java.text.DecimalFormat Scala Examples

The following examples show how to use java.text.DecimalFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: TrafficMonitor.scala    From shadowsocksr-android   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.shadowsocks.utils

import java.text.DecimalFormat

import com.github.shadowsocks.R
import com.github.shadowsocks.ShadowsocksApplication.app

object TrafficMonitor {
  // Bytes per second
  var txRate: Long = _
  var rxRate: Long = _

  // Bytes for the current session
  var txTotal: Long = _
  var rxTotal: Long = _

  // Bytes for the last query
  var txLast: Long = _
  var rxLast: Long = _
  var timestampLast: Long = _
  @volatile var dirty = true

  private val units = Array("KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "BB", "NB", "DB", "CB")
  private val numberFormat = new DecimalFormat("@@@")
  def formatTraffic(size: Long): String = {
    var n: Double = size
    var i = -1
    while (n >= 1000) {
      n /= 1024
      i = i + 1
    }
    if (i < 0) size + " " + app.getResources.getQuantityString(R.plurals.bytes, size.toInt)
    else numberFormat.format(n) + ' ' + units(i)
  }

  def updateRate() = {
    val now = System.currentTimeMillis()
    val delta = now - timestampLast
    var updated = false
    if (delta != 0) {
      if (dirty) {
        txRate = (txTotal - txLast) * 1000 / delta
        rxRate = (rxTotal - rxLast) * 1000 / delta
        txLast = txTotal
        rxLast = rxTotal
        dirty = false
        updated = true
      } else {
        if (txRate != 0) {
          txRate = 0
          updated = true
        }
        if (rxRate != 0) {
          rxRate = 0
          updated = true
        }
      }
      timestampLast = now
    }
    updated
  }

  def update(tx: Long, rx: Long) {
    if (txTotal != tx) {
      txTotal = tx
      dirty = true
    }
    if (rxTotal != rx) {
      rxTotal = rx
      dirty = true
    }
  }

  def reset() {
    txRate = 0
    rxRate = 0
    txTotal = 0
    rxTotal = 0
    txLast = 0
    rxLast = 0
    dirty = true
  }
} 
Example 2
Source File: ExcelOutputWriter.scala    From spark-hadoopoffice-ds   with Apache License 2.0 5 votes vote down vote up
package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

} 
Example 3
Source File: JavaMetricsScreen.scala    From Pi-Akka-Cluster   with Apache License 2.0 5 votes vote down vote up
package akka_oled

import java.lang.management.ManagementFactory
import java.text.DecimalFormat

import com.sun.management.OperatingSystemMXBean
import org.apache.commons.io.FileUtils

import scala.collection.mutable

trait JavaMetricsScreen {
   def getJavaMetrics(): Array[Array[String]] = {
      val bean = ManagementFactory.getPlatformMXBean(classOf[OperatingSystemMXBean])
      val formatter = new DecimalFormat("#0.00")
      val map = mutable.LinkedHashMap[String, String](
         "Max mem:" -> FileUtils.byteCountToDisplaySize( ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax),
         "Curr mem:" -> FileUtils.byteCountToDisplaySize(ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed),
         "CPU:" -> (formatter.format(bean.getSystemCpuLoad) + "%"),
         "Threads:" -> ManagementFactory.getThreadMXBean.getThreadCount.toString,
         "Classes:" -> ManagementFactory.getClassLoadingMXBean.getLoadedClassCount.toString)
      map.toArray.map(x => Array(x._1, x._2))
   }
} 
Example 4
Source File: FileActorUtils.scala    From sparta   with Apache License 2.0 5 votes vote down vote up
package com.stratio.sparta.serving.api.utils

import java.io.{BufferedOutputStream, File, FileOutputStream}
import java.net.InetAddress
import java.text.DecimalFormat
import java.util.function.Predicate

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.serving.api.constants.HttpConstant
import com.stratio.sparta.serving.core.config.SpartaConfig
import com.stratio.sparta.serving.core.models.files.SpartaFile
import spray.http.BodyPart

import scala.util.{Failure, Success, Try}

trait FileActorUtils extends SLF4JLogging {

  //The dir where the files will be saved
  val targetDir: String
  val apiPath: String

  //Regexp for name validation
  val patternFileName: Option[Predicate[String]] = None

  def deleteFiles(): Try[_] =
    Try {
      val directory = new File(targetDir)
      if (directory.exists && directory.isDirectory)
        directory.listFiles.filter(_.isFile).toList.foreach { file =>
          if (patternFileName.isEmpty || (patternFileName.isDefined && patternFileName.get.test(file.getName)))
            file.delete()
        }
    }

  def deleteFile(fileName: String): Try[_] =
    Try {
      val plugin = new File(s"$targetDir/$fileName")
      if (plugin.exists && !plugin.isDirectory)
        plugin.delete()
    }

  def browseDirectory(): Try[Seq[SpartaFile]] =
    Try {
      val directory = new File(targetDir)
      if (directory.exists && directory.isDirectory) {
        directory.listFiles.filter(_.isFile).toList.flatMap { file =>
          if (patternFileName.isEmpty || (patternFileName.isDefined && patternFileName.get.test(file.getName)))
            Option(SpartaFile(file.getName, s"$url/${file.getName}", file.getAbsolutePath,
              sizeToMbFormat(file.length())))
          else None
        }
      } else Seq.empty[SpartaFile]
    }

  def uploadFiles(files: Seq[BodyPart]): Try[Seq[SpartaFile]] =
    Try {
      files.flatMap { file =>
        val fileNameOption = file.filename.orElse(file.name.orElse {
          log.warn(s"Is necessary one file name to upload files")
          None
        })
        fileNameOption.flatMap { fileName =>
          if (patternFileName.isEmpty || (patternFileName.isDefined && patternFileName.get.test(fileName))) {
            val localMachineDir = s"$targetDir/$fileName"

            Try(saveFile(file.entity.data.toByteArray, localMachineDir)) match {
              case Success(newFile) =>
                Option(SpartaFile(fileName, s"$url/$fileName", localMachineDir, sizeToMbFormat(newFile.length())))
              case Failure(e) =>
                log.error(s"Error saving file in path $localMachineDir", e)
                None
            }
          } else {
            log.warn(s"$fileName is Not a valid file name")
            None
          }
        }
      }
    }

  private def sizeToMbFormat(size: Long): String = {
    val formatter = new DecimalFormat("####.##")
    s"${formatter.format(size.toDouble / (1024 * 1024))} MB"
  }

  private def saveFile(array: Array[Byte], fileName: String): File = {
    log.info(s"Saving file to: $fileName")
    new File(fileName).getParentFile.mkdirs
    val bos = new BufferedOutputStream(new FileOutputStream(fileName))
    bos.write(array)
    bos.close()
    new File(fileName)
  }

  private def url: String = {
    val host = Try(InetAddress.getLocalHost.getHostName).getOrElse(SpartaConfig.apiConfig.get.getString("host"))
    val port = SpartaConfig.apiConfig.get.getInt("port")

    s"http://$host:$port/${HttpConstant.SpartaRootPath}/$apiPath"
  }
} 
Example 5
Source File: Encoder.scala    From censorinus   with MIT License 5 votes vote down vote up
package github.gphat.censorinus.statsd

import github.gphat.censorinus._
import java.text.DecimalFormat


object Encoder extends MetricEncoder {

  val format = new DecimalFormat("0.################")

  def encode(metric: Metric): Option[String] = metric match {
    case sm: SampledMetric =>
      val sb = new StringBuilder()
      encodeBaseMetric(sb, metric)
      encodeSampleRate(sb, sm.sampleRate)
      Some(sb.toString)

    case nm: NumericMetric if(nm.value.isInfinite || nm.value.isNaN) =>
      None

    case _: Metric =>
      Some(encodeSimpleMetric(metric))

    case _ =>
      None
  }

  // Encodes the initial prefix used by all metrics.
  private def encodeBaseMetric(sb: StringBuilder, metric: Metric): Unit = {
    sb.append(metric.name)
    sb.append(':')
    val finalValue = metric match {
      // This is the only string based-metric
      case nm: NumericMetric => format.format(nm.value)
      case sm: StringMetric => sm.value
    }
    sb.append(finalValue)
    sb.append('|')
    val metricType = metric match {
      case _: CounterMetric => "c"
      case _: GaugeMetric => "g"
      case _: MeterMetric => "m"
      case _: SetMetric => "s"
      case _: TimerMetric => "ms"
    }
    val _ = sb.append(metricType)
  }

  // Encodes the sample rate, so that counters are adjusted appropriately.
  def encodeSampleRate(sb: StringBuilder, sampleRate: Double): Unit = {
    if(sampleRate < 1.0) {
      sb.append("|@")
      val _ = sb.append(format.format(sampleRate))
    }
  }

  // Encodes the base metric and tags only. This covers most metrics.
  private def encodeSimpleMetric(metric: Metric): String = {
    val sb = new StringBuilder()
    encodeBaseMetric(sb, metric)
    sb.toString
  }
} 
Example 6
Source File: ComposedDataset.scala    From uberdata   with Apache License 2.0 5 votes vote down vote up
package eleflow.uberdata.data

import java.text.{DecimalFormatSymbols, DecimalFormat}
import java.util.Locale
import eleflow.uberdata.core.data.Dataset
import org.apache.spark.rdd.RDD


class ComposedDataset(train: Dataset, test: Dataset, result: Option[RDD[(Double, Double)]]) {

  def exportResult(path: String, locale: Locale = Locale.ENGLISH) = {
    val formatSymbols = new DecimalFormatSymbols(locale)
    val formatter =
      new DecimalFormat("###############.################", formatSymbols)
    result.map(
      res =>
        res
          .coalesce(1)
          .map {
            case (id, value) =>
              s"${BigDecimal(id.toString).toString},${formatter.format(value)}"
          }
          .saveAsTextFile(path)
    ) getOrElse println("No result to export")
  }
} 
Example 7
Source File: FileHelper.scala    From spark-nlp   with Apache License 2.0 5 votes vote down vote up
package com.johnsnowlabs.util

import java.io.{File, IOException}
import java.nio.charset.Charset
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import java.text.DecimalFormat

import org.apache.commons.io.FileUtils
object FileHelper {
  def writeLines(file: String, lines: Seq[String], encoding: String = "UTF-8"): Unit = {
    val writer = Files.newBufferedWriter(Paths.get(file), Charset.forName(encoding))
    try {
      var cnt = 0
      for (line <- lines) {
        writer.write(line)
        if (cnt > 0)
          writer.write(System.lineSeparator())
        cnt += 1
      }
    }
    catch {
      case ex: IOException =>
        ex.printStackTrace()
    }
    finally if (writer != null) writer.close()
  }

  def delete(file: String, throwOnError: Boolean = false): Unit = {
    val f = new File(file)
    if (f.exists()) {
      try {
        if (f.isDirectory)
          FileUtils.deleteDirectory(f)
        else
          FileUtils.deleteQuietly(f)
      }
      catch {
        case e: Exception =>
          if (throwOnError)
            throw e
          else
            FileUtils.forceDeleteOnExit(f)
      }
    }

  }

  def generateChecksum(path: String): String = {
    val arr = Files readAllBytes (Paths get path)
    val checksum = MessageDigest.getInstance("MD5") digest arr
    checksum.map("%02X" format _).mkString
  }

  def getHumanReadableFileSize(size: Long): String = {
    if (size <= 0) return "0"
    val units = Array[String]("B", "KB", "MB", "GB", "TB", "PB", "EB")
    val digitGroups = (Math.log10(size) / Math.log10(1024)).toInt
    new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + " " + units(digitGroups)
  }
} 
Example 8
Source File: Files.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.tools.data.utils.text

import java.text.DecimalFormat

object Files {
  private[this] val B = 1L
  private[this] val K = 1024L
  private[this] val M = K * K
  private[this] val G = M * K
  private[this] val T = G * K

  def toHumanReadable(value: Double): String = toHumanReadable(value.toLong)

  def toHumanReadable(value: Long): String = {
    if (value < 0) {
      throw new IllegalArgumentException("Invalid file size: " + value);
    }

    value match {
      case v if v < 0  => throw new IllegalArgumentException("Invalid file size: " + value)
      case v if v == 0 => "0B"
      case v if v < K  => format(v, B, "B")
      case v if v < M  => format(v, K, "KB")
      case v if v < G  => format(v, M, "MB")
      case v if v < T  => format(v, G, "GB")
      case v           => format(v, T, "TB")
    }
  }

  def format(value: Long, divider: Long, unit: String) = {
    val result = if (divider >= 0) value * 1.0 / divider else value * 1.0
    new DecimalFormat(s"#,##0.##$unit").format(result)
  }
} 
Example 9
Source File: AmountFormatter.scala    From OUTDATED_ledger-wallet-android   with MIT License 5 votes vote down vote up
package co.ledger.wallet.core.bitcoin

import java.math.BigInteger
import java.text.DecimalFormat

trait AmountFormatter {
  def format(value: BigInt, precision: Int = -1): String
}

object AmountFormatter {

  lazy val Bitcoin: AmountFormatter = new BitcoinFormatter

  private class MagnitudeDependantFormatter(magnitude: Int) extends AmountFormatter {
    override def format(value: BigInt, precision: Int = -1): String = {
      val df = new DecimalFormat()
      val (integralPart, decimalPart) = value /% BigInteger.valueOf(10).pow(magnitude)
      val doubleValue = integralPart.longValue() + (decimalPart.longValue() / Math.pow(10, magnitude))
      val pattern = "###,###,###,##0" + {
        if (precision > 0) {
         "." + "0" * precision
        } else if (precision < 0) {
          "." + "#" * magnitude
        } else {
          ""
        }
      }
      df.applyPattern(pattern)
      df.format(doubleValue)
    }
  }

  private class BitcoinFormatter extends MagnitudeDependantFormatter(8)

} 
Example 10
Source File: TrafficMonitor.scala    From shadowsocksr-android   with GNU General Public License v3.0 5 votes vote down vote up
package com.github.shadowsocks.utils

import java.text.DecimalFormat

import com.github.shadowsocks.R
import com.github.shadowsocks.ShadowsocksApplication.app

object TrafficMonitor {
  // Bytes per second
  var txRate: Long = _
  var rxRate: Long = _

  // Bytes for the current session
  var txTotal: Long = _
  var rxTotal: Long = _

  // Bytes for the last query
  var txLast: Long = _
  var rxLast: Long = _
  var timestampLast: Long = _
  @volatile var dirty = true

  private val units = Array("KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "BB", "NB", "DB", "CB")
  private val numberFormat = new DecimalFormat("@@@")
  def formatTraffic(size: Long): String = {
    var n: Double = size
    var i = -1
    while (n >= 1000) {
      n /= 1024
      i = i + 1
    }
    if (i < 0) size + " " + app.getResources.getQuantityString(R.plurals.bytes, size.toInt)
    else numberFormat.format(n) + ' ' + units(i)
  }

  def updateRate() = {
    val now = System.currentTimeMillis()
    val delta = now - timestampLast
    var updated = false
    if (delta != 0) {
      if (dirty) {
        txRate = (txTotal - txLast) * 1000 / delta
        rxRate = (rxTotal - rxLast) * 1000 / delta
        txLast = txTotal
        rxLast = rxTotal
        dirty = false
        updated = true
      } else {
        if (txRate != 0) {
          txRate = 0
          updated = true
        }
        if (rxRate != 0) {
          rxRate = 0
          updated = true
        }
      }
      timestampLast = now
    }
    updated
  }

  def update(tx: Long, rx: Long) {
    if (txTotal != tx) {
      txTotal = tx
      dirty = true
    }
    if (rxTotal != rx) {
      rxTotal = rx
      dirty = true
    }
  }

  def reset() {
    txRate = 0
    rxRate = 0
    txTotal = 0
    rxTotal = 0
    txLast = 0
    rxLast = 0
    dirty = true
  }
} 
Example 11
Source File: Histogram.scala    From Scurses   with MIT License 5 votes vote down vote up
package net.team2xh.onions.components.widgets

import java.text.DecimalFormat

import net.team2xh.onions.{Symbols, Palettes}
import net.team2xh.onions.Themes.ColorScheme
import net.team2xh.onions.components.{FramePanel, Widget}
import net.team2xh.onions.utils.{Drawing, Math}
import net.team2xh.scurses.Scurses

import scala.Numeric.Implicits._

case class Histogram[T: Numeric](parent: FramePanel, initialValues: Seq[T] = Seq[Double](),
                                 palette: Seq[Int] = Palettes.rainbow,
                                 min: Option[Int] = None, max: Option[Int] = None,
                                 labelY: String = "",  showLabels: Boolean = true,
                                 showValues: Boolean = true)
                                (implicit screen: Scurses) extends Widget(parent) {

  val gridSize = 4

  override def focusable: Boolean = false

  override def innerHeight: Int = parent.innerHeight - 3

  val limit = 400
  var values = initialValues
  var counter = 0
  val df = new DecimalFormat("#.#")

  def push(value: T): Unit = {
    values +:= value
    values = values.take(limit)
    counter = (counter + 1) % gridSize
    needsRedraw = true
  }

  override def redraw(focus: Boolean, theme: ColorScheme): Unit = {

    val valueMin = min.getOrElse(if (values.isEmpty) 0 else Math.aBitLessThanMin(values))
    val valueMax = max.getOrElse(if (values.isEmpty) 10 else Math.aBitMoreThanMax(values) + 1)

    val valuesLength = valueMax.toString.length max valueMin.toString.length
    val x0 = valuesLength + (if (showLabels) 2 else 0)
    val graphWidth = (if (showLabels) innerWidth - 3 else innerWidth - 1) - valuesLength
    val graphHeight = innerHeight - 1

    // Draw grid
    Drawing.drawGrid(x0, 0, graphWidth, graphHeight, gridSize, theme.accent1, theme.background,
      showVertical = true, showHorizontal = true, gridOffsetX = graphWidth % 4 + (gridSize - counter))

    // Draw axis values
    Drawing.drawAxisValues(x0 - valuesLength, 0, graphHeight, gridSize,
      valueMin, valueMax, theme.accent3, theme.background, horizontal = false)

    // Draw bars
    val charHeight = (valueMax - valueMin).toDouble / graphHeight
    for (i <- 0 until ((graphWidth - 1) min values.length)) {
      val v = values(i)
      val ny = graphHeight - math.round((graphHeight * (v.toDouble - valueMin)) / (valueMax - valueMin)).toInt

      val color = Palettes.mapToRGB((v.toDouble - valueMin).abs, (valueMax - valueMin).abs)

      for (y <- ny to graphHeight) {
        val isLower = v.toDouble % charHeight < charHeight / 2.0
        val s =
          if (y == graphHeight) Symbols.BLOCK_UPPER
          else if (y == ny && (!isLower || ny == 0)) Symbols.BLOCK_LOWER
          else Symbols.BLOCK
        screen.put(x0 + (graphWidth - i - 1), y, s, color, theme.background)
      }
    }

    // Draw labels
    if (showLabels) {
      Drawing.drawAxisLabels(x0, graphWidth, graphHeight, labelY = labelY, theme = theme)
    }
  }

  override def handleKeypress(keypress: Int): Unit = {}

} 
Example 12
Source File: VwFeatureNormalizer.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.dataset.vw

import java.text.DecimalFormat
import java.util.regex.Pattern

class VwFeatureNormalizer extends (CharSequence => CharSequence) with java.io.Serializable {
    private[this] val lineRegex = Pattern.compile("\\|(\\w)\\s+([^\\|]+)")
    private[this] val namespaceRegex = ".+:(.+)".r
    private[this] val format = new DecimalFormat("0.00000")

    def apply(vwLine: CharSequence): CharSequence = {
        val matcher = lineRegex.matcher(vwLine)
        val sb = new StringBuffer
        while(matcher.find) {
            matcher.appendReplacement(sb, "|" + matcher.group(1) + ":" + format.format(normalizeNamespace(matcher.group(2))) + " " + matcher.group(2))
        }
        matcher.appendTail(sb)
        sb
    }

    private[this] def normalizeNamespace(namespace: String): Double = {
        var sum = 0d
        namespace.split("\\s+").foreach {
            case namespaceRegex(w) =>
                val currentWeight = w.toDouble
                sum += currentWeight * currentWeight
            case _ => sum += 1
        }
        if (sum == 0) 0
        else 1.0 / math.sqrt(sum)
    }
}

object VwFeatureNormalizer {
    val instance = new VwFeatureNormalizer
}

 
Example 13
Source File: MovieData.scala    From Apache-Spark-2x-Machine-Learning-Cookbook   with MIT License 5 votes vote down vote up
package spark.ml.cookbook.chapter7

import java.text.DecimalFormat
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.jfree.chart.{ChartFactory, ChartFrame, JFreeChart}
import org.jfree.chart.axis.NumberAxis
import org.jfree.chart.plot.PlotOrientation
import org.jfree.data.xy.{XYSeries, XYSeriesCollection}


case class MovieData(movieId: Int, title: String, year: Int, genre: Seq[String])

object MovieData {
  def show(chart: JFreeChart) {
    val frame = new ChartFrame("plot", chart)
    frame.pack()
    frame.setVisible(true)
  }

  def parseMovie(str: String): MovieData = {
    val columns = str.split("::")
    assert(columns.size == 3)

    val titleYearStriped = """\(|\)""".r.replaceAllIn(columns(1), " ")
    val titleYearData = titleYearStriped.split(" ")

    MovieData(columns(0).toInt,
      titleYearData.take(titleYearData.size - 1).mkString(" "),
      titleYearData.last.toInt,
      columns(2).split("|"))
  }

  def main(args: Array[String]) {

    val movieFile = "../data/sparkml2/chapter7/movies.dat"

    Logger.getLogger("org").setLevel(Level.ERROR)
    Logger.getLogger("akka").setLevel(Level.ERROR)

    // setup SparkSession to use for interactions with Spark
    val spark = SparkSession
      .builder
      .master("local[*]")
      .appName("MovieData App")
      .config("spark.sql.warehouse.dir",  ".")
      .config("spark.executor.memory", "2g")
      .getOrCreate()

    import spark.implicits._

    val movies = spark.read.textFile(movieFile).map(parseMovie)
    movies.createOrReplaceTempView("movies")

    val movieCount = movies.count()
    println("Number of movies:  %s".format(movieCount))

    val moviesByYear = spark.sql("select year, count(year) as count from movies group by year order by year")
    moviesByYear.show(25)

    val histogramDataset = new XYSeriesCollection()
    val xy = new XYSeries("")
    moviesByYear.collect().foreach({
      row => xy.add(row.getAs[Int]("year"), row.getAs[Long]("count"))
    })

    histogramDataset.addSeries(xy)

    val chart = ChartFactory.createHistogram(
      "", "Year", "Movies Per Year", histogramDataset, PlotOrientation.VERTICAL, false, false, false)
    val chartPlot = chart.getXYPlot()

    val xAxis = chartPlot.getDomainAxis().asInstanceOf[NumberAxis]
    xAxis.setNumberFormatOverride(new DecimalFormat("####"))

    show(chart)

    spark.stop()
  }
} 
Example 14
Source File: RatingsData.scala    From Apache-Spark-2x-Machine-Learning-Cookbook   with MIT License 5 votes vote down vote up
package spark.ml.cookbook.chapter7

import java.text.DecimalFormat
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.jfree.chart.{ChartFactory, ChartFrame, JFreeChart}
import org.jfree.chart.axis.NumberAxis
import org.jfree.chart.plot.PlotOrientation
import org.jfree.data.xy.{XYSeries, XYSeriesCollection}



case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long)

object RatingsData {
  def show(chart: JFreeChart) {
    val frame = new ChartFrame("plot", chart)
    frame.pack()
    frame.setVisible(true)
  }

  def parseRating(str: String): Rating = {
    val columns = str.split("::")
    assert(columns.size == 4)
    Rating(columns(0).toInt, columns(1).toInt, columns(2).toFloat, columns(3).toLong)
  }

  def main(args: Array[String]) {

    val ratingsFile = "../data/sparkml2/chapter7/ratings.dat"

    Logger.getLogger("org").setLevel(Level.ERROR)
    Logger.getLogger("akka").setLevel(Level.ERROR)

    // setup SparkSession to use for interactions with Spark
    val spark = SparkSession
      .builder
      .master("local[*]")
      .appName("MovieRating App")
      .config("spark.sql.warehouse.dir",  ".")
      .config("spark.executor.memory", "2g")
      .getOrCreate()

    import spark.implicits._

    val ratings = spark.read.textFile(ratingsFile).map(parseRating)

    val ratingCount = ratings.count()
    println("Number of ratings:  %s".format(ratingCount))

    ratings.createOrReplaceTempView("ratings")
    val resultDF = spark.sql("select ratings.userId, count(*) as count from ratings group by ratings.userId")
    resultDF.show(25, false);

    val scatterPlotDataset = new XYSeriesCollection()
    val xy = new XYSeries("")

    resultDF.collect().foreach({r => xy.add( r.getAs[Integer]("userId"), r.getAs[Integer]("count")) })

    scatterPlotDataset.addSeries(xy)

    val chart = ChartFactory.createScatterPlot(
      "", "User", "Ratings Per User", scatterPlotDataset, PlotOrientation.VERTICAL, false, false, false)
    val chartPlot = chart.getXYPlot()

    val xAxis = chartPlot.getDomainAxis().asInstanceOf[NumberAxis]
    xAxis.setNumberFormatOverride(new DecimalFormat("####"))

    show(chart)

    spark.stop()
  }
}