java.text.DecimalFormat Scala Example

Source File: TrafficMonitor.scala From shadowsocksr-android with GNU General Public License v3.0

5 votes

package com.github.shadowsocks.utils

import java.text.DecimalFormat

import com.github.shadowsocks.R
import com.github.shadowsocks.ShadowsocksApplication.app

object TrafficMonitor {
  // Bytes per second
  var txRate: Long = _
  var rxRate: Long = _

  // Bytes for the current session
  var txTotal: Long = _
  var rxTotal: Long = _

  // Bytes for the last query
  var txLast: Long = _
  var rxLast: Long = _
  var timestampLast: Long = _
  @volatile var dirty = true

  private val units = Array("KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "BB", "NB", "DB", "CB")
  private val numberFormat = new DecimalFormat("@@@")
  def formatTraffic(size: Long): String = {
    var n: Double = size
    var i = -1
    while (n >= 1000) {
      n /= 1024
      i = i + 1
    }
    if (i < 0) size + " " + app.getResources.getQuantityString(R.plurals.bytes, size.toInt)
    else numberFormat.format(n) + ' ' + units(i)
  }

  def updateRate() = {
    val now = System.currentTimeMillis()
    val delta = now - timestampLast
    var updated = false
    if (delta != 0) {
      if (dirty) {
        txRate = (txTotal - txLast) * 1000 / delta
        rxRate = (rxTotal - rxLast) * 1000 / delta
        txLast = txTotal
        rxLast = rxTotal
        dirty = false
        updated = true
      } else {
        if (txRate != 0) {
          txRate = 0
          updated = true
        }
        if (rxRate != 0) {
          rxRate = 0
          updated = true
        }
      }
      timestampLast = now
    }
    updated
  }

  def update(tx: Long, rx: Long) {
    if (txTotal != tx) {
      txTotal = tx
      dirty = true
    }
    if (rxTotal != rx) {
      rxTotal = rx
      dirty = true
    }
  }

  def reset() {
    txRate = 0
    rxRate = 0
    txTotal = 0
    rxTotal = 0
    txLast = 0
    rxLast = 0
    dirty = true
  }
}

Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0

5 votes

package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

}

Source File: JavaMetricsScreen.scala From Pi-Akka-Cluster with Apache License 2.0

5 votes

package akka_oled

import java.lang.management.ManagementFactory
import java.text.DecimalFormat

import com.sun.management.OperatingSystemMXBean
import org.apache.commons.io.FileUtils

import scala.collection.mutable

trait JavaMetricsScreen {
   def getJavaMetrics(): Array[Array[String]] = {
      val bean = ManagementFactory.getPlatformMXBean(classOf[OperatingSystemMXBean])
      val formatter = new DecimalFormat("#0.00")
      val map = mutable.LinkedHashMap[String, String](
         "Max mem:" -> FileUtils.byteCountToDisplaySize( ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax),
         "Curr mem:" -> FileUtils.byteCountToDisplaySize(ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed),
         "CPU:" -> (formatter.format(bean.getSystemCpuLoad) + "%"),
         "Threads:" -> ManagementFactory.getThreadMXBean.getThreadCount.toString,
         "Classes:" -> ManagementFactory.getClassLoadingMXBean.getLoadedClassCount.toString)
      map.toArray.map(x => Array(x._1, x._2))
   }
}

Source File: FileActorUtils.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.serving.api.utils

import java.io.{BufferedOutputStream, File, FileOutputStream}
import java.net.InetAddress
import java.text.DecimalFormat
import java.util.function.Predicate

import akka.event.slf4j.SLF4JLogging
import com.stratio.sparta.serving.api.constants.HttpConstant
import com.stratio.sparta.serving.core.config.SpartaConfig
import com.stratio.sparta.serving.core.models.files.SpartaFile
import spray.http.BodyPart

import scala.util.{Failure, Success, Try}

trait FileActorUtils extends SLF4JLogging {

  //The dir where the files will be saved
  val targetDir: String
  val apiPath: String

  //Regexp for name validation
  val patternFileName: Option[Predicate[String]] = None

  def deleteFiles(): Try[_] =
    Try {
      val directory = new File(targetDir)
      if (directory.exists && directory.isDirectory)
        directory.listFiles.filter(_.isFile).toList.foreach { file =>
          if (patternFileName.isEmpty || (patternFileName.isDefined && patternFileName.get.test(file.getName)))
            file.delete()
        }
    }

  def deleteFile(fileName: String): Try[_] =
    Try {
      val plugin = new File(s"$targetDir/$fileName")
      if (plugin.exists && !plugin.isDirectory)
        plugin.delete()
    }

  def browseDirectory(): Try[Seq[SpartaFile]] =
    Try {
      val directory = new File(targetDir)
      if (directory.exists && directory.isDirectory) {
        directory.listFiles.filter(_.isFile).toList.flatMap { file =>
          if (patternFileName.isEmpty || (patternFileName.isDefined && patternFileName.get.test(file.getName)))
            Option(SpartaFile(file.getName, s"$url/${file.getName}", file.getAbsolutePath,
              sizeToMbFormat(file.length())))
          else None
        }
      } else Seq.empty[SpartaFile]
    }

  def uploadFiles(files: Seq[BodyPart]): Try[Seq[SpartaFile]] =
    Try {
      files.flatMap { file =>
        val fileNameOption = file.filename.orElse(file.name.orElse {
          log.warn(s"Is necessary one file name to upload files")
          None
        })
        fileNameOption.flatMap { fileName =>
          if (patternFileName.isEmpty || (patternFileName.isDefined && patternFileName.get.test(fileName))) {
            val localMachineDir = s"$targetDir/$fileName"

            Try(saveFile(file.entity.data.toByteArray, localMachineDir)) match {
              case Success(newFile) =>
                Option(SpartaFile(fileName, s"$url/$fileName", localMachineDir, sizeToMbFormat(newFile.length())))
              case Failure(e) =>
                log.error(s"Error saving file in path $localMachineDir", e)
                None
            }
          } else {
            log.warn(s"$fileName is Not a valid file name")
            None
          }
        }
      }
    }

  private def sizeToMbFormat(size: Long): String = {
    val formatter = new DecimalFormat("####.##")
    s"${formatter.format(size.toDouble / (1024 * 1024))} MB"
  }

  private def saveFile(array: Array[Byte], fileName: String): File = {
    log.info(s"Saving file to: $fileName")
    new File(fileName).getParentFile.mkdirs
    val bos = new BufferedOutputStream(new FileOutputStream(fileName))
    bos.write(array)
    bos.close()
    new File(fileName)
  }

  private def url: String = {
    val host = Try(InetAddress.getLocalHost.getHostName).getOrElse(SpartaConfig.apiConfig.get.getString("host"))
    val port = SpartaConfig.apiConfig.get.getInt("port")

    s"http://$host:$port/${HttpConstant.SpartaRootPath}/$apiPath"
  }
}

Source File: Encoder.scala From censorinus with MIT License

5 votes

package github.gphat.censorinus.statsd

import github.gphat.censorinus._
import java.text.DecimalFormat


object Encoder extends MetricEncoder {

  val format = new DecimalFormat("0.################")

  def encode(metric: Metric): Option[String] = metric match {
    case sm: SampledMetric =>
      val sb = new StringBuilder()
      encodeBaseMetric(sb, metric)
      encodeSampleRate(sb, sm.sampleRate)
      Some(sb.toString)

    case nm: NumericMetric if(nm.value.isInfinite || nm.value.isNaN) =>
      None

    case _: Metric =>
      Some(encodeSimpleMetric(metric))

    case _ =>
      None
  }

  // Encodes the initial prefix used by all metrics.
  private def encodeBaseMetric(sb: StringBuilder, metric: Metric): Unit = {
    sb.append(metric.name)
    sb.append(':')
    val finalValue = metric match {
      // This is the only string based-metric
      case nm: NumericMetric => format.format(nm.value)
      case sm: StringMetric => sm.value
    }
    sb.append(finalValue)
    sb.append('|')
    val metricType = metric match {
      case _: CounterMetric => "c"
      case _: GaugeMetric => "g"
      case _: MeterMetric => "m"
      case _: SetMetric => "s"
      case _: TimerMetric => "ms"
    }
    val _ = sb.append(metricType)
  }

  // Encodes the sample rate, so that counters are adjusted appropriately.
  def encodeSampleRate(sb: StringBuilder, sampleRate: Double): Unit = {
    if(sampleRate < 1.0) {
      sb.append("|@")
      val _ = sb.append(format.format(sampleRate))
    }
  }

  // Encodes the base metric and tags only. This covers most metrics.
  private def encodeSimpleMetric(metric: Metric): String = {
    val sb = new StringBuilder()
    encodeBaseMetric(sb, metric)
    sb.toString
  }
}

Source File: ComposedDataset.scala From uberdata with Apache License 2.0

5 votes

package eleflow.uberdata.data

import java.text.{DecimalFormatSymbols, DecimalFormat}
import java.util.Locale
import eleflow.uberdata.core.data.Dataset
import org.apache.spark.rdd.RDD


class ComposedDataset(train: Dataset, test: Dataset, result: Option[RDD[(Double, Double)]]) {

  def exportResult(path: String, locale: Locale = Locale.ENGLISH) = {
    val formatSymbols = new DecimalFormatSymbols(locale)
    val formatter =
      new DecimalFormat("###############.################", formatSymbols)
    result.map(
      res =>
        res
          .coalesce(1)
          .map {
            case (id, value) =>
              s"${BigDecimal(id.toString).toString},${formatter.format(value)}"
          }
          .saveAsTextFile(path)
    ) getOrElse println("No result to export")
  }
}

Source File: FileHelper.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.util

import java.io.{File, IOException}
import java.nio.charset.Charset
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import java.text.DecimalFormat

import org.apache.commons.io.FileUtils
object FileHelper {
  def writeLines(file: String, lines: Seq[String], encoding: String = "UTF-8"): Unit = {
    val writer = Files.newBufferedWriter(Paths.get(file), Charset.forName(encoding))
    try {
      var cnt = 0
      for (line <- lines) {
        writer.write(line)
        if (cnt > 0)
          writer.write(System.lineSeparator())
        cnt += 1
      }
    }
    catch {
      case ex: IOException =>
        ex.printStackTrace()
    }
    finally if (writer != null) writer.close()
  }

  def delete(file: String, throwOnError: Boolean = false): Unit = {
    val f = new File(file)
    if (f.exists()) {
      try {
        if (f.isDirectory)
          FileUtils.deleteDirectory(f)
        else
          FileUtils.deleteQuietly(f)
      }
      catch {
        case e: Exception =>
          if (throwOnError)
            throw e
          else
            FileUtils.forceDeleteOnExit(f)
      }
    }

  }

  def generateChecksum(path: String): String = {
    val arr = Files readAllBytes (Paths get path)
    val checksum = MessageDigest.getInstance("MD5") digest arr
    checksum.map("%02X" format _).mkString
  }

  def getHumanReadableFileSize(size: Long): String = {
    if (size <= 0) return "0"
    val units = Array[String]("B", "KB", "MB", "GB", "TB", "PB", "EB")
    val digitGroups = (Math.log10(size) / Math.log10(1024)).toInt
    new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + " " + units(digitGroups)
  }
}

Source File: Files.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.tools.data.utils.text

import java.text.DecimalFormat

object Files {
  private[this] val B = 1L
  private[this] val K = 1024L
  private[this] val M = K * K
  private[this] val G = M * K
  private[this] val T = G * K

  def toHumanReadable(value: Double): String = toHumanReadable(value.toLong)

  def toHumanReadable(value: Long): String = {
    if (value < 0) {
      throw new IllegalArgumentException("Invalid file size: " + value);
    }

    value match {
      case v if v < 0  => throw new IllegalArgumentException("Invalid file size: " + value)
      case v if v == 0 => "0B"
      case v if v < K  => format(v, B, "B")
      case v if v < M  => format(v, K, "KB")
      case v if v < G  => format(v, M, "MB")
      case v if v < T  => format(v, G, "GB")
      case v           => format(v, T, "TB")
    }
  }

  def format(value: Long, divider: Long, unit: String) = {
    val result = if (divider >= 0) value * 1.0 / divider else value * 1.0
    new DecimalFormat(s"#,##0.##$unit").format(result)
  }
}

Source File: AmountFormatter.scala From OUTDATED_ledger-wallet-android with MIT License

5 votes

package co.ledger.wallet.core.bitcoin

import java.math.BigInteger
import java.text.DecimalFormat

trait AmountFormatter {
  def format(value: BigInt, precision: Int = -1): String
}

object AmountFormatter {

  lazy val Bitcoin: AmountFormatter = new BitcoinFormatter

  private class MagnitudeDependantFormatter(magnitude: Int) extends AmountFormatter {
    override def format(value: BigInt, precision: Int = -1): String = {
      val df = new DecimalFormat()
      val (integralPart, decimalPart) = value /% BigInteger.valueOf(10).pow(magnitude)
      val doubleValue = integralPart.longValue() + (decimalPart.longValue() / Math.pow(10, magnitude))
      val pattern = "###,###,###,##0" + {
        if (precision > 0) {
         "." + "0" * precision
        } else if (precision < 0) {
          "." + "#" * magnitude
        } else {
          ""
        }
      }
      df.applyPattern(pattern)
      df.format(doubleValue)
    }
  }

  private class BitcoinFormatter extends MagnitudeDependantFormatter(8)

}

Source File: TrafficMonitor.scala From shadowsocksr-android with GNU General Public License v3.0

5 votes

package com.github.shadowsocks.utils

import java.text.DecimalFormat

import com.github.shadowsocks.R
import com.github.shadowsocks.ShadowsocksApplication.app

object TrafficMonitor {
  // Bytes per second
  var txRate: Long = _
  var rxRate: Long = _

  // Bytes for the current session
  var txTotal: Long = _
  var rxTotal: Long = _

  // Bytes for the last query
  var txLast: Long = _
  var rxLast: Long = _
  var timestampLast: Long = _
  @volatile var dirty = true

  private val units = Array("KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", "BB", "NB", "DB", "CB")
  private val numberFormat = new DecimalFormat("@@@")
  def formatTraffic(size: Long): String = {
    var n: Double = size
    var i = -1
    while (n >= 1000) {
      n /= 1024
      i = i + 1
    }
    if (i < 0) size + " " + app.getResources.getQuantityString(R.plurals.bytes, size.toInt)
    else numberFormat.format(n) + ' ' + units(i)
  }

  def updateRate() = {
    val now = System.currentTimeMillis()
    val delta = now - timestampLast
    var updated = false
    if (delta != 0) {
      if (dirty) {
        txRate = (txTotal - txLast) * 1000 / delta
        rxRate = (rxTotal - rxLast) * 1000 / delta
        txLast = txTotal
        rxLast = rxTotal
        dirty = false
        updated = true
      } else {
        if (txRate != 0) {
          txRate = 0
          updated = true
        }
        if (rxRate != 0) {
          rxRate = 0
          updated = true
        }
      }
      timestampLast = now
    }
    updated
  }

  def update(tx: Long, rx: Long) {
    if (txTotal != tx) {
      txTotal = tx
      dirty = true
    }
    if (rxTotal != rx) {
      rxTotal = rx
      dirty = true
    }
  }

  def reset() {
    txRate = 0
    rxRate = 0
    txTotal = 0
    rxTotal = 0
    txLast = 0
    rxLast = 0
    dirty = true
  }
}

Source File: Histogram.scala From Scurses with MIT License

5 votes

package net.team2xh.onions.components.widgets

import java.text.DecimalFormat

import net.team2xh.onions.{Symbols, Palettes}
import net.team2xh.onions.Themes.ColorScheme
import net.team2xh.onions.components.{FramePanel, Widget}
import net.team2xh.onions.utils.{Drawing, Math}
import net.team2xh.scurses.Scurses

import scala.Numeric.Implicits._

case class Histogram[T: Numeric](parent: FramePanel, initialValues: Seq[T] = Seq[Double](),
                                 palette: Seq[Int] = Palettes.rainbow,
                                 min: Option[Int] = None, max: Option[Int] = None,
                                 labelY: String = "",  showLabels: Boolean = true,
                                 showValues: Boolean = true)
                                (implicit screen: Scurses) extends Widget(parent) {

  val gridSize = 4

  override def focusable: Boolean = false

  override def innerHeight: Int = parent.innerHeight - 3

  val limit = 400
  var values = initialValues
  var counter = 0
  val df = new DecimalFormat("#.#")

  def push(value: T): Unit = {
    values +:= value
    values = values.take(limit)
    counter = (counter + 1) % gridSize
    needsRedraw = true
  }

  override def redraw(focus: Boolean, theme: ColorScheme): Unit = {

    val valueMin = min.getOrElse(if (values.isEmpty) 0 else Math.aBitLessThanMin(values))
    val valueMax = max.getOrElse(if (values.isEmpty) 10 else Math.aBitMoreThanMax(values) + 1)

    val valuesLength = valueMax.toString.length max valueMin.toString.length
    val x0 = valuesLength + (if (showLabels) 2 else 0)
    val graphWidth = (if (showLabels) innerWidth - 3 else innerWidth - 1) - valuesLength
    val graphHeight = innerHeight - 1

    // Draw grid
    Drawing.drawGrid(x0, 0, graphWidth, graphHeight, gridSize, theme.accent1, theme.background,
      showVertical = true, showHorizontal = true, gridOffsetX = graphWidth % 4 + (gridSize - counter))

    // Draw axis values
    Drawing.drawAxisValues(x0 - valuesLength, 0, graphHeight, gridSize,
      valueMin, valueMax, theme.accent3, theme.background, horizontal = false)

    // Draw bars
    val charHeight = (valueMax - valueMin).toDouble / graphHeight
    for (i <- 0 until ((graphWidth - 1) min values.length)) {
      val v = values(i)
      val ny = graphHeight - math.round((graphHeight * (v.toDouble - valueMin)) / (valueMax - valueMin)).toInt

      val color = Palettes.mapToRGB((v.toDouble - valueMin).abs, (valueMax - valueMin).abs)

      for (y <- ny to graphHeight) {
        val isLower = v.toDouble % charHeight < charHeight / 2.0
        val s =
          if (y == graphHeight) Symbols.BLOCK_UPPER
          else if (y == ny && (!isLower || ny == 0)) Symbols.BLOCK_LOWER
          else Symbols.BLOCK
        screen.put(x0 + (graphWidth - i - 1), y, s, color, theme.background)
      }
    }

    // Draw labels
    if (showLabels) {
      Drawing.drawAxisLabels(x0, graphWidth, graphHeight, labelY = labelY, theme = theme)
    }
  }

  override def handleKeypress(keypress: Int): Unit = {}

}

Source File: VwFeatureNormalizer.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.dataset.vw

import java.text.DecimalFormat
import java.util.regex.Pattern

class VwFeatureNormalizer extends (CharSequence => CharSequence) with java.io.Serializable {
    private[this] val lineRegex = Pattern.compile("\\|(\\w)\\s+([^\\|]+)")
    private[this] val namespaceRegex = ".+:(.+)".r
    private[this] val format = new DecimalFormat("0.00000")

    def apply(vwLine: CharSequence): CharSequence = {
        val matcher = lineRegex.matcher(vwLine)
        val sb = new StringBuffer
        while(matcher.find) {
            matcher.appendReplacement(sb, "|" + matcher.group(1) + ":" + format.format(normalizeNamespace(matcher.group(2))) + " " + matcher.group(2))
        }
        matcher.appendTail(sb)
        sb
    }

    private[this] def normalizeNamespace(namespace: String): Double = {
        var sum = 0d
        namespace.split("\\s+").foreach {
            case namespaceRegex(w) =>
                val currentWeight = w.toDouble
                sum += currentWeight * currentWeight
            case _ => sum += 1
        }
        if (sum == 0) 0
        else 1.0 / math.sqrt(sum)
    }
}

object VwFeatureNormalizer {
    val instance = new VwFeatureNormalizer
}

Source File: MovieData.scala From Apache-Spark-2x-Machine-Learning-Cookbook with MIT License

5 votes

package spark.ml.cookbook.chapter7

import java.text.DecimalFormat
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.jfree.chart.{ChartFactory, ChartFrame, JFreeChart}
import org.jfree.chart.axis.NumberAxis
import org.jfree.chart.plot.PlotOrientation
import org.jfree.data.xy.{XYSeries, XYSeriesCollection}


case class MovieData(movieId: Int, title: String, year: Int, genre: Seq[String])

object MovieData {
  def show(chart: JFreeChart) {
    val frame = new ChartFrame("plot", chart)
    frame.pack()
    frame.setVisible(true)
  }

  def parseMovie(str: String): MovieData = {
    val columns = str.split("::")
    assert(columns.size == 3)

    val titleYearStriped = """\(|\)""".r.replaceAllIn(columns(1), " ")
    val titleYearData = titleYearStriped.split(" ")

    MovieData(columns(0).toInt,
      titleYearData.take(titleYearData.size - 1).mkString(" "),
      titleYearData.last.toInt,
      columns(2).split("|"))
  }

  def main(args: Array[String]) {

    val movieFile = "../data/sparkml2/chapter7/movies.dat"

    Logger.getLogger("org").setLevel(Level.ERROR)
    Logger.getLogger("akka").setLevel(Level.ERROR)

    // setup SparkSession to use for interactions with Spark
    val spark = SparkSession
      .builder
      .master("local[*]")
      .appName("MovieData App")
      .config("spark.sql.warehouse.dir",  ".")
      .config("spark.executor.memory", "2g")
      .getOrCreate()

    import spark.implicits._

    val movies = spark.read.textFile(movieFile).map(parseMovie)
    movies.createOrReplaceTempView("movies")

    val movieCount = movies.count()
    println("Number of movies:  %s".format(movieCount))

    val moviesByYear = spark.sql("select year, count(year) as count from movies group by year order by year")
    moviesByYear.show(25)

    val histogramDataset = new XYSeriesCollection()
    val xy = new XYSeries("")
    moviesByYear.collect().foreach({
      row => xy.add(row.getAs[Int]("year"), row.getAs[Long]("count"))
    })

    histogramDataset.addSeries(xy)

    val chart = ChartFactory.createHistogram(
      "", "Year", "Movies Per Year", histogramDataset, PlotOrientation.VERTICAL, false, false, false)
    val chartPlot = chart.getXYPlot()

    val xAxis = chartPlot.getDomainAxis().asInstanceOf[NumberAxis]
    xAxis.setNumberFormatOverride(new DecimalFormat("####"))

    show(chart)

    spark.stop()
  }
}

Source File: RatingsData.scala From Apache-Spark-2x-Machine-Learning-Cookbook with MIT License

5 votes

package spark.ml.cookbook.chapter7

import java.text.DecimalFormat
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.SparkSession
import org.jfree.chart.{ChartFactory, ChartFrame, JFreeChart}
import org.jfree.chart.axis.NumberAxis
import org.jfree.chart.plot.PlotOrientation
import org.jfree.data.xy.{XYSeries, XYSeriesCollection}



case class Rating(userId: Int, movieId: Int, rating: Float, timestamp: Long)

object RatingsData {
  def show(chart: JFreeChart) {
    val frame = new ChartFrame("plot", chart)
    frame.pack()
    frame.setVisible(true)
  }

  def parseRating(str: String): Rating = {
    val columns = str.split("::")
    assert(columns.size == 4)
    Rating(columns(0).toInt, columns(1).toInt, columns(2).toFloat, columns(3).toLong)
  }

  def main(args: Array[String]) {

    val ratingsFile = "../data/sparkml2/chapter7/ratings.dat"

    Logger.getLogger("org").setLevel(Level.ERROR)
    Logger.getLogger("akka").setLevel(Level.ERROR)

    // setup SparkSession to use for interactions with Spark
    val spark = SparkSession
      .builder
      .master("local[*]")
      .appName("MovieRating App")
      .config("spark.sql.warehouse.dir",  ".")
      .config("spark.executor.memory", "2g")
      .getOrCreate()

    import spark.implicits._

    val ratings = spark.read.textFile(ratingsFile).map(parseRating)

    val ratingCount = ratings.count()
    println("Number of ratings:  %s".format(ratingCount))

    ratings.createOrReplaceTempView("ratings")
    val resultDF = spark.sql("select ratings.userId, count(*) as count from ratings group by ratings.userId")
    resultDF.show(25, false);

    val scatterPlotDataset = new XYSeriesCollection()
    val xy = new XYSeries("")

    resultDF.collect().foreach({r => xy.add( r.getAs[Integer]("userId"), r.getAs[Integer]("count")) })

    scatterPlotDataset.addSeries(xy)

    val chart = ChartFactory.createScatterPlot(
      "", "User", "Ratings Per User", scatterPlotDataset, PlotOrientation.VERTICAL, false, false, false)
    val chartPlot = chart.getXYPlot()

    val xAxis = chartPlot.getDomainAxis().asInstanceOf[NumberAxis]
    xAxis.setNumberFormatOverride(new DecimalFormat("####"))

    show(chart)

    spark.stop()
  }
}

java.text.DecimalFormat Scala Examples