java.text.NumberFormat Scala Examples
The following examples show how to use java.text.NumberFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SQLMetrics.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.metric import java.text.NumberFormat import java.util.Locale import org.apache.spark.SparkContext import org.apache.spark.scheduler.AccumulableInfo import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils} class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] { // This is a workaround for SPARK-11013. // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will // update it at the end of task and the value will be at least 0. Then we can filter out the -1 // values before calculate max, min, etc. private[this] var _value = initValue private var _zeroValue = initValue override def copy(): SQLMetric = { val newAcc = new SQLMetric(metricType, _value) newAcc._zeroValue = initValue newAcc } override def reset(): Unit = _value = _zeroValue override def merge(other: AccumulatorV2[Long, Long]): Unit = other match { case o: SQLMetric => _value += o.value case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def isZero(): Boolean = _value == _zeroValue override def add(v: Long): Unit = _value += v def +=(v: Long): Unit = _value += v override def value: Long = _value // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = { new AccumulableInfo( id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)) } } object SQLMetrics { private val SUM_METRIC = "sum" private val SIZE_METRIC = "size" private val TIMING_METRIC = "timing" def createMetric(sc: SparkContext, name: String): SQLMetric = { val acc = new SQLMetric(SUM_METRIC) acc.register(sc, name = Some(name), countFailedValues = false) acc } def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { val numberFormat = NumberFormat.getIntegerInstance(Locale.ENGLISH) numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString } else if (metricsType == TIMING_METRIC) { Utils.msDurationToString } else { throw new IllegalStateException("unexpected metrics type: " + metricsType) } val validValues = values.filter(_ >= 0) val Seq(sum, min, med, max) = { val metric = if (validValues.isEmpty) { Seq.fill(4)(0L) } else { val sorted = validValues.sorted Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1)) } metric.map(strFormat) } s"\n$sum ($min, $med, $max)" } } }
Example 2
Source File: NumericPartialFunctions.scala From dsentric with Apache License 2.0 | 5 votes |
package dsentric import java.text.NumberFormat object NumericPartialFunctions { private val numericRegex = "^[\\+-]{0,1}(\\d{1,3}[\\.', ](\\d{3}[\\.', ])*\\d{3}([\\.,]\\d*)?|\\d*([\\.,]\\d*)?)$".r private val numberFormat = NumberFormat.getInstance() private def isNumeric(s:String) = !s.isEmpty && numericRegex.pattern.matcher(s).matches() && s != "-" && s != "+" && s != "." && s != "," def byte:PartialFunction[Any, Byte] = { case n:Byte => n case n:Short if n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte case n:Int if n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte case n:Long if n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte case n:Double if n % 1 == 0 && n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte case n:Float if n % 1 == 0 && n <= Byte.MaxValue && n >= Byte.MinValue => n.toByte } def short:PartialFunction[Any, Short] = { case n:Byte => n case n:Short => n case n:Int if n <= Short.MaxValue && n >= Short.MinValue => n.toShort case n:Long if n <= Short.MaxValue && n >= Short.MinValue => n.toShort case n:Double if n % 1 == 0 && n <= Short.MaxValue && n >= Short.MinValue => n.toShort case n:Float if n % 1 == 0 && n <= Short.MaxValue && n >= Short.MinValue => n.toShort } def int:PartialFunction[Any, Int] = { case n:Int => n case n:Long if n <= Int.MaxValue && n >= Int.MinValue => n.toInt case n:Double if n % 1 == 0 && n <= Int.MaxValue && n >= Int.MinValue => n.toInt case n:Float if n % 1 == 0 && n <= Int.MaxValue && n >= Int.MinValue => n.toInt case n:Short => n case n:Byte => n } def long:PartialFunction[Any, Long] = { case n:Int => n case n:Long => n case n:Double if n % 1 == 0 && n <= Long.MaxValue && n >= Long.MinValue => n.toLong case n:Float if n % 1 == 0 && n <= Long.MaxValue && n >= Long.MinValue => n.toLong case n:Short => n case n:Byte => n } def float:PartialFunction[Any, Float] = { case n:Float => n case n:Int => n case n:Long => n case n:Double if n <= Float.MaxValue && n >= Float.MinValue => n.toFloat case n:Short => n case n:Byte => n } def double:PartialFunction[Any, Double] = { case n:Double => n case n:Long => n case n:Float => n case n:Int => n case n:Short => n case n:Byte => n } def number:PartialFunction[Any, Number] = { case n:Double => n case n:Long => n case n:Float => n case n:Int => n case n:Short => n case n:Byte => n } def stringDouble:PartialFunction[Any, Double] = { case s:String if isNumeric(s) => numberFormat.parse(s).doubleValue() } }
Example 3
Source File: JobProgressUtil.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.spark.utils import java.text.NumberFormat import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo import org.apache.commons.lang.time.DateFormatUtils import org.apache.spark.{JobExecutionStatus, SparkContext, SparkJobInfo} object JobProgressUtil extends Logging{ def progress(sc: SparkContext, jobGroup : String):Float = { val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup) val jobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) } val stages = jobs.flatMap { job => job.stageIds().flatMap(sc.statusTracker.getStageInfo) } val taskCount = stages.map(_.numTasks).sum val completedTaskCount = stages.map(_.numCompletedTasks).sum if (taskCount == 0) { 0f } else { (completedTaskCount.toDouble / taskCount).toFloat } } def getActiveJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = { val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup) val activeJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.RUNNING) val progressInfos = activeJobs.map { job => val jobProgressInfo = getJobProgressInfoByStages(job, sc, jobGroup) val timestamp = DateFormatUtils.format(System.currentTimeMillis, "yyyy-MM-dd HH:mm:ss") val progress = jobProgressInfo.succeedTasks * 1d / jobProgressInfo.totalTasks info(s"${jobProgressInfo.id} numTasks = ${jobProgressInfo.totalTasks}, numCompletedTasks = ${jobProgressInfo.succeedTasks}," + s" numActiveTasks = ${jobProgressInfo.runningTasks} , completed:${percentageFormat(progress)}") jobProgressInfo } progressInfos } def getCompletedJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = { val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup) val completedJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.SUCCEEDED) val progressInfos = completedJobs.map { job => getJobProgressInfoByStages(job, sc, jobGroup) } progressInfos } private def getJobProgressInfoByStages(job:SparkJobInfo, sc:SparkContext, jobGroup : String) : JobProgressInfo = { val stages = job.stageIds().flatMap(sc.statusTracker.getStageInfo) var numTasks = 0 var numActiveTasks = 0 var numFailedTasks = 0 var numSucceedTasks = 0 stages.foreach{stageInfo => if (stageInfo.submissionTime() > 0){ numTasks += stageInfo.numTasks() numActiveTasks += stageInfo.numActiveTasks() numFailedTasks += stageInfo.numFailedTasks() numSucceedTasks += stageInfo.numCompletedTasks() } } JobProgressInfo(getJobId(job.jobId(), jobGroup), numTasks, numActiveTasks, numFailedTasks, numSucceedTasks) } private def getJobId( jobId : Int , jobGroup : String ): String = "jobId-" + jobId + "(" + jobGroup + ")" private var _percentFormat: NumberFormat = _ def percentageFormat(decimal: Double): String = { if(_percentFormat == null) { _percentFormat = NumberFormat.getPercentInstance _percentFormat.setMinimumFractionDigits(2) } _percentFormat.format(decimal) } }
Example 4
Source File: TypeCast.scala From spark-select with Apache License 2.0 | 5 votes |
package io.minio.spark.select.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.{SimpleDateFormat, NumberFormat} import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try @throws[IllegalArgumentException] private[select] def toChar(str: String): Char = { if (str.charAt(0) == '\\') { str.charAt(1) match { case 't' => '\t' case 'r' => '\r' case 'b' => '\b' case 'f' => '\f' case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options case '\'' => '\'' case 'u' if str == """\u0000""" => '\u0000' case _ => throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str") } } else if (str.length == 1) { str.charAt(0) } else { throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str") } } }
Example 5
Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0 | 5 votes |
package org.zuinnote.spark.office.excel import java.math.BigDecimal import java.sql.Date import java.sql.Timestamp import java.text.DateFormat import java.text.SimpleDateFormat import java.util.Calendar import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.mapreduce.RecordWriter import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow } import org.apache.spark.sql.Row import org.apache.spark.sql.execution.datasources.OutputWriter import org.apache.spark.sql.types._ import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil import org.zuinnote.hadoop.office.format.mapreduce._ import org.apache.commons.logging.LogFactory import org.apache.commons.logging.Log import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration import java.util.Locale import java.text.DecimalFormat import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO import java.text.NumberFormat // NOTE: This class is instantiated and used on executor side only, no need to be serializable. private[excel] class ExcelOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext, options: Map[String, String]) extends OutputWriter { def write(row: Row): Unit = { // check useHeader if (useHeader) { val headers = row.schema.fieldNames var i = 0 for (x <- headers) { val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName) recordWriter.write(NullWritable.get(), headerColumnSCD) i += 1 } currentRowNum += 1 useHeader = false } // for each value in the row if (row.size>0) { var currentColumnNum = 0; val simpleObject = new Array[AnyRef](row.size) for (i <- 0 to row.size - 1) { // for each element of the row val obj = row.get(i) if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) { val formattedValue = obj.asInstanceOf[Seq[String]](0) val comment = obj.asInstanceOf[Seq[String]](1) val formula = obj.asInstanceOf[Seq[String]](2) val address = obj.asInstanceOf[Seq[String]](3) val sheetName = obj.asInstanceOf[Seq[String]](4) simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName) } else { simpleObject(i)=obj.asInstanceOf[AnyRef] } } // convert row to spreadsheetcellDAO val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum) // write it for (x<- spreadSheetCellDAORow) { recordWriter.write(NullWritable.get(), x) } } currentRowNum += 1 } override def close(): Unit = { recordWriter.close(context) currentRowNum = 0; } }
Example 6
Source File: package.scala From CMAK with Apache License 2.0 | 5 votes |
package kafka.manager import java.nio.charset.StandardCharsets import java.text.NumberFormat package object utils { import org.json4s._ import org.json4s.jackson.JsonMethods._ import org.json4s.jackson.Serialization.{read, write} implicit val formats = DefaultFormats private[this] val numberFormat = NumberFormat.getInstance() implicit class LongFormatted(val x: Long) { def formattedAsDecimal = numberFormat.format(x) } implicit def serializeString(data: String) : Array[Byte] = { data.getBytes(StandardCharsets.UTF_8) } implicit def deserializeString(data: Array[Byte]) : String = { new String(data, StandardCharsets.UTF_8) } def toJson(map: Map[String, Any]): String = { write(map) } def toJson(s: String) : String = { "\"" + s + "\"" } def fromJson[T](s: String) : T = { read(s) } def parseJson(s: String) : JValue = { parse(s) } @throws[UtilException] def checkCondition(cond: Boolean, error: UtilError) : Unit = { if(!cond) { throw new UtilException(error) } } @throws[UtilException] def throwError [T] (error: UtilError) : T = { throw new UtilException(error) } }
Example 7
Source File: SQLMetrics.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.metric import java.text.NumberFormat import java.util.Locale import org.apache.spark.SparkContext import org.apache.spark.scheduler.AccumulableInfo import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils} class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] { // This is a workaround for SPARK-11013. // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will // update it at the end of task and the value will be at least 0. Then we can filter out the -1 // values before calculate max, min, etc. private[this] var _value = initValue private var _zeroValue = initValue override def copy(): SQLMetric = { val newAcc = new SQLMetric(metricType, _value) newAcc._zeroValue = initValue newAcc } override def reset(): Unit = _value = _zeroValue override def merge(other: AccumulatorV2[Long, Long]): Unit = other match { case o: SQLMetric => _value += o.value case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def isZero(): Boolean = _value == _zeroValue override def add(v: Long): Unit = _value += v def +=(v: Long): Unit = _value += v override def value: Long = _value // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = { new AccumulableInfo( id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)) } } object SQLMetrics { private val SUM_METRIC = "sum" private val SIZE_METRIC = "size" private val TIMING_METRIC = "timing" def createMetric(sc: SparkContext, name: String): SQLMetric = { val acc = new SQLMetric(SUM_METRIC) acc.register(sc, name = Some(name), countFailedValues = false) acc } def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { val numberFormat = NumberFormat.getIntegerInstance(Locale.US) numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString } else if (metricsType == TIMING_METRIC) { Utils.msDurationToString } else { throw new IllegalStateException("unexpected metrics type: " + metricsType) } val validValues = values.filter(_ >= 0) val Seq(sum, min, med, max) = { val metric = if (validValues.isEmpty) { Seq.fill(4)(0L) } else { val sorted = validValues.sorted Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1)) } metric.map(strFormat) } s"\n$sum ($min, $med, $max)" } } }
Example 8
Source File: JVMUtil.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core.util import java.io.{BufferedReader, InputStreamReader} import java.net.URLClassLoader import java.text.NumberFormat object JVMUtil { def startSecondJVM[C](clazz: Class[C], jvmArgs: List[String], args: List[String], redirectStream: Boolean): Int = { val separator = System.getProperty("file.separator") val classpath = Thread.currentThread().getContextClassLoader.asInstanceOf[URLClassLoader].getURLs.map(_.getPath()).reduce((c1, c2) => c1 + java.io.File.pathSeparator + c2) val path = System.getProperty("java.home") + separator + "bin" + separator + "java" val commands: IList[String] = List(path) ::: jvmArgs ::: List("-cp", classpath, clazz.getCanonicalName.stripSuffix("$")) ::: args import scala.collection.JavaConverters._ val processBuilder = new ProcessBuilder(commands.asJava) processBuilder.redirectErrorStream(redirectStream) val process = processBuilder.start() val is = process.getInputStream val isr = new InputStreamReader(is) val br = new BufferedReader(isr) var line = br.readLine() while (line != null) { println(line) line = br.readLine() } process.waitFor() } def showMemoryUsage(): Unit = { val runtime = Runtime.getRuntime val format = NumberFormat.getInstance() val sb = new StringBuilder() val maxMemory = runtime.maxMemory() val allocatedMemory = runtime.totalMemory() val freeMemory = runtime.freeMemory() sb.append("free memory: " + format.format(freeMemory / 1024 / 1024) + " ") sb.append("allocated memory: " + format.format(allocatedMemory / 1024 / 1024) + " ") sb.append("max memory: " + format.format(maxMemory / 1024 / 1024) + " ") sb.append("total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024 / 1024) + " ") println(sb.toString()) } }
Example 9
Source File: SQLMetrics.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.metric import java.text.NumberFormat import java.util.Locale import org.apache.spark.SparkContext import org.apache.spark.scheduler.AccumulableInfo import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils} class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] { // This is a workaround for SPARK-11013. // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will // update it at the end of task and the value will be at least 0. Then we can filter out the -1 // values before calculate max, min, etc. private[this] var _value = initValue private var _zeroValue = initValue override def copy(): SQLMetric = { val newAcc = new SQLMetric(metricType, _value) newAcc._zeroValue = initValue newAcc } override def reset(): Unit = _value = _zeroValue override def merge(other: AccumulatorV2[Long, Long]): Unit = other match { case o: SQLMetric => _value += o.value case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def isZero(): Boolean = _value == _zeroValue override def add(v: Long): Unit = _value += v def +=(v: Long): Unit = _value += v override def value: Long = _value // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = { new AccumulableInfo( id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)) } } object SQLMetrics { private val SUM_METRIC = "sum" private val SIZE_METRIC = "size" private val TIMING_METRIC = "timing" def createMetric(sc: SparkContext, name: String): SQLMetric = { val acc = new SQLMetric(SUM_METRIC) acc.register(sc, name = Some(name), countFailedValues = false) acc } def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { val numberFormat = NumberFormat.getIntegerInstance(Locale.US) numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString } else if (metricsType == TIMING_METRIC) { Utils.msDurationToString } else { throw new IllegalStateException("unexpected metrics type: " + metricsType) } val validValues = values.filter(_ >= 0) val Seq(sum, min, med, max) = { val metric = if (validValues.isEmpty) { Seq.fill(4)(0L) } else { val sorted = validValues.sorted Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1)) } metric.map(strFormat) } s"\n$sum ($min, $med, $max)" } } }
Example 10
Source File: TypeCast.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.datasource.google.spreadsheet import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.NumberFormat import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try object TypeCast { private[spreadsheet] def castTo( datum: String, castType: DataType, nullable: Boolean = true ): Any = { castType match { case _: ByteType => datum.toByte case _: ShortType => datum.toShort case _: IntegerType => datum.toInt case _: LongType => datum.toLong case _: FloatType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue()) case _: DoubleType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue()) case _: BooleanType => datum.toBoolean case _: DecimalType => new BigDecimal(datum.replaceAll(",", "")) case _: TimestampType => Timestamp.valueOf(datum) case _: DateType => Date.valueOf(datum) case _: StringType => datum case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } }
Example 11
Source File: SandboxApp.scala From bloom-filter-scala with MIT License | 5 votes |
import java.text.NumberFormat import bloomfilter.mutable.{CuckooFilter, UnsafeTable8Bit} import com.google.monitoring.runtime.instrumentation.{AllocationRecorder, Sampler} import com.twitter.algebird.{BloomFilter => AlgebirdBloomFilter} import scala.util.Random object SandboxApp { def checkMemory(): Unit = { val runtime = Runtime.getRuntime val format = NumberFormat.getInstance() val sb = new StringBuilder() val maxMemory = runtime.maxMemory() val allocatedMemory = runtime.totalMemory() val freeMemory = runtime.freeMemory() sb.append("free memory: " + format.format(freeMemory / 1024) + "\n") sb.append("allocated memory: " + format.format(allocatedMemory / 1024) + "\n") sb.append("max memory: " + format.format(maxMemory / 1024) + "\n") sb.append("total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n") System.out.println(sb.toString()) } def main(args: Array[String]): Unit = { val sut = CuckooFilter[Long](1000) sut.add(8) assert(sut.mightContain(8)) sut.add(10) assert(sut.mightContain(10)) sut.add(8) assert(sut.mightContain(8)) sut.add(10000) assert(sut.mightContain(10000)) } def compareAlgebirdFPR(): Unit = { val random: Random = new Random() val itemsExpected = 10000L val falsePositiveRate = 0.1 var bf = AlgebirdBloomFilter(itemsExpected.toInt, falsePositiveRate, 0).create("") val bf2 = bloomfilter.mutable.BloomFilter[String](itemsExpected, falsePositiveRate) var i = 0 while (i < itemsExpected) { val str: String = random.nextString(1000) bf = bf.+(str) bf2.add(str) i += 1 } i = 0 var in, in2 = 0 while (true) { val str = random.nextString(1000) if (bf.contains(str).isTrue) { in += 1 } if (bf2.mightContain(str)) { in2 += 1 } if (i % 1000 == 0) { println(s"in: $in; in2: $in2") } } } def checkAllocations(): Unit = { val sampler: Sampler = new Sampler() { def sampleAllocation(count: Int, desc: String, newObj: Object, size: Long) { System.out.println("I just allocated the object " + newObj + " of type " + desc + " whose size is " + size) if (count != -1) { System.out.println("It's an array of size " + count) } } } AllocationRecorder.addSampler(sampler) AllocationRecorder.removeSampler(sampler) } }
Example 12
Source File: ThreadLocalNumberFormat.scala From perfolation with MIT License | 5 votes |
package perfolation import java.math.RoundingMode import java.text.NumberFormat import java.util.Currency object ThreadLocalNumberFormat { // Make sure the platform is initialized Platform private val threadLocalNumberFormat = new ThreadLocal[NumberFormat]{ override protected def initialValue(): NumberFormat = NumberFormat.getInstance() } protected[perfolation] def apply(i: Int, f: Int, maxI: Int, maxF: Int, g: Boolean, c: Option[Currency], rm: RoundingMode): NumberFormat = { val nf = threadLocalNumberFormat.get() nf.setGroupingUsed(g) c.foreach(nf.setCurrency) nf.setMaximumFractionDigits(if (maxF == -1) f else maxF) nf.setMinimumFractionDigits(f) nf.setMaximumIntegerDigits(if (maxI == -1) i else maxI) nf.setMinimumIntegerDigits(i) nf.setParseIntegerOnly(maxF == 0) nf.setRoundingMode(rm) nf } }
Example 13
Source File: PlyOutputWriter.scala From spark-iqmulus with Apache License 2.0 | 5 votes |
package fr.ign.spark.iqmulus.ply import org.apache.spark.sql.types._ import org.apache.hadoop.mapreduce.{ TaskAttemptID, RecordWriter, TaskAttemptContext, JobContext } import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter import java.io.DataOutputStream import org.apache.spark.sql.sources.OutputWriter import org.apache.hadoop.io.{ NullWritable, BytesWritable } import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.fs.Path import java.text.NumberFormat import org.apache.spark.sql.{ Row, SQLContext, sources } import fr.ign.spark.iqmulus.RowOutputStream class PlyOutputWriter( name: String, context: TaskAttemptContext, dataSchema: StructType, element: String, littleEndian: Boolean ) extends OutputWriter { private val file = { val path = getDefaultWorkFile(s".ply.$element") val fs = path.getFileSystem(context.getConfiguration) fs.create(path) } private var count = 0L // strip out ids private val schema = StructType(dataSchema.filterNot { Seq("fid", "pid") contains _.name }) private val recordWriter = new RowOutputStream(new DataOutputStream(file), littleEndian, schema, dataSchema) def getDefaultWorkFile(extension: String): Path = { val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID") val taskAttemptId: TaskAttemptID = context.getTaskAttemptID val split = taskAttemptId.getTaskID.getId new Path(name, f"$split%05d-$uniqueWriteJobId$extension") } override def write(row: Row): Unit = { recordWriter.write(row) count += 1 } override def close(): Unit = { recordWriter.close // write header val path = getDefaultWorkFile(".ply.header") val fs = path.getFileSystem(context.getConfiguration) val dos = new java.io.DataOutputStream(fs.create(path)) val header = new PlyHeader(path.toString, littleEndian, Map(element -> ((count, schema)))) header.write(dos) dos.close } }
Example 14
Source File: LasOutputWriter.scala From spark-iqmulus with Apache License 2.0 | 5 votes |
package fr.ign.spark.iqmulus.las import org.apache.spark.sql.types._ import org.apache.hadoop.mapreduce.{ TaskAttemptID, RecordWriter, TaskAttemptContext } import java.io.DataOutputStream import org.apache.spark.sql.sources.OutputWriter import org.apache.spark.deploy.SparkHadoopUtil import org.apache.hadoop.io.{ NullWritable, BytesWritable } import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.fs.Path import java.text.NumberFormat import org.apache.spark.sql.{ Row, SQLContext, sources } import fr.ign.spark.iqmulus.RowOutputStream class LasOutputWriter( name: String, context: TaskAttemptContext, dataSchema: StructType, formatOpt: Option[Byte] = None, version: Version = Version(), offset: Array[Double] = Array(0F, 0F, 0F), scale: Array[Double] = Array(0.01F, 0.01F, 0.01F) ) extends OutputWriter { private val file = { val path = getDefaultWorkFile("/1.pdr") val fs = path.getFileSystem(context.getConfiguration) fs.create(path) } private val pmin = Array.fill[Double](3)(Double.PositiveInfinity) private val pmax = Array.fill[Double](3)(Double.NegativeInfinity) private val countByReturn = Array.fill[Long](15)(0) private def count = countByReturn.sum private val format = formatOpt.getOrElse(LasHeader.formatFromSchema(dataSchema)) // todo, extra bytes private val schema = LasHeader.schema(format) private def header = new LasHeader(name, format, count, pmin, pmax, scale, offset, countByReturn) private val recordWriter = new RowOutputStream(new DataOutputStream(file), littleEndian = true, schema, dataSchema) def getDefaultWorkFile(extension: String): Path = { val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID") val taskAttemptId: TaskAttemptID = context.getTaskAttemptID val split = taskAttemptId.getTaskID.getId new Path(name, f"$split%05d-$uniqueWriteJobId$extension") } override def write(row: Row): Unit = { recordWriter.write(row) // gather statistics for the header val x = offset(0) + scale(0) * row.getAs[Int]("x").toDouble val y = offset(1) + scale(1) * row.getAs[Int]("y").toDouble val z = offset(2) + scale(2) * row.getAs[Int]("z").toDouble val ret = row.getAs[Byte]("flags") & 0x3 countByReturn(ret) += 1 pmin(0) = Math.min(pmin(0), x) pmin(1) = Math.min(pmin(1), y) pmin(2) = Math.min(pmin(2), z) pmax(0) = Math.max(pmax(0), x) pmax(1) = Math.max(pmax(1), y) pmax(2) = Math.max(pmax(2), z) } override def close(): Unit = { recordWriter.close // write header val path = getDefaultWorkFile("/0.header") val fs = path.getFileSystem(context.getConfiguration) val dos = new java.io.DataOutputStream(fs.create(path)) header.write(dos) dos.close // copy header and pdf to a final las file (1 per split) org.apache.hadoop.fs.FileUtil.copyMerge( fs, getDefaultWorkFile("/"), fs, getDefaultWorkFile(".las"), true, context.getConfiguration, "" ) } }
Example 15
Source File: TypeCast.scala From spark-google-spreadsheets with Apache License 2.0 | 5 votes |
package com.github.potix2.spark.google.spreadsheets.util import java.math.BigDecimal import java.sql.{Date, Timestamp} import java.text.NumberFormat import java.util.Locale import org.apache.spark.sql.types._ import scala.util.Try object TypeCast { private[spreadsheets] def castTo( datum: String, castType: DataType, nullable: Boolean = true ): Any = { castType match { case _: ByteType => datum.toByte case _: ShortType => datum.toShort case _: IntegerType => datum.toInt case _: LongType => datum.toLong case _: FloatType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue()) case _: DoubleType => Try(datum.toFloat) .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue()) case _: BooleanType => datum.toBoolean case _: DecimalType => new BigDecimal(datum.replaceAll(",", "")) case _: TimestampType => Timestamp.valueOf(datum) case _: DateType => Date.valueOf(datum) case _: StringType => datum case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}") } } }
Example 16
Source File: Bencharts.scala From rtree2d with Apache License 2.0 | 5 votes |
import java.awt.{Color, Paint} import java.text.NumberFormat import javax.imageio.ImageIO import org.jfree.chart.JFreeChart import org.jfree.chart.axis.LogarithmicAxis import org.jfree.chart.plot.{DefaultDrawingSupplier, XYPlot} import org.jfree.chart.renderer.xy.XYErrorRenderer import org.jfree.data.xy.{YIntervalSeries, YIntervalSeriesCollection} import sbt._ import com.github.plokhotnyuk.jsoniter_scala.macros._ import com.github.plokhotnyuk.jsoniter_scala.core._ import com.github.plokhotnyuk.jsoniter_scala.macros.JsonCodecMaker._ import scala.collection.SortedMap def apply(jmhReport: File, yAxisTitle: String, targetDir: File): Unit = { val allResults = readFromArray(IO.readBytes(jmhReport))(make[Seq[BenchmarkResult]](CodecMakerConfig)) val constParams = allResults.flatMap(_.params.toSeq).groupBy(_._1).collect { case (_, kvs) if kvs.distinct.size == 1 => kvs.head }.toSeq allResults.groupBy(benchmarkName(constParams)).foreach { case (benchmark, results) => val dataset = new YIntervalSeriesCollection { SortedMap(results.groupBy(otherParams(constParams)).toSeq:_*).foreach { case (params, iterations) => addSeries(new YIntervalSeries(params) { iterations.foreach { iteration => val x = iteration.params.get("size").fold(0.0)(_.toDouble) val y = Math.max(iteration.primaryMetric.score, 1.0) val yLow = Math.max(iteration.primaryMetric.scoreConfidence._1, 1.0) val yHigh = Math.max(iteration.primaryMetric.scoreConfidence._2, 1.0) add(x, y, yLow, yHigh) } }) } } val renderer = new XYErrorRenderer { (0 to dataset.getSeriesCount).foreach(i => setSeriesLinesVisible(i, true)) } val plot = new XYPlot(dataset, axis("Size"), axis(yAxisTitle), renderer) { setDrawingSupplier(new DefaultDrawingSupplier { override def getNextPaint: Paint = super.getNextPaint match { case x: Color if x.getRed > 200 && x.getGreen > 200 => new Color(x.getRed, (x.getGreen * 0.8).toInt, x.getBlue, x.getAlpha) case x => x } }) } val chart = new JFreeChart(benchmark, JFreeChart.DEFAULT_TITLE_FONT, plot, true) ImageIO.write(chart.createBufferedImage(1200, 900), "png", targetDir / s"$benchmark.png") } } private def axis(title: String): LogarithmicAxis = new LogarithmicAxis(title) { setAllowNegativesFlag(true) setNumberFormatOverride(NumberFormat.getInstance()) } private def benchmarkName(constParams: Seq[(String, String)])(result: BenchmarkResult): String = { val benchName = result.benchmark.split("""\.""").last constParams.map { case (k, v) => s"$k=$v" }.sorted.mkString(s"$benchName[", ",", "]") } private def otherParams(constParams: Seq[(String, String)])(result: BenchmarkResult): String = { val constParamNames = constParams.map(_._1).toSet val benchSuitName = result.benchmark.split("""\.""").reverse.tail.head result.params.filterKeys(k => k != "size" && !constParamNames(k)).map { case (k, v) => s"$k=$v" }.toSeq.sorted.mkString(s"$benchSuitName[", ",", "]") } } case class BenchmarkMetric(score: Double, scoreConfidence: (Double, Double)) case class BenchmarkResult(benchmark: String, params: Map[String, String], primaryMetric: BenchmarkMetric)