scala.math.BigDecimal.RoundingMode Scala Examples
The following examples show how to use scala.math.BigDecimal.RoundingMode.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ParquetSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import com.sksamuel.exts.Logging import com.sksamuel.exts.OptionImplicits._ import io.eels.schema.StructType import io.eels.{Row, Sink, SinkWriter} import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} import scala.math.BigDecimal.RoundingMode import scala.math.BigDecimal.RoundingMode.RoundingMode case class ParquetWriteOptions(overwrite: Boolean = false, permission: Option[FsPermission] = None, dictionary: Boolean = true, inheritPermissions: Option[Boolean] = None, roundingMode: RoundingMode = RoundingMode.UNNECESSARY, metadata: Map[String, String] = Map.empty) { def withOverwrite(overwrite: Boolean): ParquetWriteOptions = copy(overwrite = overwrite) def withDictionary(dictionary: Boolean): ParquetWriteOptions = copy(dictionary = dictionary) def withMetaData(map: Map[String, String]): ParquetWriteOptions = copy(metadata = map) def withPermission(permission: FsPermission): ParquetWriteOptions = copy(permission = permission.some) def withInheritPermission(inheritPermissions: Boolean): ParquetWriteOptions = copy(inheritPermissions = inheritPermissions.some) def withRoundingMode(mode: RoundingMode): ParquetWriteOptions = copy(roundingMode = mode) } case class ParquetSink(path: Path, options: ParquetWriteOptions = ParquetWriteOptions()) (implicit fs: FileSystem) extends Sink with Logging { // -- convenience methods -- def withOverwrite(overwrite: Boolean): ParquetSink = copy(options = options.withOverwrite(overwrite)) def withDictionary(dictionary: Boolean): ParquetSink = copy(options = options.copy(dictionary = dictionary)) def withMetaData(map: Map[String, String]): ParquetSink = copy(options = options.copy(metadata = map)) def withPermission(permission: FsPermission): ParquetSink = copy(options = options.copy(permission = permission.some)) def withInheritPermission(inheritPermissions: Boolean): ParquetSink = copy(options = options.copy(inheritPermissions = inheritPermissions.some)) def withRoundingMode(mode: RoundingMode): ParquetSink = copy(options = options.copy(roundingMode = mode)) private def create(schema: StructType, path: Path): SinkWriter = new SinkWriter { if (options.overwrite && fs.exists(path)) fs.delete(path, false) val writer = RowParquetWriterFn(path, schema, options.metadata, options.dictionary, options.roundingMode, fs.getConf) override def write(row: Row): Unit = { writer.write(row) } override def close(): Unit = { writer.close() options.permission match { case Some(perm) => fs.setPermission(path, perm) case None => if (options.inheritPermissions.getOrElse(false)) { val permission = fs.getFileStatus(path.getParent).getPermission fs.setPermission(path, permission) } } } } override def open(schema: StructType, n: Int): Seq[SinkWriter] = { if (n == 1) Seq(create(schema, path)) else List.tabulate(n) { k => create(schema, new Path(path.getParent, path.getName + "_" + k)) } } override def open(schema: StructType): SinkWriter = create(schema, path) } object ParquetSink { def apply(path: String)(implicit fs: FileSystem): ParquetSink = ParquetSink(new Path(path)) }
Example 2
Source File: DecimalWriterTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import io.eels.Row import io.eels.schema.{DecimalType, Field, StructType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.scalatest.FunSuite import scala.math.BigDecimal.RoundingMode class DecimalWriterTest extends FunSuite { test("negativeDecimalTest") { implicit val configuration = new Configuration val expectedBigDecimals = Seq(BigDecimal(-5025176.39), BigDecimal(-5), BigDecimal(-999.56434), BigDecimal(-10000.9890)) assertBigDecimals("bigd_negative.parquet", expectedBigDecimals) } test("positiveDecimalTest") { implicit val configuration = new Configuration val expectedBigDecimals = Seq(BigDecimal(5025176.39), BigDecimal(5), BigDecimal(999.56434), BigDecimal(-10000.9890)) assertBigDecimals("bigd_positive.parquet", expectedBigDecimals) } private def assertBigDecimals(filename: String, expectedBigDecimals: Seq[BigDecimal])(implicit configuration: Configuration): Unit = { val schema = StructType(Field(name = "bd", dataType = DecimalType(38, 10))) val path = new Path(filename) val fileSystem = path.getFileSystem(configuration) if (fileSystem.exists(path)) fileSystem.delete(path, false) // Write out the decimal values val parquetWriter = RowParquetWriterFn(path = path, schema = schema, metadata = Map.empty, dictionary = false, roundingMode = RoundingMode.UP, fileSystem.getConf) expectedBigDecimals.foreach { expectedBigDecimal => println(s"Writing row with value $expectedBigDecimal") parquetWriter.write(Row.fromMap(schema, Map("bd" -> expectedBigDecimal))) } parquetWriter.close() // Read back all the writes and assert their values val parquetProjectionSchema = ParquetSchemaFns.toParquetMessageType(schema) val parquetReader = RowParquetReaderFn(path, None, Option(parquetProjectionSchema), dictionaryFiltering = true) for (i <- 0 until expectedBigDecimals.length) { val readRow = parquetReader.read println(s"read row: $readRow") assert(readRow.values.head == expectedBigDecimals(i)) } parquetReader.close() } }
Example 3
Source File: ParquetVsOrcSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import java.math.MathContext import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.component.orc.{OrcSink, OrcSource} import io.eels.component.parquet.{ParquetSink, ParquetSource} import io.eels.datastream.DataStream import io.eels.schema._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.math.BigDecimal.RoundingMode import scala.util.Random object ParquetVsOrcSpeedTest extends App with Timed { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val size = 5000000 val structType = StructType( Field("name", StringType), Field("age", IntType.Signed), Field("height", DoubleType), Field("amazing", BooleanType), Field("fans", LongType.Signed), Field("rating", DecimalType(4, 2)) ) def iter: Iterator[Vector[Any]] = Iterator.continually(Vector( Random.nextString(10), Random.nextInt(), Random.nextDouble(), Random.nextBoolean(), Random.nextLong(), BigDecimal(Random.nextDouble(), new MathContext(4)).setScale(2, RoundingMode.UP) )) def ds: DataStream = DataStream.fromIterator(structType, iter.take(size).map(Row(structType, _))) val ppath = new Path("parquet_speed.pq") fs.delete(ppath, false) val opath = new Path("orc_speed.orc") fs.delete(opath, false) new File(ppath.toString).deleteOnExit() new File(opath.toString).deleteOnExit() timed("Orc Insertion") { ds.to(OrcSink(opath)) } timed("Parquet Insertion") { ds.to(ParquetSink(ppath)) } while (true) { timed("Reading with OrcSource") { val actual = OrcSource(opath).toDataStream().size assert(actual == size, s"$actual != $size") } timed("Reading with ParquetSource") { val actual = ParquetSource(ppath).toDataStream().size assert(actual == size, s"$actual != $size") } } }
Example 4
Source File: RetainCompletenessRule.scala From deequ with Apache License 2.0 | 5 votes |
package com.amazon.deequ.suggestions.rules import com.amazon.deequ.constraints.Constraint.completenessConstraint import com.amazon.deequ.profiles._ import com.amazon.deequ.suggestions.ConstraintSuggestion import scala.math.BigDecimal.RoundingMode case class RetainCompletenessRule() extends ConstraintRule[ColumnProfile] { override def shouldBeApplied(profile: ColumnProfile, numRecords: Long): Boolean = { profile.completeness > 0.2 && profile.completeness < 1.0 } override def candidate(profile: ColumnProfile, numRecords: Long): ConstraintSuggestion = { val p = profile.completeness val n = numRecords val z = 1.96 // TODO this needs to be more robust for p's close to 0 or 1 val targetCompleteness = BigDecimal(p - z * math.sqrt(p * (1 - p) / n)) .setScale(2, RoundingMode.DOWN).toDouble val constraint = completenessConstraint(profile.column, _ >= targetCompleteness) val boundInPercent = ((1.0 - targetCompleteness) * 100).toInt val description = s"'${profile.column}' has less than $boundInPercent% missing values" ConstraintSuggestion( constraint, profile.column, "Completeness: " + profile.completeness.toString, description, this, s""".hasCompleteness("${profile.column}", _ >= $targetCompleteness, | Some("It should be above $targetCompleteness!"))""" .stripMargin.replaceAll("\n", "") ) } override val ruleDescription: String = "If a column is incomplete in the sample, " + "we model its completeness as a binomial variable, estimate a confidence interval " + "and use this to define a lower bound for the completeness" }