scala.math.BigDecimal.RoundingMode Scala Examples

The following examples show how to use scala.math.BigDecimal.RoundingMode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: ParquetSink.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.parquet

import com.sksamuel.exts.Logging
import com.sksamuel.exts.OptionImplicits._
import io.eels.schema.StructType
import io.eels.{Row, Sink, SinkWriter}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.math.BigDecimal.RoundingMode
import scala.math.BigDecimal.RoundingMode.RoundingMode

case class ParquetWriteOptions(overwrite: Boolean = false,
                               permission: Option[FsPermission] = None,
                               dictionary: Boolean = true,
                               inheritPermissions: Option[Boolean] = None,
                               roundingMode: RoundingMode = RoundingMode.UNNECESSARY,
                               metadata: Map[String, String] = Map.empty) {

  def withOverwrite(overwrite: Boolean): ParquetWriteOptions = copy(overwrite = overwrite)
  def withDictionary(dictionary: Boolean): ParquetWriteOptions = copy(dictionary = dictionary)
  def withMetaData(map: Map[String, String]): ParquetWriteOptions = copy(metadata = map)
  def withPermission(permission: FsPermission): ParquetWriteOptions = copy(permission = permission.some)
  def withInheritPermission(inheritPermissions: Boolean): ParquetWriteOptions = copy(inheritPermissions = inheritPermissions.some)
  def withRoundingMode(mode: RoundingMode): ParquetWriteOptions = copy(roundingMode = mode)
}

case class ParquetSink(path: Path, options: ParquetWriteOptions = ParquetWriteOptions())
                      (implicit fs: FileSystem) extends Sink with Logging {

  // -- convenience methods --
  def withOverwrite(overwrite: Boolean): ParquetSink = copy(options = options.withOverwrite(overwrite))
  def withDictionary(dictionary: Boolean): ParquetSink = copy(options = options.copy(dictionary = dictionary))
  def withMetaData(map: Map[String, String]): ParquetSink = copy(options = options.copy(metadata = map))
  def withPermission(permission: FsPermission): ParquetSink = copy(options = options.copy(permission = permission.some))
  def withInheritPermission(inheritPermissions: Boolean): ParquetSink = copy(options = options.copy(inheritPermissions = inheritPermissions.some))
  def withRoundingMode(mode: RoundingMode): ParquetSink = copy(options = options.copy(roundingMode = mode))

  private def create(schema: StructType, path: Path): SinkWriter = new SinkWriter {

    if (options.overwrite && fs.exists(path))
      fs.delete(path, false)

    val writer = RowParquetWriterFn(path, schema, options.metadata, options.dictionary, options.roundingMode, fs.getConf)

    override def write(row: Row): Unit = {
      writer.write(row)
    }

    override def close(): Unit = {
      writer.close()
      options.permission match {
        case Some(perm) => fs.setPermission(path, perm)
        case None =>
          if (options.inheritPermissions.getOrElse(false)) {
            val permission = fs.getFileStatus(path.getParent).getPermission
            fs.setPermission(path, permission)
          }
      }
    }
  }

  override def open(schema: StructType, n: Int): Seq[SinkWriter] = {
    if (n == 1) Seq(create(schema, path))
    else List.tabulate(n) { k => create(schema, new Path(path.getParent, path.getName + "_" + k)) }
  }

  override def open(schema: StructType): SinkWriter = create(schema, path)
}

object ParquetSink {
  def apply(path: String)(implicit fs: FileSystem): ParquetSink = ParquetSink(new Path(path))
} 
Example 2
Source File: DecimalWriterTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.parquet

import io.eels.Row
import io.eels.schema.{DecimalType, Field, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.scalatest.FunSuite

import scala.math.BigDecimal.RoundingMode

class DecimalWriterTest extends FunSuite {

  test("negativeDecimalTest") {
    implicit val configuration = new Configuration
    val expectedBigDecimals = Seq(BigDecimal(-5025176.39), BigDecimal(-5), BigDecimal(-999.56434), BigDecimal(-10000.9890))
    assertBigDecimals("bigd_negative.parquet", expectedBigDecimals)
  }

  test("positiveDecimalTest") {
    implicit val configuration = new Configuration
    val expectedBigDecimals = Seq(BigDecimal(5025176.39), BigDecimal(5), BigDecimal(999.56434), BigDecimal(-10000.9890))
    assertBigDecimals("bigd_positive.parquet", expectedBigDecimals)
  }

  private def assertBigDecimals(filename: String, expectedBigDecimals: Seq[BigDecimal])(implicit configuration: Configuration): Unit = {
    val schema = StructType(Field(name = "bd", dataType = DecimalType(38, 10)))
    val path = new Path(filename)
    val fileSystem = path.getFileSystem(configuration)
    if (fileSystem.exists(path)) fileSystem.delete(path, false)

    // Write out the decimal values
    val parquetWriter = RowParquetWriterFn(path = path, schema = schema, metadata = Map.empty, dictionary = false, roundingMode = RoundingMode.UP, fileSystem.getConf)
    expectedBigDecimals.foreach { expectedBigDecimal =>
      println(s"Writing row with value $expectedBigDecimal")
      parquetWriter.write(Row.fromMap(schema, Map("bd" -> expectedBigDecimal)))
    }
    parquetWriter.close()

    // Read back all the writes and assert their values
    val parquetProjectionSchema = ParquetSchemaFns.toParquetMessageType(schema)
    val parquetReader = RowParquetReaderFn(path, None, Option(parquetProjectionSchema), dictionaryFiltering = true)
    for (i <- 0 until expectedBigDecimals.length) {
      val readRow = parquetReader.read
      println(s"read row: $readRow")
      assert(readRow.values.head == expectedBigDecimals(i))
    }
    parquetReader.close()
  }

} 
Example 3
Source File: ParquetVsOrcSpeedTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File
import java.math.MathContext

import com.sksamuel.exts.metrics.Timed
import io.eels.Row
import io.eels.component.orc.{OrcSink, OrcSource}
import io.eels.component.parquet.{ParquetSink, ParquetSource}
import io.eels.datastream.DataStream
import io.eels.schema._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.math.BigDecimal.RoundingMode
import scala.util.Random

object ParquetVsOrcSpeedTest extends App with Timed {

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.getLocal(new Configuration())

  val size = 5000000

  val structType = StructType(
    Field("name", StringType),
    Field("age", IntType.Signed),
    Field("height", DoubleType),
    Field("amazing", BooleanType),
    Field("fans", LongType.Signed),
    Field("rating", DecimalType(4, 2))
  )

  def iter: Iterator[Vector[Any]] = Iterator.continually(Vector(
    Random.nextString(10),
    Random.nextInt(),
    Random.nextDouble(),
    Random.nextBoolean(),
    Random.nextLong(),
    BigDecimal(Random.nextDouble(), new MathContext(4)).setScale(2, RoundingMode.UP)
  ))

  def ds: DataStream = DataStream.fromIterator(structType, iter.take(size).map(Row(structType, _)))

  val ppath = new Path("parquet_speed.pq")
  fs.delete(ppath, false)

  val opath = new Path("orc_speed.orc")
  fs.delete(opath, false)

  new File(ppath.toString).deleteOnExit()
  new File(opath.toString).deleteOnExit()

  timed("Orc Insertion") {
    ds.to(OrcSink(opath))
  }

  timed("Parquet Insertion") {
    ds.to(ParquetSink(ppath))
  }

  while (true) {

    timed("Reading with OrcSource") {
      val actual = OrcSource(opath).toDataStream().size
      assert(actual == size, s"$actual != $size")
    }

    timed("Reading with ParquetSource") {
      val actual = ParquetSource(ppath).toDataStream().size
      assert(actual == size, s"$actual != $size")
    }
  }
} 
Example 4
Source File: RetainCompletenessRule.scala    From deequ   with Apache License 2.0 5 votes vote down vote up
package com.amazon.deequ.suggestions.rules

import com.amazon.deequ.constraints.Constraint.completenessConstraint
import com.amazon.deequ.profiles._
import com.amazon.deequ.suggestions.ConstraintSuggestion
import scala.math.BigDecimal.RoundingMode


case class RetainCompletenessRule() extends ConstraintRule[ColumnProfile] {

  override def shouldBeApplied(profile: ColumnProfile, numRecords: Long): Boolean = {
    profile.completeness > 0.2 && profile.completeness < 1.0
  }

  override def candidate(profile: ColumnProfile, numRecords: Long): ConstraintSuggestion = {

    val p = profile.completeness
    val n = numRecords
    val z = 1.96

    // TODO this needs to be more robust for p's close to 0 or 1
    val targetCompleteness = BigDecimal(p - z * math.sqrt(p * (1 - p) / n))
      .setScale(2, RoundingMode.DOWN).toDouble

    val constraint = completenessConstraint(profile.column, _ >= targetCompleteness)

    val boundInPercent = ((1.0 - targetCompleteness) * 100).toInt

    val description = s"'${profile.column}' has less than $boundInPercent% missing values"

    ConstraintSuggestion(
      constraint,
      profile.column,
      "Completeness: " + profile.completeness.toString,
      description,
      this,
      s""".hasCompleteness("${profile.column}", _ >= $targetCompleteness,
         | Some("It should be above $targetCompleteness!"))"""
        .stripMargin.replaceAll("\n", "")
    )
  }

  override val ruleDescription: String = "If a column is incomplete in the sample, " +
    "we model its completeness as a binomial variable, estimate a confidence interval " +
    "and use this to define a lower bound for the completeness"
}