java.math.BigDecimal Scala Example

Source File: VectorAssemblerModelSpec.scala From mleap with Apache License 2.0

5 votes

package ml.combust.mleap.core.feature

import java.math.BigDecimal

import ml.combust.mleap.core.types._
import org.apache.spark.ml.linalg.Vectors
import org.scalatest.FunSpec


class VectorAssemblerModelSpec extends FunSpec {
  val assembler = VectorAssemblerModel(Seq(
    ScalarShape(), ScalarShape(),
    TensorShape(2),
    TensorShape(5)))

  describe("#apply") {
    it("assembles doubles and vectors into a new vector") {
      val expectedArray = Array(45.0, 76.8, 23.0, 45.6, 0.0, 22.3, 45.6, 0.0, 99.3)

      assert(assembler(Array(45.0,
        new BigDecimal(76.8),
        Vectors.dense(Array(23.0, 45.6)),
        Vectors.sparse(5, Array(1, 2, 4), Array(22.3, 45.6, 99.3)))).toArray.sameElements(expectedArray))
    }
  }

  describe("input/output schema") {
    it("has the right input schema") {
      assert(assembler.inputSchema.fields == Seq(
        StructField("input0", ScalarType.Double),
        StructField("input1", ScalarType.Double),
        StructField("input2", TensorType.Double(2)),
        StructField("input3", TensorType.Double(5))))
    }

    it("has the right output schema") {
      assert(assembler.outputSchema.fields == Seq(StructField("output", TensorType.Double(9))))
    }
  }
}

Source File: DataGens.scala From spark-vector with Apache License 2.0

5 votes

package com.actian.spark_vector

import java.math.BigDecimal
import java.{ sql => jsql }
import java.util.Calendar

import scala.collection.Seq
import scala.util.Try

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalacheck.Gen

import com.actian.spark_vector.colbuffer.util.MillisecondsInDay
import java.math.RoundingMode

object DataGens {
  import com.actian.spark_vector.DataTypeGens._
  import org.scalacheck.Arbitrary._
  import org.scalacheck.Gen._
  import scala.collection.JavaConverters._

  val DefaultMaxRows = 500

  val booleanGen: Gen[Boolean] = arbitrary[Boolean]

  val byteGen: Gen[Byte] = arbitrary[Byte]

  val shortGen: Gen[Short] = arbitrary[Short]

  val intGen: Gen[Int] = arbitrary[Int]

  val longGen: Gen[Long] = arbitrary[Long]

  // FIXME allow arbitrary doubles (and filter externally for vector tests)
  val floatGen: Gen[Float] = arbitrary[Float].map(f => if (f.abs > 1e-38) f else 0.0f)

  // FIXME allow arbitrary doubles (and filter externally for vector tests)
  val doubleGen: Gen[Double] = for {
    neg <- arbitrary[Boolean]
    digits <- listOfN(12, choose(0, 9))
  } yield s"${if (neg) "-" else ""}1.${digits.mkString("")}".toDouble

  val decimalGen: Gen[BigDecimal] = arbitrary[scala.BigDecimal].retryUntil(bd =>
    bd.scale <= 12 && bd.scale >= 0 && bd.precision <= 26 &&
    Try { new BigDecimal(bd.toString) }.isSuccess).map(bd => new BigDecimal(bd.toString))

  private val dateValueGen: Gen[Long] =
    choose(-3600L * 1000 * 24 * 100000L, 3600L * 1000 * 24 * 100000L)

  // @note normalize getTime so that we don't have diffs more than 1 day in between our {JDBC,Spark}results
  val dateGen: Gen[jsql.Date] = dateValueGen.map(d => new jsql.Date(d / MillisecondsInDay * MillisecondsInDay))

  val timestampGen: Gen[jsql.Timestamp] = for (ms <- dateValueGen) yield new jsql.Timestamp(ms)

  // FIXME allow empty strings (and filter externally for vector tests)
  // @note we do not allow invalid UTF8 chars to be generated (from D800 to DFFF incl)
  val stringGen: Gen[String] =
    listOfN(choose(1, 512).sample.getOrElse(1), arbitrary[Char]).map(_.mkString).map( s => s.filter(c => Character.isDefined(c) && c != '\u0000' && (c < '\uD800' || c > '\uDFFF')) )

  def valueGen(dataType: DataType): Gen[Any] = dataType match {
    case BooleanType => booleanGen
    case ByteType => byteGen
    case ShortType => shortGen
    case IntegerType => intGen
    case LongType => longGen
    case FloatType => floatGen
    case DoubleType => doubleGen
    case TimestampType => timestampGen
    case DateType => dateGen
    case StringType => stringGen
    case _: DecimalType => decimalGen
    case _ => throw new Exception("Invalid data type.")
  }

  def nullableValueGen(field: StructField): Gen[Any] = {
    val gen = valueGen(field.dataType)
    if (field.nullable) frequency(1 -> gen, 10 -> const(null)) else gen
  }

  def rowGen(schema: StructType): Gen[Row] =
    sequence(schema.fields.map(f => nullableValueGen(f))).map(l => Row.fromSeq(l.asScala)) // TODO Huh? Why ju.ArrayList?!?

  def dataGenFor(schema: StructType, maxRows: Int): Gen[Seq[Row]] = for {
    numRows <- choose(1, maxRows)
    rows <- listOfN(numRows, rowGen(schema))
  } yield rows

  case class TypedData(dataType: StructType, data: Seq[Row])

  val dataGen: Gen[TypedData] = for {
    schema <- schemaGen
    data <- dataGenFor(schema, DefaultMaxRows)
  } yield TypedData(schema, data)
  
  val allDataGen: Gen[TypedData] = for {
    schema <- allTypesSchemaGen
    data <- dataGenFor(schema, DefaultMaxRows)
  } yield TypedData(schema, data)
  
}

Source File: KMScalaKit.scala From Swallow with Apache License 2.0

5 votes

import java.math.BigDecimal

object KMScalaKit {

  def bigDemicalDoubleAdd(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val sum: Double =  a.add(b).doubleValue();

    return  sum;
  }

  def bigDemicalDoubleAdd(number1: Double, number2: Double, number3: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val c: BigDecimal = new BigDecimal(number3.toString);
    val sum: Double =  a.add(b).add(c).doubleValue();

    return  sum;
  }

  def bigDemicalDoubleSub(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val res: Double =  a.subtract(b).doubleValue();

    return  res;
  }

  def bigDemicalDoubleMul(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val res: Double = a.multiply(b).doubleValue();

    return res;
  }

  def bigDemicalDoubleMul(number1: Double, number2: Double, number3: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val c: BigDecimal = new BigDecimal(number2.toString);
    val res: Double = a.multiply(b).multiply(c).doubleValue();

    return res;
  }

  def bigDemicalDoubleDiv(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val res: Double =  a.divide(b).doubleValue();

    return  res;
  }
}

Source File: TypeCast.scala From spark-google-spreadsheets with Apache License 2.0

5 votes

package com.github.potix2.spark.google.spreadsheets.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheets] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
}

Source File: JsonHadoopFsRelationSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        hiveContext.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = hiveContext.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        hiveContext.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = hiveContext.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        hiveContext.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: TypeCast.scala From mimir with Apache License 2.0

5 votes

package mimir.exec.spark.datasource.google.spreadsheet

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheet] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
}

Source File: JsonHadoopFsRelationSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(
          CatalogUtils.URIToString(makeQualifiedPath(file.getCanonicalPath)), s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: JsonHadoopFsRelationSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  import sqlContext._

  // JSON does not write data of NullType and does not play well with BinaryType.
  //JSON不会写入Null Type的数据，并且不能使用二进制类型播放
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }
  //save（）/ load（） - 分区表 - 简单查询 - 数据中的分区列
  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }
  //将复杂类型保存到JSON
  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.写出数据
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result. 把它读回来检查结果
      checkAnswer(
        read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
  //将十进制类型保存到JSON
  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out. 写出数据
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result. 把它读回来检查结果
      checkAnswer(
        read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: JsonHadoopFsRelationSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: JsonHadoopFsRelationSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: FileUtils.scala From spark-distcp with Apache License 2.0

5 votes

package com.coxautodata.utils

import java.math.{BigDecimal, BigInteger}

// Adapted from: https://jira.apache.org/jira/secure/attachment/12542305/roundedByteCountToDisplaySize.patch
object FileUtils {

  val ONE_KB = 1024
  val ONE_KB_BI: BigInteger = BigInteger.valueOf(ONE_KB)
  val ONE_MB: Long = ONE_KB * ONE_KB
  val ONE_MB_BI: BigInteger = ONE_KB_BI.multiply(ONE_KB_BI)
  val ONE_GB: Long = ONE_KB * ONE_MB
  val ONE_GB_BI: BigInteger = ONE_KB_BI.multiply(ONE_MB_BI)
  val ONE_TB: Long = ONE_KB * ONE_GB
  val ONE_TB_BI: BigInteger = ONE_KB_BI.multiply(ONE_GB_BI)
  val ONE_PB: Long = ONE_KB * ONE_TB
  val ONE_PB_BI: BigInteger = ONE_KB_BI.multiply(ONE_TB_BI)
  val ONE_EB: Long = ONE_KB * ONE_PB
  val ONE_EB_BI: BigInteger = ONE_KB_BI.multiply(ONE_PB_BI)
  val ONE_ZB: BigInteger = BigInteger.valueOf(ONE_KB).multiply(BigInteger.valueOf(ONE_EB))
  val ONE_YB: BigInteger = ONE_KB_BI.multiply(ONE_ZB)

  def byteCountToDisplaySize(size: BigInteger): String = {
    val sizeBD = new BigDecimal(size)
    if (size.divide(ONE_YB).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_YB))) + s" YB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_ZB).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_ZB))) + s" ZB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_EB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_EB_BI))) + s" EB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_PB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_PB_BI))) + s" PB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_TB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_TB_BI))) + s" TB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_GB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_GB_BI))) + s" GB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_MB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_MB_BI))) + s" MB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_KB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_KB_BI))) + s" KB (${String.valueOf(size)} bytes)"
    else String.valueOf(size) + " bytes"
  }

  def byteCountToDisplaySize(size: Long): String = byteCountToDisplaySize(BigInteger.valueOf(size))

  private def getThreeSigFigs(size: BigDecimal): String = {
    val (isDecimal, _, sizeS) = size.toString.foldLeft((false, 0, "")) {
      case ((decimal, count, agg), c) =>
        if (c == '.' && !decimal) (true, count, agg + c)
        else if (count < 3 || !decimal) (decimal, count + 1, agg + c)
        else (decimal, count + 1, agg)
    }

    if (isDecimal) sizeS.reverse.dropWhile(c => c == '0').reverse.stripSuffix(".")
    else sizeS

  }

}

Source File: JsonHadoopFsRelationSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: ExcelOutputWriter.scala From spark-hadoopoffice-ds with Apache License 2.0

5 votes

package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

}

Source File: JsonHadoopFsRelationSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(
          CatalogUtils.URIToString(makeQualifiedPath(file.getCanonicalPath)), s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
}

Source File: package.scala From Sidechains-SDK with MIT License

5 votes

package com.horizen

import java.math.{BigDecimal, BigInteger, MathContext}

import com.google.common.primitives.{Bytes, Ints}
import com.horizen.vrf.VrfOutput
import scorex.util.ModifierId
import supertagged.TaggedType

package object consensus {
  val merkleTreeHashLen: Int = 32
  val sha256HashLen: Int = 32

  val consensusHardcodedSaltString: Array[Byte] = "TEST".getBytes()
  val forgerStakePercentPrecision: BigDecimal = BigDecimal.valueOf(1000000) // where 1 / forgerStakePercentPrecision -- minimal possible forger stake percentage to be able to forge
  val stakeConsensusDivideMathContext: MathContext = MathContext.DECIMAL128 //shall be used during dividing, otherwise ArithmeticException is thrown in case of irrational number as division result

  object ConsensusEpochNumber extends TaggedType[Int]
  type ConsensusEpochNumber = ConsensusEpochNumber.Type
  def intToConsensusEpochNumber(consensusEpochNumber: Int): ConsensusEpochNumber = ConsensusEpochNumber @@ consensusEpochNumber

  
  object ConsensusEpochId extends TaggedType[String]
  type ConsensusEpochId = ConsensusEpochId.Type
  def blockIdToEpochId(blockId: ModifierId): ConsensusEpochId = ConsensusEpochId @@ blockId
  def lastBlockIdInEpochId(epochId: ConsensusEpochId): ModifierId = ModifierId @@ epochId.untag(ConsensusEpochId)

  object ConsensusSlotNumber extends TaggedType[Int]
  type ConsensusSlotNumber = ConsensusSlotNumber.Type
  def intToConsensusSlotNumber(consensusSlotNumber: Int): ConsensusSlotNumber = ConsensusSlotNumber @@ consensusSlotNumber

  //Slot number starting from genesis block
  object ConsensusAbsoluteSlotNumber extends TaggedType[Int]
  type ConsensusAbsoluteSlotNumber = ConsensusAbsoluteSlotNumber.Type
  def intToConsensusAbsoluteSlotNumber(consensusSlotNumber: Int): ConsensusAbsoluteSlotNumber = ConsensusAbsoluteSlotNumber @@ consensusSlotNumber


  object ConsensusNonce extends TaggedType[Array[Byte]]
  type ConsensusNonce = ConsensusNonce.Type
  def byteArrayToConsensusNonce(bytes: Array[Byte]): ConsensusNonce = ConsensusNonce @@ bytes

  object VrfMessage extends TaggedType[Array[Byte]]
  type VrfMessage = VrfMessage.Type

  def buildVrfMessage(slotNumber: ConsensusSlotNumber, nonce: NonceConsensusEpochInfo): VrfMessage = {
    val slotNumberBytes = Ints.toByteArray(slotNumber)
    val nonceBytes = nonce.consensusNonce

    val resBytes = Bytes.concat(slotNumberBytes, nonceBytes, consensusHardcodedSaltString)
    VrfMessage @@ resBytes
  }

  def vrfOutputToPositiveBigInteger(vrfOutput: VrfOutput): BigInteger = {
    new BigInteger(1, vrfOutput.bytes())
  }

  def vrfProofCheckAgainstStake(vrfOutput: VrfOutput, actualStake: Long, totalStake: Long): Boolean = {
    val requiredStakePercentage: BigDecimal = vrfOutputToRequiredStakePercentage(vrfOutput)
    val actualStakePercentage: BigDecimal = new BigDecimal(actualStake).divide(new BigDecimal(totalStake), stakeConsensusDivideMathContext)

    requiredStakePercentage.compareTo(actualStakePercentage) match {
      case -1 => true //required percentage is less than actual
      case  0 => true //required percentage is equal to actual
      case  _ => false //any other case
    }
  }

  // @TODO shall be changed by adding "active slots coefficient" according to Ouroboros Praos Whitepaper (page 10)
  def vrfOutputToRequiredStakePercentage(vrfOutput: VrfOutput): BigDecimal = {
    val hashAsBigDecimal: BigDecimal = new BigDecimal(vrfOutputToPositiveBigInteger(vrfOutput))

    hashAsBigDecimal
      .remainder(forgerStakePercentPrecision) //got random number from 0 to forgerStakePercentPrecision - 1
      .divide(forgerStakePercentPrecision, stakeConsensusDivideMathContext) //got random number from 0 to 0.(9)
  }
}

Source File: MainchainApiRequests.scala From Sidechains-SDK with MIT License

5 votes

package com.horizen.mainchain.api

import com.fasterxml.jackson.annotation.JsonView
import com.horizen.box.WithdrawalRequestBox
import com.horizen.params.NetworkParams
import com.horizen.serialization.Views
import java.math.BigDecimal

import com.horizen.utils.BytesUtils

@JsonView(Array(classOf[Views.Default]))
case class SidechainInfoResponse
  (sidechainId: Array[Byte],
   balance: String,
   creatingTxHash: Array[Byte],
   createdInBlock: Array[Byte],
   createdAtBlockHeight: Long,
   withdrawalEpochLength: Long
  )

@JsonView(Array(classOf[Views.Default]))
case class BackwardTransferEntry
  (pubkeyhash: Array[Byte],
   amount: String)
{
  require(pubkeyhash != null, "Address MUST be NOT NULL.")
}

@JsonView(Array(classOf[Views.Default]))
case class SendCertificateRequest
  (sidechainId: Array[Byte],
   epochNumber: Int,
   endEpochBlockHash: Array[Byte],
   proofBytes: Array[Byte],
   quality: Long,
   backwardTransfers: Seq[BackwardTransferEntry],
   fee: String = "0.00001")
{
  require(sidechainId.length == 32, "SidechainId MUST has length 32 bytes.")
  require(endEpochBlockHash != null, "End epoch block hash MUST be NOT NULL.")
}

case class SendCertificateResponse
  (certificateId: Array[Byte])

case class GetRawCertificateRequest
  (certificateId: Array[Byte])

@JsonView(Array(classOf[Views.Default]))
case class GetRawCertificateResponse
  (hex: Array[Byte])

object CertificateRequestCreator {

  val ZEN_COINS_DIVISOR: BigDecimal = new BigDecimal(100000000)

  def create(epochNumber: Int,
             endEpochBlockHash: Array[Byte],
             proofBytes: Array[Byte],
             quality: Long,
             withdrawalRequestBoxes: Seq[WithdrawalRequestBox],
             params: NetworkParams) : SendCertificateRequest = {
    SendCertificateRequest(
      params.sidechainId,
      epochNumber,
      endEpochBlockHash,
      proofBytes,
      quality,
      // Note: we should send BT entries public key hashes in reversed BE endianness.
      withdrawalRequestBoxes.map(wrb => BackwardTransferEntry(BytesUtils.reverseBytes(wrb.proposition().bytes()), new BigDecimal(wrb.value()).divide(ZEN_COINS_DIVISOR).toPlainString)))
  }
}

Source File: KahanSpec.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.math

import java.math.BigDecimal

import com.twosigma.flint.util.Timer
import org.scalatest.FlatSpec

import scala.util.Random

class KahanSpec extends FlatSpec {

  "Kahan" should "sum correctly in wiki example" in {
    val kahan = new Kahan()
    var i = 0
    while (i < 1000) {
      kahan.add(1.0)
      kahan.add(1.0e100)
      kahan.add(1.0)
      kahan.add(-1.0e100)
      i += 1
    }

    assert(kahan.value === 2000.0)
  }

  it should "sum correctly for constants of Double(s)" in {
    val kahan = new Kahan()
    val x = 1000.0002
    var sum = 0.0
    val bigDecimal = new BigDecimal(x)
    var bigDecimalSum = new BigDecimal(0.0)
    var i = 0
    while (i < (Int.MaxValue >> 5)) {
      sum += x
      kahan.add(x)
      bigDecimalSum = bigDecimalSum.add(bigDecimal)
      i += 1
    }
    assert(
      Math.abs(
        bigDecimalSum
          .subtract(new BigDecimal(kahan.value))
          .doubleValue()
      ) < 1.0e-5
    )

    assert(
      Math.abs(
        bigDecimalSum
          .subtract(new BigDecimal(sum))
          .doubleValue()
      ) > 1.0
    )
  }

  it should "subtract correctly" in {
    val kahan1 = new Kahan()
    val kahan2 = new Kahan()
    val x = 1000.0002
    var i = 0
    while (i < (Int.MaxValue >> 5)) {
      kahan1.add(x)
      kahan2.add(x)
      kahan2.add(x)
      i += 1
    }
    kahan2.subtract(kahan1)
    assert(kahan2.value === kahan1.value)
  }
}

Source File: TypeCast.scala From spark-select with Apache License 2.0

5 votes

package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
}

Source File: MapToJavaPropertiesConversionSpec.scala From kafka-configurator with BSD 3-Clause "New" or "Revised" License

5 votes

package com.sky.kafka.utils

import java.math.BigDecimal
import java.util.Properties

import common.BaseSpec

class MapToJavaPropertiesConversionSpec extends BaseSpec {

  "mapToProperties" should "convert objects into their string representation" in {
    val x = new Properties
    MapToJavaPropertiesConversion.mapToProperties(Map[String, Object](
      "object" -> new BigDecimal("123.456")
    )) shouldBe new Properties {
      setProperty("object", "123.456")
    }
  }

  it should "convert classes into the full class name" in {
    val x = new Properties
    MapToJavaPropertiesConversion.mapToProperties(Map[String, Object](
      "class" -> classOf[Exception]
    )) shouldBe new Properties {
      setProperty("class", "java.lang.Exception")
    }
  }
}

Source File: MatfastSerializer.scala From MatRel with Apache License 2.0

5 votes

package org.apache.spark.sql.matfast.util

import java.math.BigDecimal
import java.nio.ByteBuffer
import java.util.{HashMap => JavaHashMap}

import scala.reflect.ClassTag

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.ResourcePool

import org.apache.spark.{SparkConf, SparkEnv}
import org.apache.spark.serializer.{KryoSerializer, SerializerInstance}
import org.apache.spark.sql.matfast.matrix._
import org.apache.spark.sql.types.Decimal
import org.apache.spark.util.MutablePair


private[matfast] class MatfastSerializer(conf: SparkConf) extends KryoSerializer(conf) {
  override def newKryo(): Kryo = {
    val kryo = super.newKryo()
    kryo.setRegistrationRequired(false)
    kryo.register(classOf[MutablePair[_, _]])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow])
    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)

    kryo.register(classOf[Decimal])
    kryo.register(classOf[JavaHashMap[_, _]])
    kryo.register(classOf[DenseMatrix])
    kryo.register(classOf[SparseMatrix])

    kryo.setReferences(false)
    kryo
  }
}

private[matfast] class KryoResourcePool(size: Int) extends ResourcePool[SerializerInstance](size) {
  val ser: MatfastSerializer = {
    val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
    new MatfastSerializer(sparkConf)
  }

  def newInstance(): SerializerInstance = ser.newInstance()
}

private[matfast] object MatfastSerializer {
  @transient lazy val resourcePool = new KryoResourcePool(50)

  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
    val kryo = resourcePool.borrow()
    try {
      fn(kryo)
    } finally {
      resourcePool.release(kryo)
    }
  }

  def serialize[T: ClassTag](o: T): Array[Byte] = {
    acquireRelease { k =>
      k.serialize(o).array()
    }
  }

  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
    acquireRelease { k =>
      k.deserialize[T](ByteBuffer.wrap(bytes))
    }
}

private[matfast] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

private[matfast] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: BigDecimal): Unit = {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

Source File: SQLCompatibilityFunctionSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.math.BigDecimal
import java.sql.Timestamp

import org.apache.spark.sql.test.SharedSQLContext


class SQLCompatibilityFunctionSuite extends QueryTest with SharedSQLContext {

  test("ifnull") {
    checkAnswer(
      sql("SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nullif") {
    checkAnswer(
      sql("SELECT nullif('x', 'x'), nullif('x', 'y')"),
      Row(null, "x"))

    // Type coercion
    checkAnswer(
      sql("SELECT nullif(1, 2.1d), nullif(1, 1.0d)"),
      Row(1.0, null))
  }

  test("nvl") {
    checkAnswer(
      sql("SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl(1, 2.1d), nvl(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nvl2") {
    checkAnswer(
      sql("SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)"),
      Row("y", "x", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"),
      Row(2.1, 1.0))
  }

  test("SPARK-16730 cast alias functions for Hive compatibility") {
    checkAnswer(
      sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"),
      Row(true, 1.toByte, 1.toShort, 1, 1L))

    checkAnswer(
      sql("SELECT float(1), double(1), decimal(1)"),
      Row(1.toFloat, 1.0, new BigDecimal(1)))

    checkAnswer(
      sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"),
      Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0)))

    checkAnswer(
      sql("SELECT string(1)"),
      Row("1"))

    // Error handling: only one argument
    val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 2)")).getMessage
    assert(errorMsg.contains("Function string accepts only one argument"))
  }
}

java.math.BigDecimal Scala Examples