java.math.BigDecimal Scala Examples

The following examples show how to use java.math.BigDecimal. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: VectorAssemblerModelSpec.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.feature

import java.math.BigDecimal

import ml.combust.mleap.core.types._
import org.apache.spark.ml.linalg.Vectors
import org.scalatest.FunSpec


class VectorAssemblerModelSpec extends FunSpec {
  val assembler = VectorAssemblerModel(Seq(
    ScalarShape(), ScalarShape(),
    TensorShape(2),
    TensorShape(5)))

  describe("#apply") {
    it("assembles doubles and vectors into a new vector") {
      val expectedArray = Array(45.0, 76.8, 23.0, 45.6, 0.0, 22.3, 45.6, 0.0, 99.3)

      assert(assembler(Array(45.0,
        new BigDecimal(76.8),
        Vectors.dense(Array(23.0, 45.6)),
        Vectors.sparse(5, Array(1, 2, 4), Array(22.3, 45.6, 99.3)))).toArray.sameElements(expectedArray))
    }
  }

  describe("input/output schema") {
    it("has the right input schema") {
      assert(assembler.inputSchema.fields == Seq(
        StructField("input0", ScalarType.Double),
        StructField("input1", ScalarType.Double),
        StructField("input2", TensorType.Double(2)),
        StructField("input3", TensorType.Double(5))))
    }

    it("has the right output schema") {
      assert(assembler.outputSchema.fields == Seq(StructField("output", TensorType.Double(9))))
    }
  }
} 
Example 2
Source File: DataGens.scala    From spark-vector   with Apache License 2.0 5 votes vote down vote up
package com.actian.spark_vector

import java.math.BigDecimal
import java.{ sql => jsql }
import java.util.Calendar

import scala.collection.Seq
import scala.util.Try

import org.apache.spark.sql.Row
import org.apache.spark.sql.types._
import org.scalacheck.Gen

import com.actian.spark_vector.colbuffer.util.MillisecondsInDay
import java.math.RoundingMode

object DataGens {
  import com.actian.spark_vector.DataTypeGens._
  import org.scalacheck.Arbitrary._
  import org.scalacheck.Gen._
  import scala.collection.JavaConverters._

  val DefaultMaxRows = 500

  val booleanGen: Gen[Boolean] = arbitrary[Boolean]

  val byteGen: Gen[Byte] = arbitrary[Byte]

  val shortGen: Gen[Short] = arbitrary[Short]

  val intGen: Gen[Int] = arbitrary[Int]

  val longGen: Gen[Long] = arbitrary[Long]

  // FIXME allow arbitrary doubles (and filter externally for vector tests)
  val floatGen: Gen[Float] = arbitrary[Float].map(f => if (f.abs > 1e-38) f else 0.0f)

  // FIXME allow arbitrary doubles (and filter externally for vector tests)
  val doubleGen: Gen[Double] = for {
    neg <- arbitrary[Boolean]
    digits <- listOfN(12, choose(0, 9))
  } yield s"${if (neg) "-" else ""}1.${digits.mkString("")}".toDouble

  val decimalGen: Gen[BigDecimal] = arbitrary[scala.BigDecimal].retryUntil(bd =>
    bd.scale <= 12 && bd.scale >= 0 && bd.precision <= 26 &&
    Try { new BigDecimal(bd.toString) }.isSuccess).map(bd => new BigDecimal(bd.toString))

  private val dateValueGen: Gen[Long] =
    choose(-3600L * 1000 * 24 * 100000L, 3600L * 1000 * 24 * 100000L)

  // @note normalize getTime so that we don't have diffs more than 1 day in between our {JDBC,Spark}results
  val dateGen: Gen[jsql.Date] = dateValueGen.map(d => new jsql.Date(d / MillisecondsInDay * MillisecondsInDay))

  val timestampGen: Gen[jsql.Timestamp] = for (ms <- dateValueGen) yield new jsql.Timestamp(ms)

  // FIXME allow empty strings (and filter externally for vector tests)
  // @note we do not allow invalid UTF8 chars to be generated (from D800 to DFFF incl)
  val stringGen: Gen[String] =
    listOfN(choose(1, 512).sample.getOrElse(1), arbitrary[Char]).map(_.mkString).map( s => s.filter(c => Character.isDefined(c) && c != '\u0000' && (c < '\uD800' || c > '\uDFFF')) )

  def valueGen(dataType: DataType): Gen[Any] = dataType match {
    case BooleanType => booleanGen
    case ByteType => byteGen
    case ShortType => shortGen
    case IntegerType => intGen
    case LongType => longGen
    case FloatType => floatGen
    case DoubleType => doubleGen
    case TimestampType => timestampGen
    case DateType => dateGen
    case StringType => stringGen
    case _: DecimalType => decimalGen
    case _ => throw new Exception("Invalid data type.")
  }

  def nullableValueGen(field: StructField): Gen[Any] = {
    val gen = valueGen(field.dataType)
    if (field.nullable) frequency(1 -> gen, 10 -> const(null)) else gen
  }

  def rowGen(schema: StructType): Gen[Row] =
    sequence(schema.fields.map(f => nullableValueGen(f))).map(l => Row.fromSeq(l.asScala)) // TODO Huh? Why ju.ArrayList?!?

  def dataGenFor(schema: StructType, maxRows: Int): Gen[Seq[Row]] = for {
    numRows <- choose(1, maxRows)
    rows <- listOfN(numRows, rowGen(schema))
  } yield rows

  case class TypedData(dataType: StructType, data: Seq[Row])

  val dataGen: Gen[TypedData] = for {
    schema <- schemaGen
    data <- dataGenFor(schema, DefaultMaxRows)
  } yield TypedData(schema, data)
  
  val allDataGen: Gen[TypedData] = for {
    schema <- allTypesSchemaGen
    data <- dataGenFor(schema, DefaultMaxRows)
  } yield TypedData(schema, data)
  
} 
Example 3
Source File: KMScalaKit.scala    From Swallow   with Apache License 2.0 5 votes vote down vote up
import java.math.BigDecimal

object KMScalaKit {

  def bigDemicalDoubleAdd(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val sum: Double =  a.add(b).doubleValue();

    return  sum;
  }

  def bigDemicalDoubleAdd(number1: Double, number2: Double, number3: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val c: BigDecimal = new BigDecimal(number3.toString);
    val sum: Double =  a.add(b).add(c).doubleValue();

    return  sum;
  }

  def bigDemicalDoubleSub(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val res: Double =  a.subtract(b).doubleValue();

    return  res;
  }

  def bigDemicalDoubleMul(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val res: Double = a.multiply(b).doubleValue();

    return res;
  }

  def bigDemicalDoubleMul(number1: Double, number2: Double, number3: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val c: BigDecimal = new BigDecimal(number2.toString);
    val res: Double = a.multiply(b).multiply(c).doubleValue();

    return res;
  }

  def bigDemicalDoubleDiv(number1: Double, number2: Double): Double = {
    val a: BigDecimal = new BigDecimal(number1.toString);
    val b: BigDecimal = new BigDecimal(number2.toString);
    val res: Double =  a.divide(b).doubleValue();

    return  res;
  }
} 
Example 4
Source File: TypeCast.scala    From spark-google-spreadsheets   with Apache License 2.0 5 votes vote down vote up
package com.github.potix2.spark.google.spreadsheets.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheets] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
} 
Example 5
Source File: JsonHadoopFsRelationSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        hiveContext.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = hiveContext.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        hiveContext.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = hiveContext.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        hiveContext.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 6
Source File: TypeCast.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.exec.spark.datasource.google.spreadsheet

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.NumberFormat
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try

object TypeCast {

  private[spreadsheet] def castTo(
                                   datum: String,
                                   castType: DataType,
                                   nullable: Boolean = true
                                 ): Any = {
    castType match {
      case _: ByteType => datum.toByte
      case _: ShortType => datum.toShort
      case _: IntegerType => datum.toInt
      case _: LongType => datum.toLong
      case _: FloatType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).floatValue())
      case _: DoubleType => Try(datum.toFloat)
        .getOrElse(NumberFormat.getInstance(Locale.getDefault()).parse(datum).doubleValue())
      case _: BooleanType => datum.toBoolean
      case _: DecimalType => new BigDecimal(datum.replaceAll(",", ""))
      case _: TimestampType => Timestamp.valueOf(datum)
      case _: DateType => Date.valueOf(datum)
      case _: StringType => datum
      case _ => throw new RuntimeException(s"Unsupported type: ${castType.typeName}")

    }
  }
} 
Example 7
Source File: JsonHadoopFsRelationSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(
          CatalogUtils.URIToString(makeQualifiedPath(file.getCanonicalPath)), s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 8
Source File: JsonHadoopFsRelationSuite.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  import sqlContext._

  // JSON does not write data of NullType and does not play well with BinaryType.
  //JSON不会写入Null Type的数据,并且不能使用二进制类型播放
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }
  //save()/ load() - 分区表 - 简单查询 - 数据中的分区列
  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }
  //将复杂类型保存到JSON
  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.写出数据
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result. 把它读回来检查结果
      checkAnswer(
        read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
  //将十进制类型保存到JSON
  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out. 写出数据
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result. 把它读回来检查结果
      checkAnswer(
        read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 9
Source File: JsonHadoopFsRelationSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 10
Source File: JsonHadoopFsRelationSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 11
Source File: FileUtils.scala    From spark-distcp   with Apache License 2.0 5 votes vote down vote up
package com.coxautodata.utils

import java.math.{BigDecimal, BigInteger}

// Adapted from: https://jira.apache.org/jira/secure/attachment/12542305/roundedByteCountToDisplaySize.patch
object FileUtils {

  val ONE_KB = 1024
  val ONE_KB_BI: BigInteger = BigInteger.valueOf(ONE_KB)
  val ONE_MB: Long = ONE_KB * ONE_KB
  val ONE_MB_BI: BigInteger = ONE_KB_BI.multiply(ONE_KB_BI)
  val ONE_GB: Long = ONE_KB * ONE_MB
  val ONE_GB_BI: BigInteger = ONE_KB_BI.multiply(ONE_MB_BI)
  val ONE_TB: Long = ONE_KB * ONE_GB
  val ONE_TB_BI: BigInteger = ONE_KB_BI.multiply(ONE_GB_BI)
  val ONE_PB: Long = ONE_KB * ONE_TB
  val ONE_PB_BI: BigInteger = ONE_KB_BI.multiply(ONE_TB_BI)
  val ONE_EB: Long = ONE_KB * ONE_PB
  val ONE_EB_BI: BigInteger = ONE_KB_BI.multiply(ONE_PB_BI)
  val ONE_ZB: BigInteger = BigInteger.valueOf(ONE_KB).multiply(BigInteger.valueOf(ONE_EB))
  val ONE_YB: BigInteger = ONE_KB_BI.multiply(ONE_ZB)

  def byteCountToDisplaySize(size: BigInteger): String = {
    val sizeBD = new BigDecimal(size)
    if (size.divide(ONE_YB).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_YB))) + s" YB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_ZB).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_ZB))) + s" ZB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_EB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_EB_BI))) + s" EB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_PB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_PB_BI))) + s" PB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_TB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_TB_BI))) + s" TB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_GB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_GB_BI))) + s" GB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_MB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_MB_BI))) + s" MB (${String.valueOf(size)} bytes)"
    else if (size.divide(ONE_KB_BI).compareTo(BigInteger.ZERO) > 0) getThreeSigFigs(sizeBD.divide(new BigDecimal(ONE_KB_BI))) + s" KB (${String.valueOf(size)} bytes)"
    else String.valueOf(size) + " bytes"
  }

  def byteCountToDisplaySize(size: Long): String = byteCountToDisplaySize(BigInteger.valueOf(size))

  private def getThreeSigFigs(size: BigDecimal): String = {
    val (isDecimal, _, sizeS) = size.toString.foldLeft((false, 0, "")) {
      case ((decimal, count, agg), c) =>
        if (c == '.' && !decimal) (true, count, agg + c)
        else if (count < 3 || !decimal) (decimal, count + 1, agg + c)
        else (decimal, count + 1, agg)
    }

    if (isDecimal) sizeS.reverse.dropWhile(c => c == '0').reverse.stripSuffix(".")
    else sizeS

  }

} 
Example 12
Source File: JsonHadoopFsRelationSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql.Row
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      val basePath = new Path(file.getCanonicalPath)
      val fs = basePath.getFileSystem(SparkHadoopUtil.get.conf)
      val qualifiedBasePath = fs.makeQualified(basePath)

      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(qualifiedBasePath, s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 13
Source File: ExcelOutputWriter.scala    From spark-hadoopoffice-ds   with Apache License 2.0 5 votes vote down vote up
package org.zuinnote.spark.office.excel

import java.math.BigDecimal
import java.sql.Date
import java.sql.Timestamp
import java.text.DateFormat
import java.text.SimpleDateFormat
import java.util.Calendar

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.ArrayWritable
import org.apache.hadoop.mapreduce.RecordWriter
import org.apache.hadoop.mapreduce.TaskAttemptContext

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.catalyst.{ CatalystTypeConverters, InternalRow }
import org.apache.spark.sql.Row
import org.apache.spark.sql.execution.datasources.OutputWriter
import org.apache.spark.sql.types._

import org.zuinnote.hadoop.office.format.common.dao.SpreadSheetCellDAO
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import org.zuinnote.hadoop.office.format.common.util.msexcel.MSExcelUtil
import org.zuinnote.hadoop.office.format.mapreduce._

import org.apache.commons.logging.LogFactory
import org.apache.commons.logging.Log
import org.zuinnote.hadoop.office.format.common.HadoopOfficeWriteConfiguration
import java.util.Locale
import java.text.DecimalFormat
import org.zuinnote.hadoop.office.format.common.converter.ExcelConverterSimpleSpreadSheetCellDAO
import java.text.NumberFormat

// NOTE: This class is instantiated and used on executor side only, no need to be serializable.
private[excel] class ExcelOutputWriter(
  path:       String,
  dataSchema: StructType,
  context:    TaskAttemptContext, options: Map[String, String]) extends OutputWriter {
  
  def write(row: Row): Unit = {
    // check useHeader
    if (useHeader) {
      val headers = row.schema.fieldNames
      var i = 0
      for (x <- headers) {
        val headerColumnSCD = new SpreadSheetCellDAO(x, "", "", MSExcelUtil.getCellAddressA1Format(currentRowNum, i), defaultSheetName)
        recordWriter.write(NullWritable.get(), headerColumnSCD)
        i += 1
      }
      currentRowNum += 1
      useHeader = false
    }
    // for each value in the row
    if (row.size>0) {
      var currentColumnNum = 0;
      val simpleObject = new Array[AnyRef](row.size)
      for (i <- 0 to row.size - 1) { // for each element of the row
        val obj = row.get(i)
        if ((obj.isInstanceOf[Seq[String]]) && (obj.asInstanceOf[Seq[String]].length==5)) {
          val formattedValue = obj.asInstanceOf[Seq[String]](0)
          val comment = obj.asInstanceOf[Seq[String]](1)
          val formula = obj.asInstanceOf[Seq[String]](2)
          val address = obj.asInstanceOf[Seq[String]](3)
          val sheetName = obj.asInstanceOf[Seq[String]](4)
          simpleObject(i) = new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName)
        } else {
          simpleObject(i)=obj.asInstanceOf[AnyRef]
        }
      }
      // convert row to spreadsheetcellDAO
      val spreadSheetCellDAORow = simpleConverter.getSpreadSheetCellDAOfromSimpleDataType(simpleObject, defaultSheetName, currentRowNum)
      // write it
      for (x<- spreadSheetCellDAORow) {
        recordWriter.write(NullWritable.get(), x)
      }
    }
    currentRowNum += 1
  }

  override def close(): Unit = {
    recordWriter.close(context)
    currentRowNum = 0;
  }

} 
Example 14
Source File: JsonHadoopFsRelationSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import java.math.BigDecimal

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
import org.apache.spark.sql.types._

class JsonHadoopFsRelationSuite extends HadoopFsRelationTest {
  override val dataSourceName: String = "json"

  // JSON does not write data of NullType and does not play well with BinaryType.
  override protected def supportsDataType(dataType: DataType): Boolean = dataType match {
    case _: NullType => false
    case _: BinaryType => false
    case _: CalendarIntervalType => false
    case _ => true
  }

  test("save()/load() - partitioned table - simple queries - partition columns in data") {
    withTempDir { file =>
      for (p1 <- 1 to 2; p2 <- Seq("foo", "bar")) {
        val partitionDir = new Path(
          CatalogUtils.URIToString(makeQualifiedPath(file.getCanonicalPath)), s"p1=$p1/p2=$p2")
        sparkContext
          .parallelize(for (i <- 1 to 3) yield s"""{"a":$i,"b":"val_$i"}""")
          .saveAsTextFile(partitionDir.toString)
      }

      val dataSchemaWithPartition =
        StructType(dataSchema.fields :+ StructField("p1", IntegerType, nullable = true))

      checkQueries(
        spark.read.format(dataSourceName)
          .option("dataSchema", dataSchemaWithPartition.json)
          .load(file.getCanonicalPath))
    }
  }

  test("SPARK-9894: save complex types to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("array", ArrayType(LongType))
          .add("map", MapType(StringType, new StructType().add("innerField", LongType)))

      val data =
        Row(Seq(1L, 2L, 3L), Map("m1" -> Row(4L))) ::
          Row(Seq(5L, 6L, 7L), Map("m2" -> Row(10L))) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }

  test("SPARK-10196: save decimal type to JSON") {
    withTempDir { file =>
      file.delete()

      val schema =
        new StructType()
          .add("decimal", DecimalType(7, 2))

      val data =
        Row(new BigDecimal("10.02")) ::
          Row(new BigDecimal("20000.99")) ::
          Row(new BigDecimal("10000")) :: Nil
      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)

      // Write the data out.
      df.write.format(dataSourceName).save(file.getCanonicalPath)

      // Read it back and check the result.
      checkAnswer(
        spark.read.format(dataSourceName).schema(schema).load(file.getCanonicalPath),
        df
      )
    }
  }
} 
Example 15
Source File: package.scala    From Sidechains-SDK   with MIT License 5 votes vote down vote up
package com.horizen

import java.math.{BigDecimal, BigInteger, MathContext}

import com.google.common.primitives.{Bytes, Ints}
import com.horizen.vrf.VrfOutput
import scorex.util.ModifierId
import supertagged.TaggedType

package object consensus {
  val merkleTreeHashLen: Int = 32
  val sha256HashLen: Int = 32

  val consensusHardcodedSaltString: Array[Byte] = "TEST".getBytes()
  val forgerStakePercentPrecision: BigDecimal = BigDecimal.valueOf(1000000) // where 1 / forgerStakePercentPrecision -- minimal possible forger stake percentage to be able to forge
  val stakeConsensusDivideMathContext: MathContext = MathContext.DECIMAL128 //shall be used during dividing, otherwise ArithmeticException is thrown in case of irrational number as division result

  object ConsensusEpochNumber extends TaggedType[Int]
  type ConsensusEpochNumber = ConsensusEpochNumber.Type
  def intToConsensusEpochNumber(consensusEpochNumber: Int): ConsensusEpochNumber = ConsensusEpochNumber @@ consensusEpochNumber

  
  object ConsensusEpochId extends TaggedType[String]
  type ConsensusEpochId = ConsensusEpochId.Type
  def blockIdToEpochId(blockId: ModifierId): ConsensusEpochId = ConsensusEpochId @@ blockId
  def lastBlockIdInEpochId(epochId: ConsensusEpochId): ModifierId = ModifierId @@ epochId.untag(ConsensusEpochId)

  object ConsensusSlotNumber extends TaggedType[Int]
  type ConsensusSlotNumber = ConsensusSlotNumber.Type
  def intToConsensusSlotNumber(consensusSlotNumber: Int): ConsensusSlotNumber = ConsensusSlotNumber @@ consensusSlotNumber

  //Slot number starting from genesis block
  object ConsensusAbsoluteSlotNumber extends TaggedType[Int]
  type ConsensusAbsoluteSlotNumber = ConsensusAbsoluteSlotNumber.Type
  def intToConsensusAbsoluteSlotNumber(consensusSlotNumber: Int): ConsensusAbsoluteSlotNumber = ConsensusAbsoluteSlotNumber @@ consensusSlotNumber


  object ConsensusNonce extends TaggedType[Array[Byte]]
  type ConsensusNonce = ConsensusNonce.Type
  def byteArrayToConsensusNonce(bytes: Array[Byte]): ConsensusNonce = ConsensusNonce @@ bytes

  object VrfMessage extends TaggedType[Array[Byte]]
  type VrfMessage = VrfMessage.Type

  def buildVrfMessage(slotNumber: ConsensusSlotNumber, nonce: NonceConsensusEpochInfo): VrfMessage = {
    val slotNumberBytes = Ints.toByteArray(slotNumber)
    val nonceBytes = nonce.consensusNonce

    val resBytes = Bytes.concat(slotNumberBytes, nonceBytes, consensusHardcodedSaltString)
    VrfMessage @@ resBytes
  }

  def vrfOutputToPositiveBigInteger(vrfOutput: VrfOutput): BigInteger = {
    new BigInteger(1, vrfOutput.bytes())
  }

  def vrfProofCheckAgainstStake(vrfOutput: VrfOutput, actualStake: Long, totalStake: Long): Boolean = {
    val requiredStakePercentage: BigDecimal = vrfOutputToRequiredStakePercentage(vrfOutput)
    val actualStakePercentage: BigDecimal = new BigDecimal(actualStake).divide(new BigDecimal(totalStake), stakeConsensusDivideMathContext)

    requiredStakePercentage.compareTo(actualStakePercentage) match {
      case -1 => true //required percentage is less than actual
      case  0 => true //required percentage is equal to actual
      case  _ => false //any other case
    }
  }

  // @TODO shall be changed by adding "active slots coefficient" according to Ouroboros Praos Whitepaper (page 10)
  def vrfOutputToRequiredStakePercentage(vrfOutput: VrfOutput): BigDecimal = {
    val hashAsBigDecimal: BigDecimal = new BigDecimal(vrfOutputToPositiveBigInteger(vrfOutput))

    hashAsBigDecimal
      .remainder(forgerStakePercentPrecision) //got random number from 0 to forgerStakePercentPrecision - 1
      .divide(forgerStakePercentPrecision, stakeConsensusDivideMathContext) //got random number from 0 to 0.(9)
  }
} 
Example 16
Source File: MainchainApiRequests.scala    From Sidechains-SDK   with MIT License 5 votes vote down vote up
package com.horizen.mainchain.api

import com.fasterxml.jackson.annotation.JsonView
import com.horizen.box.WithdrawalRequestBox
import com.horizen.params.NetworkParams
import com.horizen.serialization.Views
import java.math.BigDecimal

import com.horizen.utils.BytesUtils

@JsonView(Array(classOf[Views.Default]))
case class SidechainInfoResponse
  (sidechainId: Array[Byte],
   balance: String,
   creatingTxHash: Array[Byte],
   createdInBlock: Array[Byte],
   createdAtBlockHeight: Long,
   withdrawalEpochLength: Long
  )

@JsonView(Array(classOf[Views.Default]))
case class BackwardTransferEntry
  (pubkeyhash: Array[Byte],
   amount: String)
{
  require(pubkeyhash != null, "Address MUST be NOT NULL.")
}

@JsonView(Array(classOf[Views.Default]))
case class SendCertificateRequest
  (sidechainId: Array[Byte],
   epochNumber: Int,
   endEpochBlockHash: Array[Byte],
   proofBytes: Array[Byte],
   quality: Long,
   backwardTransfers: Seq[BackwardTransferEntry],
   fee: String = "0.00001")
{
  require(sidechainId.length == 32, "SidechainId MUST has length 32 bytes.")
  require(endEpochBlockHash != null, "End epoch block hash MUST be NOT NULL.")
}

case class SendCertificateResponse
  (certificateId: Array[Byte])

case class GetRawCertificateRequest
  (certificateId: Array[Byte])

@JsonView(Array(classOf[Views.Default]))
case class GetRawCertificateResponse
  (hex: Array[Byte])

object CertificateRequestCreator {

  val ZEN_COINS_DIVISOR: BigDecimal = new BigDecimal(100000000)

  def create(epochNumber: Int,
             endEpochBlockHash: Array[Byte],
             proofBytes: Array[Byte],
             quality: Long,
             withdrawalRequestBoxes: Seq[WithdrawalRequestBox],
             params: NetworkParams) : SendCertificateRequest = {
    SendCertificateRequest(
      params.sidechainId,
      epochNumber,
      endEpochBlockHash,
      proofBytes,
      quality,
      // Note: we should send BT entries public key hashes in reversed BE endianness.
      withdrawalRequestBoxes.map(wrb => BackwardTransferEntry(BytesUtils.reverseBytes(wrb.proposition().bytes()), new BigDecimal(wrb.value()).divide(ZEN_COINS_DIVISOR).toPlainString)))
  }
} 
Example 17
Source File: KahanSpec.scala    From flint   with Apache License 2.0 5 votes vote down vote up
package com.twosigma.flint.math

import java.math.BigDecimal

import com.twosigma.flint.util.Timer
import org.scalatest.FlatSpec

import scala.util.Random

class KahanSpec extends FlatSpec {

  "Kahan" should "sum correctly in wiki example" in {
    val kahan = new Kahan()
    var i = 0
    while (i < 1000) {
      kahan.add(1.0)
      kahan.add(1.0e100)
      kahan.add(1.0)
      kahan.add(-1.0e100)
      i += 1
    }

    assert(kahan.value === 2000.0)
  }

  it should "sum correctly for constants of Double(s)" in {
    val kahan = new Kahan()
    val x = 1000.0002
    var sum = 0.0
    val bigDecimal = new BigDecimal(x)
    var bigDecimalSum = new BigDecimal(0.0)
    var i = 0
    while (i < (Int.MaxValue >> 5)) {
      sum += x
      kahan.add(x)
      bigDecimalSum = bigDecimalSum.add(bigDecimal)
      i += 1
    }
    assert(
      Math.abs(
        bigDecimalSum
          .subtract(new BigDecimal(kahan.value))
          .doubleValue()
      ) < 1.0e-5
    )

    assert(
      Math.abs(
        bigDecimalSum
          .subtract(new BigDecimal(sum))
          .doubleValue()
      ) > 1.0
    )
  }

  it should "subtract correctly" in {
    val kahan1 = new Kahan()
    val kahan2 = new Kahan()
    val x = 1000.0002
    var i = 0
    while (i < (Int.MaxValue >> 5)) {
      kahan1.add(x)
      kahan2.add(x)
      kahan2.add(x)
      i += 1
    }
    kahan2.subtract(kahan1)
    assert(kahan2.value === kahan1.value)
  }
} 
Example 18
Source File: TypeCast.scala    From spark-select   with Apache License 2.0 5 votes vote down vote up
package io.minio.spark.select.util

import java.math.BigDecimal
import java.sql.{Date, Timestamp}
import java.text.{SimpleDateFormat, NumberFormat}
import java.util.Locale

import org.apache.spark.sql.types._

import scala.util.Try


  @throws[IllegalArgumentException]
  private[select] def toChar(str: String): Char = {
    if (str.charAt(0) == '\\') {
      str.charAt(1)
      match {
        case 't' => '\t'
        case 'r' => '\r'
        case 'b' => '\b'
        case 'f' => '\f'
        case '\"' => '\"' // In case user changes quote char and uses \" as delimiter in options
        case '\'' => '\''
        case 'u' if str == """\u0000""" => '\u0000'
        case _ =>
          throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
      }
    } else if (str.length == 1) {
      str.charAt(0)
    } else {
      throw new IllegalArgumentException(s"Delimiter cannot be more than one character: $str")
    }
  }
} 
Example 19
Source File: MapToJavaPropertiesConversionSpec.scala    From kafka-configurator   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.sky.kafka.utils

import java.math.BigDecimal
import java.util.Properties

import common.BaseSpec

class MapToJavaPropertiesConversionSpec extends BaseSpec {

  "mapToProperties" should "convert objects into their string representation" in {
    val x = new Properties
    MapToJavaPropertiesConversion.mapToProperties(Map[String, Object](
      "object" -> new BigDecimal("123.456")
    )) shouldBe new Properties {
      setProperty("object", "123.456")
    }
  }

  it should "convert classes into the full class name" in {
    val x = new Properties
    MapToJavaPropertiesConversion.mapToProperties(Map[String, Object](
      "class" -> classOf[Exception]
    )) shouldBe new Properties {
      setProperty("class", "java.lang.Exception")
    }
  }
} 
Example 20
Source File: MatfastSerializer.scala    From MatRel   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.matfast.util

import java.math.BigDecimal
import java.nio.ByteBuffer
import java.util.{HashMap => JavaHashMap}

import scala.reflect.ClassTag

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.ResourcePool

import org.apache.spark.{SparkConf, SparkEnv}
import org.apache.spark.serializer.{KryoSerializer, SerializerInstance}
import org.apache.spark.sql.matfast.matrix._
import org.apache.spark.sql.types.Decimal
import org.apache.spark.util.MutablePair


private[matfast] class MatfastSerializer(conf: SparkConf) extends KryoSerializer(conf) {
  override def newKryo(): Kryo = {
    val kryo = super.newKryo()
    kryo.setRegistrationRequired(false)
    kryo.register(classOf[MutablePair[_, _]])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow])
    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)

    kryo.register(classOf[Decimal])
    kryo.register(classOf[JavaHashMap[_, _]])
    kryo.register(classOf[DenseMatrix])
    kryo.register(classOf[SparseMatrix])

    kryo.setReferences(false)
    kryo
  }
}

private[matfast] class KryoResourcePool(size: Int) extends ResourcePool[SerializerInstance](size) {
  val ser: MatfastSerializer = {
    val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
    new MatfastSerializer(sparkConf)
  }

  def newInstance(): SerializerInstance = ser.newInstance()
}

private[matfast] object MatfastSerializer {
  @transient lazy val resourcePool = new KryoResourcePool(50)

  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
    val kryo = resourcePool.borrow()
    try {
      fn(kryo)
    } finally {
      resourcePool.release(kryo)
    }
  }

  def serialize[T: ClassTag](o: T): Array[Byte] = {
    acquireRelease { k =>
      k.serialize(o).array()
    }
  }

  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
    acquireRelease { k =>
      k.deserialize[T](ByteBuffer.wrap(bytes))
    }
}

private[matfast] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

private[matfast] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: BigDecimal): Unit = {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
} 
Example 21
Source File: SQLCompatibilityFunctionSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.math.BigDecimal
import java.sql.Timestamp

import org.apache.spark.sql.test.SharedSQLContext


class SQLCompatibilityFunctionSuite extends QueryTest with SharedSQLContext {

  test("ifnull") {
    checkAnswer(
      sql("SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT ifnull(1, 2.1d), ifnull(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nullif") {
    checkAnswer(
      sql("SELECT nullif('x', 'x'), nullif('x', 'y')"),
      Row(null, "x"))

    // Type coercion
    checkAnswer(
      sql("SELECT nullif(1, 2.1d), nullif(1, 1.0d)"),
      Row(1.0, null))
  }

  test("nvl") {
    checkAnswer(
      sql("SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null)"),
      Row("x", "y", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl(1, 2.1d), nvl(null, 2.1d)"),
      Row(1.0, 2.1))
  }

  test("nvl2") {
    checkAnswer(
      sql("SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null)"),
      Row("y", "x", null))

    // Type coercion
    checkAnswer(
      sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"),
      Row(2.1, 1.0))
  }

  test("SPARK-16730 cast alias functions for Hive compatibility") {
    checkAnswer(
      sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"),
      Row(true, 1.toByte, 1.toShort, 1, 1L))

    checkAnswer(
      sql("SELECT float(1), double(1), decimal(1)"),
      Row(1.toFloat, 1.0, new BigDecimal(1)))

    checkAnswer(
      sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"),
      Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0)))

    checkAnswer(
      sql("SELECT string(1)"),
      Row("1"))

    // Error handling: only one argument
    val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 2)")).getMessage
    assert(errorMsg.contains("Function string accepts only one argument"))
  }
}