org.apache.spark.sql.types.Decimal Scala Example

Source File: ColumnarTestUtils.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.sql.columnar

import java.sql.Timestamp

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types.{UTF8String, DataType, Decimal, AtomicType}

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[T <: DataType, JvmType](columnType: ColumnType[T, JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case STRING => UTF8String(Random.nextString(Random.nextInt(32)))
      case BOOLEAN => Random.nextBoolean()
      case BINARY => randomBytes(Random.nextInt(32))
      case DATE => Random.nextInt()
      case TIMESTAMP =>
        val timestamp = new Timestamp(Random.nextLong())
        timestamp.setNanos(Random.nextInt(999999999))
        timestamp
      case _ =>
        // Using a random one-element map instead of an arbitrary object
        Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[T <: DataType, JvmType](
      columnType: ColumnType[T, JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_ <: DataType, _],
      tail: ColumnType[_ <: DataType, _]*): Row = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_ <: DataType, _]]): Row = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericMutableRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: tbl_demo_test.scala From huemul-bigdatagovernance with Apache License 2.0

5 votes

package samples

import com.huemulsolutions.bigdata.common._
import com.huemulsolutions.bigdata.control._
import com.huemulsolutions.bigdata.tables.huemul_Table
import com.huemulsolutions.bigdata.tables.huemul_Columns
import com.huemulsolutions.bigdata.tables.huemulType_StorageType
import com.huemulsolutions.bigdata.tables.huemulType_Tables
import com.huemulsolutions.bigdata.tables.huemulType_SecurityLevel
import org.apache.spark.sql.types.DataTypes._
import org.apache.spark.sql.types.DecimalType
import org.apache.spark.sql.types.Decimal

class tbl_demo_test(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_Control) extends huemul_Table(huemulBigDataGov, Control) with Serializable {
  this.setAutoCast(true)
  this.setBusiness_ResponsibleName("Nombre 1")
  this.setDataBase(huemulBigDataGov.GlobalSettings.DIM_DataBase)
  this.setDescription("descripcion")
  this.setDQ_MaxNewRecords_Num(10)
  this.setDQ_MaxNewRecords_Perc(Decimal.apply(0.30))
  this.setGlobalPaths(huemulBigDataGov.GlobalSettings.DIM_BigFiles_Path)
  this.setIT_ResponsibleName("IT Responsible")
  this.setLocalPath("demo/")
  this.setFrequency(huemulType_Frequency.MONTHLY)
  //this.setPartitionField("periodo_id")
  this.setStorageType(huemulType_StorageType.ORC)
  this.setTableType(huemulType_Tables.Reference)
  this.WhoCanRun_executeFull_addAccess("classname","package") 
  this.WhoCanRun_executeOnlyInsert_addAccess("classname","package")
  this.WhoCanRun_executeOnlyUpdate_addAccess("classname","package")
  
  
  val codigo_id: huemul_Columns = new huemul_Columns(StringType, true, "descripción del campo")
  codigo_id.setIsPK (true)
  codigo_id.setIsUnique ( true)
  codigo_id.setDQ_MaxDateTimeValue ( "")
  codigo_id.setDQ_MinDateTimeValue ( "")
  codigo_id.setDQ_MaxDecimalValue ( Decimal.apply(10))
  codigo_id.setDQ_MinDecimalValue ( Decimal.apply(10))
  codigo_id.setDQ_MaxLen ( 10)
  codigo_id.setDQ_MinLen ( 9)
  codigo_id.setNullable ( true)
  codigo_id.setDefaultValue ( "'nada'")
  
  codigo_id.setSecurityLevel ( huemulType_SecurityLevel.Public)
  codigo_id.setEncryptedType ( "sin encriptar")
  
  codigo_id.setMDM_EnableOldValue ( false)
  codigo_id.setMDM_EnableDTLog( false)
  codigo_id.setMDM_EnableProcessLog( false)
  
  
  
  this.ApplyTableDefinition()
}

Source File: tbl_demo_test_padre.scala From huemul-bigdatagovernance with Apache License 2.0

5 votes

package samples

import com.huemulsolutions.bigdata.common._
import com.huemulsolutions.bigdata.control._
import com.huemulsolutions.bigdata.tables.huemul_Table
import com.huemulsolutions.bigdata.tables.huemul_Columns
import com.huemulsolutions.bigdata.tables.huemulType_StorageType
import com.huemulsolutions.bigdata.tables.huemulType_Tables
import com.huemulsolutions.bigdata.tables.huemulType_SecurityLevel
import org.apache.spark.sql.types.DataTypes._
import org.apache.spark.sql.types.DecimalType
import org.apache.spark.sql.types.Decimal
import com.huemulsolutions.bigdata.tables.huemul_Table_Relationship
import javax.naming.ldap.Control

class tbl_demo_test_padre(huemulBigDataGov: huemul_BigDataGovernance, Control: huemul_Control) extends huemul_Table(huemulBigDataGov, Control) with Serializable {
  this.setAutoCast(true)
  this.setBusiness_ResponsibleName("Nombre 1")
  this.setDataBase(huemulBigDataGov.GlobalSettings.DIM_DataBase)
  this.setDescription("descripcion")
  this.setFrequency(huemulType_Frequency.MONTHLY)
  this.setDQ_MaxNewRecords_Num(10)
  this.setDQ_MaxNewRecords_Perc(Decimal.apply(0.20))
  this.setGlobalPaths(huemulBigDataGov.GlobalSettings.DIM_BigFiles_Path)
  this.setIT_ResponsibleName("IT Responsible")
  this.setLocalPath("demo/")
  //this.setPartitionField("periodo_id")
  this.setStorageType(huemulType_StorageType.ORC)
  this.setTableType(huemulType_Tables.Reference)
  this.WhoCanRun_executeFull_addAccess("classname","package") 
  this.WhoCanRun_executeOnlyInsert_addAccess("classname","package")
  this.WhoCanRun_executeOnlyUpdate_addAccess("classname","package")
  
  
  val miClave_id: huemul_Columns = new huemul_Columns(StringType, true, "descripción del campo")
  miClave_id.setIsPK(true)
  miClave_id.setIsUnique(true)
  miClave_id.setDQ_MaxDateTimeValue ("")
  miClave_id.setDQ_MinDateTimeValue ("")
  miClave_id.setDQ_MaxDecimalValue ( Decimal.apply(10))
  miClave_id.setDQ_MinDecimalValue ( Decimal.apply(10))
  miClave_id.setDQ_MaxLen ( 10)
  miClave_id.setDQ_MinLen ( 9)
  miClave_id.setNullable ( true)
  miClave_id.setDefaultValue ( "'nada'")
  
  miClave_id.setSecurityLevel ( huemulType_SecurityLevel.Public)
  miClave_id.setEncryptedType ( "nada")
  
  miClave_id.setMDM_EnableOldValue ( false)
  miClave_id.setMDM_EnableDTLog( false)
  miClave_id.setMDM_EnableProcessLog( false)
  
  val codigo_id_aca: huemul_Columns = new huemul_Columns(StringType, true, "descripción del campo fk")
  
  val instancia_tbl_demo_test = new tbl_demo_test(huemulBigDataGov, Control)
  val FK_Rel = new huemul_Table_Relationship(instancia_tbl_demo_test, false)
  FK_Rel.AddRelationship(instancia_tbl_demo_test.codigo_id, this.codigo_id_aca)
  
  
  
  this.ApplyTableDefinition()
}

Source File: ColumnarTestUtils.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, GenericMutableRow}
import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayBasedMapData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericMutableRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: SparkSqlSerializer.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import java.nio.ByteBuffer
import java.util.{HashMap => JavaHashMap}

import scala.reflect.ClassTag
import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.twitter.chill.ResourcePool
import org.apache.spark.serializer.{KryoSerializer, SerializerInstance}
import org.apache.spark.sql.types.{Decimal, StructField, StructType}
import org.apache.spark.util.MutablePair
import org.apache.spark.{SparkConf, SparkEnv}


//private[sql]
class SparkSqlSerializer(conf: SparkConf) extends KryoSerializer(conf) {
  override def newKryo(): Kryo = {
    val kryo = super.newKryo()
    kryo.setRegistrationRequired(false)
    kryo.register(classOf[MutablePair[_, _]])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericMutableRow])
    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)

    kryo.register(classOf[Decimal])
    kryo.register(classOf[JavaHashMap[_, _]])

    // APS
    kryo.register(classOf[StructType])
    kryo.register(classOf[StructField])

    kryo.setReferences(false)
    kryo
  }
}

private[execution] class KryoResourcePool(size: Int)
  extends ResourcePool[SerializerInstance](size) {

  val ser: SparkSqlSerializer = {
    val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
    new SparkSqlSerializer(sparkConf)
  }

  def newInstance(): SerializerInstance = ser.newInstance()
}

//private[sql]
object SparkSqlSerializer {
  @transient lazy val resourcePool = new KryoResourcePool(30)

  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
    val kryo = resourcePool.borrow
    try {
      fn(kryo)
    } finally {
      resourcePool.release(kryo)
    }
  }

  def serialize[T: ClassTag](o: T): Array[Byte] =
    acquireRelease { k =>
      k.serialize(o).array()
    }

  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
    acquireRelease { k =>
      k.deserialize[T](ByteBuffer.wrap(bytes))
    }
}

private[sql] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
    // TODO: There are probably more efficient representations than strings...
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

private[sql] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: BigDecimal) {
    // TODO: There are probably more efficient representations than strings...
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

Source File: DecimalExpressionSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{LongType, DecimalType, Decimal}


class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }

  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

Source File: ColumnarTestUtils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: GenerateUnsafeProjectionSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.codegen

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.BoundReference
import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
import org.apache.spark.sql.types.{DataType, Decimal, StringType, StructType}
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}

class GenerateUnsafeProjectionSuite extends SparkFunSuite {
  test("Test unsafe projection string access pattern") {
    val dataType = (new StructType).add("a", StringType)
    val exprs = BoundReference(0, dataType, nullable = true) :: Nil
    val projection = GenerateUnsafeProjection.generate(exprs)
    val result = projection.apply(InternalRow(AlwaysNull))
    assert(!result.isNullAt(0))
    assert(result.getStruct(0, 1).isNullAt(0))
  }
}

object AlwaysNull extends InternalRow {
  override def numFields: Int = 1
  override def setNullAt(i: Int): Unit = {}
  override def copy(): InternalRow = this
  override def anyNull: Boolean = true
  override def isNullAt(ordinal: Int): Boolean = true
  override def update(i: Int, value: Any): Unit = notSupported
  override def getBoolean(ordinal: Int): Boolean = notSupported
  override def getByte(ordinal: Int): Byte = notSupported
  override def getShort(ordinal: Int): Short = notSupported
  override def getInt(ordinal: Int): Int = notSupported
  override def getLong(ordinal: Int): Long = notSupported
  override def getFloat(ordinal: Int): Float = notSupported
  override def getDouble(ordinal: Int): Double = notSupported
  override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported
  override def getUTF8String(ordinal: Int): UTF8String = notSupported
  override def getBinary(ordinal: Int): Array[Byte] = notSupported
  override def getInterval(ordinal: Int): CalendarInterval = notSupported
  override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported
  override def getArray(ordinal: Int): ArrayData = notSupported
  override def getMap(ordinal: Int): MapData = notSupported
  override def get(ordinal: Int, dataType: DataType): AnyRef = notSupported
  private def notSupported: Nothing = throw new UnsupportedOperationException
}

Source File: DecimalExpressionSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{Decimal, DecimalType, LongType}

class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }

  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

Source File: ColumnarTestUtils.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.columnar

import scala.collection.immutable.HashSet
import scala.util.Random
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types.{DataType, Decimal, AtomicType}
import org.apache.spark.unsafe.types.UTF8String
//列测试工具
object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }
  //产生随机值
  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case DATE => Random.nextInt()
      case LONG => Random.nextLong()
      case TIMESTAMP => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case FIXED_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case _ =>
        // Using a random one-element map instead of an arbitrary object
        //使用随机一元映射代替任意对象
        Map(Random.nextInt() -> Random.nextString(Random.nextInt(32)))
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }
 //使唯一随机值
  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }
  //使唯一值和单值行
  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericMutableRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: DecimalExpressionSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{LongType, DecimalType, Decimal}


class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
  //非标准的值
  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }
  //十进制
  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }
  //提高精度
  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }
  //检查溢出
  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

Source File: MiscStatement.scala From spark-snowflake with Apache License 2.0

5 votes

package net.snowflake.spark.snowflake.pushdowns.querygeneration

import net.snowflake.spark.snowflake.{
  ConstantString,
  EmptySnowflakeSQLStatement,
  IntVariable,
  SnowflakeSQLStatement
}
import org.apache.spark.sql.catalyst.expressions.{
  Alias,
  Ascending,
  Attribute,
  Cast,
  DenseRank,
  Descending,
  Expression,
  If,
  In,
  InSet,
  Literal,
  MakeDecimal,
  PercentRank,
  Rank,
  ScalarSubquery,
  ShiftLeft,
  ShiftRight,
  SortOrder,
  UnscaledValue,
  WindowExpression,
  WindowSpecDefinition
}
import org.apache.spark.sql.types.{Decimal, _}


  private[querygeneration] final def getCastType(t: DataType): Option[String] =
    Option(t match {
      case StringType => "VARCHAR"
      case BinaryType => "BINARY"
      case DateType => "DATE"
      case TimestampType => "TIMESTAMP"
      case d: DecimalType =>
        "DECIMAL(" + d.precision + ", " + d.scale + ")"
      case IntegerType | LongType => "NUMBER"
      case FloatType => "FLOAT"
      case DoubleType => "DOUBLE"
      case _ => null
    })
}

Source File: ColumnarTestUtils.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: DecimalExpressionSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{Decimal, DecimalType, LongType}

class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }

  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

Source File: ColumnarTestUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: DecimalExpressionSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{Decimal, DecimalType, LongType}

class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }

  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

Source File: ColumnarTestUtils.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: UnsafeRowWriterSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.codegen

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.Decimal

class UnsafeRowWriterSuite extends SparkFunSuite {

  def checkDecimalSizeInBytes(decimal: Decimal, numBytes: Int): Unit = {
    assert(decimal.toJavaBigDecimal.unscaledValue().toByteArray.length == numBytes)
  }

  test("SPARK-25538: zero-out all bits for decimals") {
    val decimal1 = Decimal(0.431)
    decimal1.changePrecision(38, 18)
    checkDecimalSizeInBytes(decimal1, 8)

    val decimal2 = Decimal(123456789.1232456789)
    decimal2.changePrecision(38, 18)
    checkDecimalSizeInBytes(decimal2, 11)
    // On an UnsafeRowWriter we write decimal2 first and then decimal1
    val unsafeRowWriter1 = new UnsafeRowWriter(1)
    unsafeRowWriter1.resetRowWriter()
    unsafeRowWriter1.write(0, decimal2, decimal2.precision, decimal2.scale)
    unsafeRowWriter1.reset()
    unsafeRowWriter1.write(0, decimal1, decimal1.precision, decimal1.scale)
    val res1 = unsafeRowWriter1.getRow
    // On a second UnsafeRowWriter we write directly decimal1
    val unsafeRowWriter2 = new UnsafeRowWriter(1)
    unsafeRowWriter2.resetRowWriter()
    unsafeRowWriter2.write(0, decimal1, decimal1.precision, decimal1.scale)
    val res2 = unsafeRowWriter2.getRow
    // The two rows should be the equal
    assert(res1 == res2)
  }

}

Source File: DecimalExpressionSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{Decimal, DecimalType, LongType}

class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }

  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

Source File: MatfastSerializer.scala From MatRel with Apache License 2.0

5 votes

package org.apache.spark.sql.matfast.util

import java.math.BigDecimal
import java.nio.ByteBuffer
import java.util.{HashMap => JavaHashMap}

import scala.reflect.ClassTag

import com.esotericsoftware.kryo.{Kryo, Serializer}
import com.esotericsoftware.kryo.io.{Input, Output}
import com.twitter.chill.ResourcePool

import org.apache.spark.{SparkConf, SparkEnv}
import org.apache.spark.serializer.{KryoSerializer, SerializerInstance}
import org.apache.spark.sql.matfast.matrix._
import org.apache.spark.sql.types.Decimal
import org.apache.spark.util.MutablePair


private[matfast] class MatfastSerializer(conf: SparkConf) extends KryoSerializer(conf) {
  override def newKryo(): Kryo = {
    val kryo = super.newKryo()
    kryo.setRegistrationRequired(false)
    kryo.register(classOf[MutablePair[_, _]])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericRow])
    kryo.register(classOf[org.apache.spark.sql.catalyst.expressions.GenericInternalRow])
    kryo.register(classOf[java.math.BigDecimal], new JavaBigDecimalSerializer)
    kryo.register(classOf[BigDecimal], new ScalaBigDecimalSerializer)

    kryo.register(classOf[Decimal])
    kryo.register(classOf[JavaHashMap[_, _]])
    kryo.register(classOf[DenseMatrix])
    kryo.register(classOf[SparseMatrix])

    kryo.setReferences(false)
    kryo
  }
}

private[matfast] class KryoResourcePool(size: Int) extends ResourcePool[SerializerInstance](size) {
  val ser: MatfastSerializer = {
    val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
    new MatfastSerializer(sparkConf)
  }

  def newInstance(): SerializerInstance = ser.newInstance()
}

private[matfast] object MatfastSerializer {
  @transient lazy val resourcePool = new KryoResourcePool(50)

  private[this] def acquireRelease[O](fn: SerializerInstance => O): O = {
    val kryo = resourcePool.borrow()
    try {
      fn(kryo)
    } finally {
      resourcePool.release(kryo)
    }
  }

  def serialize[T: ClassTag](o: T): Array[Byte] = {
    acquireRelease { k =>
      k.serialize(o).array()
    }
  }

  def deserialize[T: ClassTag](bytes: Array[Byte]): T =
    acquireRelease { k =>
      k.deserialize[T](ByteBuffer.wrap(bytes))
    }
}

private[matfast] class JavaBigDecimalSerializer extends Serializer[java.math.BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: java.math.BigDecimal) {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[java.math.BigDecimal]): java.math.BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

private[matfast] class ScalaBigDecimalSerializer extends Serializer[BigDecimal] {
  def write(kryo: Kryo, output: Output, bd: BigDecimal): Unit = {
    output.writeString(bd.toString)
  }

  def read(kryo: Kryo, input: Input, tpe: Class[BigDecimal]): BigDecimal = {
    new java.math.BigDecimal(input.readString())
  }
}

Source File: ColumnarTestUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: DecimalExpressionSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types.{Decimal, DecimalType, LongType}

class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("UnscaledValue") {
    val d1 = Decimal("10.1")
    checkEvaluation(UnscaledValue(Literal(d1)), 101L)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(UnscaledValue(Literal(d2)), 101L)
    checkEvaluation(UnscaledValue(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("MakeDecimal") {
    checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
    checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
  }

  test("PromotePrecision") {
    val d1 = Decimal("10.1")
    checkEvaluation(PromotePrecision(Literal(d1)), d1)
    val d2 = Decimal(101, 3, 1)
    checkEvaluation(PromotePrecision(Literal(d2)), d2)
    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
  }

  test("CheckOverflow") {
    val d1 = Decimal("10.1")
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 1)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 2)), d1)
    checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 3)), null)

    val d2 = Decimal(101, 3, 1)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 0)), Decimal("10"))
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 1)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 2)), d2)
    checkEvaluation(CheckOverflow(Literal(d2), DecimalType(4, 3)), null)

    checkEvaluation(CheckOverflow(Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2)), null)
  }

}

org.apache.spark.sql.types.Decimal Scala Examples