org.apache.spark.sql.catalyst.util.GenericArrayData Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.util.GenericArrayData.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ColumnarTestUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 2
Source File: InRange.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba.expression import org.apache.spark.sql.simba.{ShapeSerializer, ShapeType} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, Predicate} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.simba.spatial.{MBR, Point, Shape} import org.apache.spark.sql.simba.util.ShapeUtils import org.apache.spark.sql.catalyst.util.GenericArrayData case class InRange(shape: Expression, range_low: Expression, range_high: Expression) extends Predicate with CodegenFallback{ override def nullable: Boolean = false override def eval(input: InternalRow): Any = { val eval_shape = ShapeUtils.getShape(shape, input) val eval_low = range_low.asInstanceOf[Literal].value.asInstanceOf[Point] val eval_high = range_high.asInstanceOf[Literal].value.asInstanceOf[Point] require(eval_shape.dimensions == eval_low.dimensions && eval_shape.dimensions == eval_high.dimensions) val mbr = MBR(eval_low, eval_high) mbr.intersects(eval_shape) } override def toString: String = s" **($shape) IN Rectangle ($range_low) - ($range_high)** " override def children: Seq[Expression] = Seq(shape, range_low, range_high) }
Example 3
Source File: ShapeType.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba import org.apache.spark.sql.types._ import org.apache.spark.sql.simba.spatial.Shape import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayData} private[simba] class ShapeType extends UserDefinedType[Shape] { override def sqlType: DataType = ArrayType(ByteType, containsNull = false) override def serialize(s: Shape): Any = { new GenericArrayData(ShapeSerializer.serialize(s)) } override def userClass: Class[Shape] = classOf[Shape] override def deserialize(datum: Any): Shape = { datum match { case values: ArrayData => ShapeSerializer.deserialize(values.toByteArray) } } } case object ShapeType extends ShapeType
Example 4
Source File: PlinkRowToInternalRowConverter.scala From glow with Apache License 2.0 | 5 votes |
package io.projectglow.plink import org.apache.spark.sql.SQLUtils.structFieldsEqualExceptNullability import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types.{ArrayType, StructType} import org.apache.spark.unsafe.types.UTF8String import io.projectglow.common.{GlowLogging, VariantSchemas} import io.projectglow.sql.util.RowConverter class PlinkRowToInternalRowConverter(schema: StructType) extends GlowLogging { private val homAlt = new GenericArrayData(Array(1, 1)) private val missing = new GenericArrayData(Array(-1, -1)) private val het = new GenericArrayData(Array(0, 1)) private val homRef = new GenericArrayData(Array(0, 0)) private def twoBitsToCalls(twoBits: Int): GenericArrayData = { twoBits match { case 0 => homAlt // Homozygous for first (alternate) allele case 1 => missing // Missing genotype case 2 => het // Heterozygous case 3 => homRef // Homozygous for second (reference) allele } } private val converter = { val fns = schema.map { field => val fn: RowConverter.Updater[(Array[UTF8String], Array[Byte])] = field match { case f if f.name == VariantSchemas.genotypesFieldName => val gSchema = f.dataType.asInstanceOf[ArrayType].elementType.asInstanceOf[StructType] val converter = makeGenotypeConverter(gSchema) (samplesAndBlock, r, i) => { val genotypes = new Array[Any](samplesAndBlock._1.length) var sampleIdx = 0 while (sampleIdx < genotypes.length) { val sample = samplesAndBlock._1(sampleIdx) // Get the relevant 2 bits for the sample from the block // The i-th sample's call bits are the (i%4)-th pair within the (i/4)-th block val twoBits = samplesAndBlock._2(sampleIdx / 4) >> (2 * (sampleIdx % 4)) & 3 genotypes(sampleIdx) = converter((sample, twoBits)) sampleIdx += 1 } r.update(i, new GenericArrayData(genotypes)) } case _ => // BED file only contains genotypes (_, _, _) => () } fn } new RowConverter[(Array[UTF8String], Array[Byte])](schema, fns.toArray) } private def makeGenotypeConverter(gSchema: StructType): RowConverter[(UTF8String, Int)] = { val functions = gSchema.map { field => val fn: RowConverter.Updater[(UTF8String, Int)] = field match { case f if structFieldsEqualExceptNullability(f, VariantSchemas.sampleIdField) => (sampleAndTwoBits, r, i) => { r.update(i, sampleAndTwoBits._1) } case f if structFieldsEqualExceptNullability(f, VariantSchemas.callsField) => (sampleAndTwoBits, r, i) => r.update(i, twoBitsToCalls(sampleAndTwoBits._2)) case f => logger.info( s"Genotype field $f cannot be derived from PLINK files. It will be null " + s"for each sample." ) (_, _, _) => () } fn } new RowConverter[(UTF8String, Int)](gSchema, functions.toArray) } def convertRow( bimRow: InternalRow, sampleIds: Array[UTF8String], gtBlock: Array[Byte]): InternalRow = { converter((sampleIds, gtBlock), bimRow) } }
Example 5
Source File: VectorUDT.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap import com.truecar.mleap.core.linalg import com.truecar.mleap.core.linalg.{DenseVector, SparseVector} import org.apache.spark.annotation.AlphaComponent import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericMutableRow import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ @AlphaComponent class VectorUDT extends UserDefinedType[linalg.Vector] { override def sqlType: StructType = { // type: 0 = sparse, 1 = dense // We only use "values" for dense vectors, and "size", "indices", and "values" for sparse // vectors. The "values" field is nullable because we might want to add binary vectors later, // which uses "size" and "indices", but not "values". StructType(Seq( StructField("type", ByteType, nullable = false), StructField("size", IntegerType, nullable = true), StructField("indices", ArrayType(IntegerType, containsNull = false), nullable = true), StructField("values", ArrayType(DoubleType, containsNull = false), nullable = true))) } override def serialize(obj: Any): InternalRow = { obj match { case SparseVector(size, indices, values) => val row = new GenericMutableRow(4) row.setByte(0, 0) row.setInt(1, size) row.update(2, new GenericArrayData(indices.map(_.asInstanceOf[Any]))) row.update(3, new GenericArrayData(values.map(_.asInstanceOf[Any]))) row case DenseVector(values) => val row = new GenericMutableRow(4) row.setByte(0, 1) row.setNullAt(1) row.setNullAt(2) row.update(3, new GenericArrayData(values.map(_.asInstanceOf[Any]))) row } } override def deserialize(datum: Any): linalg.Vector = { datum match { case row: InternalRow => require(row.numFields == 4, s"VectorUDT.deserialize given row with length ${row.numFields} but requires length == 4") val tpe = row.getByte(0) tpe match { case 0 => val size = row.getInt(1) val indices = row.getArray(2).toIntArray() val values = row.getArray(3).toDoubleArray() new SparseVector(size, indices, values) case 1 => val values = row.getArray(3).toDoubleArray() new DenseVector(values) } } } override def pyUDT: String = "pyspark.mllib.linalg.VectorUDT" override def userClass: Class[linalg.Vector] = classOf[linalg.Vector] override def equals(o: Any): Boolean = { o match { case v: VectorUDT => true case _ => false } } // see [SPARK-8647], this achieves the needed constant hash code without constant no. override def hashCode(): Int = classOf[VectorUDT].getName.hashCode() override def typeName: String = "vector" private[spark] override def asNullable: VectorUDT = this }
Example 6
Source File: ColumnarTestUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, GenericMutableRow} import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayBasedMapData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericMutableRow = { val row = new GenericMutableRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericMutableRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericMutableRow(1) row(0) = value row } (values, rows) } }
Example 7
Source File: ExamplePointUDT.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayData} import org.apache.spark.sql.types._ private[sql] class ExamplePointUDT extends UserDefinedType[ExamplePoint] { override def sqlType: DataType = ArrayType(DoubleType, false) override def pyUDT: String = "pyspark.sql.tests.ExamplePointUDT" override def serialize(obj: Any): GenericArrayData = { obj match { case p: ExamplePoint => val output = new Array[Any](2) output(0) = p.x output(1) = p.y new GenericArrayData(output) } } override def deserialize(datum: Any): ExamplePoint = { datum match { case values: ArrayData => new ExamplePoint(values.getDouble(0), values.getDouble(1)) } } override def userClass: Class[ExamplePoint] = classOf[ExamplePoint] private[spark] override def asNullable: ExamplePointUDT = this }
Example 8
Source File: ColumnarTestUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 9
Source File: CatalystTypeConvertersSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst import org.apache.spark.SparkFunSuite import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ class CatalystTypeConvertersSuite extends SparkFunSuite { private val simpleTypes: Seq[DataType] = Seq( StringType, DateType, BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.SYSTEM_DEFAULT, DecimalType.USER_DEFAULT) test("null handling in rows") { val schema = StructType(simpleTypes.map(t => StructField(t.getClass.getName, t))) val convertToCatalyst = CatalystTypeConverters.createToCatalystConverter(schema) val convertToScala = CatalystTypeConverters.createToScalaConverter(schema) val scalaRow = Row.fromSeq(Seq.fill(simpleTypes.length)(null)) assert(convertToScala(convertToCatalyst(scalaRow)) === scalaRow) } test("null handling for individual values") { for (dataType <- simpleTypes) { assert(CatalystTypeConverters.createToScalaConverter(dataType)(null) === null) } } test("option handling in convertToCatalyst") { // convertToCatalyst doesn't handle unboxing from Options. This is inconsistent with // createToCatalystConverter but it may not actually matter as this is only called internally // in a handful of places where we don't expect to receive Options. assert(CatalystTypeConverters.convertToCatalyst(Some(123)) === Some(123)) } test("option handling in createToCatalystConverter") { assert(CatalystTypeConverters.createToCatalystConverter(IntegerType)(Some(123)) === 123) } test("primitive array handling") { val intArray = Array(1, 100, 10000) val intUnsafeArray = UnsafeArrayData.fromPrimitiveArray(intArray) val intArrayType = ArrayType(IntegerType, false) assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intUnsafeArray) === intArray) val doubleArray = Array(1.1, 111.1, 11111.1) val doubleUnsafeArray = UnsafeArrayData.fromPrimitiveArray(doubleArray) val doubleArrayType = ArrayType(DoubleType, false) assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleUnsafeArray) === doubleArray) } test("An array with null handling") { val intArray = Array(1, null, 100, null, 10000) val intGenericArray = new GenericArrayData(intArray) val intArrayType = ArrayType(IntegerType, true) assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intGenericArray) === intArray) assert(CatalystTypeConverters.createToCatalystConverter(intArrayType)(intArray) == intGenericArray) val doubleArray = Array(1.1, null, 111.1, null, 11111.1) val doubleGenericArray = new GenericArrayData(doubleArray) val doubleArrayType = ArrayType(DoubleType, true) assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleGenericArray) === doubleArray) assert(CatalystTypeConverters.createToCatalystConverter(doubleArrayType)(doubleArray) == doubleGenericArray) } }
Example 10
Source File: JacksonGeneratorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { val gmtId = DateTimeUtils.TimeZoneGMT.getID val option = new JSONOptions(Map.empty, gmtId) test("initial with StructType and write out a row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with StructType and write out rows") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"a":2}]""") } test("initial with StructType and write out an array with single empty row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(null) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{}]""") } test("initial with StructType and write out an empty array") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[]""") } test("initial with Map and write out a map data") { val dataType = MapType(StringType, IntegerType) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with Map and write out an array of maps") { val dataType = MapType(StringType, IntegerType) val input = new GenericArrayData( ArrayBasedMapData(Map("a" -> 1)) :: ArrayBasedMapData(Map("b" -> 2)) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"b":2}]""") } test("error handling: initial with StructType but error calling write a map") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } test("error handling: initial with MapType and write out a row") { val dataType = MapType(StringType, IntegerType) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } }
Example 11
Source File: ObjectExpressionsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{IntegerType, ObjectType} class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-16622: The returned value of the called method in Invoke can be null") { val inputRow = InternalRow.fromSeq(Seq((false, null))) val cls = classOf[Tuple2[Boolean, java.lang.Integer]] val inputObject = BoundReference(0, ObjectType(cls), nullable = true) val invoke = Invoke(inputObject, "_2", IntegerType) checkEvaluationWithGeneratedMutableProjection(invoke, null, inputRow) } test("MapObjects should make copies of unsafe-backed data") { // test UnsafeRow-backed data val structEncoder = ExpressionEncoder[Array[Tuple2[java.lang.Integer, java.lang.Integer]]] val structInputRow = InternalRow.fromSeq(Seq(Array((1, 2), (3, 4)))) val structExpected = new GenericArrayData( Array(InternalRow.fromSeq(Seq(1, 2)), InternalRow.fromSeq(Seq(3, 4)))) checkEvalutionWithUnsafeProjection( structEncoder.serializer.head, structExpected, structInputRow) // test UnsafeArray-backed data val arrayEncoder = ExpressionEncoder[Array[Array[Int]]] val arrayInputRow = InternalRow.fromSeq(Seq(Array(Array(1, 2), Array(3, 4)))) val arrayExpected = new GenericArrayData( Array(new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(3, 4)))) checkEvalutionWithUnsafeProjection( arrayEncoder.serializer.head, arrayExpected, arrayInputRow) // test UnsafeMap-backed data val mapEncoder = ExpressionEncoder[Array[Map[Int, Int]]] val mapInputRow = InternalRow.fromSeq(Seq(Array( Map(1 -> 100, 2 -> 200), Map(3 -> 300, 4 -> 400)))) val mapExpected = new GenericArrayData(Seq( new ArrayBasedMapData( new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(100, 200))), new ArrayBasedMapData( new GenericArrayData(Array(3, 4)), new GenericArrayData(Array(300, 400))))) checkEvalutionWithUnsafeProjection( mapEncoder.serializer.head, mapExpected, mapInputRow) } }
Example 12
Source File: ColumnarTestUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 13
Source File: CatalystTypeConvertersSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst import org.apache.spark.SparkFunSuite import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ class CatalystTypeConvertersSuite extends SparkFunSuite { private val simpleTypes: Seq[DataType] = Seq( StringType, DateType, BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.SYSTEM_DEFAULT, DecimalType.USER_DEFAULT) test("null handling in rows") { val schema = StructType(simpleTypes.map(t => StructField(t.getClass.getName, t))) val convertToCatalyst = CatalystTypeConverters.createToCatalystConverter(schema) val convertToScala = CatalystTypeConverters.createToScalaConverter(schema) val scalaRow = Row.fromSeq(Seq.fill(simpleTypes.length)(null)) assert(convertToScala(convertToCatalyst(scalaRow)) === scalaRow) } test("null handling for individual values") { for (dataType <- simpleTypes) { assert(CatalystTypeConverters.createToScalaConverter(dataType)(null) === null) } } test("option handling in convertToCatalyst") { // convertToCatalyst doesn't handle unboxing from Options. This is inconsistent with // createToCatalystConverter but it may not actually matter as this is only called internally // in a handful of places where we don't expect to receive Options. assert(CatalystTypeConverters.convertToCatalyst(Some(123)) === Some(123)) } test("option handling in createToCatalystConverter") { assert(CatalystTypeConverters.createToCatalystConverter(IntegerType)(Some(123)) === 123) } test("primitive array handling") { val intArray = Array(1, 100, 10000) val intUnsafeArray = UnsafeArrayData.fromPrimitiveArray(intArray) val intArrayType = ArrayType(IntegerType, false) assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intUnsafeArray) === intArray) val doubleArray = Array(1.1, 111.1, 11111.1) val doubleUnsafeArray = UnsafeArrayData.fromPrimitiveArray(doubleArray) val doubleArrayType = ArrayType(DoubleType, false) assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleUnsafeArray) === doubleArray) } test("An array with null handling") { val intArray = Array(1, null, 100, null, 10000) val intGenericArray = new GenericArrayData(intArray) val intArrayType = ArrayType(IntegerType, true) assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intGenericArray) === intArray) assert(CatalystTypeConverters.createToCatalystConverter(intArrayType)(intArray) == intGenericArray) val doubleArray = Array(1.1, null, 111.1, null, 11111.1) val doubleGenericArray = new GenericArrayData(doubleArray) val doubleArrayType = ArrayType(DoubleType, true) assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleGenericArray) === doubleArray) assert(CatalystTypeConverters.createToCatalystConverter(doubleArrayType)(doubleArray) == doubleGenericArray) } }
Example 14
Source File: ObjectExpressionsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{IntegerType, ObjectType} class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-16622: The returned value of the called method in Invoke can be null") { val inputRow = InternalRow.fromSeq(Seq((false, null))) val cls = classOf[Tuple2[Boolean, java.lang.Integer]] val inputObject = BoundReference(0, ObjectType(cls), nullable = true) val invoke = Invoke(inputObject, "_2", IntegerType) checkEvaluationWithGeneratedMutableProjection(invoke, null, inputRow) } test("MapObjects should make copies of unsafe-backed data") { // test UnsafeRow-backed data val structEncoder = ExpressionEncoder[Array[Tuple2[java.lang.Integer, java.lang.Integer]]] val structInputRow = InternalRow.fromSeq(Seq(Array((1, 2), (3, 4)))) val structExpected = new GenericArrayData( Array(InternalRow.fromSeq(Seq(1, 2)), InternalRow.fromSeq(Seq(3, 4)))) checkEvalutionWithUnsafeProjection( structEncoder.serializer.head, structExpected, structInputRow) // test UnsafeArray-backed data val arrayEncoder = ExpressionEncoder[Array[Array[Int]]] val arrayInputRow = InternalRow.fromSeq(Seq(Array(Array(1, 2), Array(3, 4)))) val arrayExpected = new GenericArrayData( Array(new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(3, 4)))) checkEvalutionWithUnsafeProjection( arrayEncoder.serializer.head, arrayExpected, arrayInputRow) // test UnsafeMap-backed data val mapEncoder = ExpressionEncoder[Array[Map[Int, Int]]] val mapInputRow = InternalRow.fromSeq(Seq(Array( Map(1 -> 100, 2 -> 200), Map(3 -> 300, 4 -> 400)))) val mapExpected = new GenericArrayData(Seq( new ArrayBasedMapData( new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(100, 200))), new ArrayBasedMapData( new GenericArrayData(Array(3, 4)), new GenericArrayData(Array(300, 400))))) checkEvalutionWithUnsafeProjection( mapEncoder.serializer.head, mapExpected, mapInputRow) } }
Example 15
Source File: TypeConversion.scala From spark-dynamodb with Apache License 2.0 | 5 votes |
package com.audienceproject.spark.dynamodb.datasource import com.amazonaws.services.dynamodbv2.document.{IncompatibleTypeException, Item} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import scala.collection.JavaConverters._ private[dynamodb] object TypeConversion { def apply(attrName: String, sparkType: DataType): Item => Any = sparkType match { case BooleanType => nullableGet(_.getBOOL)(attrName) case StringType => nullableGet(item => attrName => UTF8String.fromString(item.getString(attrName)))(attrName) case IntegerType => nullableGet(_.getInt)(attrName) case LongType => nullableGet(_.getLong)(attrName) case DoubleType => nullableGet(_.getDouble)(attrName) case FloatType => nullableGet(_.getFloat)(attrName) case BinaryType => nullableGet(_.getBinary)(attrName) case DecimalType() => nullableGet(_.getNumber)(attrName) case ArrayType(innerType, _) => nullableGet(_.getList)(attrName).andThen(extractArray(convertValue(innerType))) case MapType(keyType, valueType, _) => if (keyType != StringType) throw new IllegalArgumentException(s"Invalid Map key type '${keyType.typeName}'. DynamoDB only supports String as Map key type.") nullableGet(_.getRawMap)(attrName).andThen(extractMap(convertValue(valueType))) case StructType(fields) => val nestedConversions = fields.collect({ case StructField(name, dataType, _, _) => name -> convertValue(dataType) }) nullableGet(_.getRawMap)(attrName).andThen(extractStruct(nestedConversions)) case _ => throw new IllegalArgumentException(s"Spark DataType '${sparkType.typeName}' could not be mapped to a corresponding DynamoDB data type.") } private val stringConverter = (value: Any) => UTF8String.fromString(value.asInstanceOf[String]) private def convertValue(sparkType: DataType): Any => Any = sparkType match { case IntegerType => nullableConvert(_.intValue()) case LongType => nullableConvert(_.longValue()) case DoubleType => nullableConvert(_.doubleValue()) case FloatType => nullableConvert(_.floatValue()) case DecimalType() => nullableConvert(identity) case ArrayType(innerType, _) => extractArray(convertValue(innerType)) case MapType(keyType, valueType, _) => if (keyType != StringType) throw new IllegalArgumentException(s"Invalid Map key type '${keyType.typeName}'. DynamoDB only supports String as Map key type.") extractMap(convertValue(valueType)) case StructType(fields) => val nestedConversions = fields.collect({ case StructField(name, dataType, _, _) => name -> convertValue(dataType) }) extractStruct(nestedConversions) case BooleanType => { case boolean: Boolean => boolean case _ => null } case StringType => { case string: String => UTF8String.fromString(string) case _ => null } case BinaryType => { case byteArray: Array[Byte] => byteArray case _ => null } case _ => throw new IllegalArgumentException(s"Spark DataType '${sparkType.typeName}' could not be mapped to a corresponding DynamoDB data type.") } private def nullableGet(getter: Item => String => Any)(attrName: String): Item => Any = { case item if item.hasAttribute(attrName) => try getter(item)(attrName) catch { case _: NumberFormatException => null case _: IncompatibleTypeException => null } case _ => null } private def nullableConvert(converter: java.math.BigDecimal => Any): Any => Any = { case item: java.math.BigDecimal => converter(item) case _ => null } private def extractArray(converter: Any => Any): Any => Any = { case list: java.util.List[_] => new GenericArrayData(list.asScala.map(converter)) case set: java.util.Set[_] => new GenericArrayData(set.asScala.map(converter).toSeq) case _ => null } private def extractMap(converter: Any => Any): Any => Any = { case map: java.util.Map[_, _] => ArrayBasedMapData(map, stringConverter, converter) case _ => null } private def extractStruct(conversions: Seq[(String, Any => Any)]): Any => Any = { case map: java.util.Map[_, _] => InternalRow.fromSeq(conversions.map({ case (name, conv) => conv(map.get(name)) })) case _ => null } }
Example 16
Source File: collect.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.aggregate import scala.collection.generic.Growable import scala.collection.mutable import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - Collects and returns a set of unique elements.") case class CollectSet( child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends Collect { def this(child: Expression) = this(child, 0, 0) override def checkInputDataTypes(): TypeCheckResult = { if (!child.dataType.existsRecursively(_.isInstanceOf[MapType])) { TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure("collect_set() cannot have map type data") } } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate = copy(inputAggBufferOffset = newInputAggBufferOffset) override def prettyName: String = "collect_set" override protected[this] val buffer: mutable.HashSet[Any] = mutable.HashSet.empty }
Example 17
Source File: CatalystTypeConvertersSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst import org.apache.spark.SparkFunSuite import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ class CatalystTypeConvertersSuite extends SparkFunSuite { private val simpleTypes: Seq[DataType] = Seq( StringType, DateType, BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.SYSTEM_DEFAULT, DecimalType.USER_DEFAULT) test("null handling in rows") { val schema = StructType(simpleTypes.map(t => StructField(t.getClass.getName, t))) val convertToCatalyst = CatalystTypeConverters.createToCatalystConverter(schema) val convertToScala = CatalystTypeConverters.createToScalaConverter(schema) val scalaRow = Row.fromSeq(Seq.fill(simpleTypes.length)(null)) assert(convertToScala(convertToCatalyst(scalaRow)) === scalaRow) } test("null handling for individual values") { for (dataType <- simpleTypes) { assert(CatalystTypeConverters.createToScalaConverter(dataType)(null) === null) } } test("option handling in convertToCatalyst") { // convertToCatalyst doesn't handle unboxing from Options. This is inconsistent with // createToCatalystConverter but it may not actually matter as this is only called internally // in a handful of places where we don't expect to receive Options. assert(CatalystTypeConverters.convertToCatalyst(Some(123)) === Some(123)) } test("option handling in createToCatalystConverter") { assert(CatalystTypeConverters.createToCatalystConverter(IntegerType)(Some(123)) === 123) } test("primitive array handling") { val intArray = Array(1, 100, 10000) val intUnsafeArray = UnsafeArrayData.fromPrimitiveArray(intArray) val intArrayType = ArrayType(IntegerType, false) assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intUnsafeArray) === intArray) val doubleArray = Array(1.1, 111.1, 11111.1) val doubleUnsafeArray = UnsafeArrayData.fromPrimitiveArray(doubleArray) val doubleArrayType = ArrayType(DoubleType, false) assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleUnsafeArray) === doubleArray) } test("An array with null handling") { val intArray = Array(1, null, 100, null, 10000) val intGenericArray = new GenericArrayData(intArray) val intArrayType = ArrayType(IntegerType, true) assert(CatalystTypeConverters.createToScalaConverter(intArrayType)(intGenericArray) === intArray) assert(CatalystTypeConverters.createToCatalystConverter(intArrayType)(intArray) == intGenericArray) val doubleArray = Array(1.1, null, 111.1, null, 11111.1) val doubleGenericArray = new GenericArrayData(doubleArray) val doubleArrayType = ArrayType(DoubleType, true) assert(CatalystTypeConverters.createToScalaConverter(doubleArrayType)(doubleGenericArray) === doubleArray) assert(CatalystTypeConverters.createToCatalystConverter(doubleArrayType)(doubleArray) == doubleGenericArray) } }
Example 18
Source File: ObjectExpressionsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{IntegerType, ObjectType} class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-16622: The returned value of the called method in Invoke can be null") { val inputRow = InternalRow.fromSeq(Seq((false, null))) val cls = classOf[Tuple2[Boolean, java.lang.Integer]] val inputObject = BoundReference(0, ObjectType(cls), nullable = true) val invoke = Invoke(inputObject, "_2", IntegerType) checkEvaluationWithGeneratedMutableProjection(invoke, null, inputRow) } test("MapObjects should make copies of unsafe-backed data") { // test UnsafeRow-backed data val structEncoder = ExpressionEncoder[Array[Tuple2[java.lang.Integer, java.lang.Integer]]] val structInputRow = InternalRow.fromSeq(Seq(Array((1, 2), (3, 4)))) val structExpected = new GenericArrayData( Array(InternalRow.fromSeq(Seq(1, 2)), InternalRow.fromSeq(Seq(3, 4)))) checkEvalutionWithUnsafeProjection( structEncoder.serializer.head, structExpected, structInputRow) // test UnsafeArray-backed data val arrayEncoder = ExpressionEncoder[Array[Array[Int]]] val arrayInputRow = InternalRow.fromSeq(Seq(Array(Array(1, 2), Array(3, 4)))) val arrayExpected = new GenericArrayData( Array(new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(3, 4)))) checkEvalutionWithUnsafeProjection( arrayEncoder.serializer.head, arrayExpected, arrayInputRow) // test UnsafeMap-backed data val mapEncoder = ExpressionEncoder[Array[Map[Int, Int]]] val mapInputRow = InternalRow.fromSeq(Seq(Array( Map(1 -> 100, 2 -> 200), Map(3 -> 300, 4 -> 400)))) val mapExpected = new GenericArrayData(Seq( new ArrayBasedMapData( new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(100, 200))), new ArrayBasedMapData( new GenericArrayData(Array(3, 4)), new GenericArrayData(Array(300, 400))))) checkEvalutionWithUnsafeProjection( mapEncoder.serializer.head, mapExpected, mapInputRow) } }
Example 19
Source File: IndexerSuite.scala From magellan with Apache License 2.0 | 5 votes |
package magellan.catalyst import magellan.{MockPointExpr, Point, TestSparkContext} import magellan.index.ZOrderCurve import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, Indexer} import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.magellan.dsl.expressions._ import org.scalatest.FunSuite class IndexerSuite extends FunSuite with TestSparkContext { test("index points") { val sqlCtx = this.sqlContext val path = this.getClass.getClassLoader.getResource("testpoint/").getPath val df = sqlCtx.read.format("magellan").load(path) import sqlCtx.implicits._ val index = df.withColumn("index", $"point" index 25) .select($"index.curve") .take(1)(0)(0) .asInstanceOf[Seq[ZOrderCurve]] assert(index.map(_.toBase32()) === Seq("9z109")) try { df.withColumn("index", $"point" index 23) assert(false) } catch { case e: Error => assert(true) } } test("eval: Index") { val indexer = Indexer(MockPointExpr(Point(-122.3959313, 37.7912976)), 25) val result = indexer.eval(null).asInstanceOf[GenericArrayData] assert(result.numElements() === 1) val resultRow = result.get(0, Indexer.dataType).asInstanceOf[GenericInternalRow] val indexUDT = Indexer.indexUDT val curve = indexUDT.deserialize(resultRow.get(0, indexUDT)) assert(curve.toBase32() === "9q8yy") val relation = resultRow.getString(1) assert(relation === "Contains") } }
Example 20
Source File: ColumnarTestUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 21
Source File: JacksonGeneratorSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.CharArrayWriter import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.types._ class JacksonGeneratorSuite extends SparkFunSuite { val gmtId = DateTimeUtils.TimeZoneGMT.getID val option = new JSONOptions(Map.empty, gmtId) test("initial with StructType and write out a row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with StructType and write out rows") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"a":2}]""") } test("initial with StructType and write out an array with single empty row") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(InternalRow(null) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{}]""") } test("initial with StructType and write out an empty array") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = new GenericArrayData(Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[]""") } test("initial with Map and write out a map data") { val dataType = MapType(StringType, IntegerType) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """{"a":1}""") } test("initial with Map and write out an array of maps") { val dataType = MapType(StringType, IntegerType) val input = new GenericArrayData( ArrayBasedMapData(Map("a" -> 1)) :: ArrayBasedMapData(Map("b" -> 2)) :: Nil) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) gen.write(input) gen.flush() assert(writer.toString === """[{"a":1},{"b":2}]""") } test("error handling: initial with StructType but error calling write a map") { val dataType = StructType(StructField("a", IntegerType) :: Nil) val input = ArrayBasedMapData(Map("a" -> 1)) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } test("error handling: initial with MapType and write out a row") { val dataType = MapType(StringType, IntegerType) val input = InternalRow(1) val writer = new CharArrayWriter() val gen = new JacksonGenerator(dataType, writer, option) intercept[UnsupportedOperationException] { gen.write(input) } } }
Example 22
Source File: collect.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.aggregate import scala.collection.generic.Growable import scala.collection.mutable import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - Collects and returns a set of unique elements.") case class CollectSet( child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends Collect[mutable.HashSet[Any]] { def this(child: Expression) = this(child, 0, 0) override def checkInputDataTypes(): TypeCheckResult = { if (!child.dataType.existsRecursively(_.isInstanceOf[MapType])) { TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure("collect_set() cannot have map type data") } } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate = copy(inputAggBufferOffset = newInputAggBufferOffset) override def prettyName: String = "collect_set" override def createAggregationBuffer(): mutable.HashSet[Any] = mutable.HashSet.empty }
Example 23
Source File: XmlDataToCatalyst.scala From spark-xml with Apache License 2.0 | 5 votes |
package com.databricks.spark.xml import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression} import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import com.databricks.spark.xml.parsers.StaxXmlParser case class XmlDataToCatalyst( child: Expression, schema: DataType, options: XmlOptions) extends UnaryExpression with CodegenFallback with ExpectsInputTypes { override lazy val dataType: DataType = schema @transient lazy val rowSchema: StructType = schema match { case st: StructType => st case ArrayType(st: StructType, _) => st } override def nullSafeEval(xml: Any): Any = xml match { case string: UTF8String => CatalystTypeConverters.convertToCatalyst( StaxXmlParser.parseColumn(string.toString, rowSchema, options)) case string: String => StaxXmlParser.parseColumn(string, rowSchema, options) case arr: GenericArrayData => CatalystTypeConverters.convertToCatalyst( arr.array.map(s => StaxXmlParser.parseColumn(s.toString, rowSchema, options))) case arr: Array[_] => arr.map(s => StaxXmlParser.parseColumn(s.toString, rowSchema, options)) case _ => null } override def inputTypes: Seq[DataType] = schema match { case _: StructType => Seq(StringType) case ArrayType(_: StructType, _) => Seq(ArrayType(StringType)) } }
Example 24
Source File: ArrowSummarizer.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.rdd.function.summarize.summarizer import java.io.ByteArrayOutputStream import java.nio.channels.Channels import java.util import com.twosigma.flint.arrow.{ ArrowFieldWriter, ArrowPayload, ArrowUtils, ArrowWriter } import org.apache.arrow.memory.{ BufferAllocator, RootAllocator } import org.apache.arrow.vector.VectorSchemaRoot import org.apache.arrow.vector.ipc.ArrowFileWriter import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types.StructType import scala.collection.JavaConverters._ case class ArrowSummarizer(inputSchema: StructType, outputSchema: StructType, includeBaseRows: Boolean) extends Summarizer[InternalRow, ArrowSummarizerState, ArrowSummarizerResult] { private[this] val size = outputSchema.size require(size > 0, "Cannot create summarizer with no input columns") // This function will allocate memory from the BufferAllocator to initialize arrow vectors. override def zero(): ArrowSummarizerState = { new ArrowSummarizerState(false, null, null, null, null) } private def init(u: ArrowSummarizerState): Unit = { if (!u.initialized) { val arrowSchema = ArrowUtils.toArrowSchema(outputSchema) val allocator = new RootAllocator(Int.MaxValue) val root = VectorSchemaRoot.create(arrowSchema, allocator) val arrowWriter = ArrowWriter.create(inputSchema, outputSchema, root) u.initialized = true u.baseRows = new util.ArrayList[InternalRow]() u.allocator = allocator u.root = root u.arrowWriter = arrowWriter } } override def add(u: ArrowSummarizerState, row: InternalRow): ArrowSummarizerState = { if (!u.initialized) { init(u) } if (includeBaseRows) { u.baseRows.add(row) } u.arrowWriter.write(row) u } override def merge( u1: ArrowSummarizerState, u2: ArrowSummarizerState ): ArrowSummarizerState = throw new UnsupportedOperationException() // This can only be called once override def render(u: ArrowSummarizerState): ArrowSummarizerResult = { if (u.initialized) { val out = new ByteArrayOutputStream() val writer = new ArrowFileWriter(u.root, null, Channels.newChannel(out)) u.arrowWriter.finish() writer.writeBatch() writer.close() u.root.close() u.allocator.close() val rows = u.baseRows.toArray.asInstanceOf[Array[Any]] ArrowSummarizerResult(rows, out.toByteArray) } else { ArrowSummarizerResult(Array.empty, Array.empty) } } override def close(u: ArrowSummarizerState): Unit = { if (u.initialized) { u.arrowWriter.reset() u.root.close() u.allocator.close() } } }
Example 25
Source File: ArrowSummarizer.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.timeseries.summarize.summarizer import com.twosigma.flint.rdd.function.summarize.summarizer.{ ArrowSummarizerResult, ArrowSummarizerState, ArrowSummarizer => ArrowSum } import com.twosigma.flint.timeseries.row.Schema import com.twosigma.flint.timeseries.summarize.ColumnList import com.twosigma.flint.timeseries.summarize.{ ColumnList, InputAlwaysValid, Summarizer, SummarizerFactory } import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types.{ ArrayType, BinaryType, StructType } object ArrowSummarizer { val baseRowsColumnName = "__baseRows" val arrowBatchColumnName = "arrow_bytes" } case class ArrowSummarizerFactory(columns: Seq[String], includeBaseRows: Boolean) extends SummarizerFactory { override val requiredColumns: ColumnList = if (includeBaseRows) { ColumnList.All } else { ColumnList.Sequence(columns) } override def apply(inputSchema: StructType): ArrowSummarizer = { val outputBatchSchema = StructType(columns.map(col => inputSchema(inputSchema.fieldIndex(col)))) ArrowSummarizer(inputSchema, outputBatchSchema, includeBaseRows, prefixOpt, requiredColumns) } } case class ArrowSummarizer( override val inputSchema: StructType, outputBatchSchema: StructType, includeBaseRows: Boolean, override val prefixOpt: Option[String], requiredColumns: ColumnList ) extends Summarizer with InputAlwaysValid { override type T = InternalRow override type U = ArrowSummarizerState override type V = ArrowSummarizerResult override val summarizer = ArrowSum(inputSchema, outputBatchSchema, includeBaseRows) override val schema: StructType = if (includeBaseRows) { Schema.of( ArrowSummarizer.baseRowsColumnName -> ArrayType(inputSchema), ArrowSummarizer.arrowBatchColumnName -> BinaryType ) } else { Schema.of( ArrowSummarizer.arrowBatchColumnName -> BinaryType ) } override def toT(r: InternalRow): T = r override def fromV(v: V): InternalRow = if (includeBaseRows) { InternalRow(new GenericArrayData(v.baseRows), v.arrowBatch) } else { InternalRow(v.arrowBatch) } }
Example 26
Source File: StackSummarizerFactory.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.timeseries.summarize.summarizer import com.twosigma.flint.timeseries.summarize._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types.{ ArrayType, StructField, StructType } case class StackSummarizerFactory(factories: Seq[SummarizerFactory]) extends SummarizerFactory { factories.foreach { case factory => require( !factory.isInstanceOf[OverlappableSummarizerFactory], "Stacking overlappable summarizers are not supported" ) } override val requiredColumns: ColumnList = factories.map(_.requiredColumns).reduce(_ ++ _) def apply(inputSchema: StructType): Summarizer = { val summarizers = factories.map(f => f.apply(inputSchema)) new StackSummarizer(inputSchema, prefixOpt, requiredColumns, summarizers) } } class StackSummarizer( override val inputSchema: StructType, override val prefixOpt: Option[String], override val requiredColumns: ColumnList, summarizers: Seq[Summarizer] ) extends Summarizer with InputAlwaysValid { override type T = InternalRow override type U = Seq[Any] override type V = Seq[InternalRow] require( summarizers.forall(s => s.outputSchema == summarizers.head.outputSchema), s"Summarizers must have identical schemas to be stacked: ${summarizers.map(_.outputSchema).mkString(" vs. ")}" ) override val schema: StructType = StructType( StructField(StackSummarizer.stackColumn, ArrayType(summarizers.head.outputSchema)) :: Nil ) override val summarizer = com.twosigma.flint.rdd.function.summarize.summarizer.StackSummarizer(summarizers) // Convert the output of `summarizer` to the InternalRow. override def fromV(v: V): InternalRow = InternalRow(new GenericArrayData(v)) // Convert the InternalRow to the type of row expected by the `summarizer`. override def toT(r: InternalRow): T = r } object StackSummarizer { val stackColumn = "stack" }
Example 27
Source File: RowsSummarizer.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.timeseries.summarize.summarizer import java.util.ArrayDeque import com.twosigma.flint.rdd.function.summarize.summarizer.subtractable import com.twosigma.flint.timeseries.row.Schema import com.twosigma.flint.timeseries.summarize.{ ColumnList, InputAlwaysValid, LeftSubtractableSummarizer, SummarizerFactory } import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ case class RowsSummarizerFactory(column: String) extends SummarizerFactory { override val requiredColumns: ColumnList = ColumnList.All override def apply(inputSchema: StructType): RowsSummarizer = RowsSummarizer(inputSchema, prefixOpt, requiredColumns, column) } case class RowsSummarizer( override val inputSchema: StructType, override val prefixOpt: Option[String], requiredColumns: ColumnList, column: String ) extends LeftSubtractableSummarizer with InputAlwaysValid { override type T = InternalRow override type U = ArrayDeque[InternalRow] override type V = Array[InternalRow] override val summarizer = subtractable.InternalRowsSummarizer() override val schema = Schema.of(column -> ArrayType(inputSchema)) override def toT(r: InternalRow): T = r override def fromV(v: V): InternalRow = { val values = new GenericArrayData(v.asInstanceOf[Array[Any]]) InternalRow(values) } }
Example 28
Source File: ColumnarTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 29
Source File: ObjectExpressionsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{IntegerType, ObjectType} class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("SPARK-16622: The returned value of the called method in Invoke can be null") { val inputRow = InternalRow.fromSeq(Seq((false, null))) val cls = classOf[Tuple2[Boolean, java.lang.Integer]] val inputObject = BoundReference(0, ObjectType(cls), nullable = true) val invoke = Invoke(inputObject, "_2", IntegerType) checkEvaluationWithGeneratedMutableProjection(invoke, null, inputRow) } test("MapObjects should make copies of unsafe-backed data") { // test UnsafeRow-backed data val structEncoder = ExpressionEncoder[Array[Tuple2[java.lang.Integer, java.lang.Integer]]] val structInputRow = InternalRow.fromSeq(Seq(Array((1, 2), (3, 4)))) val structExpected = new GenericArrayData( Array(InternalRow.fromSeq(Seq(1, 2)), InternalRow.fromSeq(Seq(3, 4)))) checkEvalutionWithUnsafeProjection( structEncoder.serializer.head, structExpected, structInputRow) // test UnsafeArray-backed data val arrayEncoder = ExpressionEncoder[Array[Array[Int]]] val arrayInputRow = InternalRow.fromSeq(Seq(Array(Array(1, 2), Array(3, 4)))) val arrayExpected = new GenericArrayData( Array(new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(3, 4)))) checkEvalutionWithUnsafeProjection( arrayEncoder.serializer.head, arrayExpected, arrayInputRow) // test UnsafeMap-backed data val mapEncoder = ExpressionEncoder[Array[Map[Int, Int]]] val mapInputRow = InternalRow.fromSeq(Seq(Array( Map(1 -> 100, 2 -> 200), Map(3 -> 300, 4 -> 400)))) val mapExpected = new GenericArrayData(Seq( new ArrayBasedMapData( new GenericArrayData(Array(1, 2)), new GenericArrayData(Array(100, 200))), new ArrayBasedMapData( new GenericArrayData(Array(3, 4)), new GenericArrayData(Array(300, 400))))) checkEvalutionWithUnsafeProjection( mapEncoder.serializer.head, mapExpected, mapInputRow) } }