org.apache.spark.sql.catalyst.util.MapData Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.util.MapData.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: InternalRow.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String def getAccessor(dataType: DataType): (SpecializedGetters, Int) => Any = dataType match { case BooleanType => (input, ordinal) => input.getBoolean(ordinal) case ByteType => (input, ordinal) => input.getByte(ordinal) case ShortType => (input, ordinal) => input.getShort(ordinal) case IntegerType | DateType => (input, ordinal) => input.getInt(ordinal) case LongType | TimestampType => (input, ordinal) => input.getLong(ordinal) case FloatType => (input, ordinal) => input.getFloat(ordinal) case DoubleType => (input, ordinal) => input.getDouble(ordinal) case StringType => (input, ordinal) => input.getUTF8String(ordinal) case BinaryType => (input, ordinal) => input.getBinary(ordinal) case CalendarIntervalType => (input, ordinal) => input.getInterval(ordinal) case t: DecimalType => (input, ordinal) => input.getDecimal(ordinal, t.precision, t.scale) case t: StructType => (input, ordinal) => input.getStruct(ordinal, t.size) case _: ArrayType => (input, ordinal) => input.getArray(ordinal) case _: MapType => (input, ordinal) => input.getMap(ordinal) case u: UserDefinedType[_] => getAccessor(u.sqlType) case _ => (input, ordinal) => input.get(ordinal, dataType) } }
Example 2
Source File: JavaConverter.scala From spark-dynamodb with Apache License 2.0 | 5 votes |
package com.audienceproject.spark.dynamodb.catalyst import java.util import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import scala.collection.JavaConverters._ object JavaConverter { def convertRowValue(row: InternalRow, index: Int, elementType: DataType): Any = { elementType match { case ArrayType(innerType, _) => convertArray(row.getArray(index), innerType) case MapType(keyType, valueType, _) => convertMap(row.getMap(index), keyType, valueType) case StructType(fields) => convertStruct(row.getStruct(index, fields.length), fields) case StringType => row.getString(index) case _ => row.get(index, elementType) } } def convertArray(array: ArrayData, elementType: DataType): Any = { elementType match { case ArrayType(innerType, _) => array.toSeq[ArrayData](elementType).map(convertArray(_, innerType)).asJava case MapType(keyType, valueType, _) => array.toSeq[MapData](elementType).map(convertMap(_, keyType, valueType)).asJava case structType: StructType => array.toSeq[InternalRow](structType).map(convertStruct(_, structType.fields)).asJava case StringType => convertStringArray(array).asJava case _ => array.toSeq[Any](elementType).asJava } } def convertMap(map: MapData, keyType: DataType, valueType: DataType): util.Map[String, Any] = { if (keyType != StringType) throw new IllegalArgumentException( s"Invalid Map key type '${keyType.typeName}'. DynamoDB only supports String as Map key type.") val keys = convertStringArray(map.keyArray()) val values = valueType match { case ArrayType(innerType, _) => map.valueArray().toSeq[ArrayData](valueType).map(convertArray(_, innerType)) case MapType(innerKeyType, innerValueType, _) => map.valueArray().toSeq[MapData](valueType).map(convertMap(_, innerKeyType, innerValueType)) case structType: StructType => map.valueArray().toSeq[InternalRow](structType).map(convertStruct(_, structType.fields)) case StringType => convertStringArray(map.valueArray()) case _ => map.valueArray().toSeq[Any](valueType) } val kvPairs = for (i <- 0 until map.numElements()) yield keys(i) -> values(i) Map(kvPairs: _*).asJava } def convertStruct(row: InternalRow, fields: Seq[StructField]): util.Map[String, Any] = { val kvPairs = for (i <- 0 until row.numFields) yield if (row.isNullAt(i)) fields(i).name -> null else fields(i).name -> convertRowValue(row, i, fields(i).dataType) Map(kvPairs: _*).asJava } def convertStringArray(array: ArrayData): Seq[String] = array.toSeq[UTF8String](StringType).map(_.toString) }
Example 3
Source File: GenerateUnsafeProjectionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.BoundReference import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.types.{DataType, Decimal, StringType, StructType} import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} class GenerateUnsafeProjectionSuite extends SparkFunSuite { test("Test unsafe projection string access pattern") { val dataType = (new StructType).add("a", StringType) val exprs = BoundReference(0, dataType, nullable = true) :: Nil val projection = GenerateUnsafeProjection.generate(exprs) val result = projection.apply(InternalRow(AlwaysNull)) assert(!result.isNullAt(0)) assert(result.getStruct(0, 1).isNullAt(0)) } } object AlwaysNull extends InternalRow { override def numFields: Int = 1 override def setNullAt(i: Int): Unit = {} override def copy(): InternalRow = this override def anyNull: Boolean = true override def isNullAt(ordinal: Int): Boolean = true override def update(i: Int, value: Any): Unit = notSupported override def getBoolean(ordinal: Int): Boolean = notSupported override def getByte(ordinal: Int): Byte = notSupported override def getShort(ordinal: Int): Short = notSupported override def getInt(ordinal: Int): Int = notSupported override def getLong(ordinal: Int): Long = notSupported override def getFloat(ordinal: Int): Float = notSupported override def getDouble(ordinal: Int): Double = notSupported override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = notSupported override def getUTF8String(ordinal: Int): UTF8String = notSupported override def getBinary(ordinal: Int): Array[Byte] = notSupported override def getInterval(ordinal: Int): CalendarInterval = notSupported override def getStruct(ordinal: Int, numFields: Int): InternalRow = notSupported override def getArray(ordinal: Int): ArrayData = notSupported override def getMap(ordinal: Int): MapData = notSupported override def get(ordinal: Int, dataType: DataType): AnyRef = notSupported private def notSupported: Nothing = throw new UnsupportedOperationException }
Example 4
Source File: JacksonGenerator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.json import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{MapData, ArrayData, DateTimeUtils} import scala.collection.Map import com.fasterxml.jackson.core._ import org.apache.spark.sql.Row import org.apache.spark.sql.types._ private[sql] object JacksonGenerator { def apply(rowSchema: StructType, gen: JsonGenerator)(row: InternalRow): Unit = { def valWriter: (DataType, Any) => Unit = { case (_, null) | (NullType, _) => gen.writeNull() case (StringType, v) => gen.writeString(v.toString) case (TimestampType, v: Long) => gen.writeString(DateTimeUtils.toJavaTimestamp(v).toString) case (IntegerType, v: Int) => gen.writeNumber(v) case (ShortType, v: Short) => gen.writeNumber(v) case (FloatType, v: Float) => gen.writeNumber(v) case (DoubleType, v: Double) => gen.writeNumber(v) case (LongType, v: Long) => gen.writeNumber(v) case (DecimalType(), v: Decimal) => gen.writeNumber(v.toJavaBigDecimal) case (ByteType, v: Byte) => gen.writeNumber(v.toInt) case (BinaryType, v: Array[Byte]) => gen.writeBinary(v) case (BooleanType, v: Boolean) => gen.writeBoolean(v) case (DateType, v: Int) => gen.writeString(DateTimeUtils.toJavaDate(v).toString) // For UDT values, they should be in the SQL type's corresponding value type. // We should not see values in the user-defined class at here. // For example, VectorUDT's SQL type is an array of double. So, we should expect that v is // an ArrayData at here, instead of a Vector. case (udt: UserDefinedType[_], v) => valWriter(udt.sqlType, v) case (ArrayType(ty, _), v: ArrayData) => gen.writeStartArray() v.foreach(ty, (_, value) => valWriter(ty, value)) gen.writeEndArray() case (MapType(kt, vt, _), v: MapData) => gen.writeStartObject() v.foreach(kt, vt, { (k, v) => gen.writeFieldName(k.toString) valWriter(vt, v) }) gen.writeEndObject() case (StructType(ty), v: InternalRow) => gen.writeStartObject() var i = 0 while (i < ty.length) { val field = ty(i) val value = v.get(i, field.dataType) if (value != null) { gen.writeFieldName(field.name) valWriter(field.dataType, value) } i += 1 } gen.writeEndObject() case (dt, v) => sys.error( s"Failed to convert value $v (class of ${v.getClass}}) with the type of $dt to JSON.") } valWriter(rowSchema, row) } }