org.apache.spark.sql.types.MapType Scala Examples
The following examples show how to use org.apache.spark.sql.types.MapType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MapDataSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import scala.collection._ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.ArrayBasedMapData import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType} import org.apache.spark.unsafe.types.UTF8String class MapDataSuite extends SparkFunSuite { test("inequality tests") { def u(str: String): UTF8String = UTF8String.fromString(str) // test data val testMap1 = Map(u("key1") -> 1) val testMap2 = Map(u("key1") -> 1, u("key2") -> 2) val testMap3 = Map(u("key1") -> 1) val testMap4 = Map(u("key1") -> 1, u("key2") -> 2) // ArrayBasedMapData val testArrayMap1 = ArrayBasedMapData(testMap1.toMap) val testArrayMap2 = ArrayBasedMapData(testMap2.toMap) val testArrayMap3 = ArrayBasedMapData(testMap3.toMap) val testArrayMap4 = ArrayBasedMapData(testMap4.toMap) assert(testArrayMap1 !== testArrayMap3) assert(testArrayMap2 !== testArrayMap4) // UnsafeMapData val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType))) val row = new GenericInternalRow(1) def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = { row.update(0, map) val unsafeRow = unsafeConverter.apply(row) unsafeRow.getMap(0).copy } assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3)) assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4)) } }
Example 2
Source File: DataTypeMapping.scala From azure-kusto-spark with Apache License 2.0 | 5 votes |
package com.microsoft.kusto.spark.utils import org.apache.spark.sql.types.DataTypes._ import org.apache.spark.sql.types.{ArrayType, DataType, DataTypes, DecimalType, MapType, StructType} object DataTypeMapping { val kustoTypeToSparkTypeMap: Map[String, DataType] = Map( "string" -> StringType, "long" -> LongType, "datetime" -> TimestampType,// Kusto datetime is equivalent to TimestampType "timespan" -> StringType, "bool" -> BooleanType, "real" -> DoubleType, // Can be partitioned differently between precision and scale, total must be 34 to match .Net SqlDecimal "decimal" -> DataTypes.createDecimalType(20,14), "guid" -> StringType, "int" -> IntegerType, "dynamic" -> StringType ) val kustoJavaTypeToSparkTypeMap: Map[String, DataType] = Map( "string" -> StringType, "int64" -> LongType, "datetime" -> TimestampType, "timespan" -> StringType, "sbyte" -> BooleanType, "double" -> DoubleType, "sqldecimal" -> DataTypes.createDecimalType(20,14), "guid" -> StringType, "int32" -> IntegerType, "object" -> StringType ) val sparkTypeToKustoTypeMap: Map[DataType, String] = Map( StringType -> "string", BooleanType -> "bool", DateType -> "datetime", TimestampType -> "datetime", DataTypes.createDecimalType() -> "decimal", DoubleType -> "real", FloatType -> "real", ByteType -> "int", IntegerType -> "int", LongType -> "long", ShortType -> "int" ) def getSparkTypeToKustoTypeMap(fieldType: DataType): String ={ if(fieldType.isInstanceOf[DecimalType]) "decimal" else if (fieldType.isInstanceOf[ArrayType] || fieldType.isInstanceOf[StructType] || fieldType.isInstanceOf[MapType]) "dynamic" else DataTypeMapping.sparkTypeToKustoTypeMap.getOrElse(fieldType, "string") } }
Example 3
Source File: ShortestPaths.scala From graphframes with Apache License 2.0 | 5 votes |
package org.graphframes.lib import java.util import scala.collection.JavaConverters._ import org.apache.spark.graphx.{lib => graphxlib} import org.apache.spark.sql.{Column, DataFrame, Row} import org.apache.spark.sql.api.java.UDF1 import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{IntegerType, MapType} import org.graphframes.GraphFrame def landmarks(value: util.ArrayList[Any]): this.type = { landmarks(value.asScala) } def run(): DataFrame = { ShortestPaths.run(graph, check(lmarks, "landmarks")) } } private object ShortestPaths { private def run(graph: GraphFrame, landmarks: Seq[Any]): DataFrame = { val idType = graph.vertices.schema(GraphFrame.ID).dataType val longIdToLandmark = landmarks.map(l => GraphXConversions.integralId(graph, l) -> l).toMap val gx = graphxlib.ShortestPaths.run( graph.cachedTopologyGraphX, longIdToLandmark.keys.toSeq.sorted).mapVertices { case (_, m) => m.toSeq } val g = GraphXConversions.fromGraphX(graph, gx, vertexNames = Seq(DISTANCE_ID)) val distanceCol: Column = if (graph.hasIntegralIdType) { // It seems there are no easy way to convert a sequence of pairs into a map val mapToLandmark = udf { distances: Seq[Row] => distances.map { case Row(k: Long, v: Int) => k -> v }.toMap } mapToLandmark(g.vertices(DISTANCE_ID)) } else { val func = new UDF1[Seq[Row], Map[Any, Int]] { override def call(t1: Seq[Row]): Map[Any, Int] = { t1.map { case Row(k: Long, v: Int) => longIdToLandmark(k) -> v }.toMap } } val mapToLandmark = udf(func, MapType(idType, IntegerType, false)) mapToLandmark(col(DISTANCE_ID)) } val cols = graph.vertices.columns.map(col) :+ distanceCol.as(DISTANCE_ID) g.vertices.select(cols: _*) } private val DISTANCE_ID = "distances" }
Example 4
Source File: MapDataSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import scala.collection._ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.ArrayBasedMapData import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType} import org.apache.spark.unsafe.types.UTF8String class MapDataSuite extends SparkFunSuite { test("inequality tests") { def u(str: String): UTF8String = UTF8String.fromString(str) // test data val testMap1 = Map(u("key1") -> 1) val testMap2 = Map(u("key1") -> 1, u("key2") -> 2) val testMap3 = Map(u("key1") -> 1) val testMap4 = Map(u("key1") -> 1, u("key2") -> 2) // ArrayBasedMapData val testArrayMap1 = ArrayBasedMapData(testMap1.toMap) val testArrayMap2 = ArrayBasedMapData(testMap2.toMap) val testArrayMap3 = ArrayBasedMapData(testMap3.toMap) val testArrayMap4 = ArrayBasedMapData(testMap4.toMap) assert(testArrayMap1 !== testArrayMap3) assert(testArrayMap2 !== testArrayMap4) // UnsafeMapData val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType))) val row = new GenericInternalRow(1) def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = { row.update(0, map) val unsafeRow = unsafeConverter.apply(row) unsafeRow.getMap(0).copy } assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3)) assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4)) } }
Example 5
Source File: ForecastPipelineStage.scala From uberdata with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import eleflow.uberdata.IUberdataForecastUtil import org.apache.spark.ml.param.shared.{HasNFutures, HasPredictionCol, HasValidationCol} import org.apache.spark.ml.linalg.VectorUDT import org.apache.spark.sql.types.{StructType, StringType, StructField, MapType} trait ForecastPipelineStage extends PipelineStage with HasNFutures with HasPredictionCol with HasValidationCol { def setValidationCol(value: String): this.type = set(validationCol, value) override def transformSchema(schema: StructType): StructType = { schema .add(StructField($(validationCol), new VectorUDT)) .add(StructField(IUberdataForecastUtil.ALGORITHM, StringType)) .add(StructField(IUberdataForecastUtil.PARAMS, MapType(StringType, StringType))) } }
Example 6
Source File: MapDataSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import scala.collection._ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.ArrayBasedMapData import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType} import org.apache.spark.unsafe.types.UTF8String class MapDataSuite extends SparkFunSuite { test("inequality tests") { def u(str: String): UTF8String = UTF8String.fromString(str) // test data val testMap1 = Map(u("key1") -> 1) val testMap2 = Map(u("key1") -> 1, u("key2") -> 2) val testMap3 = Map(u("key1") -> 1) val testMap4 = Map(u("key1") -> 1, u("key2") -> 2) // ArrayBasedMapData val testArrayMap1 = ArrayBasedMapData(testMap1.toMap) val testArrayMap2 = ArrayBasedMapData(testMap2.toMap) val testArrayMap3 = ArrayBasedMapData(testMap3.toMap) val testArrayMap4 = ArrayBasedMapData(testMap4.toMap) assert(testArrayMap1 !== testArrayMap3) assert(testArrayMap2 !== testArrayMap4) // UnsafeMapData val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType))) val row = new GenericInternalRow(1) def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = { row.update(0, map) val unsafeRow = unsafeConverter.apply(row) unsafeRow.getMap(0).copy } assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3)) assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4)) } }
Example 7
Source File: ComplexDataSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import scala.collection._ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{BoundReference, GenericInternalRow, SpecificInternalRow, UnsafeMapData, UnsafeProjection} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.types.{DataType, IntegerType, MapType, StringType} import org.apache.spark.unsafe.types.UTF8String class ComplexDataSuite extends SparkFunSuite { def utf8(str: String): UTF8String = UTF8String.fromString(str) test("inequality tests for MapData") { // test data val testMap1 = Map(utf8("key1") -> 1) val testMap2 = Map(utf8("key1") -> 1, utf8("key2") -> 2) val testMap3 = Map(utf8("key1") -> 1) val testMap4 = Map(utf8("key1") -> 1, utf8("key2") -> 2) // ArrayBasedMapData val testArrayMap1 = ArrayBasedMapData(testMap1.toMap) val testArrayMap2 = ArrayBasedMapData(testMap2.toMap) val testArrayMap3 = ArrayBasedMapData(testMap3.toMap) val testArrayMap4 = ArrayBasedMapData(testMap4.toMap) assert(testArrayMap1 !== testArrayMap3) assert(testArrayMap2 !== testArrayMap4) // UnsafeMapData val unsafeConverter = UnsafeProjection.create(Array[DataType](MapType(StringType, IntegerType))) val row = new GenericInternalRow(1) def toUnsafeMap(map: ArrayBasedMapData): UnsafeMapData = { row.update(0, map) val unsafeRow = unsafeConverter.apply(row) unsafeRow.getMap(0).copy } assert(toUnsafeMap(testArrayMap1) !== toUnsafeMap(testArrayMap3)) assert(toUnsafeMap(testArrayMap2) !== toUnsafeMap(testArrayMap4)) } test("GenericInternalRow.copy return a new instance that is independent from the old one") { val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true))) val unsafeRow = project.apply(InternalRow(utf8("a"))) val genericRow = new GenericInternalRow(Array[Any](unsafeRow.getUTF8String(0))) val copiedGenericRow = genericRow.copy() assert(copiedGenericRow.getString(0) == "a") project.apply(InternalRow(UTF8String.fromString("b"))) // The copied internal row should not be changed externally. assert(copiedGenericRow.getString(0) == "a") } test("SpecificMutableRow.copy return a new instance that is independent from the old one") { val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true))) val unsafeRow = project.apply(InternalRow(utf8("a"))) val mutableRow = new SpecificInternalRow(Seq(StringType)) mutableRow(0) = unsafeRow.getUTF8String(0) val copiedMutableRow = mutableRow.copy() assert(copiedMutableRow.getString(0) == "a") project.apply(InternalRow(UTF8String.fromString("b"))) // The copied internal row should not be changed externally. assert(copiedMutableRow.getString(0) == "a") } test("GenericArrayData.copy return a new instance that is independent from the old one") { val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true))) val unsafeRow = project.apply(InternalRow(utf8("a"))) val genericArray = new GenericArrayData(Array[Any](unsafeRow.getUTF8String(0))) val copiedGenericArray = genericArray.copy() assert(copiedGenericArray.getUTF8String(0).toString == "a") project.apply(InternalRow(UTF8String.fromString("b"))) // The copied array data should not be changed externally. assert(copiedGenericArray.getUTF8String(0).toString == "a") } test("copy on nested complex type") { val project = GenerateUnsafeProjection.generate(Seq(BoundReference(0, StringType, true))) val unsafeRow = project.apply(InternalRow(utf8("a"))) val arrayOfRow = new GenericArrayData(Array[Any](InternalRow(unsafeRow.getUTF8String(0)))) val copied = arrayOfRow.copy() assert(copied.getStruct(0, 1).getUTF8String(0).toString == "a") project.apply(InternalRow(UTF8String.fromString("b"))) // The copied data should not be changed externally. assert(copied.getStruct(0, 1).getUTF8String(0).toString == "a") } }
Example 8
Source File: DataTypeUtil.scala From sona with Apache License 2.0 | 5 votes |
package org.apache.spark.util import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} object DataTypeUtil { def sameType(left: DataType, right: DataType): Boolean = if (SQLConf.get.caseSensitiveAnalysis) { equalsIgnoreNullability(left, right) } else { equalsIgnoreCaseAndNullability(left, right) } private def equalsIgnoreNullability(left: DataType, right: DataType): Boolean = { (left, right) match { case (ArrayType(leftElementType, _), ArrayType(rightElementType, _)) => equalsIgnoreNullability(leftElementType, rightElementType) case (MapType(leftKeyType, leftValueType, _), MapType(rightKeyType, rightValueType, _)) => equalsIgnoreNullability(leftKeyType, rightKeyType) && equalsIgnoreNullability(leftValueType, rightValueType) case (StructType(leftFields), StructType(rightFields)) => leftFields.length == rightFields.length && leftFields.zip(rightFields).forall { case (l, r) => l.name == r.name && equalsIgnoreNullability(l.dataType, r.dataType) } case (l, r) => l == r } } private def equalsIgnoreCaseAndNullability(from: DataType, to: DataType): Boolean = { (from, to) match { case (ArrayType(fromElement, _), ArrayType(toElement, _)) => equalsIgnoreCaseAndNullability(fromElement, toElement) case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) => equalsIgnoreCaseAndNullability(fromKey, toKey) && equalsIgnoreCaseAndNullability(fromValue, toValue) case (StructType(fromFields), StructType(toFields)) => fromFields.length == toFields.length && fromFields.zip(toFields).forall { case (l, r) => l.name.equalsIgnoreCase(r.name) && equalsIgnoreCaseAndNullability(l.dataType, r.dataType) } case (fromDataType, toDataType) => fromDataType == toDataType } } }