org.apache.spark.sql.types.NullType Scala Examples
The following examples show how to use org.apache.spark.sql.types.NullType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DeltaInvariantCheckerExec.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.schema import org.apache.spark.sql.delta.DeltaErrors import org.apache.spark.sql.delta.schema.Invariants.NotNull import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BindReferences, Expression, GetStructField, Literal, SortOrder} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.types.{NullType, StructType} private def buildExtractors(invariant: Invariant): Option[Expression] = { assert(invariant.column.nonEmpty) val topLevelColumn = invariant.column.head val topLevelRefOpt = output.collectFirst { case a: AttributeReference if SchemaUtils.DELTA_COL_RESOLVER(a.name, topLevelColumn) => a } val rejectColumnNotFound = isNullNotOkay(invariant) if (topLevelRefOpt.isEmpty) { if (rejectColumnNotFound) { throw DeltaErrors.notNullInvariantException(invariant) } } if (invariant.column.length == 1) { topLevelRefOpt.map(BindReferences.bindReference[Expression](_, output)) } else { topLevelRefOpt.flatMap { topLevelRef => val boundTopLevel = BindReferences.bindReference[Expression](topLevelRef, output) try { val nested = invariant.column.tail.foldLeft(boundTopLevel) { case (e, fieldName) => e.dataType match { case StructType(fields) => val ordinal = fields.indexWhere(f => SchemaUtils.DELTA_COL_RESOLVER(f.name, fieldName)) if (ordinal == -1) { throw new IndexOutOfBoundsException(s"Not nullable column not found in struct: " + s"${fields.map(_.name).mkString("[", ",", "]")}") } GetStructField(e, ordinal, Some(fieldName)) case _ => throw new UnsupportedOperationException( "Invariants on nested fields other than StructTypes are not supported.") } } Some(nested) } catch { case i: IndexOutOfBoundsException if rejectColumnNotFound => throw InvariantViolationException(invariant, i.getMessage) case _: IndexOutOfBoundsException if !rejectColumnNotFound => None } } } } override protected def doExecute(): RDD[InternalRow] = { if (invariants.isEmpty) return child.execute() val boundRefs = invariants.map { invariant => CheckDeltaInvariant(buildExtractors(invariant).getOrElse(Literal(null, NullType)), invariant) } child.execute().mapPartitionsInternal { rows => val assertions = GenerateUnsafeProjection.generate(boundRefs) rows.map { row => assertions(row) row } } } override def outputOrdering: Seq[SortOrder] = child.outputOrdering override def outputPartitioning: Partitioning = child.outputPartitioning }
Example 2
Source File: S3AParquetRelationScaleSuite.scala From cloud-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.orc.cloud import com.cloudera.spark.cloud.s3.S3ATestSetup import org.apache.spark.sql.sources.CloudRelationScaleTest import org.apache.spark.sql.types.{CalendarIntervalType, DataType, NullType} class S3AParquetRelationScaleSuite extends CloudRelationScaleTest with S3ATestSetup { init() def init(): Unit = { // propagate S3 credentials if (enabled) { initFS() } } override def enabled: Boolean = super.enabled && isScaleTestEnabled override val dataSourceName: String = "parquet" // Parquet does not play well with NullType. override protected def supportsDataType( dataType: DataType): Boolean = dataType match { case _: NullType => false case _: CalendarIntervalType => false case _ => true } }
Example 3
Source File: S3AParquetRelationSuite.scala From cloud-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.orc.cloud import com.cloudera.spark.cloud.s3.S3ATestSetup import org.apache.spark.sql.sources.CloudRelationBasicSuite import org.apache.spark.sql.types.{CalendarIntervalType, DataType, NullType} class S3AParquetRelationSuite extends CloudRelationBasicSuite with S3ATestSetup { init() def init(): Unit = { // propagate S3 credentials if (enabled) { initFS() } } override val dataSourceName: String = "parquet" // Parquet does not play well with NullType. override protected def supportsDataType( dataType: DataType): Boolean = dataType match { case _: NullType => false case _: CalendarIntervalType => false case _ => true } }
Example 4
Source File: SimplifyConditionalSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{IntegerType, NullType} class SimplifyConditionalSuite extends PlanTest with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyConditionals", FixedPoint(50), SimplifyConditionals) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation()).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation()).analyze) comparePlans(actual, correctAnswer) } private val trueBranch = (TrueLiteral, Literal(5)) private val normalBranch = (NonFoldableLiteral(true), Literal(10)) private val unreachableBranch = (FalseLiteral, Literal(20)) private val nullBranch = (Literal.create(null, NullType), Literal(30)) test("simplify if") { assertEquivalent( If(TrueLiteral, Literal(10), Literal(20)), Literal(10)) assertEquivalent( If(FalseLiteral, Literal(10), Literal(20)), Literal(20)) assertEquivalent( If(Literal.create(null, NullType), Literal(10), Literal(20)), Literal(20)) } test("remove unreachable branches") { // i.e. removing branches whose conditions are always false assertEquivalent( CaseWhen(unreachableBranch :: normalBranch :: unreachableBranch :: nullBranch :: Nil, None), CaseWhen(normalBranch :: Nil, None)) } test("remove entire CaseWhen if only the else branch is reachable") { assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: Nil, Some(Literal(30))), Literal(30)) assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: Nil, None), Literal.create(null, IntegerType)) } test("remove entire CaseWhen if the first branch is always true") { assertEquivalent( CaseWhen(trueBranch :: normalBranch :: nullBranch :: Nil, None), Literal(5)) // Test branch elimination and simplification in combination assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: trueBranch :: normalBranch :: Nil, None), Literal(5)) // Make sure this doesn't trigger if there is a non-foldable branch before the true branch assertEquivalent( CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None), CaseWhen(normalBranch :: trueBranch :: Nil, None)) } test("simplify CaseWhen, prune branches following a definite true") { assertEquivalent( CaseWhen(normalBranch :: unreachableBranch :: unreachableBranch :: nullBranch :: trueBranch :: normalBranch :: Nil, None), CaseWhen(normalBranch :: trueBranch :: Nil, None)) } }
Example 5
Source File: ResolveInlineTablesSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Cast, Literal, Rand} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types.{LongType, NullType, TimestampType} class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter { private def lit(v: Any): Literal = Literal(v) test("validate inputs are foldable") { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) // nondeterministic (rand) should not work intercept[AnalysisException] { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1))))) } // aggregate should not work intercept[AnalysisException] { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1)))))) } // unresolved attribute should not work intercept[AnalysisException] { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A"))))) } } test("validate input dimensions") { ResolveInlineTables(conf).validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2))))) // num alias != data dimension intercept[AnalysisException] { ResolveInlineTables(conf).validateInputDimension( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2))))) } // num alias == data dimension, but data themselves are inconsistent intercept[AnalysisException] { ResolveInlineTables(conf).validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22))))) } } test("do not fire the rule if not all expressions are resolved") { val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))) assert(ResolveInlineTables(conf)(table) == table) } test("convert") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted = ResolveInlineTables(conf).convert(table) assert(converted.output.map(_.dataType) == Seq(LongType)) assert(converted.data.size == 2) assert(converted.data(0).getLong(0) == 1L) assert(converted.data(1).getLong(0) == 2L) } test("convert TimeZoneAwareExpression") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType)))) val withTimeZone = ResolveTimeZone(conf).apply(table) val LocalRelation(output, data, _) = ResolveInlineTables(conf).apply(withTimeZone) val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType) .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long] assert(output.map(_.dataType) == Seq(TimestampType)) assert(data.size == 1) assert(data.head.getLong(0) == correct) } test("nullability inference in convert") { val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted1 = ResolveInlineTables(conf).convert(table1) assert(!converted1.schema.fields(0).nullable) val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType)))) val converted2 = ResolveInlineTables(conf).convert(table2) assert(converted2.schema.fields(0).nullable) } }
Example 6
Source File: SparkExecuteStatementOperationSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{IntegerType, NullType, StringType, StructField, StructType} class SparkExecuteStatementOperationSuite extends SparkFunSuite { test("SPARK-17112 `select null` via JDBC triggers IllegalArgumentException in ThriftServer") { val field1 = StructField("NULL", NullType) val field2 = StructField("(IF(true, NULL, NULL))", NullType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) } test("SPARK-20146 Comment should be preserved") { val field1 = StructField("column1", StringType).withComment("comment 1") val field2 = StructField("column2", IntegerType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.STRING_TYPE) assert(columns.get(0).getComment() == "comment 1") assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.INT_TYPE) assert(columns.get(1).getComment() == "") } }
Example 7
Source File: SimplifyConditionalSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{IntegerType, NullType} class SimplifyConditionalSuite extends PlanTest with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyConditionals", FixedPoint(50), SimplifyConditionals) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) comparePlans(actual, correctAnswer) } private val trueBranch = (TrueLiteral, Literal(5)) private val normalBranch = (NonFoldableLiteral(true), Literal(10)) private val unreachableBranch = (FalseLiteral, Literal(20)) private val nullBranch = (Literal.create(null, NullType), Literal(30)) test("simplify if") { assertEquivalent( If(TrueLiteral, Literal(10), Literal(20)), Literal(10)) assertEquivalent( If(FalseLiteral, Literal(10), Literal(20)), Literal(20)) assertEquivalent( If(Literal.create(null, NullType), Literal(10), Literal(20)), Literal(20)) } test("remove unreachable branches") { // i.e. removing branches whose conditions are always false assertEquivalent( CaseWhen(unreachableBranch :: normalBranch :: unreachableBranch :: nullBranch :: Nil, None), CaseWhen(normalBranch :: Nil, None)) } test("remove entire CaseWhen if only the else branch is reachable") { assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: Nil, Some(Literal(30))), Literal(30)) assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: Nil, None), Literal.create(null, IntegerType)) } test("remove entire CaseWhen if the first branch is always true") { assertEquivalent( CaseWhen(trueBranch :: normalBranch :: nullBranch :: Nil, None), Literal(5)) // Test branch elimination and simplification in combination assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: trueBranch :: normalBranch :: Nil, None), Literal(5)) // Make sure this doesn't trigger if there is a non-foldable branch before the true branch assertEquivalent( CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None), CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None)) } }
Example 8
Source File: ResolveInlineTablesSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Literal, Rand} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.types.{LongType, NullType} class ResolveInlineTablesSuite extends PlanTest with BeforeAndAfter { private def lit(v: Any): Literal = Literal(v) test("validate inputs are foldable") { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) // nondeterministic (rand) should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1))))) } // aggregate should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1)))))) } // unresolved attribute should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A"))))) } } test("validate input dimensions") { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2))))) // num alias != data dimension intercept[AnalysisException] { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2))))) } // num alias == data dimension, but data themselves are inconsistent intercept[AnalysisException] { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22))))) } } test("do not fire the rule if not all expressions are resolved") { val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))) assert(ResolveInlineTables(table) == table) } test("convert") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted = ResolveInlineTables.convert(table) assert(converted.output.map(_.dataType) == Seq(LongType)) assert(converted.data.size == 2) assert(converted.data(0).getLong(0) == 1L) assert(converted.data(1).getLong(0) == 2L) } test("nullability inference in convert") { val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted1 = ResolveInlineTables.convert(table1) assert(!converted1.schema.fields(0).nullable) val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType)))) val converted2 = ResolveInlineTables.convert(table2) assert(converted2.schema.fields(0).nullable) } }
Example 9
Source File: SparkExecuteStatementOperationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{NullType, StructField, StructType} class SparkExecuteStatementOperationSuite extends SparkFunSuite { test("SPARK-17112 `select null` via JDBC triggers IllegalArgumentException in ThriftServer") { val field1 = StructField("NULL", NullType) val field2 = StructField("(IF(true, NULL, NULL))", NullType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) } }
Example 10
Source File: CheckDeltaInvariant.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.schema import org.apache.spark.sql.delta.schema.Invariants.{ArbitraryExpression, NotNull} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Expression, NonSQLExpression, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{Block, CodegenContext, ExprCode, JavaCode, TrueLiteral} import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.types.{DataType, NullType} case class CheckDeltaInvariant( child: Expression, invariant: Invariant) extends UnaryExpression with NonSQLExpression { override def dataType: DataType = NullType override def foldable: Boolean = false override def nullable: Boolean = true override def flatArguments: Iterator[Any] = Iterator(child) private def assertRule(input: InternalRow): Unit = invariant.rule match { case NotNull if child.eval(input) == null => throw InvariantViolationException(invariant, "") case ArbitraryExpression(expr) => val resolvedExpr = expr.transform { case _: UnresolvedAttribute => child } val result = resolvedExpr.eval(input) if (result == null || result == false) { throw InvariantViolationException( invariant, s"Value ${child.eval(input)} violates requirement.") } } override def eval(input: InternalRow): Any = { assertRule(input) null } private def generateNotNullCode(ctx: CodegenContext): Block = { val childGen = child.genCode(ctx) val invariantField = ctx.addReferenceObj("errMsg", invariant) code"""${childGen.code} | |if (${childGen.isNull}) { | throw org.apache.spark.sql.delta.schema.InvariantViolationException.apply( | $invariantField, ""); |} """.stripMargin } private def generateExpressionValidationCode(expr: Expression, ctx: CodegenContext): Block = { val resolvedExpr = expr.transform { case _: UnresolvedAttribute => child } val elementValue = child.genCode(ctx) val childGen = resolvedExpr.genCode(ctx) val invariantField = ctx.addReferenceObj("errMsg", invariant) val eValue = ctx.freshName("elementResult") code"""${elementValue.code} |${childGen.code} | |if (${childGen.isNull} || ${childGen.value} == false) { | Object $eValue = "null"; | if (!${elementValue.isNull}) { | $eValue = (Object) ${elementValue.value}; | } | throw org.apache.spark.sql.delta.schema.InvariantViolationException.apply( | $invariantField, "Value " + $eValue + " violates requirement."); |} """.stripMargin } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val code = invariant.rule match { case NotNull => generateNotNullCode(ctx) case ArbitraryExpression(expr) => generateExpressionValidationCode(expr, ctx) } ev.copy(code = code, isNull = TrueLiteral, value = JavaCode.literal("null", NullType)) } }
Example 11
Source File: SparkExecuteStatementOperationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{NullType, StructField, StructType} class SparkExecuteStatementOperationSuite extends SparkFunSuite { test("SPARK-17112 `select null` via JDBC triggers IllegalArgumentException in ThriftServer") { val field1 = StructField("NULL", NullType) val field2 = StructField("(IF(true, NULL, NULL))", NullType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) } }
Example 12
Source File: DeltaSink.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.sources import org.apache.spark.sql.delta._ import org.apache.spark.sql.delta.actions.SetTransaction import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.spark.sql.delta.schema.{ImplicitMetadataOperation, SchemaUtils} import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.sql._ import org.apache.spark.sql.execution.SQLExecution import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.sql.execution.metric.SQLMetrics.createMetric import org.apache.spark.sql.execution.streaming.{Sink, StreamExecution} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.NullType class DeltaSink( sqlContext: SQLContext, path: Path, partitionColumns: Seq[String], outputMode: OutputMode, options: DeltaOptions) extends Sink with ImplicitMetadataOperation with DeltaLogging { private val deltaLog = DeltaLog.forTable(sqlContext.sparkSession, path) private val sqlConf = sqlContext.sparkSession.sessionState.conf override protected val canOverwriteSchema: Boolean = outputMode == OutputMode.Complete() && options.canOverwriteSchema override protected val canMergeSchema: Boolean = options.canMergeSchema override def addBatch(batchId: Long, data: DataFrame): Unit = deltaLog.withNewTransaction { txn => val sc = data.sparkSession.sparkContext val metrics = Map[String, SQLMetric]( "numAddedFiles" -> createMetric(sc, "number of files added"), "numRemovedFiles" -> createMetric(sc, "number of files removed") ) val queryId = sqlContext.sparkContext.getLocalProperty(StreamExecution.QUERY_ID_KEY) assert(queryId != null) if (SchemaUtils.typeExistsRecursively(data.schema)(_.isInstanceOf[NullType])) { throw DeltaErrors.streamWriteNullTypeException } // If the batch reads the same Delta table as this sink is going to write to, then this // write has dependencies. Then make sure that this commit set hasDependencies to true // by injecting a read on the whole table. This needs to be done explicitly because // MicroBatchExecution has already enforced all the data skipping (by forcing the generation // of the executed plan) even before the transaction was started. val selfScan = data.queryExecution.analyzed.collectFirst { case DeltaTable(index) if index.deltaLog.isSameLogAs(txn.deltaLog) => true }.nonEmpty if (selfScan) { txn.readWholeTable() } // Streaming sinks can't blindly overwrite schema. See Schema Management design doc for details updateMetadata( txn, data, partitionColumns, configuration = Map.empty, outputMode == OutputMode.Complete()) val currentVersion = txn.txnVersion(queryId) if (currentVersion >= batchId) { logInfo(s"Skipping already complete epoch $batchId, in query $queryId") return } val deletedFiles = outputMode match { case o if o == OutputMode.Complete() => deltaLog.assertRemovable() txn.filterFiles().map(_.remove) case _ => Nil } val newFiles = txn.writeFiles(data, Some(options)) val setTxn = SetTransaction(queryId, batchId, Some(deltaLog.clock.getTimeMillis())) :: Nil val info = DeltaOperations.StreamingUpdate(outputMode, queryId, batchId, options.userMetadata) metrics("numRemovedFiles").set(deletedFiles.size) metrics("numAddedFiles").set(newFiles.size) txn.registerSQLMetrics(sqlContext.sparkSession, metrics) txn.commit(setTxn ++ newFiles ++ deletedFiles, info) // This is needed to make the SQL metrics visible in the Spark UI val executionId = sqlContext.sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) SQLMetrics.postDriverMetricUpdates( sqlContext.sparkContext, executionId, metrics.values.toSeq) } override def toString(): String = s"DeltaSink[$path]" }
Example 13
Source File: SimplifyConditionalSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{IntegerType, NullType} class SimplifyConditionalSuite extends PlanTest with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyConditionals", FixedPoint(50), SimplifyConditionals) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) comparePlans(actual, correctAnswer) } private val trueBranch = (TrueLiteral, Literal(5)) private val normalBranch = (NonFoldableLiteral(true), Literal(10)) private val unreachableBranch = (FalseLiteral, Literal(20)) private val nullBranch = (Literal.create(null, NullType), Literal(30)) test("simplify if") { assertEquivalent( If(TrueLiteral, Literal(10), Literal(20)), Literal(10)) assertEquivalent( If(FalseLiteral, Literal(10), Literal(20)), Literal(20)) assertEquivalent( If(Literal.create(null, NullType), Literal(10), Literal(20)), Literal(20)) } test("remove unreachable branches") { // i.e. removing branches whose conditions are always false assertEquivalent( CaseWhen(unreachableBranch :: normalBranch :: unreachableBranch :: nullBranch :: Nil, None), CaseWhen(normalBranch :: Nil, None)) } test("remove entire CaseWhen if only the else branch is reachable") { assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: Nil, Some(Literal(30))), Literal(30)) assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: Nil, None), Literal.create(null, IntegerType)) } test("remove entire CaseWhen if the first branch is always true") { assertEquivalent( CaseWhen(trueBranch :: normalBranch :: nullBranch :: Nil, None), Literal(5)) // Test branch elimination and simplification in combination assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: trueBranch :: normalBranch :: Nil, None), Literal(5)) // Make sure this doesn't trigger if there is a non-foldable branch before the true branch assertEquivalent( CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None), CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None)) } }
Example 14
Source File: ResolveInlineTablesSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Literal, Rand} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.types.{LongType, NullType} class ResolveInlineTablesSuite extends PlanTest with BeforeAndAfter { private def lit(v: Any): Literal = Literal(v) test("validate inputs are foldable") { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) // nondeterministic (rand) should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1))))) } // aggregate should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1)))))) } // unresolved attribute should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A"))))) } } test("validate input dimensions") { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2))))) // num alias != data dimension intercept[AnalysisException] { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2))))) } // num alias == data dimension, but data themselves are inconsistent intercept[AnalysisException] { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22))))) } } test("do not fire the rule if not all expressions are resolved") { val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))) assert(ResolveInlineTables(table) == table) } test("convert") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted = ResolveInlineTables.convert(table) assert(converted.output.map(_.dataType) == Seq(LongType)) assert(converted.data.size == 2) assert(converted.data(0).getLong(0) == 1L) assert(converted.data(1).getLong(0) == 2L) } test("nullability inference in convert") { val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted1 = ResolveInlineTables.convert(table1) assert(!converted1.schema.fields(0).nullable) val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType)))) val converted2 = ResolveInlineTables.convert(table2) assert(converted2.schema.fields(0).nullable) } }
Example 15
Source File: SparkExecuteStatementOperationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{NullType, StructField, StructType} class SparkExecuteStatementOperationSuite extends SparkFunSuite { test("SPARK-17112 `select null` via JDBC triggers IllegalArgumentException in ThriftServer") { val field1 = StructField("NULL", NullType) val field2 = StructField("(IF(true, NULL, NULL))", NullType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) } }
Example 16
Source File: StreamingGlobalLimitExec.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.util.concurrent.TimeUnit.NANOSECONDS import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.expressions.UnsafeProjection import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.execution.streaming.state.StateStoreOps import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{LongType, NullType, StructField, StructType} import org.apache.spark.util.CompletionIterator case class StreamingGlobalLimitExec( streamLimit: Long, child: SparkPlan, stateInfo: Option[StatefulOperatorStateInfo] = None, outputMode: Option[OutputMode] = None) extends UnaryExecNode with StateStoreWriter { private val keySchema = StructType(Array(StructField("key", NullType))) private val valueSchema = StructType(Array(StructField("value", LongType))) override protected def doExecute(): RDD[InternalRow] = { metrics // force lazy init at driver assert(outputMode.isDefined && outputMode.get == InternalOutputModes.Append, "StreamingGlobalLimitExec is only valid for streams in Append output mode") child.execute().mapPartitionsWithStateStore( getStateInfo, keySchema, valueSchema, indexOrdinal = None, sqlContext.sessionState, Some(sqlContext.streams.stateStoreCoordinator)) { (store, iter) => val key = UnsafeProjection.create(keySchema)(new GenericInternalRow(Array[Any](null))) val numOutputRows = longMetric("numOutputRows") val numUpdatedStateRows = longMetric("numUpdatedStateRows") val allUpdatesTimeMs = longMetric("allUpdatesTimeMs") val commitTimeMs = longMetric("commitTimeMs") val updatesStartTimeNs = System.nanoTime val preBatchRowCount: Long = Option(store.get(key)).map(_.getLong(0)).getOrElse(0L) var cumulativeRowCount = preBatchRowCount val result = iter.filter { r => val x = cumulativeRowCount < streamLimit if (x) { cumulativeRowCount += 1 } x } CompletionIterator[InternalRow, Iterator[InternalRow]](result, { if (cumulativeRowCount > preBatchRowCount) { numUpdatedStateRows += 1 numOutputRows += cumulativeRowCount - preBatchRowCount store.put(key, getValueRow(cumulativeRowCount)) } allUpdatesTimeMs += NANOSECONDS.toMillis(System.nanoTime - updatesStartTimeNs) commitTimeMs += timeTakenMs { store.commit() } setStoreMetrics(store) }) } } override def output: Seq[Attribute] = child.output override def outputPartitioning: Partitioning = child.outputPartitioning override def requiredChildDistribution: Seq[Distribution] = AllTuples :: Nil private def getValueRow(value: Long): UnsafeRow = { UnsafeProjection.create(valueSchema)(new GenericInternalRow(Array[Any](value))) } }
Example 17
Source File: ResolveInlineTablesSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Cast, Literal, Rand} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types.{LongType, NullType, TimestampType} class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter { private def lit(v: Any): Literal = Literal(v) test("validate inputs are foldable") { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) // nondeterministic (rand) should not work intercept[AnalysisException] { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1))))) } // aggregate should not work intercept[AnalysisException] { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1)))))) } // unresolved attribute should not work intercept[AnalysisException] { ResolveInlineTables(conf).validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A"))))) } } test("validate input dimensions") { ResolveInlineTables(conf).validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2))))) // num alias != data dimension intercept[AnalysisException] { ResolveInlineTables(conf).validateInputDimension( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2))))) } // num alias == data dimension, but data themselves are inconsistent intercept[AnalysisException] { ResolveInlineTables(conf).validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22))))) } } test("do not fire the rule if not all expressions are resolved") { val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))) assert(ResolveInlineTables(conf)(table) == table) } test("convert") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted = ResolveInlineTables(conf).convert(table) assert(converted.output.map(_.dataType) == Seq(LongType)) assert(converted.data.size == 2) assert(converted.data(0).getLong(0) == 1L) assert(converted.data(1).getLong(0) == 2L) } test("convert TimeZoneAwareExpression") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType)))) val withTimeZone = ResolveTimeZone(conf).apply(table) val LocalRelation(output, data, _) = ResolveInlineTables(conf).apply(withTimeZone) val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType) .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long] assert(output.map(_.dataType) == Seq(TimestampType)) assert(data.size == 1) assert(data.head.getLong(0) == correct) } test("nullability inference in convert") { val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted1 = ResolveInlineTables(conf).convert(table1) assert(!converted1.schema.fields(0).nullable) val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType)))) val converted2 = ResolveInlineTables(conf).convert(table2) assert(converted2.schema.fields(0).nullable) } }
Example 18
Source File: SparkExecuteStatementOperationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{IntegerType, NullType, StringType, StructField, StructType} class SparkExecuteStatementOperationSuite extends SparkFunSuite { test("SPARK-17112 `select null` via JDBC triggers IllegalArgumentException in ThriftServer") { val field1 = StructField("NULL", NullType) val field2 = StructField("(IF(true, NULL, NULL))", NullType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.NULL_TYPE) } test("SPARK-20146 Comment should be preserved") { val field1 = StructField("column1", StringType).withComment("comment 1") val field2 = StructField("column2", IntegerType) val tableSchema = StructType(Seq(field1, field2)) val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors() assert(columns.size() == 2) assert(columns.get(0).getType() == org.apache.hive.service.cli.Type.STRING_TYPE) assert(columns.get(0).getComment() == "comment 1") assert(columns.get(1).getType() == org.apache.hive.service.cli.Type.INT_TYPE) assert(columns.get(1).getComment() == "") } }
Example 19
Source File: SimplifyConditionalSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{IntegerType, NullType} class SimplifyConditionalSuite extends PlanTest with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyConditionals", FixedPoint(50), SimplifyConditionals) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) comparePlans(actual, correctAnswer) } private val trueBranch = (TrueLiteral, Literal(5)) private val normalBranch = (NonFoldableLiteral(true), Literal(10)) private val unreachableBranch = (FalseLiteral, Literal(20)) private val nullBranch = (Literal.create(null, NullType), Literal(30)) test("simplify if") { assertEquivalent( If(TrueLiteral, Literal(10), Literal(20)), Literal(10)) assertEquivalent( If(FalseLiteral, Literal(10), Literal(20)), Literal(20)) assertEquivalent( If(Literal.create(null, NullType), Literal(10), Literal(20)), Literal(20)) } test("remove unreachable branches") { // i.e. removing branches whose conditions are always false assertEquivalent( CaseWhen(unreachableBranch :: normalBranch :: unreachableBranch :: nullBranch :: Nil, None), CaseWhen(normalBranch :: Nil, None)) } test("remove entire CaseWhen if only the else branch is reachable") { assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: Nil, Some(Literal(30))), Literal(30)) assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: Nil, None), Literal.create(null, IntegerType)) } test("remove entire CaseWhen if the first branch is always true") { assertEquivalent( CaseWhen(trueBranch :: normalBranch :: nullBranch :: Nil, None), Literal(5)) // Test branch elimination and simplification in combination assertEquivalent( CaseWhen(unreachableBranch :: unreachableBranch :: nullBranch :: trueBranch :: normalBranch :: Nil, None), Literal(5)) // Make sure this doesn't trigger if there is a non-foldable branch before the true branch assertEquivalent( CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None), CaseWhen(normalBranch :: trueBranch :: normalBranch :: Nil, None)) } }
Example 20
Source File: ResolveInlineTablesSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Literal, Rand} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.types.{LongType, NullType} class ResolveInlineTablesSuite extends PlanTest with BeforeAndAfter { private def lit(v: Any): Literal = Literal(v) test("validate inputs are foldable") { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) // nondeterministic (rand) should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1))))) } // aggregate should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1)))))) } // unresolved attribute should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A"))))) } } test("validate input dimensions") { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2))))) // num alias != data dimension intercept[AnalysisException] { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2))))) } // num alias == data dimension, but data themselves are inconsistent intercept[AnalysisException] { ResolveInlineTables.validateInputDimension( UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22))))) } } test("do not fire the rule if not all expressions are resolved") { val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A")))) assert(ResolveInlineTables(table) == table) } test("convert") { val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted = ResolveInlineTables.convert(table) assert(converted.output.map(_.dataType) == Seq(LongType)) assert(converted.data.size == 2) assert(converted.data(0).getLong(0) == 1L) assert(converted.data(1).getLong(0) == 2L) } test("nullability inference in convert") { val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L)))) val converted1 = ResolveInlineTables.convert(table1) assert(!converted1.schema.fields(0).nullable) val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType)))) val converted2 = ResolveInlineTables.convert(table2) assert(converted2.schema.fields(0).nullable) } }