org.apache.spark.sql.execution.ProjectExec Scala Examples
The following examples show how to use org.apache.spark.sql.execution.ProjectExec.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HiveTypeCoercionSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 2
Source File: HiveTypeCoercionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 3
Source File: DataSourceV2Strategy.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable import org.apache.spark.sql.{sources, Strategy} import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Repartition} import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec} import org.apache.spark.sql.sources.v2.reader.{DataSourceReader, SupportsPushDownFilters, SupportsPushDownRequiredColumns} import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReader object DataSourceV2Strategy extends Strategy { // TODO: nested column pruning. private def pruneColumns( reader: DataSourceReader, relation: DataSourceV2Relation, exprs: Seq[Expression]): Seq[AttributeReference] = { reader match { case r: SupportsPushDownRequiredColumns => val requiredColumns = AttributeSet(exprs.flatMap(_.references)) val neededOutput = relation.output.filter(requiredColumns.contains) if (neededOutput != relation.output) { r.pruneColumns(neededOutput.toStructType) val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap r.readSchema().toAttributes.map { // We have to keep the attribute id during transformation. a => a.withExprId(nameToAttr(a.name).exprId) } } else { relation.output } case _ => relation.output } } override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, relation: DataSourceV2Relation) => val reader = relation.newReader() // `pushedFilters` will be pushed down and evaluated in the underlying data sources. // `postScanFilters` need to be evaluated after the scan. // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter. val (pushedFilters, postScanFilters) = pushFilters(reader, filters) val output = pruneColumns(reader, relation, project ++ postScanFilters) logInfo( s""" |Pushing operators to ${relation.source.getClass} |Pushed Filters: ${pushedFilters.mkString(", ")} |Post-Scan Filters: ${postScanFilters.mkString(",")} |Output: ${output.mkString(", ")} """.stripMargin) val scan = DataSourceV2ScanExec( output, relation.source, relation.options, pushedFilters, reader) val filterCondition = postScanFilters.reduceLeftOption(And) val withFilter = filterCondition.map(FilterExec(_, scan)).getOrElse(scan) // always add the projection, which will produce unsafe rows required by some operators ProjectExec(project, withFilter) :: Nil case r: StreamingDataSourceV2Relation => // ensure there is a projection, which will produce unsafe rows required by some operators ProjectExec(r.output, DataSourceV2ScanExec(r.output, r.source, r.options, r.pushedFilters, r.reader)) :: Nil case WriteToDataSourceV2(writer, query) => WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil case AppendData(r: DataSourceV2Relation, query, _) => WriteToDataSourceV2Exec(r.newWriter(), planLater(query)) :: Nil case WriteToContinuousDataSource(writer, query) => WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil case Repartition(1, false, child) => val isContinuous = child.collectFirst { case StreamingDataSourceV2Relation(_, _, _, r: ContinuousReader) => r }.isDefined if (isContinuous) { ContinuousCoalesceExec(1, planLater(child)) :: Nil } else { Nil } case _ => Nil } }
Example 4
Source File: HiveTypeCoercionSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 5
Source File: HiveTypeCoercionSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 6
Source File: HiveTypeCoercionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }