org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DeltaTableOperations.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.tables.execution import scala.collection.Map import org.apache.spark.sql.delta.{DeltaErrors, DeltaHistoryManager, DeltaLog, PreprocessTableUpdate} import org.apache.spark.sql.delta.commands.{DeleteCommand, DeltaGenerateCommand, VacuumCommand} import org.apache.spark.sql.delta.util.AnalysisHelper import io.delta.tables.DeltaTable import org.apache.spark.sql.{functions, Column, DataFrame, Dataset} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression} import org.apache.spark.sql.catalyst.plans.logical._ trait DeltaTableOperations extends AnalysisHelper { self: DeltaTable => protected def executeDelete(condition: Option[Expression]): Unit = improveUnsupportedOpError { val delete = DeleteFromTable(self.toDF.queryExecution.analyzed, condition) toDataset(sparkSession, delete) } protected def executeHistory(deltaLog: DeltaLog, limit: Option[Int]): DataFrame = { val history = new DeltaHistoryManager(deltaLog) val spark = self.toDF.sparkSession spark.createDataFrame(history.getHistory(limit)) } protected def executeGenerate(tblIdentifier: String, mode: String): Unit = { val tableId: TableIdentifier = sparkSession .sessionState .sqlParser .parseTableIdentifier(tblIdentifier) val generate = DeltaGenerateCommand(mode, tableId) generate.run(sparkSession) } protected def executeUpdate( set: Map[String, Column], condition: Option[Column]): Unit = improveUnsupportedOpError { val assignments = set.map { case (targetColName, column) => Assignment(UnresolvedAttribute.quotedString(targetColName), column.expr) }.toSeq val update = UpdateTable(self.toDF.queryExecution.analyzed, assignments, condition.map(_.expr)) toDataset(sparkSession, update) } protected def executeVacuum( deltaLog: DeltaLog, retentionHours: Option[Double]): DataFrame = { VacuumCommand.gc(sparkSession, deltaLog, false, retentionHours) sparkSession.emptyDataFrame } protected def toStrColumnMap(map: Map[String, String]): Map[String, Column] = { map.toSeq.map { case (k, v) => k -> functions.expr(v) }.toMap } protected def sparkSession = self.toDF.sparkSession }
Example 2
Source File: package.scala From glow with Apache License 2.0 | 5 votes |
package io.projectglow.sql import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types.DataType package object dsl { trait ImplicitOperators { def expr: Expression private def makeLambdaFunction(f: Expression => Expression): LambdaFunction = { val x = UnresolvedNamedLambdaVariable(Seq("x")) LambdaFunction(f(x), Seq(x)) } private def makeLambdaFunction(f: (Expression, Expression) => Expression): LambdaFunction = { val x = UnresolvedNamedLambdaVariable(Seq("x")) val y = UnresolvedNamedLambdaVariable(Seq("y")) LambdaFunction(f(x, y), Seq(x, y)) } def arrayTransform(fn: Expression => Expression): Expression = { ArrayTransform(expr, makeLambdaFunction(fn)) } def arrayTransform(fn: (Expression, Expression) => Expression): Expression = { ArrayTransform(expr, makeLambdaFunction(fn)) } def filter(f: Expression => Expression): Expression = { ArrayFilter(expr, makeLambdaFunction(f)) } def filter(f: (Expression, Expression) => Expression): Expression = { ArrayFilter(expr, makeLambdaFunction(f)) } def aggregate( initialValue: Expression, merge: (Expression, Expression) => Expression, finish: Expression => Expression = identity): Expression = { ArrayAggregate( expr, initialValue, makeLambdaFunction(merge), makeLambdaFunction(finish) ) } } implicit class GlowExpression(val expr: Expression) extends ImplicitOperators }
Example 3
Source File: LocalNodeTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import org.apache.spark.SparkFunSuite import org.apache.spark.sql.SQLConf import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Expression, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType} class LocalNodeTest extends SparkFunSuite { protected val conf: SQLConf = new SQLConf protected val kvIntAttributes = Seq( AttributeReference("k", IntegerType)(), AttributeReference("v", IntegerType)()) protected val joinNameAttributes = Seq( AttributeReference("id1", IntegerType)(), AttributeReference("name", StringType)()) protected val joinNicknameAttributes = Seq( AttributeReference("id2", IntegerType)(), AttributeReference("nickname", StringType)()) protected def resolveExpressions( expressions: Seq[Expression], localNode: LocalNode): Seq[Expression] = { require(localNode.expressions.forall(_.resolved)) val inputMap = localNode.output.map { a => (a.name, a) }.toMap expressions.map { expression => expression.transformUp { case UnresolvedAttribute(Seq(u)) => inputMap.getOrElse(u, sys.error(s"Invalid Test: Cannot resolve $u given input $inputMap")) } } } }
Example 4
Source File: ConvertToLocalRelationSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 5
Source File: GPlanExpander.scala From ingraph with Eclipse Public License 1.0 | 5 votes |
package ingraph.compiler.cypher2gplan import ingraph.model.{expr, gplan} import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.{expressions => cExpr} object GPlanExpander { def expandGPlan(rawQueryPlan: gplan.GNode): gplan.GNode = { // should there be other rule sets (partial functions), combine them using orElse, // e.g. pfunc1 orElse pfunc2 // expanding GetVertices involves creating other GetVertices, so transformUp is to avoid infinite recursion val full = rawQueryPlan.transformUp(gplanExpander) full.asInstanceOf[gplan.GNode] } val gplanExpander: PartialFunction[LogicalPlan, LogicalPlan] = { // Nullary case gplan.GetVertices(vertexAttribute) if vertexAttribute.properties.nonEmpty => { val condition: Expression = propertyMapToCondition(vertexAttribute.properties, vertexAttribute.name) gplan.Selection(condition, gplan.GetVertices(vertexAttribute)) } case gplan.Expand(srcVertexAttribute, trgVertexAttribute, edge, dir, child) if edge.properties.nonEmpty || trgVertexAttribute.properties.nonEmpty => { val selectionOnEdge = gplan.Selection(propertyMapToCondition(edge.properties, edge.name), gplan.Expand(srcVertexAttribute, trgVertexAttribute, edge, dir, child)) val selectionOnTargetVertex = gplan.Selection(propertyMapToCondition(trgVertexAttribute.properties, trgVertexAttribute.name), selectionOnEdge) selectionOnTargetVertex } } def propertyMapToCondition(properties: expr.types.TPropertyMap, baseName: String): Expression = { properties.map( (p) => cExpr.EqualTo(UnresolvedAttribute(Seq(baseName, p._1)), p._2) ) .foldLeft[Expression]( cExpr.Literal(true) )( (b, a) => cExpr.And(b, a) ) } }
Example 6
Source File: OptimizerStructuralIntegrityCheckerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Alias, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project} import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf class OptimizerStructuralIntegrityCheckerSuite extends PlanTest { object OptimizeRuleBreakSI extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case Project(projectList, child) => val newAttr = UnresolvedAttribute("unresolvedAttr") Project(projectList ++ Seq(newAttr), child) } } object Optimize extends Optimizer( new SessionCatalog( new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())) { val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI) override def batches: Seq[Batch] = Seq(newBatch) ++ super.batches } test("check for invalid plan after execution of rule") { val analyzed = Project(Alias(Literal(10), "attr")() :: Nil, OneRowRelation()).analyze assert(analyzed.resolved) val message = intercept[TreeNodeException[LogicalPlan]] { Optimize.execute(analyzed) }.getMessage val ruleName = OptimizeRuleBreakSI.ruleName assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI")) assert(message.contains("the structural integrity of the plan is broken")) } }
Example 7
Source File: ConvertToLocalRelationSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 8
Source File: OptimizeInSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ // For implicit conversions import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class OptimizeInSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) //OptimizedIn测试:当子项未优化为InSet时少于10项 test("OptimizedIn test: In clause not optimized to InSet when less than 10 items") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) comparePlans(optimized, originalQuery) } //优化测试:在优化到InSert的子句中,超过10项 test("OptimizedIn test: In clause optimized to InSet when more than 10 items") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), (1 to 11).map(Literal(_)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(InSet(UnresolvedAttribute("a"), (1 to 11).toSet)) .analyze comparePlans(optimized, correctAnswer) } //OptimizedIn测试:在子句未优化的情况下,过滤器具有属性 test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) } }
Example 9
Source File: ConvertToLocalRelationSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 10
Source File: OptimizeInSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ // For implicit conversions import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class OptimizeInSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("OptimizedIn test: In clause optimized to InSet") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(InSet(UnresolvedAttribute("a"), HashSet[Any]() + 1 + 2)) .analyze comparePlans(optimized, correctAnswer) } test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) } }
Example 11
Source File: ConvertToLocalRelationSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, Row(1, 2) :: Row(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, Row(1, 3) :: Row(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 12
Source File: ConvertToLocalRelationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 13
Source File: joinTypes.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.Attribute object JoinType { def apply(typ: String): JoinType = typ.toLowerCase.replace("_", "") match { case "inner" => Inner case "outer" | "full" | "fullouter" => FullOuter case "leftouter" | "left" => LeftOuter case "rightouter" | "right" => RightOuter case "leftsemi" => LeftSemi case "leftanti" => LeftAnti case "cross" => Cross case _ => val supported = Seq( "inner", "outer", "full", "fullouter", "leftouter", "left", "rightouter", "right", "leftsemi", "leftanti", "cross") throw new IllegalArgumentException(s"Unsupported join type '$typ'. " + "Supported join types include: " + supported.mkString("'", "', '", "'") + ".") } } sealed abstract class JoinType { def sql: String } sealed abstract class InnerLike extends JoinType { def explicitCartesian: Boolean } case object Inner extends InnerLike { override def explicitCartesian: Boolean = false override def sql: String = "INNER" } case object Cross extends InnerLike { override def explicitCartesian: Boolean = true override def sql: String = "CROSS" } case object LeftOuter extends JoinType { override def sql: String = "LEFT OUTER" } case object RightOuter extends JoinType { override def sql: String = "RIGHT OUTER" } case object FullOuter extends JoinType { override def sql: String = "FULL OUTER" } case object LeftSemi extends JoinType { override def sql: String = "LEFT SEMI" } case object LeftAnti extends JoinType { override def sql: String = "LEFT ANTI" } case class ExistenceJoin(exists: Attribute) extends JoinType { override def sql: String = { // This join type is only used in the end of optimizer and physical plans, we will not // generate SQL for this join type throw new UnsupportedOperationException } } case class NaturalJoin(tpe: JoinType) extends JoinType { require(Seq(Inner, LeftOuter, RightOuter, FullOuter).contains(tpe), "Unsupported natural join type " + tpe) override def sql: String = "NATURAL " + tpe.sql } case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType { require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe), "Unsupported using join type " + tpe) override def sql: String = "USING " + tpe.sql } object LeftExistence { def unapply(joinType: JoinType): Option[JoinType] = joinType match { case LeftSemi | LeftAnti => Some(joinType) case j: ExistenceJoin => Some(joinType) case _ => None } }
Example 14
Source File: joinTypes.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.Attribute object JoinType { def apply(typ: String): JoinType = typ.toLowerCase.replace("_", "") match { case "inner" => Inner case "outer" | "full" | "fullouter" => FullOuter case "leftouter" | "left" => LeftOuter case "rightouter" | "right" => RightOuter case "leftsemi" => LeftSemi case "leftanti" => LeftAnti case "cross" => Cross case _ => val supported = Seq( "inner", "outer", "full", "fullouter", "leftouter", "left", "rightouter", "right", "leftsemi", "leftanti", "cross") throw new IllegalArgumentException(s"Unsupported join type '$typ'. " + "Supported join types include: " + supported.mkString("'", "', '", "'") + ".") } } sealed abstract class JoinType { def sql: String } sealed abstract class InnerLike extends JoinType { def explicitCartesian: Boolean } case object Inner extends InnerLike { override def explicitCartesian: Boolean = false override def sql: String = "INNER" } case object Cross extends InnerLike { override def explicitCartesian: Boolean = true override def sql: String = "CROSS" } case object LeftOuter extends JoinType { override def sql: String = "LEFT OUTER" } case object RightOuter extends JoinType { override def sql: String = "RIGHT OUTER" } case object FullOuter extends JoinType { override def sql: String = "FULL OUTER" } case object LeftSemi extends JoinType { override def sql: String = "LEFT SEMI" } case object LeftAnti extends JoinType { override def sql: String = "LEFT ANTI" } case class ExistenceJoin(exists: Attribute) extends JoinType { override def sql: String = { // This join type is only used in the end of optimizer and physical plans, we will not // generate SQL for this join type throw new UnsupportedOperationException } } case class NaturalJoin(tpe: JoinType) extends JoinType { require(Seq(Inner, LeftOuter, RightOuter, FullOuter).contains(tpe), "Unsupported natural join type " + tpe) override def sql: String = "NATURAL " + tpe.sql } case class UsingJoin(tpe: JoinType, usingColumns: Seq[UnresolvedAttribute]) extends JoinType { require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe), "Unsupported using join type " + tpe) override def sql: String = "USING " + tpe.sql } object LeftExistence { def unapply(joinType: JoinType): Option[JoinType] = joinType match { case LeftSemi | LeftAnti => Some(joinType) case j: ExistenceJoin => Some(joinType) case _ => None } }
Example 15
Source File: CheckDeltaInvariant.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.schema import org.apache.spark.sql.delta.schema.Invariants.{ArbitraryExpression, NotNull} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Expression, NonSQLExpression, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{Block, CodegenContext, ExprCode, JavaCode, TrueLiteral} import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.types.{DataType, NullType} case class CheckDeltaInvariant( child: Expression, invariant: Invariant) extends UnaryExpression with NonSQLExpression { override def dataType: DataType = NullType override def foldable: Boolean = false override def nullable: Boolean = true override def flatArguments: Iterator[Any] = Iterator(child) private def assertRule(input: InternalRow): Unit = invariant.rule match { case NotNull if child.eval(input) == null => throw InvariantViolationException(invariant, "") case ArbitraryExpression(expr) => val resolvedExpr = expr.transform { case _: UnresolvedAttribute => child } val result = resolvedExpr.eval(input) if (result == null || result == false) { throw InvariantViolationException( invariant, s"Value ${child.eval(input)} violates requirement.") } } override def eval(input: InternalRow): Any = { assertRule(input) null } private def generateNotNullCode(ctx: CodegenContext): Block = { val childGen = child.genCode(ctx) val invariantField = ctx.addReferenceObj("errMsg", invariant) code"""${childGen.code} | |if (${childGen.isNull}) { | throw org.apache.spark.sql.delta.schema.InvariantViolationException.apply( | $invariantField, ""); |} """.stripMargin } private def generateExpressionValidationCode(expr: Expression, ctx: CodegenContext): Block = { val resolvedExpr = expr.transform { case _: UnresolvedAttribute => child } val elementValue = child.genCode(ctx) val childGen = resolvedExpr.genCode(ctx) val invariantField = ctx.addReferenceObj("errMsg", invariant) val eValue = ctx.freshName("elementResult") code"""${elementValue.code} |${childGen.code} | |if (${childGen.isNull} || ${childGen.value} == false) { | Object $eValue = "null"; | if (!${elementValue.isNull}) { | $eValue = (Object) ${elementValue.value}; | } | throw org.apache.spark.sql.delta.schema.InvariantViolationException.apply( | $invariantField, "Value " + $eValue + " violates requirement."); |} """.stripMargin } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val code = invariant.rule match { case NotNull => generateNotNullCode(ctx) case ArbitraryExpression(expr) => generateExpressionValidationCode(expr, ctx) } ev.copy(code = code, isNull = TrueLiteral, value = JavaCode.literal("null", NullType)) } }
Example 16
Source File: DeltaSourceUtils.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.sources import java.util.Locale import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.sources import org.apache.spark.sql.sources.Filter object DeltaSourceUtils { val NAME = "delta" val ALT_NAME = "delta" // Batch relations don't pass partitioning columns to `CreatableRelationProvider`s, therefore // as a hack, we pass in the partitioning columns among the options. val PARTITIONING_COLUMNS_KEY = "__partition_columns" def isDeltaDataSourceName(name: String): Boolean = { name.toLowerCase(Locale.ROOT) == NAME || name.toLowerCase(Locale.ROOT) == ALT_NAME } def translateFilters(filters: Array[Filter]): Expression = filters.map { case sources.EqualTo(attribute, value) => expressions.EqualTo(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.EqualNullSafe(attribute, value) => expressions.EqualNullSafe(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.GreaterThan(attribute, value) => expressions.GreaterThan(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.GreaterThanOrEqual(attribute, value) => expressions.GreaterThanOrEqual( UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.LessThan(attribute, value) => expressions.LessThanOrEqual(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.LessThanOrEqual(attribute, value) => expressions.LessThanOrEqual(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.In(attribute, values) => expressions.In(UnresolvedAttribute(attribute), values.map(createLiteral)) case sources.IsNull(attribute) => expressions.IsNull(UnresolvedAttribute(attribute)) case sources.IsNotNull(attribute) => expressions.IsNotNull(UnresolvedAttribute(attribute)) case sources.Not(otherFilter) => expressions.Not(translateFilters(Array(otherFilter))) case sources.And(filter1, filter2) => expressions.And(translateFilters(Array(filter1)), translateFilters(Array(filter2))) case sources.Or(filter1, filter2) => expressions.Or(translateFilters(Array(filter1)), translateFilters(Array(filter2))) case sources.StringStartsWith(attribute, value) => new expressions.Like( UnresolvedAttribute(attribute), expressions.Literal.create(s"${value}%")) case sources.StringEndsWith(attribute, value) => new expressions.Like( UnresolvedAttribute(attribute), expressions.Literal.create(s"%${value}")) case sources.StringContains(attribute, value) => new expressions.Like( UnresolvedAttribute(attribute), expressions.Literal.create(s"%${value}%")) case sources.AlwaysTrue() => expressions.Literal.TrueLiteral case sources.AlwaysFalse() => expressions.Literal.FalseLiteral }.reduce(expressions.And) }
Example 17
Source File: ConvertToLocalRelationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 18
Source File: joinTypes.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.Attribute object JoinType { def apply(typ: String): JoinType = typ.toLowerCase.replace("_", "") match { case "inner" => Inner case "outer" | "full" | "fullouter" => FullOuter case "leftouter" | "left" => LeftOuter case "rightouter" | "right" => RightOuter case "leftsemi" => LeftSemi case "leftanti" => LeftAnti case "cross" => Cross case _ => val supported = Seq( "inner", "outer", "full", "fullouter", "leftouter", "left", "rightouter", "right", "leftsemi", "leftanti", "cross") throw new IllegalArgumentException(s"Unsupported join type '$typ'. " + "Supported join types include: " + supported.mkString("'", "', '", "'") + ".") } } sealed abstract class JoinType { def sql: String } sealed abstract class InnerLike extends JoinType { def explicitCartesian: Boolean } case object Inner extends InnerLike { override def explicitCartesian: Boolean = false override def sql: String = "INNER" } case object Cross extends InnerLike { override def explicitCartesian: Boolean = true override def sql: String = "CROSS" } case object LeftOuter extends JoinType { override def sql: String = "LEFT OUTER" } case object RightOuter extends JoinType { override def sql: String = "RIGHT OUTER" } case object FullOuter extends JoinType { override def sql: String = "FULL OUTER" } case object LeftSemi extends JoinType { override def sql: String = "LEFT SEMI" } case object LeftAnti extends JoinType { override def sql: String = "LEFT ANTI" } case class ExistenceJoin(exists: Attribute) extends JoinType { override def sql: String = { // This join type is only used in the end of optimizer and physical plans, we will not // generate SQL for this join type throw new UnsupportedOperationException } } case class NaturalJoin(tpe: JoinType) extends JoinType { require(Seq(Inner, LeftOuter, RightOuter, FullOuter).contains(tpe), "Unsupported natural join type " + tpe) override def sql: String = "NATURAL " + tpe.sql } case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType { require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe), "Unsupported using join type " + tpe) override def sql: String = "USING " + tpe.sql } object LeftExistence { def unapply(joinType: JoinType): Option[JoinType] = joinType match { case LeftSemi | LeftAnti => Some(joinType) case j: ExistenceJoin => Some(joinType) case _ => None } }
Example 19
Source File: AnnotationParsingUtils.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.util import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedAlias} import org.apache.spark.sql.catalyst.expressions._ import org.scalatest.FunSuite trait AnnotationParsingUtils extends FunSuite { def assertAnnotatedAttribute(expectedAliasName:String, expectedAliasChild: Expression, expectedAnnotations: Map[String, Expression], actual: NamedExpression): Unit = { assert(actual.isInstanceOf[UnresolvedAlias]) assert(actual.asInstanceOf[UnresolvedAlias].child.isInstanceOf[AnnotatedAttribute]) val attribute = actual.asInstanceOf[UnresolvedAlias].child.asInstanceOf[AnnotatedAttribute] assertResult(expectedAnnotations.keySet)(attribute.annotations.keySet) expectedAnnotations.foreach({ case (k, v:Literal) => assert(v.semanticEquals(attribute.annotations.get(k).get)) }) assert(attribute.child.isInstanceOf[Alias]) val alias = attribute.child.asInstanceOf[Alias] assertResult(expectedAliasName)(alias.name) assertResult(expectedAliasChild)(alias.child) } def assertAnnotatedProjection(expected: Seq[(String, UnresolvedAttribute, Map[String, Literal])]) (actual: Seq[NamedExpression]): Unit = { actual.zip(expected).foreach{case (exp: NamedExpression, values: ( String, UnresolvedAttribute, Map[String, Expression])) => assertAnnotatedAttribute(values._1, values._2, values._3, exp)} } }
Example 20
Source File: AnnotationFilter.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.analysis.{UnresolvedException, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.{InternalRow, trees} import org.apache.spark.sql.types._ case class AnnotationFilter(child: Expression)( val filters: Set[String] = Set.empty, val exprId: ExprId = NamedExpression.newExprId) extends UnaryExpression with NamedExpression with CodegenFallback { override def name: String = child match { case e:NamedExpression => e.name case _ => throw new UnresolvedException(this, "name of AnnotationFilter with non-named child") } override lazy val resolved = childrenResolved override def toAttribute: Attribute = { if (resolved) { child.transform ({ case a:Alias => a.copy(a.child, a.name)(a.exprId, qualifiers = a.qualifiers, explicitMetadata = Some(MetadataAccessor.filterMetadata(a.metadata, filters))) case a:AttributeReference => a.copy(a.name, a.dataType, a.nullable, metadata = MetadataAccessor.filterMetadata(a.metadata, filters))(a.exprId, a.qualifiers) case p => p }) match { case e: NamedExpression => e.toAttribute case _ => throw new UnresolvedException(this, "toAttribute of AnnotationFilter with " + "no-named child") } } else { UnresolvedAttribute(name) } } override def equals(other: Any): Boolean = other match { case aa: AnnotationFilter => child == aa.child && filters == aa.filters && exprId == aa.exprId case _ => false } // scalastyle:off magic.number override def hashCode:Int = { List[Int](child.hashCode, filters.hashCode, exprId.hashCode) .foldLeft(17)((l, r) => 31 * l + r) } override def metadata: Metadata = { child match { case named: NamedExpression => MetadataAccessor.filterMetadata(named.metadata, filters) case _ => Metadata.empty } } override def qualifiers: Seq[String] = Nil override def eval(input: InternalRow): Any = child.eval(input) override def nullable: Boolean = child.nullable override def dataType: DataType = child.dataType override protected final def otherCopyArgs: Seq[AnyRef] = filters :: exprId :: Nil }
Example 21
Source File: AnnotationParser.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.parser import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.AbstractSparkSQLParser import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{AnnotationReference, Expression, Literal} import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String protected def toTableMetadata(metadata: Map[String, Expression]): Metadata = { val res = new MetadataBuilder() metadata.foreach { case (k, v:Literal) => v.dataType match { case StringType => if (k.equals("?")) { sys.error("column metadata key can not be ?") } if (k.equals("*")) { sys.error("column metadata key can not be *") } res.putString(k, v.value.asInstanceOf[UTF8String].toString) case LongType => res.putLong(k, v.value.asInstanceOf[Long]) case DoubleType => res.putDouble(k, v.value.asInstanceOf[Double]) case NullType => res.putString(k, null) case a:ArrayType => res.putString(k, v.value.toString) } case (k, v:AnnotationReference) => sys.error("column metadata can not have a reference to another column metadata") } res.build() } }
Example 22
Source File: OptimizerStructuralIntegrityCheckerSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Alias, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project} import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf class OptimizerStructuralIntegrityCheckerSuite extends PlanTest { object OptimizeRuleBreakSI extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case Project(projectList, child) => val newAttr = UnresolvedAttribute("unresolvedAttr") Project(projectList ++ Seq(newAttr), child) } } object Optimize extends Optimizer( new SessionCatalog( new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())) { val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI) override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches } test("check for invalid plan after execution of rule") { val analyzed = Project(Alias(Literal(10), "attr")() :: Nil, OneRowRelation()).analyze assert(analyzed.resolved) val message = intercept[TreeNodeException[LogicalPlan]] { Optimize.execute(analyzed) }.getMessage val ruleName = OptimizeRuleBreakSI.ruleName assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI")) assert(message.contains("the structural integrity of the plan is broken")) } }
Example 23
Source File: ConvertToLocalRelationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{LessThan, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } test("Filter on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: Nil) val filterAndProjectOnLocal = testRelation .select(UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) .where(LessThan(UnresolvedAttribute("b1"), Literal.create(6))) val optimized = Optimize.execute(filterAndProjectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 24
Source File: DeltaPushFilter.scala From connectors with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import scala.collection.immutable.HashSet import scala.collection.JavaConverters._ import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, SerializationUtilities} import org.apache.hadoop.hive.ql.lib._ import org.apache.hadoop.hive.ql.parse.SemanticException import org.apache.hadoop.hive.ql.plan.{ExprNodeColumnDesc, ExprNodeConstantDesc, ExprNodeGenericFuncDesc} import org.apache.hadoop.hive.ql.udf.generic._ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{And, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, InSet, LessThan, LessThanOrEqual, Like, Literal, Not} object DeltaPushFilter extends Logging { lazy val supportedPushDownUDFs = Array( "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS", "org.apache.hadoop.hive.ql.udf.UDFLike", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn" ) def partitionFilterConverter(hiveFilterExprSeriablized: String): Seq[Expression] = { if (hiveFilterExprSeriablized != null) { val filterExpr = SerializationUtilities.deserializeExpression(hiveFilterExprSeriablized) val opRules = new java.util.LinkedHashMap[Rule, NodeProcessor]() val nodeProcessor = new NodeProcessor() { @throws[SemanticException] def process(nd: Node, stack: java.util.Stack[Node], procCtx: NodeProcessorCtx, nodeOutputs: Object*): Object = { nd match { case e: ExprNodeGenericFuncDesc if FunctionRegistry.isOpAnd(e) => nodeOutputs.map(_.asInstanceOf[Expression]).reduce(And) case e: ExprNodeGenericFuncDesc => val (columnDesc, constantDesc) = if (nd.getChildren.get(0).isInstanceOf[ExprNodeColumnDesc]) { (nd.getChildren.get(0), nd.getChildren.get(1)) } else { (nd.getChildren.get(1), nd.getChildren.get(0)) } val columnAttr = UnresolvedAttribute( columnDesc.asInstanceOf[ExprNodeColumnDesc].getColumn) val constantVal = Literal(constantDesc.asInstanceOf[ExprNodeConstantDesc].getValue) nd.asInstanceOf[ExprNodeGenericFuncDesc].getGenericUDF match { case f: GenericUDFOPNotEqualNS => Not(EqualNullSafe(columnAttr, constantVal)) case f: GenericUDFOPNotEqual => Not(EqualTo(columnAttr, constantVal)) case f: GenericUDFOPEqualNS => EqualNullSafe(columnAttr, constantVal) case f: GenericUDFOPEqual => EqualTo(columnAttr, constantVal) case f: GenericUDFOPGreaterThan => GreaterThan(columnAttr, constantVal) case f: GenericUDFOPEqualOrGreaterThan => GreaterThanOrEqual(columnAttr, constantVal) case f: GenericUDFOPLessThan => LessThan(columnAttr, constantVal) case f: GenericUDFOPEqualOrLessThan => LessThanOrEqual(columnAttr, constantVal) case f: GenericUDFBridge if f.getUdfName.equals("like") => Like(columnAttr, constantVal) case f: GenericUDFIn => val inConstantVals = nd.getChildren.asScala .filter(_.isInstanceOf[ExprNodeConstantDesc]) .map(_.asInstanceOf[ExprNodeConstantDesc].getValue) .map(Literal(_)).toSet InSet(columnAttr, HashSet() ++ inConstantVals) case _ => throw new RuntimeException(s"Unsupported func(${nd.getName}) " + s"which can not be pushed down to delta") } case _ => null } } } val disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null) val ogw = new DefaultGraphWalker(disp) val topNodes = new java.util.ArrayList[Node]() topNodes.add(filterExpr) val nodeOutput = new java.util.HashMap[Node, Object]() try { ogw.startWalking(topNodes, nodeOutput) } catch { case ex: Exception => throw new RuntimeException(ex) } logInfo(s"converted partition filter expr:" + s"${nodeOutput.get(filterExpr).asInstanceOf[Expression].toJSON}") Seq(nodeOutput.get(filterExpr).asInstanceOf[Expression]) } else Seq.empty[org.apache.spark.sql.catalyst.expressions.Expression] } }
Example 25
Source File: ConvertToLocalRelationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }