org.apache.spark.sql.catalyst.expressions.LessThan Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.expressions.LessThan.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DeltaPushFilter.scala From connectors with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import scala.collection.immutable.HashSet import scala.collection.JavaConverters._ import org.apache.hadoop.hive.ql.exec.{FunctionRegistry, SerializationUtilities} import org.apache.hadoop.hive.ql.lib._ import org.apache.hadoop.hive.ql.parse.SemanticException import org.apache.hadoop.hive.ql.plan.{ExprNodeColumnDesc, ExprNodeConstantDesc, ExprNodeGenericFuncDesc} import org.apache.hadoop.hive.ql.udf.generic._ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{And, EqualNullSafe, EqualTo, Expression, GreaterThan, GreaterThanOrEqual, InSet, LessThan, LessThanOrEqual, Like, Literal, Not} object DeltaPushFilter extends Logging { lazy val supportedPushDownUDFs = Array( "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS", "org.apache.hadoop.hive.ql.udf.UDFLike", "org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn" ) def partitionFilterConverter(hiveFilterExprSeriablized: String): Seq[Expression] = { if (hiveFilterExprSeriablized != null) { val filterExpr = SerializationUtilities.deserializeExpression(hiveFilterExprSeriablized) val opRules = new java.util.LinkedHashMap[Rule, NodeProcessor]() val nodeProcessor = new NodeProcessor() { @throws[SemanticException] def process(nd: Node, stack: java.util.Stack[Node], procCtx: NodeProcessorCtx, nodeOutputs: Object*): Object = { nd match { case e: ExprNodeGenericFuncDesc if FunctionRegistry.isOpAnd(e) => nodeOutputs.map(_.asInstanceOf[Expression]).reduce(And) case e: ExprNodeGenericFuncDesc => val (columnDesc, constantDesc) = if (nd.getChildren.get(0).isInstanceOf[ExprNodeColumnDesc]) { (nd.getChildren.get(0), nd.getChildren.get(1)) } else { (nd.getChildren.get(1), nd.getChildren.get(0)) } val columnAttr = UnresolvedAttribute( columnDesc.asInstanceOf[ExprNodeColumnDesc].getColumn) val constantVal = Literal(constantDesc.asInstanceOf[ExprNodeConstantDesc].getValue) nd.asInstanceOf[ExprNodeGenericFuncDesc].getGenericUDF match { case f: GenericUDFOPNotEqualNS => Not(EqualNullSafe(columnAttr, constantVal)) case f: GenericUDFOPNotEqual => Not(EqualTo(columnAttr, constantVal)) case f: GenericUDFOPEqualNS => EqualNullSafe(columnAttr, constantVal) case f: GenericUDFOPEqual => EqualTo(columnAttr, constantVal) case f: GenericUDFOPGreaterThan => GreaterThan(columnAttr, constantVal) case f: GenericUDFOPEqualOrGreaterThan => GreaterThanOrEqual(columnAttr, constantVal) case f: GenericUDFOPLessThan => LessThan(columnAttr, constantVal) case f: GenericUDFOPEqualOrLessThan => LessThanOrEqual(columnAttr, constantVal) case f: GenericUDFBridge if f.getUdfName.equals("like") => Like(columnAttr, constantVal) case f: GenericUDFIn => val inConstantVals = nd.getChildren.asScala .filter(_.isInstanceOf[ExprNodeConstantDesc]) .map(_.asInstanceOf[ExprNodeConstantDesc].getValue) .map(Literal(_)).toSet InSet(columnAttr, HashSet() ++ inConstantVals) case _ => throw new RuntimeException(s"Unsupported func(${nd.getName}) " + s"which can not be pushed down to delta") } case _ => null } } } val disp = new DefaultRuleDispatcher(nodeProcessor, opRules, null) val ogw = new DefaultGraphWalker(disp) val topNodes = new java.util.ArrayList[Node]() topNodes.add(filterExpr) val nodeOutput = new java.util.HashMap[Node, Object]() try { ogw.startWalking(topNodes, nodeOutput) } catch { case ex: Exception => throw new RuntimeException(ex) } logInfo(s"converted partition filter expr:" + s"${nodeOutput.get(filterExpr).asInstanceOf[Expression].toJSON}") Seq(nodeOutput.get(filterExpr).asInstanceOf[Expression]) } else Seq.empty[org.apache.spark.sql.catalyst.expressions.Expression] } }
Example 2
Source File: ConvertToLocalRelationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{LessThan, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } test("Filter on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: Nil) val filterAndProjectOnLocal = testRelation .select(UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) .where(LessThan(UnresolvedAttribute("b1"), Literal.create(6))) val optimized = Optimize.execute(filterAndProjectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 3
Source File: SemiJoinSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.joins import org.apache.spark.sql.{SQLConf, DataFrame, Row} import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.catalyst.expressions.{And, LessThan, Expression} import org.apache.spark.sql.execution.{EnsureRequirements, SparkPlan, SparkPlanTest} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType} //半连接测试套件 class SemiJoinSuite extends SparkPlanTest with SharedSQLContext { private lazy val left = ctx.createDataFrame( ctx.sparkContext.parallelize(Seq( Row(1, 2.0), Row(1, 2.0), Row(2, 1.0), Row(2, 1.0), Row(3, 3.0), Row(null, null), Row(null, 5.0), Row(6, null) )), new StructType().add("a", IntegerType).add("b", DoubleType)) private lazy val right = ctx.createDataFrame( ctx.sparkContext.parallelize(Seq( Row(2, 3.0), Row(2, 3.0), Row(3, 2.0), Row(4, 1.0), Row(null, null), Row(null, 5.0), Row(6, null) )), new StructType().add("c", IntegerType).add("d", DoubleType)) private lazy val condition = { And((left.col("a") === right.col("c")).expr, LessThan(left.col("b").expr, right.col("d").expr)) } // Note: the input dataframes and expression must be evaluated lazily because // the SQLContext should be used only within a test to keep SQL tests stable private def testLeftSemiJoin( testName: String, leftRows: => DataFrame, rightRows: => DataFrame, condition: => Expression, expectedAnswer: Seq[Product]): Unit = { def extractJoinParts(): Option[ExtractEquiJoinKeys.ReturnType] = { val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition)) ExtractEquiJoinKeys.unapply(join) } test(s"$testName using LeftSemiJoinHash") { extractJoinParts().foreach { case (joinType, leftKeys, rightKeys, boundCondition, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => EnsureRequirements(left.sqlContext).apply( LeftSemiJoinHash(leftKeys, rightKeys, left, right, boundCondition)), expectedAnswer.map(Row.fromTuple), sortAnswers = true) } } } test(s"$testName using BroadcastLeftSemiJoinHash") { extractJoinParts().foreach { case (joinType, leftKeys, rightKeys, boundCondition, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => BroadcastLeftSemiJoinHash(leftKeys, rightKeys, left, right, boundCondition), expectedAnswer.map(Row.fromTuple), sortAnswers = true) } } } test(s"$testName using LeftSemiJoinBNL") { withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => LeftSemiJoinBNL(left, right, Some(condition)), expectedAnswer.map(Row.fromTuple), sortAnswers = true) } } } //测试左半连接 testLeftSemiJoin( "basic test", left, right, condition, Seq( (2, 1.0), (2, 1.0) ) ) }
Example 4
Source File: SemiJoinSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.joins import org.apache.spark.sql.{SQLConf, DataFrame, Row} import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.catalyst.expressions.{And, LessThan, Expression} import org.apache.spark.sql.execution.{EnsureRequirements, SparkPlan, SparkPlanTest} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType} class SemiJoinSuite extends SparkPlanTest with SharedSQLContext { private lazy val left = sqlContext.createDataFrame( sparkContext.parallelize(Seq( Row(1, 2.0), Row(1, 2.0), Row(2, 1.0), Row(2, 1.0), Row(3, 3.0), Row(null, null), Row(null, 5.0), Row(6, null) )), new StructType().add("a", IntegerType).add("b", DoubleType)) private lazy val right = sqlContext.createDataFrame( sparkContext.parallelize(Seq( Row(2, 3.0), Row(2, 3.0), Row(3, 2.0), Row(4, 1.0), Row(null, null), Row(null, 5.0), Row(6, null) )), new StructType().add("c", IntegerType).add("d", DoubleType)) private lazy val condition = { And((left.col("a") === right.col("c")).expr, LessThan(left.col("b").expr, right.col("d").expr)) } // Note: the input dataframes and expression must be evaluated lazily because // the SQLContext should be used only within a test to keep SQL tests stable private def testLeftSemiJoin( testName: String, leftRows: => DataFrame, rightRows: => DataFrame, condition: => Expression, expectedAnswer: Seq[Product]): Unit = { def extractJoinParts(): Option[ExtractEquiJoinKeys.ReturnType] = { val join = Join(leftRows.logicalPlan, rightRows.logicalPlan, Inner, Some(condition)) ExtractEquiJoinKeys.unapply(join) } test(s"$testName using LeftSemiJoinHash") { extractJoinParts().foreach { case (joinType, leftKeys, rightKeys, boundCondition, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => EnsureRequirements(left.sqlContext).apply( LeftSemiJoinHash(leftKeys, rightKeys, left, right, boundCondition)), expectedAnswer.map(Row.fromTuple), sortAnswers = true) } } } test(s"$testName using BroadcastLeftSemiJoinHash") { extractJoinParts().foreach { case (joinType, leftKeys, rightKeys, boundCondition, _, _) => withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => BroadcastLeftSemiJoinHash(leftKeys, rightKeys, left, right, boundCondition), expectedAnswer.map(Row.fromTuple), sortAnswers = true) } } } test(s"$testName using LeftSemiJoinBNL") { withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") { checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) => LeftSemiJoinBNL(left, right, Some(condition)), expectedAnswer.map(Row.fromTuple), sortAnswers = true) } } } testLeftSemiJoin( "basic test", left, right, condition, Seq( (2, 1.0), (2, 1.0) ) ) }