org.apache.spark.sql.catalyst.rules.RuleExecutor Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.rules.RuleExecutor.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AggregateOptimizeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 2
Source File: AggregateOptimizeSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Distinct, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), ReplaceDistinctWithAggregate, RemoveLiteralFromGroupExpressions) :: Nil } //用聚合代替distinct test("replace distinct with aggregate") { val input = LocalRelation('a.int, 'b.int) val query = Distinct(input) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(input.output, input.output, input) comparePlans(optimized, correctAnswer) } //在表达式分组中移除文字 test("remove literals in grouping expression") { val input = LocalRelation('a.int, 'b.int) val query = input.groupBy('a, Literal(1), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(query) val correctAnswer = input.groupBy('a)(sum('b)) comparePlans(optimized, correctAnswer) } }
Example 3
Source File: ProjectCollapsingSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Rand import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ProjectCollapsingSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Subqueries", FixedPoint(10), EliminateSubQueries) :: Batch("ProjectCollapsing", Once, ProjectCollapsing) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) // test("collapse two deterministic, independent projects into one") { val query = testRelation .select(('a + 1).as('a_plus_1), 'b) .select('a_plus_1, ('b + 1).as('b_plus_1)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.select(('a + 1).as('a_plus_1), ('b + 1).as('b_plus_1)).analyze comparePlans(optimized, correctAnswer) } test("collapse two deterministic, dependent projects into one") { val query = testRelation .select(('a + 1).as('a_plus_1), 'b) .select(('a_plus_1 + 1).as('a_plus_2), 'b) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.select( (('a + 1).as('a_plus_1) + 1).as('a_plus_2), 'b).analyze comparePlans(optimized, correctAnswer) } test("do not collapse nondeterministic projects") { val query = testRelation .select(Rand(10).as('rand)) .select(('rand + 1).as('rand1), ('rand + 2).as('rand2)) val optimized = Optimize.execute(query.analyze) val correctAnswer = query.analyze comparePlans(optimized, correctAnswer) } test("collapse two nondeterministic, independent projects into one") { val query = testRelation .select(Rand(10).as('rand)) .select(Rand(20).as('rand2)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation .select(Rand(20).as('rand2)).analyze comparePlans(optimized, correctAnswer) } test("collapse one nondeterministic, one deterministic, independent projects into one") { val query = testRelation .select(Rand(10).as('rand), 'a) .select(('a + 1).as('a_plus_1)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation .select(('a + 1).as('a_plus_1)).analyze comparePlans(optimized, correctAnswer) } }
Example 4
Source File: OptimizeInSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ // For implicit conversions import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class OptimizeInSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) //OptimizedIn测试:当子项未优化为InSet时少于10项 test("OptimizedIn test: In clause not optimized to InSet when less than 10 items") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) comparePlans(optimized, originalQuery) } //优化测试:在优化到InSert的子句中,超过10项 test("OptimizedIn test: In clause optimized to InSet when more than 10 items") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), (1 to 11).map(Literal(_)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(InSet(UnresolvedAttribute("a"), (1 to 11).toSet)) .analyze comparePlans(optimized, correctAnswer) } //OptimizedIn测试:在子句未优化的情况下,过滤器具有属性 test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) } }
Example 5
Source File: ColumnPruningSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.Explode import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation, Generate, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.types.StringType class ColumnPruningSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Column pruning", FixedPoint(100), ColumnPruning) :: Nil } test("Column pruning for Generate when Generate.join = false") { val input = LocalRelation('a.int, 'b.array(StringType)) val query = Generate(Explode('b), false, false, None, 's.string :: Nil, input).analyze val optimized = Optimize.execute(query) val correctAnswer = Generate(Explode('b), false, false, None, 's.string :: Nil, Project('b.attr :: Nil, input)).analyze comparePlans(optimized, correctAnswer) } //生成Generate.join = true时的列修剪 test("Column pruning for Generate when Generate.join = true") { val input = LocalRelation('a.int, 'b.int, 'c.array(StringType)) val query = Project(Seq('a, 's), Generate(Explode('c), true, false, None, 's.string :: Nil, input)).analyze val optimized = Optimize.execute(query) val correctAnswer = Project(Seq('a, 's), Generate(Explode('c), true, false, None, 's.string :: Nil, Project(Seq('a, 'c), input))).analyze comparePlans(optimized, correctAnswer) } //如果可能,将Generate.join转换为false test("Turn Generate.join to false if possible") { val input = LocalRelation('b.array(StringType)) val query = Project(('s + 1).as("s+1") :: Nil, Generate(Explode('b), true, false, None, 's.string :: Nil, input)).analyze val optimized = Optimize.execute(query) val correctAnswer = Project(('s + 1).as("s+1") :: Nil, Generate(Explode('b), false, false, None, 's.string :: Nil, input)).analyze comparePlans(optimized, correctAnswer) } // todo: add more tests for column pruning }
Example 6
Source File: ConvertToLocalRelationSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 7
Source File: RuleExecutorSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(100)) === Literal(90)) } }
Example 8
Source File: ProjectCollapsingSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Rand import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ProjectCollapsingSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Subqueries", FixedPoint(10), EliminateSubQueries) :: Batch("ProjectCollapsing", Once, ProjectCollapsing) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) test("collapse two deterministic, independent projects into one") { val query = testRelation .select(('a + 1).as('a_plus_1), 'b) .select('a_plus_1, ('b + 1).as('b_plus_1)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.select(('a + 1).as('a_plus_1), ('b + 1).as('b_plus_1)).analyze comparePlans(optimized, correctAnswer) } test("collapse two deterministic, dependent projects into one") { val query = testRelation .select(('a + 1).as('a_plus_1), 'b) .select(('a_plus_1 + 1).as('a_plus_2), 'b) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.select( (('a + 1).as('a_plus_1) + 1).as('a_plus_2), 'b).analyze comparePlans(optimized, correctAnswer) } test("do not collapse nondeterministic projects") { val query = testRelation .select(Rand(10).as('rand)) .select(('rand + 1).as('rand1), ('rand + 2).as('rand2)) val optimized = Optimize.execute(query.analyze) val correctAnswer = query.analyze comparePlans(optimized, correctAnswer) } }
Example 9
Source File: OptimizeInSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import org.apache.spark.sql.catalyst.analysis.{EliminateSubQueries, UnresolvedAttribute} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ // For implicit conversions import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class OptimizeInSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, ConstantFolding, BooleanSimplification, OptimizeIn) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("OptimizedIn test: In clause optimized to InSet") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2)))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(InSet(UnresolvedAttribute("a"), HashSet[Any]() + 1 + 2)) .analyze comparePlans(optimized, correctAnswer) } test("OptimizedIn test: In clause not optimized in case filter has attributes") { val originalQuery = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(In(UnresolvedAttribute("a"), Seq(Literal(1), Literal(2), UnresolvedAttribute("b")))) .analyze comparePlans(optimized, correctAnswer) } }
Example 10
Source File: ConvertToLocalRelationSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, Row(1, 2) :: Row(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, Row(1, 3) :: Row(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 11
Source File: RuleExecutorSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(100)) === Literal(90)) } }
Example 12
Source File: HBaseSQLContext.scala From Spark-SQL-on-HBase with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.spark.SparkContext import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.OverrideCatalog import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.{EnsureRequirements, SparkPlan} import org.apache.spark.sql.hbase.execution.{AddCoprocessor, HBaseStrategies} class HBaseSQLContext(sc: SparkContext) extends SQLContext(sc) { self => def this(sparkContext: JavaSparkContext) = this(sparkContext.sc) protected[sql] override lazy val conf: SQLConf = new HBaseSQLConf HBaseConfiguration.merge( sc.hadoopConfiguration, HBaseConfiguration.create(sc.hadoopConfiguration)) @transient override protected[sql] lazy val catalog: HBaseCatalog = new HBaseCatalog(this, sc.hadoopConfiguration) with OverrideCatalog experimental.extraStrategies = Seq((new SparkPlanner with HBaseStrategies).HBaseDataSource) @transient override protected[sql] val prepareForExecution = new RuleExecutor[SparkPlan] { val batches = Batch("Add exchange", Once, EnsureRequirements(self)) :: Batch("Add coprocessor", Once, AddCoprocessor(self)) :: Nil } }
Example 13
Source File: ReorderAssociativeOperatorSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReorderAssociativeOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("ReorderAssociativeOperator", Once, ReorderAssociativeOperator) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Reorder associative operators") { val originalQuery = testRelation .select( (Literal(3) + ((Literal(1) + 'a) + 2)) + 4, 'b * 1 * 2 * 3 * 4, ('b + 1) * 2 * 3 * 4, 'a + 1 + 'b + 2 + 'c + 3, 'a + 1 + 'b * 2 + 'c + 3, Rand(0) * 1 * 2 * 3 * 4) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .select( ('a + 10).as("((3 + ((1 + a) + 2)) + 4)"), ('b * 24).as("((((b * 1) * 2) * 3) * 4)"), (('b + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"), ('a + 'b + 'c + 6).as("(((((a + 1) + b) + 2) + c) + 3)"), ('a + 'b * 2 + 'c + 4).as("((((a + 1) + (b * 2)) + c) + 3)"), Rand(0) * 1 * 2 * 3 * 4) .analyze comparePlans(optimized, correctAnswer) } test("nested expression with aggregate operator") { val originalQuery = testRelation.as("t1") .join(testRelation.as("t2"), Inner, Some("t1.a".attr === "t2.a".attr)) .groupBy("t1.a".attr + 1, "t2.a".attr + 1)( (("t1.a".attr + 1) + ("t2.a".attr + 1)).as("col")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = originalQuery.analyze comparePlans(optimized, correctAnswer) } }
Example 14
Source File: SimplifyCastsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class SimplifyCastsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyCasts", FixedPoint(50), SimplifyCasts) :: Nil } test("non-nullable element array to nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, false))) val plan = input.select('a.cast(ArrayType(IntegerType, true)).as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('a.as("casted")).analyze comparePlans(optimized, expected) } test("nullable element to non-nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, true))) val plan = input.select('a.cast(ArrayType(IntegerType, false)).as("casted")).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("non-nullable value map to nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, false))) val plan = input.select('m.cast(MapType(StringType, StringType, true)) .as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('m.as("casted")).analyze comparePlans(optimized, expected) } test("nullable value map to non-nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, true))) val plan = input.select('m.cast(MapType(StringType, StringType, false)) .as("casted")).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 15
Source File: ComputeCurrentTimeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils class ComputeCurrentTimeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Seq(Batch("ComputeCurrentTime", Once, ComputeCurrentTime)) } test("analyzer should replace current_timestamp with literals") { val in = Project(Seq(Alias(CurrentTimestamp(), "a")(), Alias(CurrentTimestamp(), "b")()), LocalRelation()) val min = System.currentTimeMillis() * 1000 val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = (System.currentTimeMillis() + 1) * 1000 val lits = new scala.collection.mutable.ArrayBuffer[Long] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Long] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } test("analyzer should replace current_date with literals") { val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation()) val min = DateTimeUtils.millisToDays(System.currentTimeMillis()) val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.millisToDays(System.currentTimeMillis()) val lits = new scala.collection.mutable.ArrayBuffer[Int] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Int] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } }
Example 16
Source File: EliminateSerializationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor case class OtherTuple(_1: Int, _2: Int) class EliminateSerializationSuite extends PlanTest { private object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Serialization", FixedPoint(100), EliminateSerialization) :: Nil } implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() implicit private def intEncoder = ExpressionEncoder[Int]() test("back to back serialization") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) val expected = input.select('obj.as("obj")).analyze comparePlans(optimized, expected) } test("back to back serialization with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val plan = input.serialize[OtherTuple].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("back to back serialization in AppendColumns") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[(Int, Int)]).analyze val optimized = Optimize.execute(plan) val expected = AppendColumnsWithObject( func.asInstanceOf[Any => Any], productEncoder[(Int, Int)].namedExpressions, intEncoder.namedExpressions, input).analyze comparePlans(optimized, expected) } test("back to back serialization in AppendColumns with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[OtherTuple]).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 17
Source File: ReplaceOperatorSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi, PlanTest} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReplaceOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Replace Operators", FixedPoint(100), ReplaceDistinctWithAggregate, ReplaceExceptWithAntiJoin, ReplaceIntersectWithSemiJoin) :: Nil } test("replace Intersect with Left-semi Join") { val table1 = LocalRelation('a.int, 'b.int) val table2 = LocalRelation('c.int, 'd.int) val query = Intersect(table1, table2) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(table1.output, table1.output, Join(table1, table2, LeftSemi, Option('a <=> 'c && 'b <=> 'd))).analyze comparePlans(optimized, correctAnswer) } test("replace Except with Left-anti Join") { val table1 = LocalRelation('a.int, 'b.int) val table2 = LocalRelation('c.int, 'd.int) val query = Except(table1, table2) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(table1.output, table1.output, Join(table1, table2, LeftAnti, Option('a <=> 'c && 'b <=> 'd))).analyze comparePlans(optimized, correctAnswer) } test("replace Distinct with Aggregate") { val input = LocalRelation('a.int, 'b.int) val query = Distinct(input) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(input.output, input.output, input) comparePlans(optimized, correctAnswer) } }
Example 18
Source File: CollapseRepartitionSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseRepartitionSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseRepartition", FixedPoint(10), CollapseRepartition) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) test("collapse two adjacent repartitions into one") { val query = testRelation .repartition(10) .repartition(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.repartition(20).analyze comparePlans(optimized, correctAnswer) } test("collapse repartition and repartitionBy into one") { val query = testRelation .repartition(10) .distribute('a)(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(20).analyze comparePlans(optimized, correctAnswer) } test("collapse repartitionBy and repartition into one") { val query = testRelation .distribute('a)(20) .repartition(10) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(10).analyze comparePlans(optimized, correctAnswer) } test("collapse two adjacent repartitionBys into one") { val query = testRelation .distribute('b)(10) .distribute('a)(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(20).analyze comparePlans(optimized, correctAnswer) } }
Example 19
Source File: CollapseWindowSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseWindowSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseWindow", FixedPoint(10), CollapseWindow) :: Nil } val testRelation = LocalRelation('a.double, 'b.double, 'c.string) val a = testRelation.output(0) val b = testRelation.output(1) val c = testRelation.output(2) val partitionSpec1 = Seq(c) val partitionSpec2 = Seq(c + 1) val orderSpec1 = Seq(c.asc) val orderSpec2 = Seq(c.desc) test("collapse two adjacent windows with the same partition/order") { val query = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec1) .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1) .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1) val analyzed = query.analyze val optimized = Optimize.execute(analyzed) assert(analyzed.output === optimized.output) val correctAnswer = testRelation.window(Seq( min(a).as('min_a), max(a).as('max_a), sum(b).as('sum_b), avg(b).as('avg_b)), partitionSpec1, orderSpec1) comparePlans(optimized, correctAnswer) } test("Don't collapse adjacent windows with different partitions or orders") { val query1 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec2) val optimized1 = Optimize.execute(query1.analyze) val correctAnswer1 = query1.analyze comparePlans(optimized1, correctAnswer1) val query2 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec2, orderSpec1) val optimized2 = Optimize.execute(query2.analyze) val correctAnswer2 = query2.analyze comparePlans(optimized2, correctAnswer2) } }
Example 20
Source File: ConvertToLocalRelationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 21
Source File: RuleExecutorSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } }
Example 22
Source File: EliminateSerializationSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor case class OtherTuple(_1: Int, _2: Int) class EliminateSerializationSuite extends PlanTest { private object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Serialization", FixedPoint(100), EliminateSerialization) :: Nil } implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() implicit private def intEncoder = ExpressionEncoder[Int]() test("back to back serialization") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) val expected = input.select('obj.as("obj")).analyze comparePlans(optimized, expected) } test("back to back serialization with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val plan = input.serialize[OtherTuple].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("back to back serialization in AppendColumns") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[(Int, Int)]).analyze val optimized = Optimize.execute(plan) val expected = AppendColumnsWithObject( func.asInstanceOf[Any => Any], productEncoder[(Int, Int)].namedExpressions, intEncoder.namedExpressions, input).analyze comparePlans(optimized, expected) } test("back to back serialization in AppendColumns with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[OtherTuple]).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 23
Source File: AggregateOptimizeSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Distinct, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), ReplaceDistinctWithAggregate, RemoveLiteralFromGroupExpressions) :: Nil } test("replace distinct with aggregate") { val input = LocalRelation('a.int, 'b.int) val query = Distinct(input) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(input.output, input.output, input) comparePlans(optimized, correctAnswer) } test("remove literals in grouping expression") { val input = LocalRelation('a.int, 'b.int) val query = input.groupBy('a, Literal(1), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(query) val correctAnswer = input.groupBy('a)(sum('b)) comparePlans(optimized, correctAnswer) } }
Example 24
Source File: ProjectCollapsingSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.EliminateSubQueries import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Rand import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ProjectCollapsingSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Subqueries", FixedPoint(10), EliminateSubQueries) :: Batch("ProjectCollapsing", Once, ProjectCollapsing) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) test("collapse two deterministic, independent projects into one") { val query = testRelation .select(('a + 1).as('a_plus_1), 'b) .select('a_plus_1, ('b + 1).as('b_plus_1)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.select(('a + 1).as('a_plus_1), ('b + 1).as('b_plus_1)).analyze comparePlans(optimized, correctAnswer) } test("collapse two deterministic, dependent projects into one") { val query = testRelation .select(('a + 1).as('a_plus_1), 'b) .select(('a_plus_1 + 1).as('a_plus_2), 'b) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.select( (('a + 1).as('a_plus_1) + 1).as('a_plus_2), 'b).analyze comparePlans(optimized, correctAnswer) } test("do not collapse nondeterministic projects") { val query = testRelation .select(Rand(10).as('rand)) .select(('rand + 1).as('rand1), ('rand + 2).as('rand2)) val optimized = Optimize.execute(query.analyze) val correctAnswer = query.analyze comparePlans(optimized, correctAnswer) } test("collapse two nondeterministic, independent projects into one") { val query = testRelation .select(Rand(10).as('rand)) .select(Rand(20).as('rand2)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation .select(Rand(20).as('rand2)).analyze comparePlans(optimized, correctAnswer) } test("collapse one nondeterministic, one deterministic, independent projects into one") { val query = testRelation .select(Rand(10).as('rand), 'a) .select(('a + 1).as('a_plus_1)) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation .select(('a + 1).as('a_plus_1)).analyze comparePlans(optimized, correctAnswer) } }
Example 25
Source File: ColumnPruningSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.Explode import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.types.StringType class ColumnPruningSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Column pruning", FixedPoint(100), ColumnPruning) :: Nil } test("Column pruning for Generate when Generate.join = false") { val input = LocalRelation('a.int, 'b.array(StringType)) val query = input.generate(Explode('b), join = false).analyze val optimized = Optimize.execute(query) val correctAnswer = input.select('b).generate(Explode('b), join = false).analyze comparePlans(optimized, correctAnswer) } test("Column pruning for Generate when Generate.join = true") { val input = LocalRelation('a.int, 'b.int, 'c.array(StringType)) val query = input .generate(Explode('c), join = true, outputNames = "explode" :: Nil) .select('a, 'explode) .analyze val optimized = Optimize.execute(query) val correctAnswer = input .select('a, 'c) .generate(Explode('c), join = true, outputNames = "explode" :: Nil) .select('a, 'explode) .analyze comparePlans(optimized, correctAnswer) } test("Turn Generate.join to false if possible") { val input = LocalRelation('b.array(StringType)) val query = input .generate(Explode('b), join = true, outputNames = "explode" :: Nil) .select(('explode + 1).as("result")) .analyze val optimized = Optimize.execute(query) val correctAnswer = input .generate(Explode('b), join = false, outputNames = "explode" :: Nil) .select(('explode + 1).as("result")) .analyze comparePlans(optimized, correctAnswer) } test("Column pruning for Project on Sort") { val input = LocalRelation('a.int, 'b.string, 'c.double) val query = input.orderBy('b.asc).select('a).analyze val optimized = Optimize.execute(query) val correctAnswer = input.select('a, 'b).orderBy('b.asc).select('a).analyze comparePlans(optimized, correctAnswer) } // todo: add more tests for column pruning }
Example 26
Source File: ConvertToLocalRelationSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 27
Source File: RuleExecutorSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(100)) === Literal(90)) } }
Example 28
Source File: BenchmarkQueryTest.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.util.Utils abstract class BenchmarkQueryTest extends QueryTest with SharedSQLContext with BeforeAndAfterAll { // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting // the max iteration of analyzer/optimizer batches. assert(Utils.isTesting, "spark.testing is not set to true") protected override def afterAll(): Unit = { try { // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) spark.sessionState.catalog.reset() } finally { super.afterAll() } } override def beforeAll() { super.beforeAll() RuleExecutor.resetMetrics() } protected def checkGeneratedCode(plan: SparkPlan): Unit = { val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() plan foreach { case s: WholeStageCodegenExec => codegenSubtrees += s case s => s } codegenSubtrees.toSeq.foreach { subtree => val code = subtree.doCodeGen()._2 try { // Just check the generated code can be properly compiled CodeGenerator.compile(code) } catch { case e: Exception => val msg = s""" |failed to compile: |Subtree: |$subtree |Generated code: |${CodeFormatter.format(code)} """.stripMargin throw new Exception(msg, e) } } } }
Example 29
Source File: AggregateOptimizeSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} class AggregateOptimizeSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 30
Source File: ReorderAssociativeOperatorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReorderAssociativeOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("ReorderAssociativeOperator", Once, ReorderAssociativeOperator) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Reorder associative operators") { val originalQuery = testRelation .select( (Literal(3) + ((Literal(1) + 'a) + 2)) + 4, 'b * 1 * 2 * 3 * 4, ('b + 1) * 2 * 3 * 4, 'a + 1 + 'b + 2 + 'c + 3, 'a + 1 + 'b * 2 + 'c + 3, Rand(0) * 1 * 2 * 3 * 4) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .select( ('a + 10).as("((3 + ((1 + a) + 2)) + 4)"), ('b * 24).as("((((b * 1) * 2) * 3) * 4)"), (('b + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"), ('a + 'b + 'c + 6).as("(((((a + 1) + b) + 2) + c) + 3)"), ('a + 'b * 2 + 'c + 4).as("((((a + 1) + (b * 2)) + c) + 3)"), Rand(0) * 1 * 2 * 3 * 4) .analyze comparePlans(optimized, correctAnswer) } test("nested expression with aggregate operator") { val originalQuery = testRelation.as("t1") .join(testRelation.as("t2"), Inner, Some("t1.a".attr === "t2.a".attr)) .groupBy("t1.a".attr + 1, "t2.a".attr + 1)( (("t1.a".attr + 1) + ("t2.a".attr + 1)).as("col")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = originalQuery.analyze comparePlans(optimized, correctAnswer) } }
Example 31
Source File: SimplifyCastsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class SimplifyCastsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyCasts", FixedPoint(50), SimplifyCasts) :: Nil } test("non-nullable element array to nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, false))) val plan = input.select('a.cast(ArrayType(IntegerType, true)).as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('a.as("casted")).analyze comparePlans(optimized, expected) } test("nullable element to non-nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, true))) val plan = input.select('a.cast(ArrayType(IntegerType, false)).as("casted")).analyze val optimized = Optimize.execute(plan) // Though cast from `ArrayType(IntegerType, true)` to `ArrayType(IntegerType, false)` is not // allowed, here we just ensure that `SimplifyCasts` rule respect the plan. comparePlans(optimized, plan, checkAnalysis = false) } test("non-nullable value map to nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, false))) val plan = input.select('m.cast(MapType(StringType, StringType, true)) .as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('m.as("casted")).analyze comparePlans(optimized, expected) } test("nullable value map to non-nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, true))) val plan = input.select('m.cast(MapType(StringType, StringType, false)) .as("casted")).analyze val optimized = Optimize.execute(plan) // Though cast from `MapType(StringType, StringType, true)` to // `MapType(StringType, StringType, false)` is not allowed, here we just ensure that // `SimplifyCasts` rule respect the plan. comparePlans(optimized, plan, checkAnalysis = false) } }
Example 32
Source File: ComputeCurrentTimeSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils class ComputeCurrentTimeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Seq(Batch("ComputeCurrentTime", Once, ComputeCurrentTime)) } test("analyzer should replace current_timestamp with literals") { val in = Project(Seq(Alias(CurrentTimestamp(), "a")(), Alias(CurrentTimestamp(), "b")()), LocalRelation()) val min = System.currentTimeMillis() * 1000 val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = (System.currentTimeMillis() + 1) * 1000 val lits = new scala.collection.mutable.ArrayBuffer[Long] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Long] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } test("analyzer should replace current_date with literals") { val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation()) val min = DateTimeUtils.millisToDays(System.currentTimeMillis()) val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.millisToDays(System.currentTimeMillis()) val lits = new scala.collection.mutable.ArrayBuffer[Int] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Int] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } }
Example 33
Source File: PruneFileSourcePartitionsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.getAbsolutePath}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, catalogTable = Some(tableMeta)) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } }
Example 34
Source File: RewriteSubquerySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.ListQuery import org.apache.spark.sql.catalyst.plans.{LeftSemi, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class RewriteSubquerySuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Column Pruning", FixedPoint(100), ColumnPruning) :: Batch("Rewrite Subquery", FixedPoint(1), RewritePredicateSubquery, ColumnPruning, CollapseProject, RemoveRedundantProject) :: Nil } test("Column pruning after rewriting predicate subquery") { val relation = LocalRelation('a.int, 'b.int) val relInSubquery = LocalRelation('x.int, 'y.int, 'z.int) val query = relation.where('a.in(ListQuery(relInSubquery.select('x)))).select('a) val optimized = Optimize.execute(query.analyze) val correctAnswer = relation .select('a) .join(relInSubquery.select('x), LeftSemi, Some('a === 'x)) .analyze comparePlans(optimized, correctAnswer) } }
Example 35
Source File: EliminateMapObjectsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{DeserializeToObject, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class EliminateMapObjectsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = { Batch("EliminateMapObjects", FixedPoint(50), NullPropagation, SimplifyCasts, EliminateMapObjects) :: Nil } } implicit private def intArrayEncoder = ExpressionEncoder[Array[Int]]() implicit private def doubleArrayEncoder = ExpressionEncoder[Array[Double]]() test("SPARK-20254: Remove unnecessary data conversion for primitive array") { val intObjType = ObjectType(classOf[Array[Int]]) val intInput = LocalRelation('a.array(ArrayType(IntegerType, false))) val intQuery = intInput.deserialize[Array[Int]].analyze val intOptimized = Optimize.execute(intQuery) val intExpected = DeserializeToObject( Invoke(intInput.output(0), "toIntArray", intObjType, Nil, true, false), AttributeReference("obj", intObjType, true)(), intInput) comparePlans(intOptimized, intExpected) val doubleObjType = ObjectType(classOf[Array[Double]]) val doubleInput = LocalRelation('a.array(ArrayType(DoubleType, false))) val doubleQuery = doubleInput.deserialize[Array[Double]].analyze val doubleOptimized = Optimize.execute(doubleQuery) val doubleExpected = DeserializeToObject( Invoke(doubleInput.output(0), "toDoubleArray", doubleObjType, Nil, true, false), AttributeReference("obj", doubleObjType, true)(), doubleInput) comparePlans(doubleOptimized, doubleExpected) } }
Example 36
Source File: CollapseWindowSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseWindowSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseWindow", FixedPoint(10), CollapseWindow) :: Nil } val testRelation = LocalRelation('a.double, 'b.double, 'c.string) val a = testRelation.output(0) val b = testRelation.output(1) val c = testRelation.output(2) val partitionSpec1 = Seq(c) val partitionSpec2 = Seq(c + 1) val orderSpec1 = Seq(c.asc) val orderSpec2 = Seq(c.desc) test("collapse two adjacent windows with the same partition/order") { val query = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec1) .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1) .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1) val analyzed = query.analyze val optimized = Optimize.execute(analyzed) assert(analyzed.output === optimized.output) val correctAnswer = testRelation.window(Seq( min(a).as('min_a), max(a).as('max_a), sum(b).as('sum_b), avg(b).as('avg_b)), partitionSpec1, orderSpec1) comparePlans(optimized, correctAnswer) } test("Don't collapse adjacent windows with different partitions or orders") { val query1 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec2) val optimized1 = Optimize.execute(query1.analyze) val correctAnswer1 = query1.analyze comparePlans(optimized1, correctAnswer1) val query2 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec2, orderSpec1) val optimized2 = Optimize.execute(query2.analyze) val correctAnswer2 = query2.analyze comparePlans(optimized2, correctAnswer2) } test("Don't collapse adjacent windows with dependent columns") { val query = testRelation .window(Seq(sum(a).as('sum_a)), partitionSpec1, orderSpec1) .window(Seq(max('sum_a).as('max_sum_a)), partitionSpec1, orderSpec1) .analyze val expected = query.analyze val optimized = Optimize.execute(query.analyze) comparePlans(optimized, expected) } }
Example 37
Source File: EliminateDistinctSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class EliminateDistinctSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Operator Optimizations", Once, EliminateDistinct) :: Nil } val testRelation = LocalRelation('a.int) test("Eliminate Distinct in Max") { val query = testRelation .select(maxDistinct('a).as('result)) .analyze val answer = testRelation .select(max('a).as('result)) .analyze assert(query != answer) comparePlans(Optimize.execute(query), answer) } test("Eliminate Distinct in Min") { val query = testRelation .select(minDistinct('a).as('result)) .analyze val answer = testRelation .select(min('a).as('result)) .analyze assert(query != answer) comparePlans(Optimize.execute(query), answer) } }
Example 38
Source File: CheckCartesianProductsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.scalatest.Matchers._ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf.CROSS_JOINS_ENABLED class CheckCartesianProductsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Check Cartesian Products", Once, CheckCartesianProducts) :: Nil } val testRelation1 = LocalRelation('a.int, 'b.int) val testRelation2 = LocalRelation('c.int, 'd.int) val joinTypesWithRequiredCondition = Seq(Inner, LeftOuter, RightOuter, FullOuter) val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, ExistenceJoin('exists)) test("CheckCartesianProducts doesn't throw an exception if cross joins are enabled)") { withSQLConf(CROSS_JOINS_ENABLED.key -> "true") { noException should be thrownBy { for (joinType <- joinTypesWithRequiredCondition ++ joinTypesWithoutRequiredCondition) { performCartesianProductCheck(joinType) } } } } test("CheckCartesianProducts throws an exception for join types that require a join condition") { withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { for (joinType <- joinTypesWithRequiredCondition) { val thrownException = the [AnalysisException] thrownBy { performCartesianProductCheck(joinType) } assert(thrownException.message.contains("Detected implicit cartesian product")) } } } test("CheckCartesianProducts doesn't throw an exception if a join condition is present") { withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { for (joinType <- joinTypesWithRequiredCondition) { noException should be thrownBy { performCartesianProductCheck(joinType, Some('a === 'd)) } } } } test("CheckCartesianProducts doesn't throw an exception if join types don't require conditions") { withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { for (joinType <- joinTypesWithoutRequiredCondition) { noException should be thrownBy { performCartesianProductCheck(joinType) } } } } private def performCartesianProductCheck( joinType: JoinType, condition: Option[Expression] = None): Unit = { val analyzedPlan = testRelation1.join(testRelation2, joinType, condition).analyze val optimizedPlan = Optimize.execute(analyzedPlan) comparePlans(analyzedPlan, optimizedPlan) } }
Example 39
Source File: PullupCorrelatedPredicatesSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{In, ListQuery} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class PullupCorrelatedPredicatesSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PullupCorrelatedPredicates", Once, PullupCorrelatedPredicates) :: Nil } val testRelation = LocalRelation('a.int, 'b.double) val testRelation2 = LocalRelation('c.int, 'd.double) test("PullupCorrelatedPredicates should not produce unresolved plan") { val correlatedSubquery = testRelation2 .where('b < 'd) .select('c) val outerQuery = testRelation .where(In('a, Seq(ListQuery(correlatedSubquery)))) .select('a).analyze assert(outerQuery.resolved) val optimized = Optimize.execute(outerQuery) assert(optimized.resolved) } }
Example 40
Source File: ConvertToLocalRelationSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 41
Source File: RuleExecutorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } test("structural integrity checker") { object WithSIChecker extends RuleExecutor[Expression] { override protected def isPlanIntegral(expr: Expression): Boolean = expr match { case IntegerLiteral(_) => true case _ => false } val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(WithSIChecker.execute(Literal(10)) === Literal(9)) val message = intercept[TreeNodeException[LogicalPlan]] { WithSIChecker.execute(Literal(10.1)) }.getMessage assert(message.contains("the structural integrity of the plan is broken")) } }
Example 42
Source File: PruneFileSourcePartitionsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.toURI}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, tableMeta) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") { withTable("tbl") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS") val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost") val df = sql("SELECT * FROM tbl WHERE p = 1") val sizes1 = df.queryExecution.analyzed.collect { case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes } assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}") assert(sizes1(0) == tableStats.get.sizeInBytes) val relations = df.queryExecution.optimizedPlan.collect { case relation: LogicalRelation => relation } assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}") val size2 = relations(0).stats.sizeInBytes assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes) assert(size2 < tableStats.get.sizeInBytes) } } }
Example 43
Source File: AggregateOptimizeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 44
Source File: EliminateMapObjectsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{DeserializeToObject, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class EliminateMapObjectsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = { Batch("EliminateMapObjects", FixedPoint(50), NullPropagation, SimplifyCasts, EliminateMapObjects) :: Nil } } implicit private def intArrayEncoder = ExpressionEncoder[Array[Int]]() implicit private def doubleArrayEncoder = ExpressionEncoder[Array[Double]]() test("SPARK-20254: Remove unnecessary data conversion for primitive array") { val intObjType = ObjectType(classOf[Array[Int]]) val intInput = LocalRelation('a.array(ArrayType(IntegerType, false))) val intQuery = intInput.deserialize[Array[Int]].analyze val intOptimized = Optimize.execute(intQuery) val intExpected = DeserializeToObject( Invoke(intInput.output(0), "toIntArray", intObjType, Nil, true, false), AttributeReference("obj", intObjType, true)(), intInput) comparePlans(intOptimized, intExpected) val doubleObjType = ObjectType(classOf[Array[Double]]) val doubleInput = LocalRelation('a.array(ArrayType(DoubleType, false))) val doubleQuery = doubleInput.deserialize[Array[Double]].analyze val doubleOptimized = Optimize.execute(doubleQuery) val doubleExpected = DeserializeToObject( Invoke(doubleInput.output(0), "toDoubleArray", doubleObjType, Nil, true, false), AttributeReference("obj", doubleObjType, true)(), doubleInput) comparePlans(doubleOptimized, doubleExpected) } }
Example 45
Source File: CollapseWindowSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseWindowSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseWindow", FixedPoint(10), CollapseWindow) :: Nil } val testRelation = LocalRelation('a.double, 'b.double, 'c.string) val a = testRelation.output(0) val b = testRelation.output(1) val c = testRelation.output(2) val partitionSpec1 = Seq(c) val partitionSpec2 = Seq(c + 1) val orderSpec1 = Seq(c.asc) val orderSpec2 = Seq(c.desc) test("collapse two adjacent windows with the same partition/order") { val query = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec1) .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1) .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1) val analyzed = query.analyze val optimized = Optimize.execute(analyzed) assert(analyzed.output === optimized.output) val correctAnswer = testRelation.window(Seq( min(a).as('min_a), max(a).as('max_a), sum(b).as('sum_b), avg(b).as('avg_b)), partitionSpec1, orderSpec1) comparePlans(optimized, correctAnswer) } test("Don't collapse adjacent windows with different partitions or orders") { val query1 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec2) val optimized1 = Optimize.execute(query1.analyze) val correctAnswer1 = query1.analyze comparePlans(optimized1, correctAnswer1) val query2 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec2, orderSpec1) val optimized2 = Optimize.execute(query2.analyze) val correctAnswer2 = query2.analyze comparePlans(optimized2, correctAnswer2) } test("Don't collapse adjacent windows with dependent columns") { val query = testRelation .window(Seq(sum(a).as('sum_a)), partitionSpec1, orderSpec1) .window(Seq(max('sum_a).as('max_sum_a)), partitionSpec1, orderSpec1) .analyze val expected = query.analyze val optimized = Optimize.execute(query.analyze) comparePlans(optimized, expected) } }
Example 46
Source File: EliminateDistinctSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class EliminateDistinctSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Operator Optimizations", Once, EliminateDistinct) :: Nil } val testRelation = LocalRelation('a.int) test("Eliminate Distinct in Max") { val query = testRelation .select(maxDistinct('a).as('result)) .analyze val answer = testRelation .select(max('a).as('result)) .analyze assert(query != answer) comparePlans(Optimize.execute(query), answer) } test("Eliminate Distinct in Min") { val query = testRelation .select(minDistinct('a).as('result)) .analyze val answer = testRelation .select(min('a).as('result)) .analyze assert(query != answer) comparePlans(Optimize.execute(query), answer) } }
Example 47
Source File: CheckCartesianProductsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.scalatest.Matchers._ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf.CROSS_JOINS_ENABLED class CheckCartesianProductsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Check Cartesian Products", Once, CheckCartesianProducts) :: Nil } val testRelation1 = LocalRelation('a.int, 'b.int) val testRelation2 = LocalRelation('c.int, 'd.int) val joinTypesWithRequiredCondition = Seq(Inner, LeftOuter, RightOuter, FullOuter) val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, ExistenceJoin('exists)) test("CheckCartesianProducts doesn't throw an exception if cross joins are enabled)") { withSQLConf(CROSS_JOINS_ENABLED.key -> "true") { noException should be thrownBy { for (joinType <- joinTypesWithRequiredCondition ++ joinTypesWithoutRequiredCondition) { performCartesianProductCheck(joinType) } } } } test("CheckCartesianProducts throws an exception for join types that require a join condition") { withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { for (joinType <- joinTypesWithRequiredCondition) { val thrownException = the [AnalysisException] thrownBy { performCartesianProductCheck(joinType) } assert(thrownException.message.contains("Detected implicit cartesian product")) } } } test("CheckCartesianProducts doesn't throw an exception if a join condition is present") { withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { for (joinType <- joinTypesWithRequiredCondition) { noException should be thrownBy { performCartesianProductCheck(joinType, Some('a === 'd)) } } } } test("CheckCartesianProducts doesn't throw an exception if join types don't require conditions") { withSQLConf(CROSS_JOINS_ENABLED.key -> "false") { for (joinType <- joinTypesWithoutRequiredCondition) { noException should be thrownBy { performCartesianProductCheck(joinType) } } } } private def performCartesianProductCheck( joinType: JoinType, condition: Option[Expression] = None): Unit = { val analyzedPlan = testRelation1.join(testRelation2, joinType, condition).analyze val optimizedPlan = Optimize.execute(analyzedPlan) comparePlans(analyzedPlan, optimizedPlan) } }
Example 48
Source File: PullupCorrelatedPredicatesSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{InSubquery, ListQuery} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class PullupCorrelatedPredicatesSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PullupCorrelatedPredicates", Once, PullupCorrelatedPredicates) :: Nil } val testRelation = LocalRelation('a.int, 'b.double) val testRelation2 = LocalRelation('c.int, 'd.double) test("PullupCorrelatedPredicates should not produce unresolved plan") { val correlatedSubquery = testRelation2 .where('b < 'd) .select('c) val outerQuery = testRelation .where(InSubquery(Seq('a), ListQuery(correlatedSubquery))) .select('a).analyze assert(outerQuery.resolved) val optimized = Optimize.execute(outerQuery) assert(optimized.resolved) } }
Example 49
Source File: ConvertToLocalRelationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{LessThan, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } test("Filter on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: Nil) val filterAndProjectOnLocal = testRelation .select(UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) .where(LessThan(UnresolvedAttribute("b1"), Literal.create(6))) val optimized = Optimize.execute(filterAndProjectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 50
Source File: ResolveLambdaVariablesSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types.{ArrayType, IntegerType} class ResolveLambdaVariablesSuite extends PlanTest { import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ object Analyzer extends RuleExecutor[LogicalPlan] { val batches = Batch("Resolution", FixedPoint(4), ResolveLambdaVariables(conf)) :: Nil } private val key = 'key.int private val values1 = 'values1.array(IntegerType) private val values2 = 'values2.array(ArrayType(ArrayType(IntegerType))) private val data = LocalRelation(Seq(key, values1, values2)) private val lvInt = NamedLambdaVariable("x", IntegerType, nullable = true) private val lvHiddenInt = NamedLambdaVariable("col0", IntegerType, nullable = true) private val lvArray = NamedLambdaVariable("x", ArrayType(IntegerType), nullable = true) private def plan(e: Expression): LogicalPlan = data.select(e.as("res")) private def checkExpression(e1: Expression, e2: Expression): Unit = { comparePlans(Analyzer.execute(plan(e1)), plan(e2)) } private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name)) test("resolution - no op") { checkExpression(key, key) } test("resolution - simple") { val in = ArrayTransform(values1, LambdaFunction(lv('x) + 1, lv('x) :: Nil)) val out = ArrayTransform(values1, LambdaFunction(lvInt + 1, lvInt :: Nil)) checkExpression(in, out) } test("resolution - nested") { val in = ArrayTransform(values2, LambdaFunction( ArrayTransform(lv('x), LambdaFunction(lv('x) + 1, lv('x) :: Nil)), lv('x) :: Nil)) val out = ArrayTransform(values2, LambdaFunction( ArrayTransform(lvArray, LambdaFunction(lvInt + 1, lvInt :: Nil)), lvArray :: Nil)) checkExpression(in, out) } test("resolution - hidden") { val in = ArrayTransform(values1, key) val out = ArrayTransform(values1, LambdaFunction(key, lvHiddenInt :: Nil, hidden = true)) checkExpression(in, out) } test("fail - name collisions") { val p = plan(ArrayTransform(values1, LambdaFunction(lv('x) + lv('X), lv('x) :: lv('X) :: Nil))) val msg = intercept[AnalysisException](Analyzer.execute(p)).getMessage assert(msg.contains("arguments should not have names that are semantically the same")) } test("fail - lambda arguments") { val p = plan(ArrayTransform(values1, LambdaFunction(lv('x) + lv('y) + lv('z), lv('x) :: lv('y) :: lv('z) :: Nil))) val msg = intercept[AnalysisException](Analyzer.execute(p)).getMessage assert(msg.contains("does not match the number of arguments expected")) } }
Example 51
Source File: RuleExecutorSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } test("structural integrity checker") { object WithSIChecker extends RuleExecutor[Expression] { override protected def isPlanIntegral(expr: Expression): Boolean = expr match { case IntegerLiteral(_) => true case _ => false } val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(WithSIChecker.execute(Literal(10)) === Literal(9)) val message = intercept[TreeNodeException[LogicalPlan]] { WithSIChecker.execute(Literal(10.1)) }.getMessage assert(message.contains("the structural integrity of the plan is broken")) } }
Example 52
Source File: PruneFileSourcePartitionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.Matchers._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec import org.apache.spark.sql.functions.broadcast import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.toURI}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, tableMeta) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") { withTable("tbl") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS") val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost") val df = sql("SELECT * FROM tbl WHERE p = 1") val sizes1 = df.queryExecution.analyzed.collect { case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes } assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}") assert(sizes1(0) == tableStats.get.sizeInBytes) val relations = df.queryExecution.optimizedPlan.collect { case relation: LogicalRelation => relation } assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}") val size2 = relations(0).stats.sizeInBytes assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes) assert(size2 < tableStats.get.sizeInBytes) } } test("SPARK-26576 Broadcast hint not applied to partitioned table") { withTable("tbl") { withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") val df = spark.table("tbl") val qe = df.join(broadcast(df), "p").queryExecution qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1 qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1 } } } }
Example 53
Source File: HBaseSQLContext.scala From Backup-Repo with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.spark.SparkContext import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.OverrideCatalog import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.{EnsureRequirements, SparkPlan} import org.apache.spark.sql.hbase.execution.{AddCoprocessor, HBaseStrategies} class HBaseSQLContext(sc: SparkContext) extends SQLContext(sc) { self => def this(sparkContext: JavaSparkContext) = this(sparkContext.sc) protected[sql] override lazy val conf: SQLConf = new HBaseSQLConf HBaseConfiguration.merge( sc.hadoopConfiguration, HBaseConfiguration.create(sc.hadoopConfiguration)) @transient override protected[sql] lazy val catalog: HBaseCatalog = new HBaseCatalog(this, sc.hadoopConfiguration) with OverrideCatalog experimental.extraStrategies = Seq((new SparkPlanner with HBaseStrategies).HBaseDataSource) @transient override protected[sql] val prepareForExecution = new RuleExecutor[SparkPlan] { val batches = Batch("Add exchange", Once, EnsureRequirements(self)) :: Batch("Add coprocessor", Once, AddCoprocessor(self)) :: Nil } }
Example 54
Source File: RewriteSubquerySuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.ListQuery import org.apache.spark.sql.catalyst.plans.{LeftSemi, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class RewriteSubquerySuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Column Pruning", FixedPoint(100), ColumnPruning) :: Batch("Rewrite Subquery", FixedPoint(1), RewritePredicateSubquery, ColumnPruning, CollapseProject, RemoveRedundantProject) :: Nil } test("Column pruning after rewriting predicate subquery") { val relation = LocalRelation('a.int, 'b.int) val relInSubquery = LocalRelation('x.int, 'y.int, 'z.int) val query = relation.where('a.in(ListQuery(relInSubquery.select('x)))).select('a) val optimized = Optimize.execute(query.analyze) val correctAnswer = relation .select('a) .join(relInSubquery.select('x), LeftSemi, Some('a === 'x)) .analyze comparePlans(optimized, correctAnswer) } }
Example 55
Source File: ReorderAssociativeOperatorSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReorderAssociativeOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("ReorderAssociativeOperator", Once, ReorderAssociativeOperator) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Reorder associative operators") { val originalQuery = testRelation .select( (Literal(3) + ((Literal(1) + 'a) + 2)) + 4, 'b * 1 * 2 * 3 * 4, ('b + 1) * 2 * 3 * 4, 'a + 1 + 'b + 2 + 'c + 3, 'a + 1 + 'b * 2 + 'c + 3, Rand(0) * 1 * 2 * 3 * 4) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .select( ('a + 10).as("((3 + ((1 + a) + 2)) + 4)"), ('b * 24).as("((((b * 1) * 2) * 3) * 4)"), (('b + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"), ('a + 'b + 'c + 6).as("(((((a + 1) + b) + 2) + c) + 3)"), ('a + 'b * 2 + 'c + 4).as("((((a + 1) + (b * 2)) + c) + 3)"), Rand(0) * 1 * 2 * 3 * 4) .analyze comparePlans(optimized, correctAnswer) } test("nested expression with aggregate operator") { val originalQuery = testRelation.as("t1") .join(testRelation.as("t2"), Inner, Some("t1.a".attr === "t2.a".attr)) .groupBy("t1.a".attr + 1, "t2.a".attr + 1)( (("t1.a".attr + 1) + ("t2.a".attr + 1)).as("col")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = originalQuery.analyze comparePlans(optimized, correctAnswer) } }
Example 56
Source File: SimplifyCastsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class SimplifyCastsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyCasts", FixedPoint(50), SimplifyCasts) :: Nil } test("non-nullable element array to nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, false))) val plan = input.select('a.cast(ArrayType(IntegerType, true)).as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('a.as("casted")).analyze comparePlans(optimized, expected) } test("nullable element to non-nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, true))) val plan = input.select('a.cast(ArrayType(IntegerType, false)).as("casted")).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("non-nullable value map to nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, false))) val plan = input.select('m.cast(MapType(StringType, StringType, true)) .as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('m.as("casted")).analyze comparePlans(optimized, expected) } test("nullable value map to non-nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, true))) val plan = input.select('m.cast(MapType(StringType, StringType, false)) .as("casted")).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 57
Source File: ComputeCurrentTimeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils class ComputeCurrentTimeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Seq(Batch("ComputeCurrentTime", Once, ComputeCurrentTime)) } test("analyzer should replace current_timestamp with literals") { val in = Project(Seq(Alias(CurrentTimestamp(), "a")(), Alias(CurrentTimestamp(), "b")()), LocalRelation()) val min = System.currentTimeMillis() * 1000 val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = (System.currentTimeMillis() + 1) * 1000 val lits = new scala.collection.mutable.ArrayBuffer[Long] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Long] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } test("analyzer should replace current_date with literals") { val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation()) val min = DateTimeUtils.millisToDays(System.currentTimeMillis()) val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.millisToDays(System.currentTimeMillis()) val lits = new scala.collection.mutable.ArrayBuffer[Int] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Int] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } }
Example 58
Source File: EliminateSerializationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor case class OtherTuple(_1: Int, _2: Int) class EliminateSerializationSuite extends PlanTest { private object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Serialization", FixedPoint(100), EliminateSerialization) :: Nil } implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() implicit private def intEncoder = ExpressionEncoder[Int]() test("back to back serialization") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) val expected = input.select('obj.as("obj")).analyze comparePlans(optimized, expected) } test("back to back serialization with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val plan = input.serialize[OtherTuple].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("back to back serialization in AppendColumns") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[(Int, Int)]).analyze val optimized = Optimize.execute(plan) val expected = AppendColumnsWithObject( func.asInstanceOf[Any => Any], productEncoder[(Int, Int)].namedExpressions, intEncoder.namedExpressions, input).analyze comparePlans(optimized, expected) } test("back to back serialization in AppendColumns with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[OtherTuple]).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 59
Source File: ReplaceOperatorSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi, PlanTest} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReplaceOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Replace Operators", FixedPoint(100), ReplaceDistinctWithAggregate, ReplaceExceptWithAntiJoin, ReplaceIntersectWithSemiJoin) :: Nil } test("replace Intersect with Left-semi Join") { val table1 = LocalRelation('a.int, 'b.int) val table2 = LocalRelation('c.int, 'd.int) val query = Intersect(table1, table2) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(table1.output, table1.output, Join(table1, table2, LeftSemi, Option('a <=> 'c && 'b <=> 'd))).analyze comparePlans(optimized, correctAnswer) } test("replace Except with Left-anti Join") { val table1 = LocalRelation('a.int, 'b.int) val table2 = LocalRelation('c.int, 'd.int) val query = Except(table1, table2) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(table1.output, table1.output, Join(table1, table2, LeftAnti, Option('a <=> 'c && 'b <=> 'd))).analyze comparePlans(optimized, correctAnswer) } test("replace Distinct with Aggregate") { val input = LocalRelation('a.int, 'b.int) val query = Distinct(input) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(input.output, input.output, input) comparePlans(optimized, correctAnswer) } }
Example 60
Source File: CollapseRepartitionSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseRepartitionSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseRepartition", FixedPoint(10), CollapseRepartition) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) test("collapse two adjacent repartitions into one") { val query = testRelation .repartition(10) .repartition(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.repartition(20).analyze comparePlans(optimized, correctAnswer) } test("collapse repartition and repartitionBy into one") { val query = testRelation .repartition(10) .distribute('a)(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(20).analyze comparePlans(optimized, correctAnswer) } test("collapse repartitionBy and repartition into one") { val query = testRelation .distribute('a)(20) .repartition(10) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(10).analyze comparePlans(optimized, correctAnswer) } test("collapse two adjacent repartitionBys into one") { val query = testRelation .distribute('b)(10) .distribute('a)(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(20).analyze comparePlans(optimized, correctAnswer) } }
Example 61
Source File: CollapseWindowSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseWindowSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseWindow", FixedPoint(10), CollapseWindow) :: Nil } val testRelation = LocalRelation('a.double, 'b.double, 'c.string) val a = testRelation.output(0) val b = testRelation.output(1) val c = testRelation.output(2) val partitionSpec1 = Seq(c) val partitionSpec2 = Seq(c + 1) val orderSpec1 = Seq(c.asc) val orderSpec2 = Seq(c.desc) test("collapse two adjacent windows with the same partition/order") { val query = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec1) .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1) .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.window(Seq( avg(b).as('avg_b), sum(b).as('sum_b), max(a).as('max_a), min(a).as('min_a)), partitionSpec1, orderSpec1) comparePlans(optimized, correctAnswer) } test("Don't collapse adjacent windows with different partitions or orders") { val query1 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec2) val optimized1 = Optimize.execute(query1.analyze) val correctAnswer1 = query1.analyze comparePlans(optimized1, correctAnswer1) val query2 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec2, orderSpec1) val optimized2 = Optimize.execute(query2.analyze) val correctAnswer2 = query2.analyze comparePlans(optimized2, correctAnswer2) } }
Example 62
Source File: ConvertToLocalRelationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 63
Source File: RuleExecutorSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } }
Example 64
Source File: RuleExecutorSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } }
Example 65
Source File: RangerSparkOptimizer.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.RuleExecutor class RangerSparkOptimizer(spark: SparkSession) extends RuleExecutor[LogicalPlan] { override def batches: Seq[Batch] = { val optimizer = spark.sessionState.optimizer val extRules = optimizer.extendedOperatorOptimizationRules optimizer.batches.map { batch => val ruleSet = batch.rules.toSet -- extRules Batch(batch.name, FixedPoint(batch.strategy.maxIterations), ruleSet.toSeq: _*) } } }
Example 66
Source File: AggregateOptimizeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 67
Source File: ReorderAssociativeOperatorSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReorderAssociativeOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("ReorderAssociativeOperator", Once, ReorderAssociativeOperator) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Reorder associative operators") { val originalQuery = testRelation .select( (Literal(3) + ((Literal(1) + 'a) + 2)) + 4, 'b * 1 * 2 * 3 * 4, ('b + 1) * 2 * 3 * 4, 'a + 1 + 'b + 2 + 'c + 3, 'a + 1 + 'b * 2 + 'c + 3, Rand(0) * 1 * 2 * 3 * 4) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .select( ('a + 10).as("((3 + ((1 + a) + 2)) + 4)"), ('b * 24).as("((((b * 1) * 2) * 3) * 4)"), (('b + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"), ('a + 'b + 'c + 6).as("(((((a + 1) + b) + 2) + c) + 3)"), ('a + 'b * 2 + 'c + 4).as("((((a + 1) + (b * 2)) + c) + 3)"), Rand(0) * 1 * 2 * 3 * 4) .analyze comparePlans(optimized, correctAnswer) } test("nested expression with aggregate operator") { val originalQuery = testRelation.as("t1") .join(testRelation.as("t2"), Inner, Some("t1.a".attr === "t2.a".attr)) .groupBy("t1.a".attr + 1, "t2.a".attr + 1)( (("t1.a".attr + 1) + ("t2.a".attr + 1)).as("col")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = originalQuery.analyze comparePlans(optimized, correctAnswer) } }
Example 68
Source File: SimplifyCastsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class SimplifyCastsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyCasts", FixedPoint(50), SimplifyCasts) :: Nil } test("non-nullable element array to nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, false))) val plan = input.select('a.cast(ArrayType(IntegerType, true)).as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('a.as("casted")).analyze comparePlans(optimized, expected) } test("nullable element to non-nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, true))) val plan = input.select('a.cast(ArrayType(IntegerType, false)).as("casted")).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("non-nullable value map to nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, false))) val plan = input.select('m.cast(MapType(StringType, StringType, true)) .as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('m.as("casted")).analyze comparePlans(optimized, expected) } test("nullable value map to non-nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, true))) val plan = input.select('m.cast(MapType(StringType, StringType, false)) .as("casted")).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 69
Source File: ComputeCurrentTimeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils class ComputeCurrentTimeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Seq(Batch("ComputeCurrentTime", Once, ComputeCurrentTime)) } test("analyzer should replace current_timestamp with literals") { val in = Project(Seq(Alias(CurrentTimestamp(), "a")(), Alias(CurrentTimestamp(), "b")()), LocalRelation()) val min = System.currentTimeMillis() * 1000 val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = (System.currentTimeMillis() + 1) * 1000 val lits = new scala.collection.mutable.ArrayBuffer[Long] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Long] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } test("analyzer should replace current_date with literals") { val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation()) val min = DateTimeUtils.millisToDays(System.currentTimeMillis()) val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.millisToDays(System.currentTimeMillis()) val lits = new scala.collection.mutable.ArrayBuffer[Int] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Int] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } }
Example 70
Source File: EliminateSerializationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules.RuleExecutor case class OtherTuple(_1: Int, _2: Int) class EliminateSerializationSuite extends PlanTest { private object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Serialization", FixedPoint(100), EliminateSerialization) :: Nil } implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() implicit private def intEncoder = ExpressionEncoder[Int]() test("back to back serialization") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) val expected = input.select('obj.as("obj")).analyze comparePlans(optimized, expected) } test("back to back serialization with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val plan = input.serialize[OtherTuple].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("back to back serialization in AppendColumns") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[(Int, Int)]).analyze val optimized = Optimize.execute(plan) val expected = AppendColumnsWithObject( func.asInstanceOf[Any => Any], productEncoder[(Int, Int)].namedExpressions, intEncoder.namedExpressions, input).analyze comparePlans(optimized, expected) } test("back to back serialization in AppendColumns with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[OtherTuple]).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 71
Source File: ReplaceOperatorSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi, PlanTest} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReplaceOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Replace Operators", FixedPoint(100), ReplaceDistinctWithAggregate, ReplaceExceptWithAntiJoin, ReplaceIntersectWithSemiJoin) :: Nil } test("replace Intersect with Left-semi Join") { val table1 = LocalRelation('a.int, 'b.int) val table2 = LocalRelation('c.int, 'd.int) val query = Intersect(table1, table2) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(table1.output, table1.output, Join(table1, table2, LeftSemi, Option('a <=> 'c && 'b <=> 'd))).analyze comparePlans(optimized, correctAnswer) } test("replace Except with Left-anti Join") { val table1 = LocalRelation('a.int, 'b.int) val table2 = LocalRelation('c.int, 'd.int) val query = Except(table1, table2) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(table1.output, table1.output, Join(table1, table2, LeftAnti, Option('a <=> 'c && 'b <=> 'd))).analyze comparePlans(optimized, correctAnswer) } test("replace Distinct with Aggregate") { val input = LocalRelation('a.int, 'b.int) val query = Distinct(input) val optimized = Optimize.execute(query.analyze) val correctAnswer = Aggregate(input.output, input.output, input) comparePlans(optimized, correctAnswer) } }
Example 72
Source File: CollapseRepartitionSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseRepartitionSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseRepartition", FixedPoint(10), CollapseRepartition) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) test("collapse two adjacent repartitions into one") { val query = testRelation .repartition(10) .repartition(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.repartition(20).analyze comparePlans(optimized, correctAnswer) } test("collapse repartition and repartitionBy into one") { val query = testRelation .repartition(10) .distribute('a)(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(20).analyze comparePlans(optimized, correctAnswer) } test("collapse repartitionBy and repartition into one") { val query = testRelation .distribute('a)(20) .repartition(10) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(10).analyze comparePlans(optimized, correctAnswer) } test("collapse two adjacent repartitionBys into one") { val query = testRelation .distribute('b)(10) .distribute('a)(20) val optimized = Optimize.execute(query.analyze) val correctAnswer = testRelation.distribute('a)(20).analyze comparePlans(optimized, correctAnswer) } }
Example 73
Source File: CollapseWindowSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class CollapseWindowSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("CollapseWindow", FixedPoint(10), CollapseWindow) :: Nil } val testRelation = LocalRelation('a.double, 'b.double, 'c.string) val a = testRelation.output(0) val b = testRelation.output(1) val c = testRelation.output(2) val partitionSpec1 = Seq(c) val partitionSpec2 = Seq(c + 1) val orderSpec1 = Seq(c.asc) val orderSpec2 = Seq(c.desc) test("collapse two adjacent windows with the same partition/order") { val query = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec1) .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1) .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1) val analyzed = query.analyze val optimized = Optimize.execute(analyzed) assert(analyzed.output === optimized.output) val correctAnswer = testRelation.window(Seq( min(a).as('min_a), max(a).as('max_a), sum(b).as('sum_b), avg(b).as('avg_b)), partitionSpec1, orderSpec1) comparePlans(optimized, correctAnswer) } test("Don't collapse adjacent windows with different partitions or orders") { val query1 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec2) val optimized1 = Optimize.execute(query1.analyze) val correctAnswer1 = query1.analyze comparePlans(optimized1, correctAnswer1) val query2 = testRelation .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1) .window(Seq(max(a).as('max_a)), partitionSpec2, orderSpec1) val optimized2 = Optimize.execute(query2.analyze) val correctAnswer2 = query2.analyze comparePlans(optimized2, correctAnswer2) } }
Example 74
Source File: ConvertToLocalRelationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ConvertToLocalRelationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil } test("Project on LocalRelation should be turned into a single LocalRelation") { val testRelation = LocalRelation( LocalRelation('a.int, 'b.int).output, InternalRow(1, 2) :: InternalRow(4, 5) :: Nil) val correctAnswer = LocalRelation( LocalRelation('a1.int, 'b1.int).output, InternalRow(1, 3) :: InternalRow(4, 6) :: Nil) val projectOnLocal = testRelation.select( UnresolvedAttribute("a").as("a1"), (UnresolvedAttribute("b") + 1).as("b1")) val optimized = Optimize.execute(projectOnLocal.analyze) comparePlans(optimized, correctAnswer) } }
Example 75
Source File: PruneFileSourcePartitionsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions, TableFileCatalog} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.getAbsolutePath}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val tableFileCatalog = new TableFileCatalog(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = tableFileCatalog, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, catalogTable = Some(tableMeta)) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } }
Example 76
Source File: PruneFileSourcePartitionsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.getAbsolutePath}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, catalogTable = Some(tableMeta)) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } }
Example 77
Source File: BenchmarkQueryTest.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.util.Utils abstract class BenchmarkQueryTest extends QueryTest with SharedSQLContext with BeforeAndAfterAll { // When Utils.isTesting is true, the RuleExecutor will issue an exception when hitting // the max iteration of analyzer/optimizer batches. assert(Utils.isTesting, "spark.testing is not set to true") protected override def afterAll(): Unit = { try { // For debugging dump some statistics about how much time was spent in various optimizer rules logWarning(RuleExecutor.dumpTimeSpent()) spark.sessionState.catalog.reset() } finally { super.afterAll() } } override def beforeAll() { super.beforeAll() RuleExecutor.resetMetrics() } protected def checkGeneratedCode(plan: SparkPlan): Unit = { val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]() plan foreach { case s: WholeStageCodegenExec => codegenSubtrees += s case _ => } codegenSubtrees.toSeq.foreach { subtree => val code = subtree.doCodeGen()._2 try { // Just check the generated code can be properly compiled CodeGenerator.compile(code) } catch { case e: Exception => val msg = s""" |failed to compile: |Subtree: |$subtree |Generated code: |${CodeFormatter.format(code)} """.stripMargin throw new Exception(msg, e) } } } }
Example 78
Source File: AggregateOptimizeSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} class AggregateOptimizeSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 79
Source File: ReorderAssociativeOperatorSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class ReorderAssociativeOperatorSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("ReorderAssociativeOperator", Once, ReorderAssociativeOperator) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Reorder associative operators") { val originalQuery = testRelation .select( (Literal(3) + ((Literal(1) + 'a) + 2)) + 4, 'b * 1 * 2 * 3 * 4, ('b + 1) * 2 * 3 * 4, 'a + 1 + 'b + 2 + 'c + 3, 'a + 1 + 'b * 2 + 'c + 3, Rand(0) * 1 * 2 * 3 * 4) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .select( ('a + 10).as("((3 + ((1 + a) + 2)) + 4)"), ('b * 24).as("((((b * 1) * 2) * 3) * 4)"), (('b + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"), ('a + 'b + 'c + 6).as("(((((a + 1) + b) + 2) + c) + 3)"), ('a + 'b * 2 + 'c + 4).as("((((a + 1) + (b * 2)) + c) + 3)"), Rand(0) * 1 * 2 * 3 * 4) .analyze comparePlans(optimized, correctAnswer) } test("nested expression with aggregate operator") { val originalQuery = testRelation.as("t1") .join(testRelation.as("t2"), Inner, Some("t1.a".attr === "t2.a".attr)) .groupBy("t1.a".attr + 1, "t2.a".attr + 1)( (("t1.a".attr + 1) + ("t2.a".attr + 1)).as("col")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = originalQuery.analyze comparePlans(optimized, correctAnswer) } }
Example 80
Source File: SimplifyCastsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class SimplifyCastsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("SimplifyCasts", FixedPoint(50), SimplifyCasts) :: Nil } test("non-nullable element array to nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, false))) val plan = input.select('a.cast(ArrayType(IntegerType, true)).as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('a.as("casted")).analyze comparePlans(optimized, expected) } test("nullable element to non-nullable element array cast") { val input = LocalRelation('a.array(ArrayType(IntegerType, true))) val plan = input.select('a.cast(ArrayType(IntegerType, false)).as("casted")).analyze val optimized = Optimize.execute(plan) // Though cast from `ArrayType(IntegerType, true)` to `ArrayType(IntegerType, false)` is not // allowed, here we just ensure that `SimplifyCasts` rule respect the plan. comparePlans(optimized, plan, checkAnalysis = false) } test("non-nullable value map to nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, false))) val plan = input.select('m.cast(MapType(StringType, StringType, true)) .as("casted")).analyze val optimized = Optimize.execute(plan) val expected = input.select('m.as("casted")).analyze comparePlans(optimized, expected) } test("nullable value map to non-nullable value map cast") { val input = LocalRelation('m.map(MapType(StringType, StringType, true))) val plan = input.select('m.cast(MapType(StringType, StringType, false)) .as("casted")).analyze val optimized = Optimize.execute(plan) // Though cast from `MapType(StringType, StringType, true)` to // `MapType(StringType, StringType, false)` is not allowed, here we just ensure that // `SimplifyCasts` rule respect the plan. comparePlans(optimized, plan, checkAnalysis = false) } }
Example 81
Source File: UpdateNullabilityInAttributeReferencesSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{CreateArray, GetArrayItem} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class UpdateNullabilityInAttributeReferencesSuite extends PlanTest { object Optimizer extends RuleExecutor[LogicalPlan] { val batches = Batch("Constant Folding", FixedPoint(10), NullPropagation, ConstantFolding, BooleanSimplification, SimplifyConditionals, SimplifyBinaryComparison, SimplifyExtractValueOps) :: Batch("UpdateAttributeReferences", Once, UpdateNullabilityInAttributeReferences) :: Nil } test("update nullability in AttributeReference") { val rel = LocalRelation('a.long.notNull) // In the 'original' plans below, the Aggregate node produced by groupBy() has a // nullable AttributeReference to `b`, because both array indexing and map lookup are // nullable expressions. After optimization, the same attribute is now non-nullable, // but the AttributeReference is not updated to reflect this. So, we need to update nullability // by the `UpdateNullabilityInAttributeReferences` rule. val original = rel .select(GetArrayItem(CreateArray(Seq('a, 'a + 1L)), 0) as "b") .groupBy($"b")("1") val expected = rel.select('a as "b").groupBy($"b")("1").analyze val optimized = Optimizer.execute(original.analyze) comparePlans(optimized, expected) } }
Example 82
Source File: ComputeCurrentTimeSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils class ComputeCurrentTimeSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Seq(Batch("ComputeCurrentTime", Once, ComputeCurrentTime)) } test("analyzer should replace current_timestamp with literals") { val in = Project(Seq(Alias(CurrentTimestamp(), "a")(), Alias(CurrentTimestamp(), "b")()), LocalRelation()) val min = System.currentTimeMillis() * 1000 val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = (System.currentTimeMillis() + 1) * 1000 val lits = new scala.collection.mutable.ArrayBuffer[Long] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Long] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } test("analyzer should replace current_date with literals") { val in = Project(Seq(Alias(CurrentDate(), "a")(), Alias(CurrentDate(), "b")()), LocalRelation()) val min = DateTimeUtils.millisToDays(System.currentTimeMillis()) val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.millisToDays(System.currentTimeMillis()) val lits = new scala.collection.mutable.ArrayBuffer[Int] plan.transformAllExpressions { case e: Literal => lits += e.value.asInstanceOf[Int] e } assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } }
Example 83
Source File: EliminateSerializationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor case class OtherTuple(_1: Int, _2: Int) class EliminateSerializationSuite extends PlanTest { private object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Serialization", FixedPoint(100), EliminateSerialization) :: Nil } implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]() implicit private def intEncoder = ExpressionEncoder[Int]() test("back to back serialization") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) val expected = input.select('obj.as("obj")).analyze comparePlans(optimized, expected) } test("back to back serialization with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val plan = input.serialize[OtherTuple].deserialize[(Int, Int)].analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } test("back to back serialization in AppendColumns") { val input = LocalRelation('obj.obj(classOf[(Int, Int)])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[(Int, Int)]).analyze val optimized = Optimize.execute(plan) val expected = AppendColumnsWithObject( func.asInstanceOf[Any => Any], productEncoder[(Int, Int)].namedExpressions, intEncoder.namedExpressions, input).analyze comparePlans(optimized, expected) } test("back to back serialization in AppendColumns with object change") { val input = LocalRelation('obj.obj(classOf[OtherTuple])) val func = (item: (Int, Int)) => item._1 val plan = AppendColumns(func, input.serialize[OtherTuple]).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, plan) } }
Example 84
Source File: PushProjectThroughUnionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class PushProjectThroughUnionSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Optimizer Batch", FixedPoint(100), PushProjectionThroughUnion, FoldablePropagation) :: Nil } test("SPARK-25450 PushProjectThroughUnion rule uses the same exprId for project expressions " + "in each Union child, causing mistakes in constant propagation") { val testRelation1 = LocalRelation('a.string, 'b.int, 'c.string) val testRelation2 = LocalRelation('d.string, 'e.int, 'f.string) val query = testRelation1 .union(testRelation2.select("bar".as("d"), 'e, 'f)) .select('a.as("n")) .select('n, "dummy").analyze val optimized = Optimize.execute(query) val expected = testRelation1 .select('a.as("n")) .select('n, "dummy") .union(testRelation2 .select("bar".as("d"), 'e, 'f) .select("bar".as("n")) .select("bar".as("n"), "dummy")).analyze comparePlans(optimized, expected) } }