org.apache.spark.sql.catalyst.catalog.SessionCatalog Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.catalog.SessionCatalog.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: EliminateSortsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class EliminateSortsSuite extends PlanTest { val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 2
Source File: SimbaOptimizer.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions.{And, Expression, PredicateHelper} import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkOptimizer import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.simba.plans.SpatialJoin class SimbaOptimizer(catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends SparkOptimizer(catalog, conf, experimentalMethods) { override def batches: Seq[Batch] = super.batches :+ Batch("SpatialJoinPushDown", FixedPoint(100), PushPredicateThroughSpatialJoin) } object PushPredicateThroughSpatialJoin extends Rule[LogicalPlan] with PredicateHelper { private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = { val (leftEvaluateCondition, rest) = condition.partition(_.references subsetOf left.outputSet) val (rightEvaluateCondition, commonCondition) = rest.partition(_.references subsetOf right.outputSet) (leftEvaluateCondition, rightEvaluateCondition, commonCondition) } def apply(plan: LogicalPlan): LogicalPlan = plan transform { // push the where condition down into join filter case f @ Filter(filterCondition, SpatialJoin(left, right, joinType, joinCondition)) => val (leftFilterConditions, rightFilterConditions, commonFilterCondition) = split(splitConjunctivePredicates(filterCondition), left, right) val newLeft = leftFilterConditions.reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) val newRight = rightFilterConditions.reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) val newJoinCond = (commonFilterCondition ++ joinCondition).reduceLeftOption(And) SpatialJoin(newLeft, newRight, joinType, newJoinCond) // push down the join filter into sub query scanning if applicable case f @ SpatialJoin(left, right, joinType, joinCondition) => val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right) val newLeft = leftJoinConditions.reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) val newRight = rightJoinConditions.reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) val newJoinCond = commonJoinCondition.reduceLeftOption(And) SpatialJoin(newLeft, newRight, joinType, newJoinCond) } }
Example 3
Source File: SeQuiLaAnalyzer.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.ResolveTableValuedFunctionsSeq import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import scala.util.Random class SeQuiLaAnalyzer(catalog: SessionCatalog, conf: SQLConf) extends Analyzer(catalog, conf, conf.optimizerMaxIterations){ //override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(ResolveTableValuedFunctionsSeq) // override lazy val batches: Seq[Batch] = Seq( // Batch("Custeom", fixedPoint, ResolveTableValuedFunctionsSeq), // Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), // ResolveHints.RemoveAllHints)) var sequilaOptmazationRules: Seq[Rule[LogicalPlan]] = Nil override lazy val batches: Seq[Batch] = Seq( Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), ResolveHints.RemoveAllHints), Batch("Simple Sanity Check", Once, LookupFunctions), Batch("Substitution", fixedPoint, CTESubstitution, WindowsSubstitution, EliminateUnions, new SubstituteUnresolvedOrdinals(conf)), Batch("Resolution", fixedPoint, ResolveTableValuedFunctionsSeq :: ResolveRelations :: ResolveReferences :: ResolveCreateNamedStruct :: ResolveDeserializer :: ResolveNewInstance :: ResolveUpCast :: ResolveGroupingAnalytics :: ResolvePivot :: ResolveOrdinalInOrderByAndGroupBy :: ResolveAggAliasInGroupBy :: ResolveMissingReferences :: ExtractGenerator :: ResolveGenerate :: ResolveFunctions :: ResolveAliases :: ResolveSubquery :: ResolveSubqueryColumnAliases :: ResolveWindowOrder :: ResolveWindowFrame :: ResolveNaturalAndUsingJoin :: ExtractWindowExpressions :: GlobalAggregates :: ResolveAggregateFunctions :: TimeWindowing :: ResolveInlineTables(conf) :: ResolveTimeZone(conf) :: TypeCoercion.typeCoercionRules(conf) ++ extendedResolutionRules : _*), Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*), Batch("SeQuiLa", Once,sequilaOptmazationRules: _*), //SeQuilaOptimization rules Batch("View", Once, AliasViewChild(conf)), Batch("Nondeterministic", Once, PullOutNondeterministic), Batch("UDF", Once, HandleNullInputsForUDF), Batch("FixNullability", Once, FixNullability), Batch("Subquery", Once, UpdateOuterReferences), Batch("Cleanup", fixedPoint, CleanupAliases) ) }
Example 4
Source File: AggregateOptimizeSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} class AggregateOptimizeSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 5
Source File: EliminateSortsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL} class EliminateSortsSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> true, ORDER_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 6
Source File: RewriteDistinctAggregatesSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 7
Source File: OptimizerStructuralIntegrityCheckerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Alias, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project} import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf class OptimizerStructuralIntegrityCheckerSuite extends PlanTest { object OptimizeRuleBreakSI extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case Project(projectList, child) => val newAttr = UnresolvedAttribute("unresolvedAttr") Project(projectList ++ Seq(newAttr), child) } } object Optimize extends Optimizer( new SessionCatalog( new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())) { val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI) override def batches: Seq[Batch] = Seq(newBatch) ++ super.batches } test("check for invalid plan after execution of rule") { val analyzed = Project(Alias(Literal(10), "attr")() :: Nil, OneRowRelation()).analyze assert(analyzed.resolved) val message = intercept[TreeNodeException[LogicalPlan]] { Optimize.execute(analyzed) }.getMessage val ruleName = OptimizeRuleBreakSI.ruleName assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI")) assert(message.contains("the structural integrity of the plan is broken")) } }
Example 8
Source File: AnalysisTest.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import java.net.URI import java.util.Locale import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf) catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), ignoreIfExists = false) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true) catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.executeAndCheck(inputPlan) comparePlans(actualPlan, expectedPlan) } protected override def comparePlans( plan1: LogicalPlan, plan2: LogicalPlan, checkAnalysis: Boolean = false): Unit = { // Analysis tests may have not been fully resolved, so skip checkAnalysis. super.comparePlans(plan1, plan2, checkAnalysis) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase(Locale.ROOT)).forall( e.getMessage.toLowerCase(Locale.ROOT).contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 9
Source File: SparkOptimizer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog, conf) { override def batches: Seq[Batch] = super.batches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+ Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) }
Example 10
Source File: AggregateOptimizeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 11
Source File: EliminateSortsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class EliminateSortsSuite extends PlanTest { val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 12
Source File: RewriteDistinctAggregatesSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{If, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectSet, Count} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with non-partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), CollectSet('b).toAggregateExpression().as('agg2)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 13
Source File: AnalysisTest.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 14
Source File: CarbonAnalyzer.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.CarbonReflectionUtils class CarbonAnalyzer(catalog: SessionCatalog, conf: SQLConf, sparkSession: SparkSession, analyzer: Analyzer) extends Analyzer(catalog, conf) { val mvPlan = try { CarbonReflectionUtils.createObject( "org.apache.carbondata.mv.extension.MVAnalyzerRule", sparkSession)._1.asInstanceOf[Rule[LogicalPlan]] } catch { case e: Exception => null } override def execute(plan: LogicalPlan): LogicalPlan = { val logicalPlan = analyzer.execute(plan) if (mvPlan != null) { mvPlan.apply(logicalPlan) } else { logicalPlan } } }
Example 15
Source File: SparkOptimizer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog, conf) { override def batches: Seq[Batch] = super.batches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+ Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) }
Example 16
Source File: AggregateOptimizeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 17
Source File: AnalysisTest.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 18
Source File: RewriteDistinctAggregatesSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{If, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectSet, Count} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with non-partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), CollectSet('b).toAggregateExpression().as('agg2)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 19
Source File: AnalysisTest.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 20
Source File: SparkOptimizer.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruning import org.apache.spark.sql.execution.python.{ExtractPythonUDFFromAggregate, ExtractPythonUDFs} class SparkOptimizer( catalog: SessionCatalog, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog) { override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ Batch("Extract Python UDFs", Once, Seq(ExtractPythonUDFFromAggregate, ExtractPythonUDFs): _*) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("Parquet Schema Pruning", Once, ParquetSchemaPruning)) ++ postHocOptimizationBatches :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+ ExtractPythonUDFFromAggregate.ruleName def postHocOptimizationBatches: Seq[Batch] = Nil }
Example 21
Source File: AggregateOptimizeSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} class AggregateOptimizeSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 22
Source File: EliminateSortsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL} class EliminateSortsSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> true, ORDER_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 23
Source File: RewriteDistinctAggregatesSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 24
Source File: OptimizerStructuralIntegrityCheckerSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Alias, Literal} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project} import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.internal.SQLConf class OptimizerStructuralIntegrityCheckerSuite extends PlanTest { object OptimizeRuleBreakSI extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case Project(projectList, child) => val newAttr = UnresolvedAttribute("unresolvedAttr") Project(projectList ++ Seq(newAttr), child) } } object Optimize extends Optimizer( new SessionCatalog( new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())) { val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI) override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches } test("check for invalid plan after execution of rule") { val analyzed = Project(Alias(Literal(10), "attr")() :: Nil, OneRowRelation()).analyze assert(analyzed.resolved) val message = intercept[TreeNodeException[LogicalPlan]] { Optimize.execute(analyzed) }.getMessage val ruleName = OptimizeRuleBreakSI.ruleName assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI")) assert(message.contains("the structural integrity of the plan is broken")) } }
Example 25
Source File: LookupFunctionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import java.net.URI import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.Alias import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf class LookupFunctionsSuite extends PlanTest { test("SPARK-23486: the functionExists for the Persistent function check") { val externalCatalog = new CustomInMemoryCatalog val conf = new SQLConf() val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf) val analyzer = { catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), ignoreIfExists = false) new Analyzer(catalog, conf) } def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref)) val unresolvedPersistentFunc = UnresolvedFunction("func", Seq.empty, false) val unresolvedRegisteredFunc = UnresolvedFunction("max", Seq.empty, false) val plan = Project( Seq(Alias(unresolvedPersistentFunc, "call1")(), Alias(unresolvedPersistentFunc, "call2")(), Alias(unresolvedPersistentFunc, "call3")(), Alias(unresolvedRegisteredFunc, "call4")(), Alias(unresolvedRegisteredFunc, "call5")()), table("TaBlE")) analyzer.LookupFunctions.apply(plan) assert(externalCatalog.getFunctionExistsCalledTimes == 1) assert(analyzer.LookupFunctions.normalizeFuncName (unresolvedPersistentFunc.name).database == Some("default")) } test("SPARK-23486: the functionExists for the Registered function check") { val externalCatalog = new InMemoryCatalog val conf = new SQLConf() val customerFunctionReg = new CustomerFunctionRegistry val catalog = new SessionCatalog(externalCatalog, customerFunctionReg, conf) val analyzer = { catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), ignoreIfExists = false) new Analyzer(catalog, conf) } def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref)) val unresolvedRegisteredFunc = UnresolvedFunction("max", Seq.empty, false) val plan = Project( Seq(Alias(unresolvedRegisteredFunc, "call1")(), Alias(unresolvedRegisteredFunc, "call2")()), table("TaBlE")) analyzer.LookupFunctions.apply(plan) assert(customerFunctionReg.getIsRegisteredFunctionCalledTimes == 2) assert(analyzer.LookupFunctions.normalizeFuncName (unresolvedRegisteredFunc.name).database == Some("default")) } } class CustomerFunctionRegistry extends SimpleFunctionRegistry { private var isRegisteredFunctionCalledTimes: Int = 0; override def functionExists(funcN: FunctionIdentifier): Boolean = synchronized { isRegisteredFunctionCalledTimes = isRegisteredFunctionCalledTimes + 1 true } def getIsRegisteredFunctionCalledTimes: Int = isRegisteredFunctionCalledTimes } class CustomInMemoryCatalog extends InMemoryCatalog { private var functionExistsCalledTimes: Int = 0 override def functionExists(db: String, funcName: String): Boolean = synchronized { functionExistsCalledTimes = functionExistsCalledTimes + 1 true } def getFunctionExistsCalledTimes: Int = functionExistsCalledTimes }
Example 26
Source File: AnalysisTest.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import java.net.URI import java.util.Locale import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf) catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), ignoreIfExists = false) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) catalog.createTempView("TaBlE2", TestRelations.testRelation2, overrideIfExists = true) catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.executeAndCheck(inputPlan) comparePlans(actualPlan, expectedPlan) } protected override def comparePlans( plan1: LogicalPlan, plan2: LogicalPlan, checkAnalysis: Boolean = false): Unit = { // Analysis tests may have not been fully resolved, so skip checkAnalysis. super.comparePlans(plan1, plan2, checkAnalysis) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase(Locale.ROOT)).forall( e.getMessage.toLowerCase(Locale.ROOT).contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 27
Source File: SparkWrapper.scala From tispark with Apache License 2.0 | 5 votes |
package com.pingcap.tispark import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.types.{DataType, Metadata} object SparkWrapper { def getVersion: String = { "SparkWrapper-2.4" } def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = { SubqueryAlias(identifier, child) } def newAlias(child: Expression, name: String): Alias = { Alias(child, name)() } def newAttributeReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)() } def callSessionCatalogCreateTable( obj: SessionCatalog, tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { obj.createTable(tableDefinition, ignoreIfExists) } }
Example 28
Source File: SparkWrapper.scala From tispark with Apache License 2.0 | 5 votes |
package com.pingcap.tispark import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.types.{DataType, Metadata} object SparkWrapper { def getVersion: String = { "SparkWrapper-2.3" } def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = { SubqueryAlias(identifier, child) } def newAlias(child: Expression, name: String): Alias = { Alias(child, name)() } def newAttributeReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)() } def callSessionCatalogCreateTable( obj: SessionCatalog, tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { obj.createTable(tableDefinition, ignoreIfExists) } }
Example 29
Source File: SparkOptimizer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog, conf) { override def batches: Seq[Batch] = super.batches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+ Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) }
Example 30
Source File: AggregateOptimizeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 31
Source File: EliminateSortsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class EliminateSortsSuite extends PlanTest { val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 32
Source File: RewriteDistinctAggregatesSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{If, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectSet, Count} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with non-partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), CollectSet('b).toAggregateExpression().as('agg2)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }