org.apache.spark.sql.catalyst.SimpleCatalystConf Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.SimpleCatalystConf.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: OptimizeCodegenSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class OptimizeCodegenSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("OptimizeCodegen", Once, OptimizeCodegen(SimpleCatalystConf(true))) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) comparePlans(actual, correctAnswer) } test("Codegen only when the number of branches is small.") { assertEquivalent( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen()) assertEquivalent( CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2)), CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2))) } test("Nested CaseWhen Codegen.") { assertEquivalent( CaseWhen( Seq((CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), Literal(3))), CaseWhen(Seq((TrueLiteral, Literal(4))), Literal(5))), CaseWhen( Seq((CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), Literal(3))), CaseWhen(Seq((TrueLiteral, Literal(4))), Literal(5)).toCodegen()).toCodegen()) } test("Multiple CaseWhen in one operator.") { val plan = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0)), CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6))).analyze val correctAnswer = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0)), CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)).toCodegen()).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, correctAnswer) } test("Multiple CaseWhen in different operators") { val plan = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) .where( LessThan( CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) ).analyze val correctAnswer = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) .where( LessThan( CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) ).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, correctAnswer) } }
Example 2
Source File: AnalysisTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.{TableIdentifier, SimpleCatalystConf} trait AnalysisTest extends PlanTest { val (caseSensitiveAnalyzer, caseInsensitiveAnalyzer) = { val caseSensitiveConf = new SimpleCatalystConf(true) val caseInsensitiveConf = new SimpleCatalystConf(false) val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf) val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf) caseSensitiveCatalog.registerTable(TableIdentifier("TaBlE"), TestRelations.testRelation) caseInsensitiveCatalog.registerTable(TableIdentifier("TaBlE"), TestRelations.testRelation) new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } -> new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) analyzer.checkAnalysis(analyzer.execute(inputPlan)) } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } assert(expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains), s"Expected to throw Exception contains: ${expectedErrors.mkString(", ")}, " + s"actually we get ${e.getMessage}") } }
Example 3
Source File: BooleanSimplificationSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ class BooleanSimplificationSuite extends PlanTest with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("Constant Folding", FixedPoint(50), NullPropagation, ConstantFolding, BooleanSimplification, SimplifyFilters) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string) private def checkCondition(input: Expression, expected: Expression): Unit = { val plan = testRelation.where(input).analyze val actual = Optimize.execute(plan) val correctAnswer = testRelation.where(expected).analyze comparePlans(actual, correctAnswer) } test("a && a => a") { checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a) checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a) } test("a || a => a") { checkCondition(Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a) checkCondition(Literal(1) < 'a || Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a) } test("(a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ...") { checkCondition('b > 3 || 'c > 5, 'b > 3 || 'c > 5) checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2) checkCondition('a < 2 || ('a < 2 && 'a > 3 && 'b > 5), 'a < 2) val input = ('a === 'b && 'b > 3 && 'c > 2) || ('a === 'b && 'c < 1 && 'a === 5) || ('a === 'b && 'b < 5 && 'a > 1) val expected = 'a === 'b && ( ('b > 3 && 'c > 2) || ('c < 1 && 'a === 5) || ('b < 5 && 'a > 1)) checkCondition(input, expected) } test("(a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ...") { checkCondition('b > 3 && 'c > 5, 'b > 3 && 'c > 5) checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2) checkCondition('a < 2 && ('a < 2 || 'a > 3 || 'b > 5) , 'a < 2) checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), 'a < 2 || ('b > 3 && 'c > 5)) checkCondition( ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5), ('a === 'b || 'b > 3 && 'a > 3 && 'a < 5)) } private val caseInsensitiveAnalyzer = new Analyzer(EmptyCatalog, EmptyFunctionRegistry, new SimpleCatalystConf(false)) test("(a && b) || (a && c) => a && (b || c) when case insensitive") { val plan = caseInsensitiveAnalyzer.execute( testRelation.where(('a > 2 && 'b > 3) || ('A > 2 && 'b < 5))) val actual = Optimize.execute(plan) val expected = caseInsensitiveAnalyzer.execute( testRelation.where('a > 2 && ('b > 3 || 'b < 5))) comparePlans(actual, expected) } test("(a || b) && (a || c) => a || (b && c) when case insensitive") { val plan = caseInsensitiveAnalyzer.execute( testRelation.where(('a > 2 || 'b > 3) && ('A > 2 || 'b < 5))) val actual = Optimize.execute(plan) val expected = caseInsensitiveAnalyzer.execute( testRelation.where('a > 2 || ('b > 3 && 'b < 5))) comparePlans(actual, expected) } }
Example 4
Source File: AnalysisTest.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.SimpleCatalystConf trait AnalysisTest extends PlanTest { val (caseSensitiveAnalyzer, caseInsensitiveAnalyzer) = { val caseSensitiveConf = new SimpleCatalystConf(true) val caseInsensitiveConf = new SimpleCatalystConf(false) val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf) val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf) caseSensitiveCatalog.registerTable(Seq("TaBlE"), TestRelations.testRelation) caseInsensitiveCatalog.registerTable(Seq("TaBlE"), TestRelations.testRelation) new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } -> new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) analyzer.checkAnalysis(analyzer.execute(inputPlan)) } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) // todo: make sure we throw AnalysisException during analysis val e = intercept[Exception] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } assert(expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains), s"Expected to throw Exception contains: ${expectedErrors.mkString(", ")}, " + s"actually we get ${e.getMessage}") } }
Example 5
Source File: AggregateOptimizeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 6
Source File: OptimizeCodegenSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class OptimizeCodegenSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("OptimizeCodegen", Once, OptimizeCodegen(SimpleCatalystConf(true))) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) comparePlans(actual, correctAnswer) } test("Codegen only when the number of branches is small.") { assertEquivalent( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen()) assertEquivalent( CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2)), CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2))) } test("Nested CaseWhen Codegen.") { assertEquivalent( CaseWhen( Seq((CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), Literal(3))), CaseWhen(Seq((TrueLiteral, Literal(4))), Literal(5))), CaseWhen( Seq((CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), Literal(3))), CaseWhen(Seq((TrueLiteral, Literal(4))), Literal(5)).toCodegen()).toCodegen()) } test("Multiple CaseWhen in one operator.") { val plan = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0)), CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6))).analyze val correctAnswer = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0)), CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)).toCodegen()).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, correctAnswer) } test("Multiple CaseWhen in different operators") { val plan = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) .where( LessThan( CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) ).analyze val correctAnswer = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) .where( LessThan( CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) ).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, correctAnswer) } }
Example 7
Source File: EliminateSortsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class EliminateSortsSuite extends PlanTest { val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 8
Source File: RewriteDistinctAggregatesSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{If, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectSet, Count} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with non-partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), CollectSet('b).toAggregateExpression().as('agg2)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 9
Source File: SubstituteUnresolvedOrdinalsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.analysis.TestRelations.testRelation2 import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.SimpleCatalystConf class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest { private lazy val conf = SimpleCatalystConf(caseSensitiveAnalysis = true) private lazy val a = testRelation2.output(0) private lazy val b = testRelation2.output(1) test("unresolved ordinal should not be unresolved") { // Expression OrderByOrdinal is unresolved. assert(!UnresolvedOrdinal(0).resolved) } test("order by ordinal") { // Tests order by ordinal, apply single rule. val plan = testRelation2.orderBy(Literal(1).asc, Literal(2).asc) comparePlans( new SubstituteUnresolvedOrdinals(conf).apply(plan), testRelation2.orderBy(UnresolvedOrdinal(1).asc, UnresolvedOrdinal(2).asc)) // Tests order by ordinal, do full analysis checkAnalysis(plan, testRelation2.orderBy(a.asc, b.asc)) // order by ordinal can be turned off by config comparePlans( new SubstituteUnresolvedOrdinals(conf.copy(orderByOrdinal = false)).apply(plan), testRelation2.orderBy(Literal(1).asc, Literal(2).asc)) } test("group by ordinal") { // Tests group by ordinal, apply single rule. val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b) comparePlans( new SubstituteUnresolvedOrdinals(conf).apply(plan2), testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b)) // Tests group by ordinal, do full analysis checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b)) // group by ordinal can be turned off by config comparePlans( new SubstituteUnresolvedOrdinals(conf.copy(groupByOrdinal = false)).apply(plan2), testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)) } }
Example 10
Source File: AnalysisTest.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 11
Source File: AggregateOptimizeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 12
Source File: AnalysisTest.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 13
Source File: EliminateSortsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class EliminateSortsSuite extends PlanTest { val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 14
Source File: RewriteDistinctAggregatesSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{If, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectSet, Count} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with non-partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), CollectSet('b).toAggregateExpression().as('agg2)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 15
Source File: SubstituteUnresolvedOrdinalsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.analysis.TestRelations.testRelation2 import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.SimpleCatalystConf class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest { private lazy val conf = SimpleCatalystConf(caseSensitiveAnalysis = true) private lazy val a = testRelation2.output(0) private lazy val b = testRelation2.output(1) test("unresolved ordinal should not be unresolved") { // Expression OrderByOrdinal is unresolved. assert(!UnresolvedOrdinal(0).resolved) } test("order by ordinal") { // Tests order by ordinal, apply single rule. val plan = testRelation2.orderBy(Literal(1).asc, Literal(2).asc) comparePlans( new SubstituteUnresolvedOrdinals(conf).apply(plan), testRelation2.orderBy(UnresolvedOrdinal(1).asc, UnresolvedOrdinal(2).asc)) // Tests order by ordinal, do full analysis checkAnalysis(plan, testRelation2.orderBy(a.asc, b.asc)) // order by ordinal can be turned off by config comparePlans( new SubstituteUnresolvedOrdinals(conf.copy(orderByOrdinal = false)).apply(plan), testRelation2.orderBy(Literal(1).asc, Literal(2).asc)) } test("group by ordinal") { // Tests group by ordinal, apply single rule. val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b) comparePlans( new SubstituteUnresolvedOrdinals(conf).apply(plan2), testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b)) // Tests group by ordinal, do full analysis checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b)) // group by ordinal can be turned off by config comparePlans( new SubstituteUnresolvedOrdinals(conf.copy(groupByOrdinal = false)).apply(plan2), testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)) } }
Example 16
Source File: AnalysisTest.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ trait AnalysisTest extends PlanTest { protected val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true) protected val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false) private def makeAnalyzer(caseSensitive: Boolean): Analyzer = { val conf = new SimpleCatalystConf(caseSensitive) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) catalog.createTempView("TaBlE", TestRelations.testRelation, overrideIfExists = true) new Analyzer(catalog, conf) { override val extendedResolutionRules = EliminateSubqueryAliases :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val analysisAttempt = analyzer.execute(inputPlan) try analyzer.checkAnalysis(analysisAttempt) catch { case a: AnalysisException => fail( s""" |Failed to Analyze Plan |$inputPlan | |Partial Analysis |$analysisAttempt """.stripMargin, a) } } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } if (!expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains)) { fail( s"""Exception message should contain the following substrings: | | ${expectedErrors.mkString("\n ")} | |Actual exception message: | | ${e.getMessage} """.stripMargin) } } }
Example 17
Source File: AggregateOptimizeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor class AggregateOptimizeSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Aggregate", FixedPoint(100), FoldablePropagation, RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("remove literals in grouping expression") { val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("do not remove all grouping expressions if they are all literals") { val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b))) comparePlans(optimized, correctAnswer) } test("Remove aliased literals") { val query = testRelation.select('a, Literal(1).as('y)).groupBy('a, 'y)(sum('b)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = testRelation.select('a, Literal(1).as('y)).groupBy('a)(sum('b)).analyze comparePlans(optimized, correctAnswer) } test("remove repetition in grouping expression") { val input = LocalRelation('a.int, 'b.int, 'c.int) val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze comparePlans(optimized, correctAnswer) } }
Example 18
Source File: OptimizeCodegenSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class OptimizeCodegenSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("OptimizeCodegen", Once, OptimizeCodegen(SimpleCatalystConf(true))) :: Nil } protected def assertEquivalent(e1: Expression, e2: Expression): Unit = { val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation).analyze val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation).analyze) comparePlans(actual, correctAnswer) } test("Codegen only when the number of branches is small.") { assertEquivalent( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen()) assertEquivalent( CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2)), CaseWhen(List.fill(100)(TrueLiteral, Literal(1)), Literal(2))) } test("Nested CaseWhen Codegen.") { assertEquivalent( CaseWhen( Seq((CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), Literal(3))), CaseWhen(Seq((TrueLiteral, Literal(4))), Literal(5))), CaseWhen( Seq((CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), Literal(3))), CaseWhen(Seq((TrueLiteral, Literal(4))), Literal(5)).toCodegen()).toCodegen()) } test("Multiple CaseWhen in one operator.") { val plan = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0)), CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6))).analyze val correctAnswer = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0)), CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)).toCodegen()).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, correctAnswer) } test("Multiple CaseWhen in different operators") { val plan = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) .where( LessThan( CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) ).analyze val correctAnswer = OneRowRelation .select( CaseWhen(Seq((TrueLiteral, Literal(1))), Literal(2)).toCodegen(), CaseWhen(Seq((FalseLiteral, Literal(3))), Literal(4)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) .where( LessThan( CaseWhen(Seq((TrueLiteral, Literal(5))), Literal(6)).toCodegen(), CaseWhen(List.fill(20)((TrueLiteral, Literal(0))), Literal(0))) ).analyze val optimized = Optimize.execute(plan) comparePlans(optimized, correctAnswer) } }
Example 19
Source File: EliminateSortsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ class EliminateSortsSuite extends PlanTest { val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Eliminate Sorts", FixedPoint(10), FoldablePropagation, EliminateSorts) :: Nil } val testRelation = LocalRelation('a.int, 'b.int, 'c.int) test("Empty order by clause") { val x = testRelation val query = x.orderBy() val optimized = Optimize.execute(query.analyze) val correctAnswer = x.analyze comparePlans(optimized, correctAnswer) } test("All the SortOrder are no-op") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending)) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x) comparePlans(optimized, correctAnswer) } test("Partial order-by clauses contain no-op SortOrder") { val x = testRelation val query = x.orderBy(SortOrder(3, Ascending), 'a.asc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute(x.orderBy('a.asc)) comparePlans(optimized, correctAnswer) } test("Remove no-op alias") { val x = testRelation val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b) .orderBy('x.asc, 'y.asc, 'b.desc) val optimized = Optimize.execute(analyzer.execute(query)) val correctAnswer = analyzer.execute( x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc)) comparePlans(optimized, correctAnswer) } }
Example 20
Source File: RewriteDistinctAggregatesSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.SimpleCatalystConf import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry} import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{If, Literal} import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectSet, Count} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan} import org.apache.spark.sql.types.{IntegerType, StringType} class RewriteDistinctAggregatesSuite extends PlanTest { val conf = SimpleCatalystConf(caseSensitiveAnalysis = false, groupByOrdinal = false) val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf) val analyzer = new Analyzer(catalog, conf) val nullInt = Literal(null, IntegerType) val nullString = Literal(null, StringType) val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int) private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match { case Aggregate(_, _, Aggregate(_, _, _: Expand)) => case _ => fail(s"Plan is not rewritten:\n$rewrite") } test("single distinct group") { val input = testRelation .groupBy('a)(countDistinct('e)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), max('b).as('agg2)) .analyze val rewrite = RewriteDistinctAggregates(input) comparePlans(input, rewrite) } test("single distinct group with non-partial aggregates") { val input = testRelation .groupBy('a, 'd)( countDistinct('e, 'c).as('agg1), CollectSet('b).toAggregateExpression().as('agg2)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with partial aggregates") { val input = testRelation .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e)) .analyze checkRewrite(RewriteDistinctAggregates(input)) } test("multiple distinct groups with non-partial aggregates") { val input = testRelation .groupBy('a)( countDistinct('b, 'c), countDistinct('d), CollectSet('b).toAggregateExpression()) .analyze checkRewrite(RewriteDistinctAggregates(input)) } }
Example 21
Source File: SubstituteUnresolvedOrdinalsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.analysis.TestRelations.testRelation2 import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.SimpleCatalystConf class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest { private lazy val conf = SimpleCatalystConf(caseSensitiveAnalysis = true) private lazy val a = testRelation2.output(0) private lazy val b = testRelation2.output(1) test("unresolved ordinal should not be unresolved") { // Expression OrderByOrdinal is unresolved. assert(!UnresolvedOrdinal(0).resolved) } test("order by ordinal") { // Tests order by ordinal, apply single rule. val plan = testRelation2.orderBy(Literal(1).asc, Literal(2).asc) comparePlans( new SubstituteUnresolvedOrdinals(conf).apply(plan), testRelation2.orderBy(UnresolvedOrdinal(1).asc, UnresolvedOrdinal(2).asc)) // Tests order by ordinal, do full analysis checkAnalysis(plan, testRelation2.orderBy(a.asc, b.asc)) // order by ordinal can be turned off by config comparePlans( new SubstituteUnresolvedOrdinals(conf.copy(orderByOrdinal = false)).apply(plan), testRelation2.orderBy(Literal(1).asc, Literal(2).asc)) } test("group by ordinal") { // Tests group by ordinal, apply single rule. val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b) comparePlans( new SubstituteUnresolvedOrdinals(conf).apply(plan2), testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b)) // Tests group by ordinal, do full analysis checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b)) // group by ordinal can be turned off by config comparePlans( new SubstituteUnresolvedOrdinals(conf.copy(groupByOrdinal = false)).apply(plan2), testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)) } }