org.apache.spark.sql.catalyst.optimizer.Optimizer Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.optimizer.Optimizer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkOptimizer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog, conf) { override def batches: Seq[Batch] = super.batches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+ Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) }
Example 2
Source File: SparkOptimizer.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaPruning import org.apache.spark.sql.execution.python.{ExtractPythonUDFFromAggregate, ExtractPythonUDFs} class SparkOptimizer( catalog: SessionCatalog, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog) { override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ Batch("Extract Python UDFs", Once, Seq(ExtractPythonUDFFromAggregate, ExtractPythonUDFs): _*) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("Parquet Schema Pruning", Once, ParquetSchemaPruning)) ++ postHocOptimizationBatches :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+ ExtractPythonUDFFromAggregate.ruleName def postHocOptimizationBatches: Seq[Batch] = Nil }
Example 3
Source File: ExtendableHiveContext.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.SparkContext import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.ParserDialect import org.apache.spark.sql.catalyst.analysis.{Analyzer, _} import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.ui.SQLListener import org.apache.spark.sql.execution.{CacheManager, ExtractPythonUDFs} import org.apache.spark.sql.extension._ import org.apache.spark.sql.hive.client.{ClientInterface, ClientWrapper} import org.apache.spark.sql.sources.commands.hive.HiveEmulationCatalog @transient override protected[sql] lazy val analyzer: Analyzer = new Analyzer(catalog, functionRegistry, conf) { override val extendedResolutionRules = resolutionRules(this) ++ (catalog.ParquetConversions :: catalog.CreateTables :: catalog.PreInsertionCasts :: ExtractPythonUDFs :: ResolveHiveWindowFunction :: PreInsertCastAndRename :: Nil) override val extendedCheckRules = ExtendableHiveContext.this.extendedCheckRules(this) } @transient override protected[sql] lazy val optimizer: Optimizer = OptimizerFactory.produce( earlyBatches = optimizerEarlyBatches, mainBatchRules = optimizerMainBatchRules, postBatches = optimizerPostBatches ) @transient override protected[sql] val planner: SparkPlanner with HiveStrategies = new SparkPlanner with HiveStrategies with ExtendedPlanner { def baseStrategies(hiveContext: HiveContext): Seq[Strategy] = Seq( DataSourceStrategy, HiveCommandStrategy(self), HiveDDLStrategy, DDLStrategy, TakeOrderedAndProject, InMemoryScans, HiveTableScans, DataSinks, Scripts, Aggregation, LeftSemiJoin, EquiJoinSelection, BasicOperators, BroadcastNestedLoop, CartesianProduct, DefaultJoin ) override def strategies: Seq[Strategy] = self.strategies(this) ++ experimental.extraStrategies ++ baseStrategies(self) override val hiveContext = self } }
Example 4
Source File: OptimizerFactory.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule def produce(earlyBatches: Seq[ExtendableOptimizerBatch] = Nil, mainBatchRules: Seq[Rule[LogicalPlan]] = Nil, postBatches: Seq[ExtendableOptimizerBatch] = Nil): Optimizer = { if (org.apache.spark.SPARK_VERSION.contains("1.6.2")) { new ExtendableOptimizer162(earlyBatches, mainBatchRules, postBatches) } else { new ExtendableOptimizer161(earlyBatches, mainBatchRules, postBatches) } } }
Example 5
Source File: ExtendableSQLContext.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.SparkContext import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.ParserDialect import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.ExtractPythonUDFs import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.sources.commands.hive.HiveEmulationCatalog @transient override protected[sql] val planner = // HiveStrategies defines its own strategies, we should be back to SparkPlanner strategies new SparkPlanner with ExtendedPlanner { def baseStrategies: Seq[Strategy] = DataSourceStrategy :: DDLStrategy :: TakeOrderedAndProject :: Aggregation :: LeftSemiJoin :: EquiJoinSelection :: InMemoryScans :: BasicOperators :: BroadcastNestedLoop :: CartesianProduct :: DefaultJoin :: Nil override def strategies: Seq[Strategy] = self.strategies(this) ++ experimental.extraStrategies ++ baseStrategies } }
Example 6
Source File: ExtendableOptimizerSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql.catalyst.optimizer.{FiltersReduction, Optimizer} import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch import org.scalatest.{FunSuite, PrivateMethodTester} class ExtendableOptimizerSuite extends FunSuite with PrivateMethodTester { implicit class OptimizerOps(opt: Optimizer) { private val nameMethod = PrivateMethod[String]('name) private def batches: Seq[AnyRef] = { val clazz = opt.getClass val batchesMethod = clazz.getMethods.find(_.getName == "batches").get batchesMethod.setAccessible(true) batchesMethod.invoke(opt).asInstanceOf[Seq[AnyRef]] } def batchNames: Seq[String] = batches map { b => b invokePrivate nameMethod() } } test("No rules is equivalent to DefaultOptimizer") { val extOpt = OptimizerFactory.produce() val defOpt = OptimizerFactoryForTests.default() assert(extOpt.batchNames == defOpt.batchNames) } test("One early batch is added before the main optimizer batch") { val extOpt = OptimizerFactory.produce( earlyBatches = ExtendableOptimizerBatch("FOO", 1, FiltersReduction :: Nil) :: Nil ) assert(extOpt.batchNames match { case subQueries :: early :: other => early.equals("FOO") }) } test("Several early batches are added before the main optimizer batch") { val extOpt = OptimizerFactory.produce( earlyBatches = ExtendableOptimizerBatch("FOO", 1, FiltersReduction :: Nil) :: ExtendableOptimizerBatch("BAR", 1, FiltersReduction :: Nil) :: Nil ) assert(extOpt.batchNames match { case subQueries :: firstEarly :: secondEarly :: other => firstEarly.equals("FOO") && secondEarly.equals("BAR") }) } test("Expression rules are added") { val extOpt = OptimizerFactory.produce( mainBatchRules = FiltersReduction :: Nil ) val defOpt = OptimizerFactoryForTests.default() assert(extOpt.batchNames == defOpt.batchNames) } test("Both rules are added") { val extOpt = OptimizerFactory.produce( earlyBatches = ExtendableOptimizerBatch("FOO", 1, FiltersReduction :: Nil) :: Nil, mainBatchRules = FiltersReduction :: Nil ) val defOpt = OptimizerFactoryForTests.default() assert(extOpt.batchNames.toSet == defOpt.batchNames.toSet ++ Seq("FOO")) } }
Example 7
Source File: SparkOptimizer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog, conf) { override def batches: Seq[Batch] = super.batches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+ Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) }
Example 8
Source File: SparkOptimizer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.python.ExtractPythonUDFFromAggregate import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends Optimizer(catalog, conf) { override def batches: Seq[Batch] = super.batches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog, conf)) :+ Batch("Extract Python UDF from Aggregate", Once, ExtractPythonUDFFromAggregate) :+ Batch("Prune File Source Table Partitions", Once, PruneFileSourcePartitions) :+ Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) }