org.apache.spark.sql.catalyst.rules.Rule Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.rules.Rule.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SQLPushdownRule.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule class SQLPushdownRule extends Rule[LogicalPlan] { override def apply(root: LogicalPlan): LogicalPlan = { val needsPushdown = root .find({ case SQLGen.Relation(r: SQLGen.Relation) if !r.reader.isFinal => true case _ => false }) .isDefined if (!needsPushdown) { return root } if (log.isTraceEnabled) { log.trace(s"Optimizing plan:\n${root.treeString(true)}") } // We first need to rename the outputs of each MemSQL relation in the tree. This transform is // done to ensure that we can handle projections which involve ambiguous column name references. var ptr, nextPtr = root.transform({ case SQLGen.Relation(relation) => relation.renameOutput }) val transforms = List( // do single node rewrites, e.g. Project([a,b,c], Relation(select * from foo)) SQLGen.fromLogicalPlan.andThen(_.asLogicalPlan()), // do multi node rewrites, e.g. Sort(a, Limit(10, Relation(select * from foo))) SQLGen.fromNestedLogicalPlan.andThen(_.asLogicalPlan()), // do single node rewrites of sort & limit (so the multi-node rewrite can match first) SQLGen.fromSingleLimitSort.andThen(_.asLogicalPlan()) ) // Run our transforms in a loop until the tree converges do { ptr = nextPtr nextPtr = transforms.foldLeft(ptr)(_.transformUp(_)) } while (!ptr.fastEquals(nextPtr)) // Finalize all the relations in the tree and perform casts into the expected output datatype for Spark val out = ptr.transformDown({ case SQLGen.Relation(relation) if !relation.isFinal => relation.castOutputAndFinalize }) if (log.isTraceEnabled) { log.trace(s"Optimized Plan:\n${out.treeString(true)}") } out } } object SQLPushdownRule { def injected(session: SparkSession): Boolean = { session.experimental.extraOptimizations .exists(s => s.isInstanceOf[SQLPushdownRule]) } def ensureInjected(session: SparkSession): Unit = { if (!injected(session)) { session.experimental.extraOptimizations ++= Seq(new SQLPushdownRule) } } def ensureRemoved(session: SparkSession): Unit = { session.experimental.extraOptimizations = session.experimental.extraOptimizations .filterNot(s => s.isInstanceOf[SQLPushdownRule]) } }
Example 2
Source File: RuleExecutorSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(100)) === Literal(90)) } }
Example 3
Source File: Exchange.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get, plan.user) } else { sameSchema += exchange exchange } } } }
Example 4
Source File: PruneFileSourcePartitions.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case op @ PhysicalOperation(projects, filters, logicalRelation @ LogicalRelation(fsRelation @ HadoopFsRelation( catalogFileIndex: CatalogFileIndex, partitionSchema, _, _, _, _), _, _)) if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined => // The attribute name of predicate could be different than the one in schema in case of // case insensitive, we should change them to match the one in schema, so we donot need to // worry about case sensitivity anymore. val normalizedFilters = filters.map { e => e transform { case a: AttributeReference => a.withName(logicalRelation.output.find(_.semanticEquals(a)).get.name) } } val sparkSession = fsRelation.sparkSession val partitionColumns = logicalRelation.resolve( partitionSchema, sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = ExpressionSet(normalizedFilters.filter(_.references.subsetOf(partitionSet))) if (partitionKeyFilters.nonEmpty) { val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq) val prunedFsRelation = fsRelation.copy(location = prunedFileIndex)(sparkSession) val prunedLogicalRelation = logicalRelation.copy( relation = prunedFsRelation, expectedOutputAttributes = Some(logicalRelation.output)) // Keep partition-pruning predicates so that they are visible in physical planning val filterExpression = filters.reduceLeft(And) val filter = Filter(filterExpression, prunedLogicalRelation) Project(projects, filter) } else { op } } }
Example 5
Source File: OptimizerExtendableSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule class ExtendedOptimizer extends SimpleTestOptimizer { // rules set to DummyRule, would not be executed anyways val myBatches: Seq[Batch] = { Batch("once", Once, DummyRule) :: Batch("fixedPoint", FixedPoint(100), DummyRule) :: Nil } override def batches: Seq[Batch] = super.batches ++ myBatches } test("Extending batches possible") { // test simply instantiates the new extended optimizer val extendedOptimizer = new ExtendedOptimizer() } }
Example 6
Source File: RuleExecutorSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } }
Example 7
Source File: ResolveInlineTables.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.{StructField, StructType} private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = { // For each column, traverse all the values and find a common data type and nullability. val fields = table.rows.transpose.zip(table.names).map { case (column, name) => val inputTypes = column.map(_.dataType) val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse { table.failAnalysis(s"incompatible types found in column $name for inline table") } StructField(name, tpe, nullable = column.exists(_.nullable)) } val attributes = StructType(fields).toAttributes assert(fields.size == table.names.size) val newRows: Seq[InternalRow] = table.rows.map { row => InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) => val targetType = fields(ci).dataType try { if (e.dataType.sameType(targetType)) { e.eval() } else { Cast(e, targetType).eval() } } catch { case NonFatal(ex) => table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") } }) } LocalRelation(attributes, newRows) } }
Example 8
Source File: SubstituteUnresolvedOrdinals.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.types.IntegerType class SubstituteUnresolvedOrdinals(conf: CatalystConf) extends Rule[LogicalPlan] { private def isIntLiteral(e: Expression) = e match { case Literal(_, IntegerType) => true case _ => false } def apply(plan: LogicalPlan): LogicalPlan = plan transform { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other } withOrigin(s.origin)(s.copy(order = newOrders)) case a: Aggregate if conf.groupByOrdinal && a.groupingExpressions.exists(isIntLiteral) => val newGroups = a.groupingExpressions.map { case ordinal @ Literal(index: Int, IntegerType) => withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) case other => other } withOrigin(a.origin)(a.copy(groupingExpressions = newGroups)) } }
Example 9
Source File: PreprocessTableUpdate.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import org.apache.spark.sql.delta.commands.UpdateCommand import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf case class PreprocessTableUpdate(conf: SQLConf) extends Rule[LogicalPlan] with UpdateExpressionsSupport { override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { case u: DeltaUpdateTable if u.resolved => u.condition.foreach { cond => if (SubqueryExpression.hasSubquery(cond)) { throw DeltaErrors.subqueryNotSupportedException("UPDATE", cond) } } toCommand(u) } def toCommand(update: DeltaUpdateTable): UpdateCommand = { val index = EliminateSubqueryAliases(update.child) match { case DeltaFullTable(tahoeFileIndex) => tahoeFileIndex case o => throw DeltaErrors.notADeltaSourceException("UPDATE", Some(o)) } val targetColNameParts = update.updateColumns.map(DeltaUpdateTable.getTargetColNameParts(_)) val alignedUpdateExprs = generateUpdateExpressions( update.child.output, targetColNameParts, update.updateExpressions, conf.resolver) UpdateCommand(index, update.child, alignedUpdateExprs, update.condition) } }
Example 10
Source File: PreprocessTableDelete.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import org.apache.spark.sql.delta.commands.DeleteCommand import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{DeltaDelete, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf case class PreprocessTableDelete(conf: SQLConf) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { plan.resolveOperators { case d: DeltaDelete if d.resolved => d.condition.foreach { cond => if (SubqueryExpression.hasSubquery(cond)) { throw DeltaErrors.subqueryNotSupportedException("DELETE", cond) } } toCommand(d) } } def toCommand(d: DeltaDelete): DeleteCommand = EliminateSubqueryAliases(d.child) match { case DeltaFullTable(tahoeFileIndex) => DeleteCommand(tahoeFileIndex, d.child, d.condition) case o => throw DeltaErrors.notADeltaSourceException("DELETE", Some(o)) } }
Example 11
Source File: AuthzHelper.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.author import org.apache.kyuubi.Logging import org.apache.spark.KyuubiConf._ import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import yaooqinn.kyuubi.utils.ReflectUtils private[kyuubi] class AuthzHelper(conf: SparkConf) extends Logging { def rule: Seq[Rule[LogicalPlan]] = { try { val authzMethod = conf.get(AUTHORIZATION_METHOD.key) val maybeRule = ReflectUtils.reflectModule(authzMethod, silent = true) maybeRule match { case Some(authz) if authz.isInstanceOf[Rule[_]] => Seq(authz.asInstanceOf[Rule[LogicalPlan]]) case _ => Nil } } catch { case _: NoSuchElementException => error(s"${AUTHORIZATION_METHOD.key} is not configured") Nil } } } private[kyuubi] object AuthzHelper extends Logging { private[this] var instance: Option[AuthzHelper] = None def get: Option[AuthzHelper] = instance def init(conf: SparkConf): Unit = { if (conf.get(AUTHORIZATION_ENABLE.key).toBoolean) { instance = Some(new AuthzHelper(conf)) debug("AuthzHelper inited.") } } }
Example 12
Source File: CarbonIUDRule.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.optimizer import org.apache.spark.sql.ProjectForUpdate import org.apache.spark.sql.catalyst.expressions.{NamedExpression, PredicateHelper} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command.mutation.CarbonProjectForUpdateCommand import org.apache.carbondata.core.constants.CarbonCommonConstants class CarbonIUDRule extends Rule[LogicalPlan] with PredicateHelper { override def apply(plan: LogicalPlan): LogicalPlan = { processPlan(plan) } private def processPlan(plan: LogicalPlan): LogicalPlan = { plan transform { case ProjectForUpdate(table, cols, Seq(updatePlan)) => var isTransformed = false val newPlan = updatePlan transform { case Project(pList, child) if !isTransformed => var (dest: Seq[NamedExpression], source: Seq[NamedExpression]) = pList .splitAt(pList.size - cols.size) // check complex column cols.foreach { col => val complexExists = "\"name\":\"" + col + "\"" if (dest.exists(m => m.dataType.json.contains(complexExists))) { throw new UnsupportedOperationException( "Unsupported operation on Complex data type") } } // check updated columns exists in table val diff = cols.diff(dest.map(_.name.toLowerCase)) if (diff.nonEmpty) { sys.error(s"Unknown column(s) ${ diff.mkString(",") } in table ${ table.tableName }") } // modify plan for updated column *in place* isTransformed = true source.foreach { col => val colName = col.name.substring(0, col.name.lastIndexOf(CarbonCommonConstants.UPDATED_COL_EXTENSION)) val updateIdx = dest.indexWhere(_.name.equalsIgnoreCase(colName)) dest = dest.updated(updateIdx, col) } Project(dest, child) } CarbonProjectForUpdateCommand( newPlan, table.tableIdentifier.database, table.tableIdentifier.table, cols) } } }
Example 13
Source File: CarbonUDFTransformRule.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.optimizer import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, PredicateHelper, ScalaUDF} import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.types.StringType import org.apache.carbondata.core.constants.CarbonCommonConstants class CarbonUDFTransformRule extends Rule[LogicalPlan] with PredicateHelper { override def apply(plan: LogicalPlan): LogicalPlan = { pushDownUDFToJoinLeftRelation(plan) } private def pushDownUDFToJoinLeftRelation(plan: LogicalPlan): LogicalPlan = { val output = plan.transform { case proj@Project(cols, Join( left, right, jointype: org.apache.spark.sql.catalyst.plans.JoinType, condition)) => var projectionToBeAdded: Seq[org.apache.spark.sql.catalyst.expressions.Alias] = Seq.empty var udfExists = false val newCols = cols.map { case a@Alias(s: ScalaUDF, name) if name.equalsIgnoreCase(CarbonCommonConstants.POSITION_ID) || name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID) => udfExists = true projectionToBeAdded :+= a AttributeReference(name, StringType, nullable = true)().withExprId(a.exprId) case other => other } if (udfExists) { val newLeft = left match { case Project(columns, logicalPlan) => Project(columns ++ projectionToBeAdded, logicalPlan) case filter: Filter => Project(filter.output ++ projectionToBeAdded, filter) case relation: LogicalRelation => Project(relation.output ++ projectionToBeAdded, relation) case other => other } Project(newCols, Join(newLeft, right, jointype, condition)) } else { proj } case other => other } output } }
Example 14
Source File: CarbonFileIndexReplaceRule.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.carbondata.execution.datasources import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, InMemoryFileIndex, InsertIntoHadoopFsRelationCommand, LogicalRelation} import org.apache.spark.sql.sources.BaseRelation import org.apache.carbondata.core.datastore.filesystem.CarbonFile import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.core.util.path.CarbonTablePath private def getDataFolders( tableFolder: CarbonFile, dataFolders: ArrayBuffer[CarbonFile]): Unit = { val files = tableFolder.listFiles() files.foreach { f => if (f.isDirectory) { val files = f.listFiles() if (files.nonEmpty && !files(0).isDirectory) { dataFolders += f } else { getDataFolders(f, dataFolders) } } } } }
Example 15
Source File: CarbonExtensions.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.strategy.{CarbonLateDecodeStrategy, DDLStrategy, StreamingTableStrategy} import org.apache.spark.sql.hive.{CarbonIUDAnalysisRule, CarbonPreInsertionCasts} import org.apache.spark.sql.parser.CarbonExtensionSqlParser class CarbonExtensions extends (SparkSessionExtensions => Unit) { override def apply(extensions: SparkSessionExtensions): Unit = { // Carbon internal parser extensions .injectParser((sparkSession: SparkSession, parser: ParserInterface) => new CarbonExtensionSqlParser(new SQLConf, sparkSession, parser)) // carbon analyzer rules extensions .injectResolutionRule((session: SparkSession) => CarbonIUDAnalysisRule(session)) extensions .injectResolutionRule((session: SparkSession) => CarbonPreInsertionCasts(session)) // carbon optimizer rules extensions.injectPostHocResolutionRule((session: SparkSession) => CarbonOptimizerRule(session)) // carbon planner strategies extensions .injectPlannerStrategy((session: SparkSession) => new StreamingTableStrategy(session)) extensions .injectPlannerStrategy((_: SparkSession) => new CarbonLateDecodeStrategy) extensions .injectPlannerStrategy((session: SparkSession) => new DDLStrategy(session)) // init CarbonEnv CarbonEnv.init() } } case class CarbonOptimizerRule(session: SparkSession) extends Rule[LogicalPlan] { self => var notAdded = true override def apply(plan: LogicalPlan): LogicalPlan = { if (notAdded) { self.synchronized { if (notAdded) { notAdded = false val sessionState = session.sessionState val field = sessionState.getClass.getDeclaredField("optimizer") field.setAccessible(true) field.set(sessionState, new CarbonOptimizer(session, sessionState.catalog, sessionState.optimizer)) } } } plan } }
Example 16
Source File: CarbonAnalyzer.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.CarbonReflectionUtils class CarbonAnalyzer(catalog: SessionCatalog, conf: SQLConf, sparkSession: SparkSession, analyzer: Analyzer) extends Analyzer(catalog, conf) { val mvPlan = try { CarbonReflectionUtils.createObject( "org.apache.carbondata.mv.extension.MVAnalyzerRule", sparkSession)._1.asInstanceOf[Rule[LogicalPlan]] } catch { case e: Exception => null } override def execute(plan: LogicalPlan): LogicalPlan = { val logicalPlan = analyzer.execute(plan) if (mvPlan != null) { mvPlan.apply(logicalPlan) } else { logicalPlan } } }
Example 17
Source File: CarbonMVRules.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.util.CarbonReflectionUtils case class CarbonMVRules(sparkSession: SparkSession) extends Rule[LogicalPlan] { val mvPlan = try { CarbonReflectionUtils.createObject( "org.apache.carbondata.mv.extension.MVAnalyzerRule", sparkSession)._1.asInstanceOf[Rule[LogicalPlan]] } catch { case e: Exception => null } override def apply(plan: LogicalPlan): LogicalPlan = { plan match { case _: Command => plan case _ => if (mvPlan != null) { mvPlan.apply(plan) } else { plan } } } }
Example 18
Source File: Exchange.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 19
Source File: PruneFileSourcePartitions.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case op @ PhysicalOperation(projects, filters, logicalRelation @ LogicalRelation(fsRelation @ HadoopFsRelation( catalogFileIndex: CatalogFileIndex, partitionSchema, _, _, _, _), _, _)) if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined => // The attribute name of predicate could be different than the one in schema in case of // case insensitive, we should change them to match the one in schema, so we donot need to // worry about case sensitivity anymore. val normalizedFilters = filters.map { e => e transform { case a: AttributeReference => a.withName(logicalRelation.output.find(_.semanticEquals(a)).get.name) } } val sparkSession = fsRelation.sparkSession val partitionColumns = logicalRelation.resolve( partitionSchema, sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = ExpressionSet(normalizedFilters.filter(_.references.subsetOf(partitionSet))) if (partitionKeyFilters.nonEmpty) { val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq) val prunedFsRelation = fsRelation.copy(location = prunedFileIndex)(sparkSession) val prunedLogicalRelation = logicalRelation.copy( relation = prunedFsRelation, expectedOutputAttributes = Some(logicalRelation.output)) // Keep partition-pruning predicates so that they are visible in physical planning val filterExpression = filters.reduceLeft(And) val filter = Filter(filterExpression, prunedLogicalRelation) Project(projects, filter) } else { op } } }
Example 20
Source File: RuleExecutorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } test("structural integrity checker") { object WithSIChecker extends RuleExecutor[Expression] { override protected def isPlanIntegral(expr: Expression): Boolean = expr match { case IntegerLiteral(_) => true case _ => false } val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(WithSIChecker.execute(Literal(10)) === Literal(9)) val message = intercept[TreeNodeException[LogicalPlan]] { WithSIChecker.execute(Literal(10.1)) }.getMessage assert(message.contains("the structural integrity of the plan is broken")) } }
Example 21
Source File: SimbaOptimizer.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba import org.apache.spark.sql.ExperimentalMethods import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.expressions.{And, Expression, PredicateHelper} import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkOptimizer import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.simba.plans.SpatialJoin class SimbaOptimizer(catalog: SessionCatalog, conf: SQLConf, experimentalMethods: ExperimentalMethods) extends SparkOptimizer(catalog, conf, experimentalMethods) { override def batches: Seq[Batch] = super.batches :+ Batch("SpatialJoinPushDown", FixedPoint(100), PushPredicateThroughSpatialJoin) } object PushPredicateThroughSpatialJoin extends Rule[LogicalPlan] with PredicateHelper { private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = { val (leftEvaluateCondition, rest) = condition.partition(_.references subsetOf left.outputSet) val (rightEvaluateCondition, commonCondition) = rest.partition(_.references subsetOf right.outputSet) (leftEvaluateCondition, rightEvaluateCondition, commonCondition) } def apply(plan: LogicalPlan): LogicalPlan = plan transform { // push the where condition down into join filter case f @ Filter(filterCondition, SpatialJoin(left, right, joinType, joinCondition)) => val (leftFilterConditions, rightFilterConditions, commonFilterCondition) = split(splitConjunctivePredicates(filterCondition), left, right) val newLeft = leftFilterConditions.reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) val newRight = rightFilterConditions.reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) val newJoinCond = (commonFilterCondition ++ joinCondition).reduceLeftOption(And) SpatialJoin(newLeft, newRight, joinType, newJoinCond) // push down the join filter into sub query scanning if applicable case f @ SpatialJoin(left, right, joinType, joinCondition) => val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right) val newLeft = leftJoinConditions.reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) val newRight = rightJoinConditions.reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) val newJoinCond = commonJoinCondition.reduceLeftOption(And) SpatialJoin(newLeft, newRight, joinType, newJoinCond) } }
Example 22
Source File: HiveAcidAutoConvert.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.hiveacid import java.util.Locale import com.qubole.spark.datasources.hiveacid.sql.execution.SparkAcidSqlParser import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.{Filter, InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.LogicalRelation import com.qubole.spark.hiveacid.datasource.HiveAcidDataSource case class HiveAcidAutoConvert(spark: SparkSession) extends Rule[LogicalPlan] { private def isConvertible(relation: HiveTableRelation): Boolean = { val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT) relation.tableMeta.properties.getOrElse("transactional", "false").toBoolean } private def convert(relation: HiveTableRelation): LogicalRelation = { val options = relation.tableMeta.properties ++ relation.tableMeta.storage.properties ++ Map("table" -> relation.tableMeta.qualifiedName) val newRelation = new HiveAcidDataSource().createRelation(spark.sqlContext, options) LogicalRelation(newRelation, isStreaming = false) } override def apply(plan: LogicalPlan): LogicalPlan = { plan resolveOperators { // Write path case InsertIntoTable(r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists) if query.resolved && DDLUtils.isHiveTable(r.tableMeta) && isConvertible(r) => InsertIntoTable(convert(r), partition, query, overwrite, ifPartitionNotExists) // Read path case relation: HiveTableRelation if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) => convert(relation) } } } class HiveAcidAutoConvertExtension extends (SparkSessionExtensions => Unit) { def apply(extension: SparkSessionExtensions): Unit = { extension.injectResolutionRule(HiveAcidAutoConvert.apply) extension.injectParser { (session, parser) => SparkAcidSqlParser(parser) } } }
Example 23
Source File: SeQuiLaAnalyzer.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.ResolveTableValuedFunctionsSeq import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import scala.util.Random class SeQuiLaAnalyzer(catalog: SessionCatalog, conf: SQLConf) extends Analyzer(catalog, conf, conf.optimizerMaxIterations){ //override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(ResolveTableValuedFunctionsSeq) // override lazy val batches: Seq[Batch] = Seq( // Batch("Custeom", fixedPoint, ResolveTableValuedFunctionsSeq), // Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), // ResolveHints.RemoveAllHints)) var sequilaOptmazationRules: Seq[Rule[LogicalPlan]] = Nil override lazy val batches: Seq[Batch] = Seq( Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), ResolveHints.RemoveAllHints), Batch("Simple Sanity Check", Once, LookupFunctions), Batch("Substitution", fixedPoint, CTESubstitution, WindowsSubstitution, EliminateUnions, new SubstituteUnresolvedOrdinals(conf)), Batch("Resolution", fixedPoint, ResolveTableValuedFunctionsSeq :: ResolveRelations :: ResolveReferences :: ResolveCreateNamedStruct :: ResolveDeserializer :: ResolveNewInstance :: ResolveUpCast :: ResolveGroupingAnalytics :: ResolvePivot :: ResolveOrdinalInOrderByAndGroupBy :: ResolveAggAliasInGroupBy :: ResolveMissingReferences :: ExtractGenerator :: ResolveGenerate :: ResolveFunctions :: ResolveAliases :: ResolveSubquery :: ResolveSubqueryColumnAliases :: ResolveWindowOrder :: ResolveWindowFrame :: ResolveNaturalAndUsingJoin :: ExtractWindowExpressions :: GlobalAggregates :: ResolveAggregateFunctions :: TimeWindowing :: ResolveInlineTables(conf) :: ResolveTimeZone(conf) :: TypeCoercion.typeCoercionRules(conf) ++ extendedResolutionRules : _*), Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*), Batch("SeQuiLa", Once,sequilaOptmazationRules: _*), //SeQuilaOptimization rules Batch("View", Once, AliasViewChild(conf)), Batch("Nondeterministic", Once, PullOutNondeterministic), Batch("UDF", Once, HandleNullInputsForUDF), Batch("FixNullability", Once, FixNullability), Batch("Subquery", Once, UpdateOuterReferences), Batch("Cleanup", fixedPoint, CleanupAliases) ) }
Example 24
Source File: rowFormatConverters.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule case class ConvertToSafe(child: SparkPlan) extends UnaryNode { override def output: Seq[Attribute] = child.output override def outputPartitioning: Partitioning = child.outputPartitioning override def outputOrdering: Seq[SortOrder] = child.outputOrdering override def outputsUnsafeRows: Boolean = false override def canProcessUnsafeRows: Boolean = true override def canProcessSafeRows: Boolean = false override protected def doExecute(): RDD[InternalRow] = { child.execute().mapPartitions { iter => val convertToSafe = FromUnsafeProjection(child.output.map(_.dataType)) iter.map(convertToSafe) } } } //private[sql] object EnsureRowFormats extends Rule[SparkPlan] { private def onlyHandlesSafeRows(operator: SparkPlan): Boolean = operator.canProcessSafeRows && !operator.canProcessUnsafeRows private def onlyHandlesUnsafeRows(operator: SparkPlan): Boolean = operator.canProcessUnsafeRows && !operator.canProcessSafeRows private def handlesBothSafeAndUnsafeRows(operator: SparkPlan): Boolean = operator.canProcessSafeRows && operator.canProcessUnsafeRows override def apply(operator: SparkPlan): SparkPlan = operator.transformUp { case operator: SparkPlan if onlyHandlesSafeRows(operator) => if (operator.children.exists(_.outputsUnsafeRows)) { operator.withNewChildren { operator.children.map { c => if (c.outputsUnsafeRows) ConvertToSafe(c) else c } } } else { operator } case operator: SparkPlan if onlyHandlesUnsafeRows(operator) => if (operator.children.exists(!_.outputsUnsafeRows)) { operator.withNewChildren { operator.children.map { c => if (!c.outputsUnsafeRows) ConvertToUnsafe(c) else c } } } else { operator } case operator: SparkPlan if handlesBothSafeAndUnsafeRows(operator) => if (operator.children.map(_.outputsUnsafeRows).toSet.size != 1) { // If this operator's children produce both unsafe and safe rows, // convert everything unsafe rows. operator.withNewChildren { operator.children.map { c => if (!c.outputsUnsafeRows) ConvertToUnsafe(c) else c } } } else { operator } } }
Example 25
Source File: RuleExecutorSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(100)) === Literal(90)) } }
Example 26
Source File: Exchange.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 27
Source File: PruneFileSourcePartitions.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.catalog.CatalogStatistics import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case op @ PhysicalOperation(projects, filters, logicalRelation @ LogicalRelation(fsRelation @ HadoopFsRelation( catalogFileIndex: CatalogFileIndex, partitionSchema, _, _, _, _), _, _, _)) if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined => // The attribute name of predicate could be different than the one in schema in case of // case insensitive, we should change them to match the one in schema, so we donot need to // worry about case sensitivity anymore. val normalizedFilters = filters.map { e => e transform { case a: AttributeReference => a.withName(logicalRelation.output.find(_.semanticEquals(a)).get.name) } } val sparkSession = fsRelation.sparkSession val partitionColumns = logicalRelation.resolve( partitionSchema, sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = ExpressionSet(normalizedFilters .filterNot(SubqueryExpression.hasSubquery(_)) .filter(_.references.subsetOf(partitionSet))) if (partitionKeyFilters.nonEmpty) { val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq) val prunedFsRelation = fsRelation.copy(location = prunedFileIndex)(sparkSession) // Change table stats based on the sizeInBytes of pruned files val withStats = logicalRelation.catalogTable.map(_.copy( stats = Some(CatalogStatistics(sizeInBytes = BigInt(prunedFileIndex.sizeInBytes))))) val prunedLogicalRelation = logicalRelation.copy( relation = prunedFsRelation, catalogTable = withStats) // Keep partition-pruning predicates so that they are visible in physical planning val filterExpression = filters.reduceLeft(And) val filter = Filter(filterExpression, prunedLogicalRelation) Project(projects, filter) } else { op } } }
Example 28
Source File: OptimizerExtendableSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule class ExtendedOptimizer extends SimpleTestOptimizer { // rules set to DummyRule, would not be executed anyways val myBatches: Seq[Batch] = { Batch("once", Once, DummyRule) :: Batch("fixedPoint", FixedPoint(100), DummyRule) :: Nil } override def batches: Seq[Batch] = super.batches ++ myBatches } test("Extending batches possible") { // test simply instantiates the new extended optimizer val extendedOptimizer = new ExtendedOptimizer() } }
Example 29
Source File: OptimizerExtendableSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule class ExtendedOptimizer extends SimpleTestOptimizer { // rules set to DummyRule, would not be executed anyways val myBatches: Seq[Batch] = { Batch("once", Once, DummyRule) :: Batch("fixedPoint", FixedPoint(100), DummyRule) :: Nil } override def batches: Seq[Batch] = super.batches ++ myBatches } test("Extending batches possible") { // test simply instantiates the new extended optimizer val extendedOptimizer = new ExtendedOptimizer() } }
Example 30
Source File: ReplaceExceptWithFilter.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.annotation.tailrec import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule object ReplaceExceptWithFilter extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { if (!plan.conf.replaceExceptWithFilter) { return plan } plan.transform { case e @ Except(left, right) if isEligible(left, right) => val newCondition = transformCondition(left, skipProject(right)) newCondition.map { c => Distinct(Filter(Not(c), left)) }.getOrElse { e } } } private def transformCondition(left: LogicalPlan, right: LogicalPlan): Option[Expression] = { val filterCondition = InferFiltersFromConstraints(combineFilters(right)).asInstanceOf[Filter].condition val attributeNameMap: Map[String, Attribute] = left.output.map(x => (x.name, x)).toMap if (filterCondition.references.forall(r => attributeNameMap.contains(r.name))) { Some(filterCondition.transform { case a: AttributeReference => attributeNameMap(a.name) }) } else { None } } // TODO: This can be further extended in the future. private def isEligible(left: LogicalPlan, right: LogicalPlan): Boolean = (left, right) match { case (_, right @ (Project(_, _: Filter) | Filter(_, _))) => verifyConditions(left, right) case _ => false } private def verifyConditions(left: LogicalPlan, right: LogicalPlan): Boolean = { val leftProjectList = projectList(left) val rightProjectList = projectList(right) left.output.size == left.output.map(_.name).distinct.size && left.find(_.expressions.exists(SubqueryExpression.hasSubquery)).isEmpty && right.find(_.expressions.exists(SubqueryExpression.hasSubquery)).isEmpty && Project(leftProjectList, nonFilterChild(skipProject(left))).sameResult( Project(rightProjectList, nonFilterChild(skipProject(right)))) } private def projectList(node: LogicalPlan): Seq[NamedExpression] = node match { case p: Project => p.projectList case x => x.output } private def skipProject(node: LogicalPlan): LogicalPlan = node match { case p: Project => p.child case x => x } private def nonFilterChild(plan: LogicalPlan) = plan.find(!_.isInstanceOf[Filter]).getOrElse { throw new IllegalStateException("Leaf node is expected") } private def combineFilters(plan: LogicalPlan): LogicalPlan = { @tailrec def iterate(plan: LogicalPlan, acc: LogicalPlan): LogicalPlan = { if (acc.fastEquals(plan)) acc else iterate(acc, CombineFilters(acc)) } iterate(plan, CombineFilters(plan)) } }
Example 31
Source File: ResolveInlineTables.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{StructField, StructType} private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = { // For each column, traverse all the values and find a common data type and nullability. val fields = table.rows.transpose.zip(table.names).map { case (column, name) => val inputTypes = column.map(_.dataType) val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse { table.failAnalysis(s"incompatible types found in column $name for inline table") } StructField(name, tpe, nullable = column.exists(_.nullable)) } val attributes = StructType(fields).toAttributes assert(fields.size == table.names.size) val newRows: Seq[InternalRow] = table.rows.map { row => InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) => val targetType = fields(ci).dataType try { val castedExpr = if (e.dataType.sameType(targetType)) { e } else { cast(e, targetType) } castedExpr.eval() } catch { case NonFatal(ex) => table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex) } }) } LocalRelation(attributes, newRows) } }
Example 32
Source File: SubstituteUnresolvedOrdinals.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.IntegerType class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] { private def isIntLiteral(e: Expression) = e match { case Literal(_, IntegerType) => true case _ => false } def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other } withOrigin(s.origin)(s.copy(order = newOrders)) case a: Aggregate if conf.groupByOrdinal && a.groupingExpressions.exists(isIntLiteral) => val newGroups = a.groupingExpressions.map { case ordinal @ Literal(index: Int, IntegerType) => withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) case other => other } withOrigin(a.origin)(a.copy(groupingExpressions = newGroups)) } }
Example 33
Source File: HiveSessionStateBuilder.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState} override protected def planner: SparkPlanner = { new SparkPlanner(session.sparkContext, conf, experimentalMethods) with HiveStrategies { override val sparkSession: SparkSession = session override def extraPlanningStrategies: Seq[Strategy] = super.extraPlanningStrategies ++ customPlanningStrategies ++ Seq(HiveTableScans, Scripts) } } override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _) } class HiveSessionResourceLoader( session: SparkSession, client: HiveClient) extends SessionResourceLoader(session) { override def addJar(path: String): Unit = { client.addJar(path) super.addJar(path) } }
Example 34
Source File: HBaseSparkSession.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.spark.SparkContext import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.ExternalCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.hbase.execution.{HBaseSourceAnalysis, HBaseStrategies} import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SQLConf, SessionState, SharedState} class HBaseSparkSession(sc: SparkContext) extends SparkSession(sc) { self => def this(sparkContext: JavaSparkContext) = this(sparkContext.sc) @transient override lazy val sessionState: SessionState = new HBaseSessionStateBuilder(this).build() HBaseConfiguration.merge( sc.hadoopConfiguration, HBaseConfiguration.create(sc.hadoopConfiguration)) @transient override lazy val sharedState: SharedState = new HBaseSharedState(sc, this.sqlContext) } class HBaseSessionStateBuilder(session: SparkSession, parentState: Option[SessionState] = None) extends BaseSessionStateBuilder(session) { override lazy val conf: SQLConf = new HBaseSQLConf override protected def newBuilder: NewBuilder = new HBaseSessionStateBuilder(_, _) override lazy val experimentalMethods: ExperimentalMethods = { val result = new ExperimentalMethods; result.extraStrategies = Seq((new SparkPlanner(session.sparkContext, conf, new ExperimentalMethods) with HBaseStrategies).HBaseDataSource) result } override lazy val analyzer: Analyzer = { new Analyzer(catalog, conf) { override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = new FindDataSourceTable(session) +: new ResolveSQLOnFile(session) +: customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = PreprocessTableCreation(session) +: PreprocessTableInsertion(conf) +: DataSourceAnalysis(conf) +: HBaseSourceAnalysis(session) +: customPostHocResolutionRules override val extendedCheckRules = customCheckRules } } } class HBaseSharedState(sc: SparkContext, sqlContext: SQLContext) extends SharedState(sc) { override lazy val externalCatalog: ExternalCatalog = new HBaseCatalog(sqlContext, sc.hadoopConfiguration) }
Example 35
Source File: RuleExecutorSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(100)) === Literal(90)) } }
Example 36
Source File: AddSourceToAttributes.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.rule import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.MetadataBuilder import tech.sourced.engine.{GitRelation, MetadataRelation, Sources} import tech.sourced.engine.compat def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case compat.LogicalRelation(rel @ GitRelation(_, _, _, schemaSource), out, catalogTable) => withMetadata(rel, schemaSource, out, catalogTable) case compat.LogicalRelation( rel @ MetadataRelation(_, _, _, _, schemaSource), out, catalogTable) => withMetadata(rel, schemaSource, out, catalogTable) } private def withMetadata(relation: BaseRelation, schemaSource: Option[String], out: Seq[AttributeReference], catalogTable: Option[CatalogTable]): LogicalRelation = { val processedOut = schemaSource match { case Some(table) => out.map( _.withMetadata(new MetadataBuilder().putString(SOURCE, table).build() ).asInstanceOf[AttributeReference] ) case None => out } compat.LogicalRelation(relation, processedOut, catalogTable) } }
Example 37
Source File: AuthzHelperSuite.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.author import org.apache.spark.{KyuubiConf, KyuubiSparkUtil, SparkConf, SparkFunSuite} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule class AuthzHelperSuite extends SparkFunSuite { test("test Rule") { // NoSuchElementException val conf = new SparkConf(loadDefaults = true) val authzHelper1 = new AuthzHelper(conf) assert(authzHelper1.rule.isEmpty) // reflect failure KyuubiSparkUtil.setupCommonConfig(conf) val authzHelper2 = new AuthzHelper(conf) assert(authzHelper2.rule.isEmpty) // success conf.set(KyuubiConf.AUTHORIZATION_METHOD.key, "yaooqinn.kyuubi.TestRule") val authzHelper3 = new AuthzHelper(conf) assert(authzHelper3.rule.nonEmpty) assert(authzHelper3.rule.head.isInstanceOf[Rule[LogicalPlan]]) // type miss match conf.set(KyuubiConf.AUTHORIZATION_METHOD.key, "yaooqinn.kyuubi.TestWrongRule") val authzHelper4 = new AuthzHelper(conf) assert(authzHelper4.rule.isEmpty) } test("test Get") { assert(AuthzHelper.get.isEmpty) } test("test Init") { val conf = new SparkConf(loadDefaults = true) .set(KyuubiConf.AUTHORIZATION_METHOD.key, "yaooqinn.kyuubi.TestRule") .set(KyuubiConf.AUTHORIZATION_ENABLE.key, "false") AuthzHelper.init(conf) assert(AuthzHelper.get.isEmpty) conf.set(KyuubiConf.AUTHORIZATION_ENABLE.key, "true") AuthzHelper.init(conf) assert(AuthzHelper.get.nonEmpty) assert(AuthzHelper.get.get.rule.nonEmpty) assert(AuthzHelper.get.get.rule.head.isInstanceOf[Rule[LogicalPlan]]) } }
Example 38
Source File: view.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf object EliminateView extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { // The child should have the same output attributes with the View operator, so we simply // remove the View operator. case View(_, output, child) => assert(output == child.output, s"The output of the child ${child.output.mkString("[", ",", "]")} is different from the " + s"view output ${output.mkString("[", ",", "]")}") child } }
Example 39
Source File: DruidRule.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.druid import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala import org.apache.spark.sql.catalyst.expressions.{ Attribute, Expression => SExpression, Literal, NamedExpression, SortOrder } import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule object DruidRule extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case Aggregate(ges, aes, p @ Project(_, _)) => ProjectAndAggregate(ges, aes, p) case s @ Sort(orders, _, child) => if (child.isInstanceOf[ProjectAndAggregate]) { child.asInstanceOf[ProjectAndAggregate].copy(orders = orders) } else { s } case l @ LocalLimit(Literal(v, t), child) => val value: Any = convertToScala(v, t) val limit = value.asInstanceOf[Int] if (limit < 0) { throw new SparkException(s"Aggregate limit must great than zero!") } if (child.isInstanceOf[ProjectAndAggregate]) { child.asInstanceOf[ProjectAndAggregate].copy(limit = limit) } else { l } case g @ GlobalLimit(_, child) => if (child.isInstanceOf[ProjectAndAggregate]) { child } else { g } } } case class ProjectAndAggregate( groupingExpressions: Seq[SExpression], aggregateExpressions: Seq[NamedExpression], child: LogicalPlan, orders: Seq[SortOrder] = null, limit: Int = 20) extends UnaryNode { override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute) }
Example 40
Source File: subquery.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 41
Source File: Exchange.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 42
Source File: PruneFileSourcePartitions.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.catalog.CatalogStatistics import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case op @ PhysicalOperation(projects, filters, logicalRelation @ LogicalRelation(fsRelation @ HadoopFsRelation( catalogFileIndex: CatalogFileIndex, partitionSchema, _, _, _, _), _, _, _)) if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined => // The attribute name of predicate could be different than the one in schema in case of // case insensitive, we should change them to match the one in schema, so we donot need to // worry about case sensitivity anymore. val normalizedFilters = filters.map { e => e transform { case a: AttributeReference => a.withName(logicalRelation.output.find(_.semanticEquals(a)).get.name) } } val sparkSession = fsRelation.sparkSession val partitionColumns = logicalRelation.resolve( partitionSchema, sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = ExpressionSet(normalizedFilters .filterNot(SubqueryExpression.hasSubquery(_)) .filter(_.references.subsetOf(partitionSet))) if (partitionKeyFilters.nonEmpty) { val prunedFileIndex = catalogFileIndex.filterPartitions(partitionKeyFilters.toSeq) val prunedFsRelation = fsRelation.copy(location = prunedFileIndex)(sparkSession) // Change table stats based on the sizeInBytes of pruned files val withStats = logicalRelation.catalogTable.map(_.copy( stats = Some(CatalogStatistics(sizeInBytes = BigInt(prunedFileIndex.sizeInBytes))))) val prunedLogicalRelation = logicalRelation.copy( relation = prunedFsRelation, catalogTable = withStats) // Keep partition-pruning predicates so that they are visible in physical planning val filterExpression = filters.reduceLeft(And) val filter = Filter(filterExpression, prunedLogicalRelation) Project(projects, filter) } else { op } } }
Example 43
Source File: OptimizerExtendableSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule class ExtendedOptimizer extends SimpleTestOptimizer { // rules set to DummyRule, would not be executed anyways val myBatches: Seq[Batch] = { Batch("once", Once, DummyRule) :: Batch("fixedPoint", FixedPoint(100), DummyRule) :: Nil } override def defaultBatches: Seq[Batch] = super.defaultBatches ++ myBatches } test("Extending batches possible") { // test simply instantiates the new extended optimizer val extendedOptimizer = new ExtendedOptimizer() } }
Example 44
Source File: RuleExecutorSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } test("structural integrity checker") { object WithSIChecker extends RuleExecutor[Expression] { override protected def isPlanIntegral(expr: Expression): Boolean = expr match { case IntegerLiteral(_) => true case _ => false } val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(WithSIChecker.execute(Literal(10)) === Literal(9)) val message = intercept[TreeNodeException[LogicalPlan]] { WithSIChecker.execute(Literal(10.1)) }.getMessage assert(message.contains("the structural integrity of the plan is broken")) } }
Example 45
Source File: ReplaceExceptWithFilter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import scala.annotation.tailrec import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule object ReplaceExceptWithFilter extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { if (!plan.conf.replaceExceptWithFilter) { return plan } plan.transform { case e @ Except(left, right, false) if isEligible(left, right) => val filterCondition = combineFilters(skipProject(right)).asInstanceOf[Filter].condition if (filterCondition.deterministic) { transformCondition(left, filterCondition).map { c => Distinct(Filter(Not(c), left)) }.getOrElse { e } } else { e } } } private def transformCondition(plan: LogicalPlan, condition: Expression): Option[Expression] = { val attributeNameMap: Map[String, Attribute] = plan.output.map(x => (x.name, x)).toMap if (condition.references.forall(r => attributeNameMap.contains(r.name))) { val rewrittenCondition = condition.transform { case a: AttributeReference => attributeNameMap(a.name) } // We need to consider as False when the condition is NULL, otherwise we do not return those // rows containing NULL which are instead filtered in the Except right plan Some(Coalesce(Seq(rewrittenCondition, Literal.FalseLiteral))) } else { None } } // TODO: This can be further extended in the future. private def isEligible(left: LogicalPlan, right: LogicalPlan): Boolean = (left, right) match { case (_, right @ (Project(_, _: Filter) | Filter(_, _))) => verifyConditions(left, right) case _ => false } private def verifyConditions(left: LogicalPlan, right: LogicalPlan): Boolean = { val leftProjectList = projectList(left) val rightProjectList = projectList(right) left.output.size == left.output.map(_.name).distinct.size && left.find(_.expressions.exists(SubqueryExpression.hasSubquery)).isEmpty && right.find(_.expressions.exists(SubqueryExpression.hasSubquery)).isEmpty && Project(leftProjectList, nonFilterChild(skipProject(left))).sameResult( Project(rightProjectList, nonFilterChild(skipProject(right)))) } private def projectList(node: LogicalPlan): Seq[NamedExpression] = node match { case p: Project => p.projectList case x => x.output } private def skipProject(node: LogicalPlan): LogicalPlan = node match { case p: Project => p.child case x => x } private def nonFilterChild(plan: LogicalPlan) = plan.find(!_.isInstanceOf[Filter]).getOrElse { throw new IllegalStateException("Leaf node is expected") } private def combineFilters(plan: LogicalPlan): LogicalPlan = { @tailrec def iterate(plan: LogicalPlan, acc: LogicalPlan): LogicalPlan = { if (acc.fastEquals(plan)) acc else iterate(acc, CombineFilters(acc)) } iterate(plan, CombineFilters(plan)) } }
Example 46
Source File: ResolveInlineTables.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{StructField, StructType} private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = { // For each column, traverse all the values and find a common data type and nullability. val fields = table.rows.transpose.zip(table.names).map { case (column, name) => val inputTypes = column.map(_.dataType) val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse { table.failAnalysis(s"incompatible types found in column $name for inline table") } StructField(name, tpe, nullable = column.exists(_.nullable)) } val attributes = StructType(fields).toAttributes assert(fields.size == table.names.size) val newRows: Seq[InternalRow] = table.rows.map { row => InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) => val targetType = fields(ci).dataType try { val castedExpr = if (e.dataType.sameType(targetType)) { e } else { cast(e, targetType) } castedExpr.eval() } catch { case NonFatal(ex) => table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex) } }) } LocalRelation(attributes, newRows) } }
Example 47
Source File: SubstituteUnresolvedOrdinals.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.IntegerType class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] { private def isIntLiteral(e: Expression) = e match { case Literal(_, IntegerType) => true case _ => false } def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other } withOrigin(s.origin)(s.copy(order = newOrders)) case a: Aggregate if conf.groupByOrdinal && a.groupingExpressions.exists(isIntLiteral) => val newGroups = a.groupingExpressions.map { case ordinal @ Literal(index: Int, IntegerType) => withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) case other => other } withOrigin(a.origin)(a.copy(groupingExpressions = newGroups)) } }
Example 48
Source File: OptimizerFactory.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql.catalyst.optimizer.Optimizer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule def produce(earlyBatches: Seq[ExtendableOptimizerBatch] = Nil, mainBatchRules: Seq[Rule[LogicalPlan]] = Nil, postBatches: Seq[ExtendableOptimizerBatch] = Nil): Optimizer = { if (org.apache.spark.SPARK_VERSION.contains("1.6.2")) { new ExtendableOptimizer162(earlyBatches, mainBatchRules, postBatches) } else { new ExtendableOptimizer161(earlyBatches, mainBatchRules, postBatches) } } }
Example 49
Source File: HiveSessionStateBuilder.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState} override protected def planner: SparkPlanner = { new SparkPlanner(session.sparkContext, conf, experimentalMethods) with HiveStrategies { override val sparkSession: SparkSession = session override def extraPlanningStrategies: Seq[Strategy] = super.extraPlanningStrategies ++ customPlanningStrategies ++ Seq(HiveTableScans, Scripts) } } override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _) } class HiveSessionResourceLoader( session: SparkSession, clientBuilder: () => HiveClient) extends SessionResourceLoader(session) { private lazy val client = clientBuilder() override def addJar(path: String): Unit = { client.addJar(path) super.addJar(path) } }
Example 50
Source File: rules.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extensions import com.pingcap.tispark.statistics.StatisticsManager import com.pingcap.tispark.utils.ReflectionUtil._ import com.pingcap.tispark.{MetaManager, TiDBRelation, TiTableReference} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation} import org.apache.spark.sql.catalyst.catalog.TiSessionCatalog import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.{AnalysisException, _} case class TiResolutionRule(getOrCreateTiContext: SparkSession => TiContext)( sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val meta: MetaManager = tiContext.meta private lazy val autoLoad = tiContext.autoLoad private lazy val tiCatalog = tiContext.tiCatalog private lazy val tiSession = tiContext.tiSession private lazy val sqlContext = tiContext.sqlContext protected val tiContext: TiContext = getOrCreateTiContext(sparkSession) protected val resolveTiDBRelation: TableIdentifier => LogicalPlan = tableIdentifier => { val dbName = getDatabaseFromIdentifier(tableIdentifier) val tableName = tableIdentifier.table val table = meta.getTable(dbName, tableName) if (table.isEmpty) { throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'") } if (autoLoad) { StatisticsManager.loadStatisticsInfo(table.get) } val sizeInBytes = StatisticsManager.estimateTableSize(table.get) val tiDBRelation = TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)( sqlContext) // Use SubqueryAlias so that projects and joins can correctly resolve // UnresolvedAttributes in JoinConditions, Projects, Filters, etc. newSubqueryAlias(tableName, LogicalRelation(tiDBRelation)) } override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp resolveTiDBRelations protected def resolveTiDBRelations: PartialFunction[LogicalPlan, LogicalPlan] = { case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier), _, _, _, _) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => i.copy(table = EliminateSubqueryAliases(resolveTiDBRelation(tableIdentifier))) case UnresolvedRelation(tableIdentifier) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => resolveTiDBRelation(tableIdentifier) } private def getDatabaseFromIdentifier(tableIdentifier: TableIdentifier): String = tableIdentifier.database.getOrElse(tiCatalog.getCurrentDatabase) } case class TiDDLRule(getOrCreateTiContext: SparkSession => TiContext)(sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val tiContext: TiContext = getOrCreateTiContext(sparkSession) override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // TODO: support other commands that may concern TiSpark catalog. case sd: ShowDatabasesCommand => TiShowDatabasesCommand(tiContext, sd) case sd: SetDatabaseCommand => TiSetDatabaseCommand(tiContext, sd) case st: ShowTablesCommand => TiShowTablesCommand(tiContext, st) case st: ShowColumnsCommand => TiShowColumnsCommand(tiContext, st) case dt: DescribeTableCommand => TiDescribeTablesCommand(tiContext, dt) case dc: DescribeColumnCommand => TiDescribeColumnCommand(tiContext, dc) case ct: CreateTableLikeCommand => TiCreateTableLikeCommand(tiContext, ct) } }
Example 51
Source File: subquery.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 52
Source File: Exchange.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 53
Source File: PruneFileSourcePartitions.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case op @ PhysicalOperation(projects, filters, logicalRelation @ LogicalRelation(fsRelation @ HadoopFsRelation( tableFileCatalog: TableFileCatalog, partitionSchema, _, _, _, _), _, _)) if filters.nonEmpty && fsRelation.partitionSchemaOption.isDefined => // The attribute name of predicate could be different than the one in schema in case of // case insensitive, we should change them to match the one in schema, so we donot need to // worry about case sensitivity anymore. val normalizedFilters = filters.map { e => e transform { case a: AttributeReference => a.withName(logicalRelation.output.find(_.semanticEquals(a)).get.name) } } val sparkSession = fsRelation.sparkSession val partitionColumns = logicalRelation.resolve( partitionSchema, sparkSession.sessionState.analyzer.resolver) val partitionSet = AttributeSet(partitionColumns) val partitionKeyFilters = ExpressionSet(normalizedFilters.filter(_.references.subsetOf(partitionSet))) if (partitionKeyFilters.nonEmpty) { val prunedFileCatalog = tableFileCatalog.filterPartitions(partitionKeyFilters.toSeq) val prunedFsRelation = fsRelation.copy(location = prunedFileCatalog)(sparkSession) val prunedLogicalRelation = logicalRelation.copy( relation = prunedFsRelation, expectedOutputAttributes = Some(logicalRelation.output)) // Keep partition-pruning predicates so that they are visible in physical planning val filterExpression = filters.reduceLeft(And) val filter = Filter(filterExpression, prunedLogicalRelation) Project(projects, filter) } else { op } } }
Example 54
Source File: OptimizerExtendableSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule class ExtendedOptimizer extends SimpleTestOptimizer { // rules set to DummyRule, would not be executed anyways val myBatches: Seq[Batch] = { Batch("once", Once, DummyRule) :: Batch("fixedPoint", FixedPoint(100), DummyRule) :: Nil } override def batches: Seq[Batch] = super.batches ++ myBatches } test("Extending batches possible") { // test simply instantiates the new extended optimizer val extendedOptimizer = new ExtendedOptimizer() } }
Example 55
Source File: RuleExecutorSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } }
Example 56
Source File: ResolveInlineTables.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.{StructField, StructType} private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = { // For each column, traverse all the values and find a common data type and nullability. val fields = table.rows.transpose.zip(table.names).map { case (column, name) => val inputTypes = column.map(_.dataType) val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse { table.failAnalysis(s"incompatible types found in column $name for inline table") } StructField(name, tpe, nullable = column.exists(_.nullable)) } val attributes = StructType(fields).toAttributes assert(fields.size == table.names.size) val newRows: Seq[InternalRow] = table.rows.map { row => InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) => val targetType = fields(ci).dataType try { if (e.dataType.sameType(targetType)) { e.eval() } else { Cast(e, targetType).eval() } } catch { case NonFatal(ex) => table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") } }) } LocalRelation(attributes, newRows) } }
Example 57
Source File: ResolveCountDistinctStar.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule case class ResolveCountDistinctStar(analyzer: Analyzer) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case a@Aggregate(_, aggregateExpressions, child) => analyzer.ResolveAliases( a.copy(aggregateExpressions = aggregateExpressions.collect { case u@UnresolvedAlias( aggExp@AggregateExpression(c@Count((star: UnresolvedStar) :: Nil),_ , true)) => val expanded = star.expand(child, analyzer.resolver) u.copy(aggExp.copy(c.copy(expanded))) case default => default }) ) } }
Example 58
Source File: RuleExecutorSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.trees import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} class RuleExecutorSuite extends SparkFunSuite { object DecrementLiterals extends Rule[Expression] { def apply(e: Expression): Expression = e transform { case IntegerLiteral(i) if i > 0 => Literal(i - 1) } } test("only once") { object ApplyOnce extends RuleExecutor[Expression] { val batches = Batch("once", Once, DecrementLiterals) :: Nil } assert(ApplyOnce.execute(Literal(10)) === Literal(9)) } test("to fixed point") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(100), DecrementLiterals) :: Nil } assert(ToFixedPoint.execute(Literal(10)) === Literal(0)) } test("to maxIterations") { object ToFixedPoint extends RuleExecutor[Expression] { val batches = Batch("fixedPoint", FixedPoint(10), DecrementLiterals) :: Nil } val message = intercept[TreeNodeException[LogicalPlan]] { ToFixedPoint.execute(Literal(100)) }.getMessage assert(message.contains("Max iterations (10) reached for batch fixedPoint")) } }
Example 59
Source File: ResolveInlineTables.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.{StructField, StructType} private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = { // For each column, traverse all the values and find a common data type and nullability. val fields = table.rows.transpose.zip(table.names).map { case (column, name) => val inputTypes = column.map(_.dataType) val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse { table.failAnalysis(s"incompatible types found in column $name for inline table") } StructField(name, tpe, nullable = column.exists(_.nullable)) } val attributes = StructType(fields).toAttributes assert(fields.size == table.names.size) val newRows: Seq[InternalRow] = table.rows.map { row => InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) => val targetType = fields(ci).dataType try { if (e.dataType.sameType(targetType)) { e.eval() } else { Cast(e, targetType).eval() } } catch { case NonFatal(ex) => table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") } }) } LocalRelation(attributes, newRows) } }
Example 60
Source File: SubstituteUnresolvedOrdinals.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.types.IntegerType class SubstituteUnresolvedOrdinals(conf: CatalystConf) extends Rule[LogicalPlan] { private def isIntLiteral(e: Expression) = e match { case Literal(_, IntegerType) => true case _ => false } def apply(plan: LogicalPlan): LogicalPlan = plan transform { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other } withOrigin(s.origin)(s.copy(order = newOrders)) case a: Aggregate if conf.groupByOrdinal && a.groupingExpressions.exists(isIntLiteral) => val newGroups = a.groupingExpressions.map { case ordinal @ Literal(index: Int, IntegerType) => withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) case other => other } withOrigin(a.origin)(a.copy(groupingExpressions = newGroups)) } }
Example 61
Source File: ColumnarPlugin.scala From OAP with Apache License 2.0 | 5 votes |
package com.intel.sparkColumnarPlugin import com.intel.sparkColumnarPlugin.execution._ import org.apache.spark.internal.Logging import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.{RowToColumnarExec, ColumnarToRowExec} import org.apache.spark.sql.execution.aggregate.HashAggregateExec import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} case class ColumnarPreOverrides(conf: SparkConf) extends Rule[SparkPlan] { val columnarConf = ColumnarPluginConfig.getConf(conf) def replaceWithColumnarPlan(plan: SparkPlan): SparkPlan = plan match { case plan: BatchScanExec => logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") new ColumnarBatchScanExec(plan.output, plan.scan) case plan: ProjectExec => //new ColumnarProjectExec(plan.projectList, replaceWithColumnarPlan(plan.child)) val columnarPlan = replaceWithColumnarPlan(plan.child) val res = if (!columnarPlan.isInstanceOf[ColumnarConditionProjectExec]) { new ColumnarConditionProjectExec(null, plan.projectList, columnarPlan) } else { val cur_plan = columnarPlan.asInstanceOf[ColumnarConditionProjectExec] new ColumnarConditionProjectExec(cur_plan.condition, plan.projectList, cur_plan.child) } logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") res case plan: FilterExec => val child = replaceWithColumnarPlan(plan.child) logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") new ColumnarConditionProjectExec(plan.condition, null, child) case plan: HashAggregateExec => val child = replaceWithColumnarPlan(plan.child) logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") new ColumnarHashAggregateExec( plan.requiredChildDistributionExpressions, plan.groupingExpressions, plan.aggregateExpressions, plan.aggregateAttributes, plan.initialInputBufferOffset, plan.resultExpressions, child) case plan: SortExec => if (columnarConf.enableColumnarSort) { val child = replaceWithColumnarPlan(plan.child) logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") new ColumnarSortExec(plan.sortOrder, plan.global, child, plan.testSpillFrequency) } else { val children = plan.children.map(replaceWithColumnarPlan) logDebug(s"Columnar Processing for ${plan.getClass} is not currently supported.") plan.withNewChildren(children) } class ColumnarPlugin extends Function1[SparkSessionExtensions, Unit] with Logging { override def apply(extensions: SparkSessionExtensions): Unit = { logWarning( "Installing extensions to enable columnar CPU support." + " To disable this set `org.apache.spark.example.columnar.enabled` to false") extensions.injectColumnar((session) => ColumnarOverrideRules(session)) } }
Example 62
Source File: RangeQueryRewrite.scala From magellan with Apache License 2.0 | 5 votes |
package magellan.catalyst import magellan.{BoundingBox, Point} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule private[magellan] case class RangeQueryRewrite(session: SparkSession) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { plan transformUp { case p @ Filter(condition, child) => val transformedCondition = condition transformUp { case WithinRange((PointConverter(xexpr, yexpr)), boundingBox) => prune(boundingBox, xexpr, yexpr) case q @ WithinCircleRange((PointConverter(xexpr, yexpr)), point, radius) => val (x, y) = (point.getX(), point.getY()) val boundingBox = BoundingBox(x - radius, y - radius, x + radius, y + radius) And(prune(boundingBox, xexpr, yexpr), new WithinCircleRangePostOpt((PointConverter(xexpr, yexpr)), point, radius)) case q : WithinCircleRangePostOpt => q } Filter(transformedCondition, child) } } private def prune(boundingBox: BoundingBox, xexpr: Expression, yexpr: Expression) = { val xpredicate = And(LessThanOrEqual(xexpr, Literal(boundingBox.xmax)), GreaterThanOrEqual(xexpr, Literal(boundingBox.xmin))) val ypredicate = And(LessThanOrEqual(yexpr, Literal(boundingBox.ymax)), GreaterThanOrEqual(yexpr, Literal(boundingBox.ymin))) And(xpredicate, ypredicate) } } private [magellan] class WithinCircleRangePostOpt(child: Expression, point: Point, radius: Double) extends WithinCircleRange(child, point, radius)
Example 63
Source File: ResolveHierarchy.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.NodeType private[this] def resolveHierarchyNodeAttr(h: Hierarchy): Hierarchy = { val a = h.node h.copy(node = AttributeReference(a.name, NodeType, nullable = false)()) } private[this] def resolveSpec(h: Hierarchy): Hierarchy = h match { case h @ Hierarchy(spec: LevelBasedHierarchySpec, _) => h.copy(spec = spec.copy(levels = spec.levels.map { case u@UnresolvedAttribute(nameParts) => spec.resolveSpec(nameParts, analyzer.resolver).getOrElse(u) case other => other })) case h @ Hierarchy(spec: AdjacencyListHierarchySpec, _) => h.copy(spec = spec.copy(parenthoodExp = spec.parenthoodExp.mapChildren { case u@UnresolvedAttribute(nameParts) => spec.resolveSpec(nameParts, analyzer.resolver).getOrElse(u) case other => other })) } }
Example 64
Source File: ResolveStarAnnotations.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule private[sql] case class ResolveStarAnnotations(analyzer: Analyzer) extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { plan transformUp { // If the projection list contains Stars, expand it. case p@Project((a@AnnotatedAttribute(s: Star)) :: Nil, child) => Project( s.expand(child, analyzer.resolver).map(e => AnnotatedAttribute(e)(a.annotations)), child) case p@Project((a@AnnotationFilter(s: Star)) :: Nil, child) => Project( s.expand(child, analyzer.resolver).map(e => AnnotationFilter(e)(a.filters)), child) } } }
Example 65
Source File: ResolveTableFunctions.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.tablefunctions.UnresolvedTableFunction import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule case class ResolveTableFunctions( analyzer: Analyzer, registry: TableFunctionRegistry = TableFunctionRegistry) extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case UnresolvedTableFunction(name, arguments) => val lookup = registry.lookupFunction(name) lookup match { case Some(f) => val analyzed = f.analyze(analyzer, arguments) ResolvedTableFunction(f, analyzed) case None => throw new AnalysisException(s"Undefined function $name") } } }
Example 66
Source File: SubstituteUnresolvedOrdinals.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.types.IntegerType class SubstituteUnresolvedOrdinals(conf: CatalystConf) extends Rule[LogicalPlan] { private def isIntLiteral(e: Expression) = e match { case Literal(_, IntegerType) => true case _ => false } def apply(plan: LogicalPlan): LogicalPlan = plan transform { case s: Sort if conf.orderByOrdinal && s.order.exists(o => isIntLiteral(o.child)) => val newOrders = s.order.map { case order @ SortOrder(ordinal @ Literal(index: Int, IntegerType), _, _) => val newOrdinal = withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) withOrigin(order.origin)(order.copy(child = newOrdinal)) case other => other } withOrigin(s.origin)(s.copy(order = newOrders)) case a: Aggregate if conf.groupByOrdinal && a.groupingExpressions.exists(isIntLiteral) => val newGroups = a.groupingExpressions.map { case ordinal @ Literal(index: Int, IntegerType) => withOrigin(ordinal.origin)(UnresolvedOrdinal(index)) case other => other } withOrigin(a.origin)(a.copy(groupingExpressions = newGroups)) } }
Example 67
Source File: ResolveInferSchemaCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.sources.commands.{InferSchemaCommand, Orc, Parquet, UnresolvedInferSchemaCommand} import scala.util.Try case class ResolveInferSchemaCommand(sqlContext: SQLContext) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.transform { case UnresolvedInferSchemaCommand(path, explicitFileType) => val fileType = explicitFileType.getOrElse(path.toLowerCase match { case p if p.endsWith(".orc") => Orc case p if p.endsWith(".parquet") => Parquet case invalid => throw new AnalysisException(s"Could not determine file format of '$path'") }) InferSchemaCommand(path, fileType) } }
Example 68
Source File: ResolveAppendCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.{AppendRunnableCommand, LogicalRelation} import org.apache.spark.sql.sources.AppendRelation import org.apache.spark.sql.sources.commands.UnresolvedAppendCommand case class ResolveAppendCommand(analyzer: Analyzer) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transform { case UnresolvedAppendCommand(table, options) => val resolvedTable = analyzer.execute(table) resolvedTable.collectFirst { case LogicalRelation(appendRelation: AppendRelation, _) => AppendRunnableCommand(appendRelation, options) }.getOrElse { throw new AnalysisException(s"Cannot append ${resolvedTable.treeString}") } } }
Example 69
Source File: ResolveSelectUsing.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SelectUsing, UnresolvedSelectUsing} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.sources.RawSqlSourceProvider import org.apache.spark.sql.{DatasourceResolver, SQLContext} private[sql] case class ResolveSelectUsing(sqlContext: SQLContext) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case UnresolvedSelectUsing(sqlCommand, provider, expectedSchema, options) => { val resolver = DatasourceResolver.resolverFor(sqlContext) val rawSqlProvider = resolver.newInstanceOfTyped[RawSqlSourceProvider](provider) val execution = rawSqlProvider.executionOf(sqlContext, options, sqlCommand, expectedSchema) SelectUsing(execution) } } }
Example 70
Source File: ResolveReferencesWithHierarchies.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule case class ResolveReferencesWithHierarchies(analyzer: Analyzer) extends Rule[LogicalPlan] { // scalastyle:off cyclomatic.complexity def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case p: LogicalPlan if !p.childrenResolved => p // Special handling for cases when self-join introduce duplicate expression ids. case j @ Join(left, right, _, _) if !j.selfJoinResolved => val conflictingAttributes = left.outputSet.intersect(right.outputSet) logDebug(s"Conflicting attributes ${conflictingAttributes.mkString(",")} in $j") right.collect { case oldVersion@Hierarchy(_, nodeAttr) if conflictingAttributes.contains(nodeAttr) => (oldVersion, oldVersion.copy(node = nodeAttr.newInstance())) } .headOption match { case None => j case Some((oldRelation, newRelation)) => val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output)) val newRight = right transformUp { case r if r == oldRelation => newRelation } transformUp { case other => other transformExpressions { case a: Attribute => attributeRewrites.get(a).getOrElse(a) } } j.copy(right = newRight) } case q: LogicalPlan => q } // scalastyle:on cyclomatic.complexity }
Example 71
Source File: ResolveDropCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.sources.commands.UnresolvedDropCommand import org.apache.spark.sql.sources.{DropRelation, RelationKind, Table} import scala.util.Try case class ResolveDropCommand(analyzer: Analyzer, catalog: Catalog) extends Rule[LogicalPlan] with TableDependencyCalculator { private def failAnalysis(reason: String) = throw new AnalysisException(reason) override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case UnresolvedDropCommand(kind, allowNotExisting, tableIdent, cascade) => val plan = resolvePlan(kind, tableIdent, allowNotExisting) val affected = plan.map { lp => val targetKind = RelationKind.kindOf(lp).getOrElse(Table) checkValidKind(kind, tableIdent, targetKind) buildDependentsMap(catalog, tableIdent) } affected.foreach(checkAllowedToDrop(cascade)) DropRunnableCommand(affected.getOrElse(Map.empty)) } private def getDropRelation(plan: LogicalPlan): Option[DropRelation] = plan.collectFirst { case d: LogicalPlan with DropRelation => d case LogicalRelation(d: DropRelation, _) => d } private def resolvePlan(kind: DropTarget, tableIdent: TableIdentifier, allowNotExisting: Boolean): Option[LogicalPlan] = { Try(catalog.lookupRelation(tableIdent)).toOption match { case Some(plan) => Some(plan) case None if allowNotExisting => None case None => failAnalysis( s"""${kind.targetName.toLowerCase} ${tableIdent.unquotedString} does not exist. To " |DROP a ${kind.targetName.toLowerCase} regardless if it exists of not, use |DROP ${kind.targetName.toUpperCase} IF EXISTS.""".stripMargin) } } private def checkAllowedToDrop(cascade: Boolean) (dependents: Map[String, Option[DropRelation]]) = { if (dependents.size > 1 && !cascade) { failAnalysis("Can not drop because more than one relation has " + s"references to the target relation: ${dependents.keys.mkString(",")}. " + s"to force drop use 'CASCADE'.") } } private def checkValidKind(kind: DropTarget, tableIdent: TableIdentifier, targetKind: RelationKind): Unit = { if (!kind.accepts(targetKind)) { failAnalysis( s"Relation '${tableIdent.unquotedString} of kind" + s"$targetKind is not a ${kind.targetName}. " + s"Please use DROP ${targetKind.name.toUpperCase()} to drop it.") } } private def buildDependentsMap(catalog: Catalog, identifier: TableIdentifier): Map[String, Option[DropRelation]] = { val tables = getTables(catalog, identifier.database) val tablesAndDependents = buildDependentsMap(tables) def aggregate(acc: Set[TableIdentifier], next: List[TableIdentifier]): Set[TableIdentifier] = next match { case Nil => acc case ident :: rest => val dependents = tablesAndDependents(ident) aggregate(acc ++ dependents, rest ++ dependents.diff(acc)) } val dependentsSet = aggregate(Set(identifier), identifier :: Nil) dependentsSet.flatMap { dependent => tables.get(dependent).map(dependent.table -> getDropRelation(_)) }.toMap } }
Example 72
Source File: UseAliasesForFunctionsInGroupings.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Subquery} import org.apache.spark.sql.catalyst.rules.Rule object UseAliasesForFunctionsInGroupings extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case agg@Aggregate(groupingExpressions, aggregateExpressions, child) => val fixedGroupingExpressions = groupingExpressions.map({ case e: AttributeReference => e case e => val aliasOpt = aggregateExpressions.find({ case Alias(aliasChild, aliasName) => aliasChild == e case _ => false }) aliasOpt match { case Some(alias) => alias.toAttribute case None => sys.error(s"Cannot resolve Alias for $e") } }) agg.copy(groupingExpressions = fixedGroupingExpressions) } }
Example 73
Source File: SQLContextExtensionBase.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.{ParserDialect, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, SimpleFunctionRegistry} import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.DDLParser import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch import org.apache.spark.util.Utils import scala.util.Try import scala.util.control.NonFatal override protected def extendedParserDialect: ParserDialect = try { val clazz = Utils.classForName(dialectClassName) clazz.newInstance().asInstanceOf[ParserDialect] } catch { case NonFatal(e) => // Since we didn't find the available SQL Dialect, it will fail even for SET command: // SET spark.sql.dialect=sql; Let's reset as default dialect automatically. val dialect = conf.dialect // reset the sql dialect conf.unsetConf(SQLConf.DIALECT) // throw out the exception, and the default sql dialect will take effect for next query. throw new DialectException( s""" |Instantiating dialect '$dialect' failed. |Reverting to default dialect '${conf.dialect}'""".stripMargin, e) } // (suggestion) make this implicit to FunctionRegistry. protected def registerBuiltins(registry: FunctionRegistry): Unit = { FunctionRegistry.expressions.foreach { case (name, (info, builder)) => registry.registerFunction(name, builder) } } override protected def extendedDdlParser(parser: String => LogicalPlan): DDLParser = new DDLParser(sqlParser.parse(_)) override protected def registerFunctions(registry: FunctionRegistry): Unit = { } }
Example 74
Source File: SapSQLContextExtension.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.ParserDialect import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.optimizer._ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SelfJoinStrategy import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.tablefunctions.TableFunctionsStrategy import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch import org.apache.spark.sql.hierarchy.HierarchyStrategy import org.apache.spark.sql.parser.{SapDDLParser, SapParserDialect} private[sql] trait SapSQLContextExtension extends SQLContextExtension { this: SQLContext => protected def catalog: Catalog override protected def resolutionRules(analyzer: Analyzer): List[Rule[LogicalPlan]] = FixCaseSensitivity(analyzer) :: ResolveViews(analyzer) :: ResolveSystemTables(analyzer, this) :: ResolveReferencesWithHierarchies(analyzer) :: ResolveHierarchy(analyzer) :: ExcludeHierarchyNodeFromSelectStar(analyzer) :: ResolveStarAnnotations(analyzer) :: ResolveAnnotations(analyzer) :: ResolveTableFunctions(analyzer) :: ResolveCountDistinctStar(analyzer) :: ResolveDeepDescribe(analyzer) :: ResolveSelectUsing(this) :: ResolveDropCommand(analyzer, catalog) :: ResolveInferSchemaCommand(this) :: ResolveAppendCommand(analyzer) :: Nil override protected def optimizerEarlyBatches: List[ExtendableOptimizerBatch] = ExtendableOptimizerBatch( name = "Redundant pushable filters", iterations = 1, rules = BooleanSimplification :: RedundantDownPushableFilters :: Nil ) :: Nil override protected def optimizerMainBatchRules: List[Rule[LogicalPlan]] = FiltersReduction :: AssureRelationsColocality :: Nil override protected def optimizerPostBatches: List[ExtendableOptimizerBatch] = Nil override protected def strategies(planner: ExtendedPlanner): List[Strategy] = CreateTableStrategy(this) :: CatalystSourceStrategy :: HierarchyStrategy(planner) :: TableFunctionsStrategy(planner) :: RawSqlSourceStrategy :: SelfJoinStrategy(planner) :: Nil override protected def extendedParserDialect: ParserDialect = new SapParserDialect override protected def extendedDdlParser(parser: String => LogicalPlan): DDLParser = new SapDDLParser(parser) override protected def registerFunctions(registry: FunctionRegistry): Unit = { RegisterHierarchyFunctions(registry) RegisterCustomFunctions(registry) } }