org.apache.spark.sql.catalyst.expressions Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.expressions.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: subquery.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 2
Source File: subquery.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 3
Source File: ColumnarSubquery.scala From OAP with Apache License 2.0 | 5 votes |
package com.intel.sparkColumnarPlugin.expression import org.apache.arrow.gandiva.evaluator._ import org.apache.arrow.gandiva.exceptions.GandivaException import org.apache.arrow.gandiva.expression._ import org.apache.arrow.vector.types.pojo.ArrowType import org.apache.arrow.vector.types.pojo.Field import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.execution.BaseSubqueryExec import org.apache.spark.sql.execution.ExecSubqueryExpression import org.apache.spark.sql.execution.ScalarSubquery import org.apache.spark.sql.types._ import scala.collection.mutable.ListBuffer class ColumnarScalarSubquery( query: ScalarSubquery) extends Expression with ColumnarExpression { override def dataType: DataType = query.dataType override def children: Seq[Expression] = Nil override def nullable: Boolean = true override def toString: String = query.toString override def eval(input: InternalRow): Any = query.eval(input) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = query.doGenCode(ctx, ev) override def canEqual(that: Any): Boolean = query.canEqual(that) override def productArity: Int = query.productArity override def productElement(n: Int): Any = query.productElement(n) override def doColumnarCodeGen(args: java.lang.Object): (TreeNode, ArrowType) = { val value = query.eval(null) val resultType = CodeGeneration.getResultType(query.dataType) query.dataType match { case t: StringType => (TreeBuilder.makeStringLiteral(value.toString().asInstanceOf[String]), resultType) case t: IntegerType => (TreeBuilder.makeLiteral(value.asInstanceOf[Integer]), resultType) case t: LongType => (TreeBuilder.makeLiteral(value.asInstanceOf[java.lang.Long]), resultType) case t: DoubleType => (TreeBuilder.makeLiteral(value.asInstanceOf[java.lang.Double]), resultType) case d: DecimalType => val v = value.asInstanceOf[Decimal] (TreeBuilder.makeDecimalLiteral(v.toString, v.precision, v.scale), resultType) case d: DateType => throw new UnsupportedOperationException(s"DateType is not supported yet.") } } }
Example 4
Source File: DeltaSourceUtils.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.sources import java.util.Locale import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.sources import org.apache.spark.sql.sources.Filter object DeltaSourceUtils { val NAME = "delta" val ALT_NAME = "delta" // Batch relations don't pass partitioning columns to `CreatableRelationProvider`s, therefore // as a hack, we pass in the partitioning columns among the options. val PARTITIONING_COLUMNS_KEY = "__partition_columns" def isDeltaDataSourceName(name: String): Boolean = { name.toLowerCase(Locale.ROOT) == NAME || name.toLowerCase(Locale.ROOT) == ALT_NAME } def translateFilters(filters: Array[Filter]): Expression = filters.map { case sources.EqualTo(attribute, value) => expressions.EqualTo(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.EqualNullSafe(attribute, value) => expressions.EqualNullSafe(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.GreaterThan(attribute, value) => expressions.GreaterThan(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.GreaterThanOrEqual(attribute, value) => expressions.GreaterThanOrEqual( UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.LessThan(attribute, value) => expressions.LessThanOrEqual(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.LessThanOrEqual(attribute, value) => expressions.LessThanOrEqual(UnresolvedAttribute(attribute), expressions.Literal.create(value)) case sources.In(attribute, values) => expressions.In(UnresolvedAttribute(attribute), values.map(createLiteral)) case sources.IsNull(attribute) => expressions.IsNull(UnresolvedAttribute(attribute)) case sources.IsNotNull(attribute) => expressions.IsNotNull(UnresolvedAttribute(attribute)) case sources.Not(otherFilter) => expressions.Not(translateFilters(Array(otherFilter))) case sources.And(filter1, filter2) => expressions.And(translateFilters(Array(filter1)), translateFilters(Array(filter2))) case sources.Or(filter1, filter2) => expressions.Or(translateFilters(Array(filter1)), translateFilters(Array(filter2))) case sources.StringStartsWith(attribute, value) => new expressions.Like( UnresolvedAttribute(attribute), expressions.Literal.create(s"${value}%")) case sources.StringEndsWith(attribute, value) => new expressions.Like( UnresolvedAttribute(attribute), expressions.Literal.create(s"%${value}")) case sources.StringContains(attribute, value) => new expressions.Like( UnresolvedAttribute(attribute), expressions.Literal.create(s"%${value}%")) case sources.AlwaysTrue() => expressions.Literal.TrueLiteral case sources.AlwaysFalse() => expressions.Literal.FalseLiteral }.reduce(expressions.And) }