org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Aggregator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.{Dataset, Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression def toColumn: TypedColumn[IN, OUT] = { implicit val bEncoder = bufferEncoder implicit val cEncoder = outputEncoder val expr = AggregateExpression( TypedAggregateExpression(this), Complete, isDistinct = false) new TypedColumn[IN, OUT](expr, encoderFor[OUT]) } }
Example 2
Source File: TiAggregation.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import com.pingcap.tispark.TiDBRelation import com.pingcap.tispark.utils.ReflectionUtil import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.datasources.LogicalRelation object TiAggregation { type ReturnType = (Seq[NamedExpression], Seq[AggregateExpression], Seq[NamedExpression], LogicalPlan) def unapply(plan: LogicalPlan): Option[ReturnType] = ReflectionUtil.callTiAggregationImplUnapply(plan) } object TiAggregationProjection { type ReturnType = (Seq[Expression], LogicalPlan, TiDBRelation, Seq[NamedExpression]) def unapply(plan: LogicalPlan): Option[ReturnType] = plan match { // Only push down aggregates projection when all filters can be applied and // all projection expressions are column references case PhysicalOperation( projects, filters, rel @ LogicalRelation(source: TiDBRelation, _, _, _)) if projects.forall(_.isInstanceOf[Attribute]) => Some((filters, rel, source, projects)) case _ => Option.empty[ReturnType] } }
Example 3
Source File: Aggregator.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.{Dataset, Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression def toColumn: TypedColumn[IN, OUT] = { implicit val bEncoder = bufferEncoder implicit val cEncoder = outputEncoder val expr = AggregateExpression( TypedAggregateExpression(this), Complete, isDistinct = false) new TypedColumn[IN, OUT](expr, encoderFor[OUT]) } }
Example 4
Source File: ResolveCountDistinctStar.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule case class ResolveCountDistinctStar(analyzer: Analyzer) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case a@Aggregate(_, aggregateExpressions, child) => analyzer.ResolveAliases( a.copy(aggregateExpressions = aggregateExpressions.collect { case u@UnresolvedAlias( aggExp@AggregateExpression(c@Count((star: UnresolvedStar) :: Nil),_ , true)) => val expanded = star.expand(child, analyzer.resolver) u.copy(aggExp.copy(c.copy(expanded))) case default => default }) ) } }
Example 5
Source File: ResolveCountDistinctStarSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.FunSuite import org.scalatest.Inside._ import org.scalatest.mock.MockitoSugar import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count} import scala.collection.mutable.ArrayBuffer class ResolveCountDistinctStarSuite extends FunSuite with MockitoSugar { val persons = new LogicalRelation(new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("age", IntegerType), StructField("name", StringType) )) }) test("Count distinct star is resolved correctly") { val projection = persons.select(UnresolvedAlias( AggregateExpression(Count(UnresolvedStar(None) :: Nil), Complete, true))) val stillNotCompletelyResolvedAggregate = SimpleAnalyzer.execute(projection) val resolvedAggregate = ResolveCountDistinctStar(SimpleAnalyzer) .apply(stillNotCompletelyResolvedAggregate) inside(resolvedAggregate) { case Aggregate(Nil, ArrayBuffer(Alias(AggregateExpression(Count(expressions), Complete, true), _)), _) => assert(expressions.collect { case a:AttributeReference => a.name }.toSet == Set("name", "age")) } assert(resolvedAggregate.resolved) } }
Example 6
Source File: OapAggUtils.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Final, Partial} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.datasources.oap.OapAggregationFileScanExec object OapAggUtils { private def createAggregate( requiredChildDistributionExpressions: Option[Seq[Expression]] = None, groupingExpressions: Seq[NamedExpression] = Nil, aggregateExpressions: Seq[AggregateExpression] = Nil, aggregateAttributes: Seq[Attribute] = Nil, initialInputBufferOffset: Int = 0, resultExpressions: Seq[NamedExpression] = Nil, child: SparkPlan): SparkPlan = { if (requiredChildDistributionExpressions.isDefined) { // final aggregate, fall back to Spark HashAggregateExec. HashAggregateExec( requiredChildDistributionExpressions = requiredChildDistributionExpressions, groupingExpressions = groupingExpressions, aggregateExpressions = aggregateExpressions, aggregateAttributes = aggregateAttributes, initialInputBufferOffset = initialInputBufferOffset, resultExpressions = resultExpressions, child = child) } else { // Apply partial aggregate optimizations. OapAggregateExec( requiredChildDistributionExpressions = None, groupingExpressions = groupingExpressions, aggregateExpressions = aggregateExpressions, aggregateAttributes = aggregateAttributes, initialInputBufferOffset = initialInputBufferOffset, resultExpressions = resultExpressions, child = child) } } def planAggregateWithoutDistinct( groupingExpressions: Seq[NamedExpression], aggregateExpressions: Seq[AggregateExpression], resultExpressions: Seq[NamedExpression], child: SparkPlan): Seq[SparkPlan] = { val useHash = HashAggregateExec.supportsAggregate( aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)) if (!child.isInstanceOf[OapAggregationFileScanExec] || !useHash) { // Child can not leverage oap optimization reading. Nil } else { // 1. Create an Aggregate Operator for partial aggregations. val groupingAttributes = groupingExpressions.map(_.toAttribute) val partialAggregateExpressions = aggregateExpressions.map(_.copy(mode = Partial)) val partialAggregateAttributes = partialAggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes) val partialResultExpressions = groupingAttributes ++ partialAggregateExpressions.flatMap(_.aggregateFunction.inputAggBufferAttributes) val partialAggregate = createAggregate( requiredChildDistributionExpressions = None, groupingExpressions = groupingExpressions, aggregateExpressions = partialAggregateExpressions, aggregateAttributes = partialAggregateAttributes, initialInputBufferOffset = 0, resultExpressions = partialResultExpressions, child = child) // 2. Create an Aggregate Operator for final aggregations. val finalAggregateExpressions = aggregateExpressions.map(_.copy(mode = Final)) // The attributes of the final aggregation buffer, which is presented as input to the result // projection: val finalAggregateAttributes = finalAggregateExpressions.map(_.resultAttribute) val finalAggregate = createAggregate( requiredChildDistributionExpressions = Some(groupingAttributes), groupingExpressions = groupingAttributes, aggregateExpressions = finalAggregateExpressions, aggregateAttributes = finalAggregateAttributes, initialInputBufferOffset = groupingExpressions.length, resultExpressions = resultExpressions, child = partialAggregate) finalAggregate :: Nil } } }
Example 7
Source File: Aggregator.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.{Dataset, Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression def toColumn: TypedColumn[IN, OUT] = { implicit val bEncoder = bufferEncoder implicit val cEncoder = outputEncoder val expr = AggregateExpression( TypedAggregateExpression(this), Complete, isDistinct = false) new TypedColumn[IN, OUT](expr, encoderFor[OUT]) } }
Example 8
Source File: CarbonCatalystOperators.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import scala.collection.mutable import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.StringType abstract class CarbonProfile(attributes: Seq[Attribute]) extends Serializable { def isEmpty: Boolean = attributes.isEmpty } case class IncludeProfile(attributes: Seq[Attribute]) extends CarbonProfile(attributes) case class ExcludeProfile(attributes: Seq[Attribute]) extends CarbonProfile(attributes) case class ProjectForUpdate( table: UnresolvedRelation, columns: List[String], children: Seq[LogicalPlan]) extends LogicalPlan { override def output: Seq[Attribute] = Seq.empty } case class UpdateTable( table: UnresolvedRelation, columns: List[String], selectStmt: String, alias: Option[String] = None, filer: String) extends LogicalPlan { override def children: Seq[LogicalPlan] = Seq.empty override def output: Seq[Attribute] = Seq.empty } case class DeleteRecords( statement: String, alias: Option[String] = None, table: UnresolvedRelation) extends LogicalPlan { override def children: Seq[LogicalPlan] = Seq.empty override def output: Seq[AttributeReference] = Seq.empty } def strictCountStar(groupingExpressions: Seq[Expression], partialComputation: Seq[NamedExpression], child: LogicalPlan): Boolean = { if (groupingExpressions.nonEmpty) { return false } if (partialComputation.isEmpty) { return false } if (partialComputation.size > 1 && partialComputation.nonEmpty) { return false } child collect { case cd: Filter => return false } true } }
Example 9
Source File: Aggregator.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.{Dataset, Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression def toColumn: TypedColumn[IN, OUT] = { implicit val bEncoder = bufferEncoder implicit val cEncoder = outputEncoder val expr = AggregateExpression( TypedAggregateExpression(this), Complete, isDistinct = false) new TypedColumn[IN, OUT](expr, encoderFor[OUT]) } }
Example 10
Source File: Aggregator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql.{Dataset, Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression def toColumn: TypedColumn[IN, OUT] = { implicit val bEncoder = bufferEncoder implicit val cEncoder = outputEncoder val expr = AggregateExpression( TypedAggregateExpression(this), Complete, isDistinct = false) new TypedColumn[IN, OUT](expr, encoderFor[OUT]) } }
Example 11
Source File: Aggregator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.expressions import org.apache.spark.sql.catalyst.encoders.encoderFor import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete} import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression import org.apache.spark.sql.{DataFrame, Dataset, Encoder, TypedColumn} def toColumn( implicit bEncoder: Encoder[B], cEncoder: Encoder[O]): TypedColumn[I, O] = { val expr = new AggregateExpression( TypedAggregateExpression(this), Complete, false) new TypedColumn[I, O](expr, encoderFor[O]) } }