org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: PropagateEmptyRelation.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper { private def isEmptyLocalRelation(plan: LogicalPlan): Boolean = plan match { case p: LocalRelation => p.data.isEmpty case _ => false } private def containsAggregateExpression(e: Expression): Boolean = { e.collectFirst { case _: AggregateFunction => () }.isDefined } private def empty(plan: LogicalPlan) = LocalRelation(plan.output, data = Seq.empty) def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case p: Union if p.children.forall(isEmptyLocalRelation) => empty(p) case p @ Join(_, _, joinType, _) if p.children.exists(isEmptyLocalRelation) => joinType match { case _: InnerLike => empty(p) // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule. // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule. case LeftOuter | LeftSemi | LeftAnti if isEmptyLocalRelation(p.left) => empty(p) case RightOuter if isEmptyLocalRelation(p.right) => empty(p) case FullOuter if p.children.forall(isEmptyLocalRelation) => empty(p) case _ => p } case p: UnaryNode if p.children.nonEmpty && p.children.forall(isEmptyLocalRelation) => p match { case _: Project => empty(p) case _: Filter => empty(p) case _: Sample => empty(p) case _: Sort => empty(p) case _: GlobalLimit => empty(p) case _: LocalLimit => empty(p) case _: Repartition => empty(p) case _: RepartitionByExpression => empty(p) // AggregateExpressions like COUNT(*) return their results like 0. case Aggregate(_, ae, _) if !ae.exists(containsAggregateExpression) => empty(p) // Generators like Hive-style UDTF may return their records within `close`. case Generate(_: Explode, _, _, _, _, _) => empty(p) case _ => p } } }
Example 2
Source File: ObjectAggregationMap.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import java.{util => ju} import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.internal.config import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, TypedImperativeAggregate} import org.apache.spark.sql.execution.UnsafeKVExternalSorter import org.apache.spark.sql.types.StructType import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter def dumpToExternalSorter( groupingAttributes: Seq[Attribute], aggregateFunctions: Seq[AggregateFunction]): UnsafeKVExternalSorter = { val aggBufferAttributes = aggregateFunctions.flatMap(_.aggBufferAttributes) val sorter = new UnsafeKVExternalSorter( StructType.fromAttributes(groupingAttributes), StructType.fromAttributes(aggBufferAttributes), SparkEnv.get.blockManager, SparkEnv.get.serializerManager, TaskContext.get().taskMemoryManager().pageSizeBytes, SparkEnv.get.conf.get(config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD), null ) val mapIterator = iterator val unsafeAggBufferProjection = UnsafeProjection.create(aggBufferAttributes.map(_.dataType).toArray) while (mapIterator.hasNext) { val entry = mapIterator.next() aggregateFunctions.foreach { case agg: TypedImperativeAggregate[_] => agg.serializeAggregateBufferInPlace(entry.aggregationBuffer) case _ => } sorter.insertKV( entry.groupingKey, unsafeAggBufferProjection(entry.aggregationBuffer) ) } hashMap.clear() sorter } def clear(): Unit = { hashMap.clear() } } // Stores the grouping key and aggregation buffer class AggregationBufferEntry(var groupingKey: UnsafeRow, var aggregationBuffer: InternalRow)
Example 3
Source File: PropagateEmptyRelation.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper { private def isEmptyLocalRelation(plan: LogicalPlan): Boolean = plan match { case p: LocalRelation => p.data.isEmpty case _ => false } private def containsAggregateExpression(e: Expression): Boolean = { e.collectFirst { case _: AggregateFunction => () }.isDefined } private def empty(plan: LogicalPlan) = LocalRelation(plan.output, data = Seq.empty) def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case p: Union if p.children.forall(isEmptyLocalRelation) => empty(p) case p @ Join(_, _, joinType, _) if p.children.exists(isEmptyLocalRelation) => joinType match { case _: InnerLike => empty(p) // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule. // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule. case LeftOuter | LeftSemi | LeftAnti if isEmptyLocalRelation(p.left) => empty(p) case RightOuter if isEmptyLocalRelation(p.right) => empty(p) case FullOuter if p.children.forall(isEmptyLocalRelation) => empty(p) case _ => p } case p: UnaryNode if p.children.nonEmpty && p.children.forall(isEmptyLocalRelation) => p match { case _: Project => empty(p) case _: Filter => empty(p) case _: Sample => empty(p) case _: Sort => empty(p) case _: GlobalLimit => empty(p) case _: LocalLimit => empty(p) case _: Repartition => empty(p) case _: RepartitionByExpression => empty(p) // AggregateExpressions like COUNT(*) return their results like 0. case Aggregate(_, ae, _) if !ae.exists(containsAggregateExpression) => empty(p) // Generators like Hive-style UDTF may return their records within `close`. case Generate(_: Explode, _, _, _, _, _) => empty(p) case _ => p } } }
Example 4
Source File: PropagateEmptyRelation.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper { private def isEmptyLocalRelation(plan: LogicalPlan): Boolean = plan match { case p: LocalRelation => p.data.isEmpty case _ => false } private def containsAggregateExpression(e: Expression): Boolean = { e.collectFirst { case _: AggregateFunction => () }.isDefined } private def empty(plan: LogicalPlan) = LocalRelation(plan.output, data = Seq.empty) def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case p: Union if p.children.forall(isEmptyLocalRelation) => empty(p) case p @ Join(_, _, joinType, _) if p.children.exists(isEmptyLocalRelation) => joinType match { case _: InnerLike => empty(p) // Intersect is handled as LeftSemi by `ReplaceIntersectWithSemiJoin` rule. // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule. case LeftOuter | LeftSemi | LeftAnti if isEmptyLocalRelation(p.left) => empty(p) case RightOuter if isEmptyLocalRelation(p.right) => empty(p) case FullOuter if p.children.forall(isEmptyLocalRelation) => empty(p) case _ => p } case p: UnaryNode if p.children.nonEmpty && p.children.forall(isEmptyLocalRelation) => p match { case _: Project => empty(p) case _: Filter => empty(p) case _: Sample => empty(p) case _: Sort => empty(p) case _: GlobalLimit => empty(p) case _: LocalLimit => empty(p) case _: Repartition => empty(p) case _: RepartitionByExpression => empty(p) // AggregateExpressions like COUNT(*) return their results like 0. case Aggregate(_, ae, _) if !ae.exists(containsAggregateExpression) => empty(p) // Generators like Hive-style UDTF may return their records within `close`. case Generate(_: Explode, _, _, _, _, _) => empty(p) case _ => p } } }
Example 5
Source File: DeequFunctions.scala From deequ with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import com.amazon.deequ.analyzers.KLLSketch import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, StatefulApproxQuantile, StatefulHyperloglogPlus} import org.apache.spark.sql.catalyst.expressions.Literal def stateful_datatype(column: Column): Column = { val statefulDataType = new StatefulDataType() statefulDataType(column) } def stateful_kll( column: Column, sketchSize: Int, shrinkingFactor: Double): Column = { val statefulKLL = new StatefulKLLSketch(sketchSize, shrinkingFactor) statefulKLL(column) } }
Example 6
Source File: ObjectAggregationMap.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import java.{util => ju} import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.internal.config import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection, UnsafeRow} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, TypedImperativeAggregate} import org.apache.spark.sql.execution.UnsafeKVExternalSorter import org.apache.spark.sql.types.StructType import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter def dumpToExternalSorter( groupingAttributes: Seq[Attribute], aggregateFunctions: Seq[AggregateFunction]): UnsafeKVExternalSorter = { val aggBufferAttributes = aggregateFunctions.flatMap(_.aggBufferAttributes) val sorter = new UnsafeKVExternalSorter( StructType.fromAttributes(groupingAttributes), StructType.fromAttributes(aggBufferAttributes), SparkEnv.get.blockManager, SparkEnv.get.serializerManager, TaskContext.get().taskMemoryManager().pageSizeBytes, SparkEnv.get.conf.get(config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD), null ) val mapIterator = iterator val unsafeAggBufferProjection = UnsafeProjection.create(aggBufferAttributes.map(_.dataType).toArray) while (mapIterator.hasNext) { val entry = mapIterator.next() aggregateFunctions.foreach { case agg: TypedImperativeAggregate[_] => agg.serializeAggregateBufferInPlace(entry.aggregationBuffer) case _ => } sorter.insertKV( entry.groupingKey, unsafeAggBufferProjection(entry.aggregationBuffer) ) } hashMap.clear() sorter } def clear(): Unit = { hashMap.clear() } } // Stores the grouping key and aggregation buffer class AggregationBufferEntry(var groupingKey: UnsafeRow, var aggregationBuffer: InternalRow)