org.apache.spark.sql.expressions.Aggregator Scala Examples
The following examples show how to use org.apache.spark.sql.expressions.Aggregator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: typedaggregators.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines internal implementations for aggregators. //////////////////////////////////////////////////////////////////////////////////////////////////// class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Double] { override def zero: Double = 0.0 override def reduce(b: Double, a: IN): Double = b + f(a) override def merge(b1: Double, b2: Double): Double = b1 + b2 override def finish(reduction: Double): Double = reduction override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this(x => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } } class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] { override def zero: Long = 0L override def reduce(b: Long, a: IN): Long = b + f(a) override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, java.lang.Long]) = this(x => f.call(x).asInstanceOf[Long]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] { override def zero: Long = 0 override def reduce(b: Long, a: IN): Long = { if (f(a) == null) b else b + 1 } override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, Object]) = this(x => f.call(x)) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] { override def zero: (Double, Long) = (0.0, 0L) override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2) override def finish(reduction: (Double, Long)): Double = reduction._1 / reduction._2 override def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) = { (b1._1 + b2._1, b1._2 + b2._2) } override def bufferEncoder: Encoder[(Double, Long)] = ExpressionEncoder[(Double, Long)]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this(x => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } }
Example 2
Source File: typedaggregators.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines internal implementations for aggregators. //////////////////////////////////////////////////////////////////////////////////////////////////// class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Double] { override def zero: Double = 0.0 override def reduce(b: Double, a: IN): Double = b + f(a) override def merge(b1: Double, b2: Double): Double = b1 + b2 override def finish(reduction: Double): Double = reduction override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } } class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] { override def zero: Long = 0L override def reduce(b: Long, a: IN): Long = b + f(a) override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, java.lang.Long]) = this((x: IN) => f.call(x).asInstanceOf[Long]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] { override def zero: Long = 0 override def reduce(b: Long, a: IN): Long = { if (f(a) == null) b else b + 1 } override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, Object]) = this((x: IN) => f.call(x).asInstanceOf[Any]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] { override def zero: (Double, Long) = (0.0, 0L) override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2) override def finish(reduction: (Double, Long)): Double = reduction._1 / reduction._2 override def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) = { (b1._1 + b2._1, b1._2 + b2._2) } override def bufferEncoder: Encoder[(Double, Long)] = ExpressionEncoder[(Double, Long)]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } }
Example 3
Source File: typedaggregators.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines internal implementations for aggregators. //////////////////////////////////////////////////////////////////////////////////////////////////// class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Double] { override def zero: Double = 0.0 override def reduce(b: Double, a: IN): Double = b + f(a) override def merge(b1: Double, b2: Double): Double = b1 + b2 override def finish(reduction: Double): Double = reduction override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this(x => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } } class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] { override def zero: Long = 0L override def reduce(b: Long, a: IN): Long = b + f(a) override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, java.lang.Long]) = this(x => f.call(x).asInstanceOf[Long]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] { override def zero: Long = 0 override def reduce(b: Long, a: IN): Long = { if (f(a) == null) b else b + 1 } override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, Object]) = this(x => f.call(x)) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] { override def zero: (Double, Long) = (0.0, 0L) override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2) override def finish(reduction: (Double, Long)): Double = reduction._1 / reduction._2 override def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) = { (b1._1 + b2._1, b1._2 + b2._2) } override def bufferEncoder: Encoder[(Double, Long)] = ExpressionEncoder[(Double, Long)]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this(x => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } }
Example 4
Source File: typedaggregators.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines internal implementations for aggregators. //////////////////////////////////////////////////////////////////////////////////////////////////// class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Double] { override def zero: Double = 0.0 override def reduce(b: Double, a: IN): Double = b + f(a) override def merge(b1: Double, b2: Double): Double = b1 + b2 override def finish(reduction: Double): Double = reduction override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this(x => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } } class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] { override def zero: Long = 0L override def reduce(b: Long, a: IN): Long = b + f(a) override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, java.lang.Long]) = this(x => f.call(x).asInstanceOf[Long]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] { override def zero: Long = 0 override def reduce(b: Long, a: IN): Long = { if (f(a) == null) b else b + 1 } override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, Object]) = this(x => f.call(x)) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] { override def zero: (Double, Long) = (0.0, 0L) override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2) override def finish(reduction: (Double, Long)): Double = reduction._1 / reduction._2 override def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) = { (b1._1 + b2._1, b1._2 + b2._2) } override def bufferEncoder: Encoder[(Double, Long)] = ExpressionEncoder[(Double, Long)]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this(x => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } }
Example 5
Source File: TopByKeyAggregator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.recommendation import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.{Encoder, Encoders} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.util.BoundedPriorityQueue private[recommendation] class TopByKeyAggregator[K1: TypeTag, K2: TypeTag, V: TypeTag] (num: Int, ord: Ordering[(K2, V)]) extends Aggregator[(K1, K2, V), BoundedPriorityQueue[(K2, V)], Array[(K2, V)]] { override def zero: BoundedPriorityQueue[(K2, V)] = new BoundedPriorityQueue[(K2, V)](num)(ord) override def reduce( q: BoundedPriorityQueue[(K2, V)], a: (K1, K2, V)): BoundedPriorityQueue[(K2, V)] = { q += {(a._2, a._3)} } override def merge( q1: BoundedPriorityQueue[(K2, V)], q2: BoundedPriorityQueue[(K2, V)]): BoundedPriorityQueue[(K2, V)] = { q1 ++= q2 } override def finish(r: BoundedPriorityQueue[(K2, V)]): Array[(K2, V)] = { r.toArray.sorted(ord.reverse) } override def bufferEncoder: Encoder[BoundedPriorityQueue[(K2, V)]] = { Encoders.kryo[BoundedPriorityQueue[(K2, V)]] } override def outputEncoder: Encoder[Array[(K2, V)]] = ExpressionEncoder[Array[(K2, V)]]() }
Example 6
Source File: typedaggregators.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.aggregate import org.apache.spark.api.java.function.MapFunction import org.apache.spark.sql.{Encoder, TypedColumn} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines internal implementations for aggregators. //////////////////////////////////////////////////////////////////////////////////////////////////// class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Double] { override def zero: Double = 0.0 override def reduce(b: Double, a: IN): Double = b + f(a) override def merge(b1: Double, b2: Double): Double = b1 + b2 override def finish(reduction: Double): Double = reduction override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } } class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] { override def zero: Long = 0L override def reduce(b: Long, a: IN): Long = b + f(a) override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, java.lang.Long]) = this((x: IN) => f.call(x).asInstanceOf[Long]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] { override def zero: Long = 0 override def reduce(b: Long, a: IN): Long = { if (f(a) == null) b else b + 1 } override def merge(b1: Long, b2: Long): Long = b1 + b2 override def finish(reduction: Long): Long = reduction override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]() override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]() // Java api support def this(f: MapFunction[IN, Object]) = this((x: IN) => f.call(x).asInstanceOf[Any]) def toColumnJava: TypedColumn[IN, java.lang.Long] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Long]] } } class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] { override def zero: (Double, Long) = (0.0, 0L) override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2) override def finish(reduction: (Double, Long)): Double = reduction._1 / reduction._2 override def merge(b1: (Double, Long), b2: (Double, Long)): (Double, Long) = { (b1._1 + b2._1, b1._2 + b2._2) } override def bufferEncoder: Encoder[(Double, Long)] = ExpressionEncoder[(Double, Long)]() override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]() // Java api support def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => f.call(x).asInstanceOf[Double]) def toColumnJava: TypedColumn[IN, java.lang.Double] = { toColumn.asInstanceOf[TypedColumn[IN, java.lang.Double]] } }
Example 7
Source File: A_9_MyAverageByAggregator.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.sql import org.apache.spark.sql.{Encoder, Encoders, SparkSession} import org.apache.spark.sql.expressions.Aggregator case class Employee(name:String, salary:Long) case class Average(var sum:Long, var count:Long) object A_9_MyAverageByAggregator extends Aggregator[Employee, Average, Double]{ override def zero: Average = Average(0L,0L) override def reduce(b: Average, a: Employee): Average = { b.sum += a.salary b.count+=1 b } override def merge(b1: Average, b2: Average): Average = { b1.count+=b2.count b1.sum+=b2.sum b1 } override def finish(reduction: Average): Double = reduction.sum.toDouble/reduction.count override def bufferEncoder: Encoder[Average] = Encoders.product override def outputEncoder: Encoder[Double] = Encoders.scalaDouble def main(args: Array[String]): Unit = { val sparkSession = SparkSession.builder().master("local").appName("MyAverageByAggregator") .getOrCreate() //隐式转换 import sparkSession.implicits._ val dataFrame = sparkSession.read.json("src/main/resources/sparkresource/employees.json").as[Employee] dataFrame.show() val salary_average = A_9_MyAverageByAggregator.toColumn.name("salary_average") val frame = dataFrame.select(salary_average) frame.show() } }