org.apache.spark.util.BoundedPriorityQueue Scala Examples
The following examples show how to use org.apache.spark.util.BoundedPriorityQueue.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GraphXUtils.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.{OpenHashSet, BitSet} import org.apache.spark.util.BoundedPriorityQueue object GraphXUtils { def registerKryoClasses(conf: SparkConf) { conf.registerKryoClasses(Array( classOf[Edge[Object]], classOf[(VertexId, Object)], classOf[EdgePartition[Object, Object]], classOf[BitSet], classOf[VertexIdToIndexMap], classOf[VertexAttributeBlock[Object]], classOf[PartitionStrategy], classOf[BoundedPriorityQueue[Object]], classOf[EdgeDirection], classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]], classOf[OpenHashSet[Int]], classOf[OpenHashSet[Long]])) } }
Example 2
Source File: BKJSpark.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba.execution.join import org.apache.spark.sql.simba.execution.SimbaPlan import org.apache.spark.sql.simba.partitioner.MapDPartition import org.apache.spark.sql.simba.spatial.Point import org.apache.spark.sql.simba.util.ShapeUtils import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, JoinedRow, Literal} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.util.BoundedPriorityQueue import scala.collection.mutable import scala.util.Random case class BKJSpark(left_key: Expression, right_key: Expression, l: Literal, left: SparkPlan, right: SparkPlan) extends SimbaPlan { override def output: Seq[Attribute] = left.output ++ right.output final val num_partitions = simbaSessionState.simbaConf.joinPartitions final val k = l.value.asInstanceOf[Number].intValue() private class DisOrdering extends Ordering[(InternalRow, Double)] { override def compare(x : (InternalRow, Double), y: (InternalRow, Double)): Int = -x._2.compare(y._2) } override protected def doExecute(): RDD[InternalRow] = { val tot_rdd = left.execute().map((0, _)).union(right.execute().map((1, _))) val tot_dup_rdd = tot_rdd.flatMap {x => val rand_no = new Random().nextInt(num_partitions) val ans = mutable.ListBuffer[(Int, (Int, InternalRow))]() if (x._1 == 0) { val base = rand_no * num_partitions for (i <- 0 until num_partitions) ans += ((base + i, x)) } else { for (i <- 0 until num_partitions) ans += ((i * num_partitions + rand_no, x)) } ans } val tot_dup_partitioned = MapDPartition(tot_dup_rdd, num_partitions * num_partitions) tot_dup_partitioned.mapPartitions {iter => var left_data = mutable.ListBuffer[(Point, InternalRow)]() var right_data = mutable.ListBuffer[(Point, InternalRow)]() while (iter.hasNext) { val data = iter.next() if (data._2._1 == 0) { val tmp_point = ShapeUtils.getShape(left_key, left.output, data._2._2).asInstanceOf[Point] left_data += ((tmp_point, data._2._2)) } else { val tmp_point = ShapeUtils.getShape(right_key, right.output, data._2._2).asInstanceOf[Point] right_data += ((tmp_point, data._2._2)) } } val joined_ans = mutable.ListBuffer[(InternalRow, Array[(InternalRow, Double)])]() left_data.foreach(left => { var pq = new BoundedPriorityQueue[(InternalRow, Double)](k)(new DisOrdering) right_data.foreach(right => pq += ((right._2, right._1.minDist(left._1)))) joined_ans += ((left._2, pq.toArray)) }) joined_ans.iterator }.reduceByKey((left, right) => (left ++ right).sortWith(_._2 < _._2).take(k), num_partitions) .flatMap { now => now._2.map(x => new JoinedRow(now._1, x._1)) } } override def children: Seq[SparkPlan] = Seq(left, right) }
Example 3
Source File: GraphXUtils.scala From zen with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx2 import org.apache.spark.SparkConf import org.apache.spark.graphx2.impl._ import org.apache.spark.graphx2.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.{OpenHashSet, BitSet} import org.apache.spark.util.BoundedPriorityQueue object GraphXUtils { def registerKryoClasses(conf: SparkConf) { conf.registerKryoClasses(Array( classOf[Edge[Object]], classOf[(VertexId, Object)], classOf[EdgePartition[Object, Object]], classOf[BitSet], classOf[VertexIdToIndexMap], classOf[VertexAttributeBlock[Object]], classOf[PartitionStrategy], classOf[BoundedPriorityQueue[Object]], classOf[EdgeDirection], classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]], classOf[OpenHashSet[Int]], classOf[OpenHashSet[Long]])) } }
Example 4
Source File: MLPairRDDFunctions.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.rdd import scala.language.implicitConversions import scala.reflect.ClassTag import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.RDD import org.apache.spark.util.BoundedPriorityQueue /** * :: DeveloperApi :: * Machine learning specific Pair RDD functions. */ @DeveloperApi class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Serializable { /** * Returns the top k (largest) elements for each key from this RDD as defined by the specified * implicit Ordering[T]. * If the number of elements for a certain key is less than k, all of them will be returned. * * @param num k, the number of top elements to return * @param ord the implicit ordering for T * @return an RDD that contains the top k values for each key */ def topByKey(num: Int)(implicit ord: Ordering[V]): RDD[(K, Array[V])] = { self.aggregateByKey(new BoundedPriorityQueue[V](num)(ord))( seqOp = (queue, item) => { queue += item }, combOp = (queue1, queue2) => { queue1 ++= queue2 } ).mapValues(_.toArray.sorted(ord.reverse)) // This is a min-heap, so we reverse the order. } } /** * :: DeveloperApi :: */ @DeveloperApi object MLPairRDDFunctions { implicit def fromPairRDD[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)]): MLPairRDDFunctions[K, V] = new MLPairRDDFunctions[K, V](rdd) }
Example 5
Source File: TakeOrderedAndProjectNode.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import org.apache.spark.sql.SQLConf import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.util.BoundedPriorityQueue case class TakeOrderedAndProjectNode( conf: SQLConf, limit: Int, sortOrder: Seq[SortOrder], projectList: Option[Seq[NamedExpression]], child: LocalNode) extends UnaryLocalNode(conf) { private[this] var projection: Option[Projection] = _ private[this] var ord: InterpretedOrdering = _ private[this] var iterator: Iterator[InternalRow] = _ private[this] var currentRow: InternalRow = _ override def output: Seq[Attribute] = { val projectOutput = projectList.map(_.map(_.toAttribute)) projectOutput.getOrElse(child.output) } override def open(): Unit = { child.open() projection = projectList.map(new InterpretedProjection(_, child.output)) ord = new InterpretedOrdering(sortOrder, child.output) // Priority keeps the largest elements, so let's reverse the ordering. val queue = new BoundedPriorityQueue[InternalRow](limit)(ord.reverse) while (child.next()) { queue += child.fetch() } // Close it eagerly since we don't need it. child.close() iterator = queue.toArray.sorted(ord).iterator } override def next(): Boolean = { if (iterator.hasNext) { val _currentRow = iterator.next() currentRow = projection match { case Some(p) => p(_currentRow) case None => _currentRow } true } else { false } } override def fetch(): InternalRow = currentRow override def close(): Unit = child.close() }
Example 6
Source File: GraphXUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.{OpenHashSet, BitSet} import org.apache.spark.util.BoundedPriorityQueue object GraphXUtils { def registerKryoClasses(conf: SparkConf) { conf.registerKryoClasses(Array( classOf[Edge[Object]], classOf[(VertexId, Object)], classOf[EdgePartition[Object, Object]], classOf[BitSet], classOf[VertexIdToIndexMap], classOf[VertexAttributeBlock[Object]], classOf[PartitionStrategy], classOf[BoundedPriorityQueue[Object]], classOf[EdgeDirection], classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]], classOf[OpenHashSet[Int]], classOf[OpenHashSet[Long]])) } }
Example 7
Source File: GraphKryoRegistrator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import com.esotericsoftware.kryo.Kryo import org.apache.spark.serializer.KryoRegistrator import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.BitSet import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.OpenHashSet @deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0") class GraphKryoRegistrator extends KryoRegistrator { def registerClasses(kryo: Kryo) { kryo.register(classOf[Edge[Object]]) kryo.register(classOf[(VertexId, Object)]) kryo.register(classOf[EdgePartition[Object, Object]]) kryo.register(classOf[BitSet]) kryo.register(classOf[VertexIdToIndexMap]) kryo.register(classOf[VertexAttributeBlock[Object]]) kryo.register(classOf[PartitionStrategy]) kryo.register(classOf[BoundedPriorityQueue[Object]]) kryo.register(classOf[EdgeDirection]) kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]]) kryo.register(classOf[OpenHashSet[Int]]) kryo.register(classOf[OpenHashSet[Long]]) } }
Example 8
Source File: TopByKeyAggregator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.recommendation import scala.language.implicitConversions import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.{Encoder, Encoders} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.expressions.Aggregator import org.apache.spark.util.BoundedPriorityQueue private[recommendation] class TopByKeyAggregator[K1: TypeTag, K2: TypeTag, V: TypeTag] (num: Int, ord: Ordering[(K2, V)]) extends Aggregator[(K1, K2, V), BoundedPriorityQueue[(K2, V)], Array[(K2, V)]] { override def zero: BoundedPriorityQueue[(K2, V)] = new BoundedPriorityQueue[(K2, V)](num)(ord) override def reduce( q: BoundedPriorityQueue[(K2, V)], a: (K1, K2, V)): BoundedPriorityQueue[(K2, V)] = { q += {(a._2, a._3)} } override def merge( q1: BoundedPriorityQueue[(K2, V)], q2: BoundedPriorityQueue[(K2, V)]): BoundedPriorityQueue[(K2, V)] = { q1 ++= q2 } override def finish(r: BoundedPriorityQueue[(K2, V)]): Array[(K2, V)] = { r.toArray.sorted(ord.reverse) } override def bufferEncoder: Encoder[BoundedPriorityQueue[(K2, V)]] = { Encoders.kryo[BoundedPriorityQueue[(K2, V)]] } override def outputEncoder: Encoder[Array[(K2, V)]] = ExpressionEncoder[Array[(K2, V)]]() }
Example 9
Source File: GraphXUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.{BitSet, OpenHashSet} object GraphXUtils { private[graphx] def mapReduceTriplets[VD: ClassTag, ED: ClassTag, A: ClassTag]( g: Graph[VD, ED], mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)], reduceFunc: (A, A) => A, activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = { def sendMsg(ctx: EdgeContext[VD, ED, A]) { mapFunc(ctx.toEdgeTriplet).foreach { kv => val id = kv._1 val msg = kv._2 if (id == ctx.srcId) { ctx.sendToSrc(msg) } else { assert(id == ctx.dstId) ctx.sendToDst(msg) } } } g.aggregateMessagesWithActiveSet( sendMsg, reduceFunc, TripletFields.All, activeSetOpt) } }
Example 10
Source File: GraphXUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.{BitSet, OpenHashSet} object GraphXUtils { private[graphx] def mapReduceTriplets[VD: ClassTag, ED: ClassTag, A: ClassTag]( g: Graph[VD, ED], mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)], reduceFunc: (A, A) => A, activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = { def sendMsg(ctx: EdgeContext[VD, ED, A]) { mapFunc(ctx.toEdgeTriplet).foreach { kv => val id = kv._1 val msg = kv._2 if (id == ctx.srcId) { ctx.sendToSrc(msg) } else { assert(id == ctx.dstId) ctx.sendToDst(msg) } } } g.aggregateMessagesWithActiveSet( sendMsg, reduceFunc, TripletFields.All, activeSetOpt) } }
Example 11
Source File: GraphKryoRegistrator.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import com.esotericsoftware.kryo.Kryo import org.apache.spark.serializer.KryoRegistrator import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.BitSet import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.OpenHashSet @deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0") class GraphKryoRegistrator extends KryoRegistrator { def registerClasses(kryo: Kryo) { kryo.register(classOf[Edge[Object]]) kryo.register(classOf[(VertexId, Object)]) kryo.register(classOf[EdgePartition[Object, Object]]) kryo.register(classOf[BitSet]) kryo.register(classOf[VertexIdToIndexMap]) kryo.register(classOf[VertexAttributeBlock[Object]]) kryo.register(classOf[PartitionStrategy]) kryo.register(classOf[BoundedPriorityQueue[Object]]) kryo.register(classOf[EdgeDirection]) kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]]) kryo.register(classOf[OpenHashSet[Int]]) kryo.register(classOf[OpenHashSet[Long]]) } }
Example 12
Source File: GraphXUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.{OpenHashSet, BitSet} import org.apache.spark.util.BoundedPriorityQueue object GraphXUtils { def registerKryoClasses(conf: SparkConf) { conf.registerKryoClasses(Array( classOf[Edge[Object]], classOf[(VertexId, Object)], classOf[EdgePartition[Object, Object]], classOf[BitSet], classOf[VertexIdToIndexMap], classOf[VertexAttributeBlock[Object]], classOf[PartitionStrategy], classOf[BoundedPriorityQueue[Object]], classOf[EdgeDirection], classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]], classOf[OpenHashSet[Int]], classOf[OpenHashSet[Long]])) } }
Example 13
Source File: GraphKryoRegistrator.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import com.esotericsoftware.kryo.Kryo import org.apache.spark.serializer.KryoRegistrator import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.BitSet import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.OpenHashSet @deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0") class GraphKryoRegistrator extends KryoRegistrator { def registerClasses(kryo: Kryo) { kryo.register(classOf[Edge[Object]]) kryo.register(classOf[(VertexId, Object)]) kryo.register(classOf[EdgePartition[Object, Object]]) kryo.register(classOf[BitSet]) kryo.register(classOf[VertexIdToIndexMap]) kryo.register(classOf[VertexAttributeBlock[Object]]) kryo.register(classOf[PartitionStrategy]) kryo.register(classOf[BoundedPriorityQueue[Object]]) kryo.register(classOf[EdgeDirection]) kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]]) kryo.register(classOf[OpenHashSet[Int]]) kryo.register(classOf[OpenHashSet[Long]]) } }
Example 14
Source File: GraphXUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.{BitSet, OpenHashSet} object GraphXUtils { private[graphx] def mapReduceTriplets[VD: ClassTag, ED: ClassTag, A: ClassTag]( g: Graph[VD, ED], mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)], reduceFunc: (A, A) => A, activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = { def sendMsg(ctx: EdgeContext[VD, ED, A]) { mapFunc(ctx.toEdgeTriplet).foreach { kv => val id = kv._1 val msg = kv._2 if (id == ctx.srcId) { ctx.sendToSrc(msg) } else { assert(id == ctx.dstId) ctx.sendToDst(msg) } } } g.aggregateMessagesWithActiveSet( sendMsg, reduceFunc, TripletFields.All, activeSetOpt) } }
Example 15
Source File: GraphXUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.{BitSet, OpenHashSet} object GraphXUtils { private[graphx] def mapReduceTriplets[VD: ClassTag, ED: ClassTag, A: ClassTag]( g: Graph[VD, ED], mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)], reduceFunc: (A, A) => A, activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = { def sendMsg(ctx: EdgeContext[VD, ED, A]) { mapFunc(ctx.toEdgeTriplet).foreach { kv => val id = kv._1 val msg = kv._2 if (id == ctx.srcId) { ctx.sendToSrc(msg) } else { assert(id == ctx.dstId) ctx.sendToDst(msg) } } } g.aggregateMessagesWithActiveSet( sendMsg, reduceFunc, TripletFields.All, activeSetOpt) } }
Example 16
Source File: GraphXUtils.scala From graphx-algorithm with GNU General Public License v2.0 | 5 votes |
package org.apache.spark.graphx import org.apache.spark.SparkConf import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.{OpenHashSet, BitSet} import org.apache.spark.util.BoundedPriorityQueue object GraphXUtils { def registerKryoClasses(conf: SparkConf) { conf.registerKryoClasses(Array( classOf[Edge[Object]], classOf[(VertexId, Object)], classOf[EdgePartition[Object, Object]], classOf[BitSet], classOf[VertexIdToIndexMap], classOf[VertexAttributeBlock[Object]], classOf[PartitionStrategy], classOf[BoundedPriorityQueue[Object]], classOf[EdgeDirection], classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]], classOf[OpenHashSet[Int]], classOf[OpenHashSet[Long]])) } }
Example 17
Source File: GraphKryoRegistrator.scala From graphx-algorithm with GNU General Public License v2.0 | 5 votes |
package org.apache.spark.graphx import com.esotericsoftware.kryo.Kryo import org.apache.spark.serializer.KryoRegistrator import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.BitSet import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.OpenHashSet @deprecated("Register GraphX classes with Kryo using GraphXUtils.registerKryoClasses", "1.2.0") class GraphKryoRegistrator extends KryoRegistrator { def registerClasses(kryo: Kryo) { kryo.register(classOf[Edge[Object]]) kryo.register(classOf[(VertexId, Object)]) kryo.register(classOf[EdgePartition[Object, Object]]) kryo.register(classOf[BitSet]) kryo.register(classOf[VertexIdToIndexMap]) kryo.register(classOf[VertexAttributeBlock[Object]]) kryo.register(classOf[PartitionStrategy]) kryo.register(classOf[BoundedPriorityQueue[Object]]) kryo.register(classOf[EdgeDirection]) kryo.register(classOf[GraphXPrimitiveKeyOpenHashMap[VertexId, Int]]) kryo.register(classOf[OpenHashSet[Int]]) kryo.register(classOf[OpenHashSet[Long]]) } }