org.apache.spark.util.collection.CompactBuffer Scala Examples
The following examples show how to use org.apache.spark.util.collection.CompactBuffer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: VOrderedRDDFunctions.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import org.apache.spark.Partitioner import org.apache.spark.internal.Logging import org.apache.spark.util.collection.CompactBuffer import scala.reflect.ClassTag class VOrderedRDDFunctions[K, V](self: RDD[(K, V)]) (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K]) extends Logging with Serializable { def groupByKeyUsingSort(partitioner: Partitioner): RDD[(K, Iterable[V])] = { self.repartitionAndSortWithinPartitions(partitioner) .mapPartitions { (iter: Iterator[(K, V)]) => new Iterator[(K, CompactBuffer[V])] { private var firstElemInNextGroup: (K, V) = null override def hasNext: Boolean = firstElemInNextGroup != null || iter.hasNext override def next(): (K, CompactBuffer[V]) = { if (firstElemInNextGroup == null) { firstElemInNextGroup = iter.next() } val key = firstElemInNextGroup._1 val group = CompactBuffer[V](firstElemInNextGroup._2) firstElemInNextGroup = null var reachNewGroup = false while (iter.hasNext && !reachNewGroup) { val currElem = iter.next() if (currElem._1 == key) { group += currElem._2 } else { firstElemInNextGroup = currElem reachNewGroup = true } } (key, group) } } } } } private[spark] object VOrderedRDDFunctions { implicit def fromRDD[K: ClassTag, V: ClassTag](rdd: RDD[(K, V)])(implicit ord: Ordering[K]): VOrderedRDDFunctions[K, V] = { new VOrderedRDDFunctions(rdd) } }
Example 2
Source File: HashJoin.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.joins import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.util.collection.CompactBuffer trait HashJoin { self: SparkPlan => val leftKeys: Seq[Expression] val rightKeys: Seq[Expression] val buildSide: BuildSide val left: SparkPlan val right: SparkPlan protected lazy val (buildPlan, streamedPlan) = buildSide match { case BuildLeft => (left, right) case BuildRight => (right, left) } protected lazy val (buildKeys, streamedKeys) = buildSide match { case BuildLeft => (leftKeys, rightKeys) case BuildRight => (rightKeys, leftKeys) } override def output: Seq[Attribute] = left.output ++ right.output @transient protected lazy val buildSideKeyGenerator: Projection = newProjection(buildKeys, buildPlan.output) @transient protected lazy val streamSideKeyGenerator: () => MutableProjection = newMutableProjection(streamedKeys, streamedPlan.output) protected def hashJoin(streamIter: Iterator[Row], hashedRelation: HashedRelation): Iterator[Row] = { new Iterator[Row] { private[this] var currentStreamedRow: Row = _ private[this] var currentHashMatches: CompactBuffer[Row] = _ private[this] var currentMatchPosition: Int = -1 // Mutable per row objects. private[this] val joinRow = new JoinedRow2 private[this] val joinKeys = streamSideKeyGenerator() override final def hasNext: Boolean = (currentMatchPosition != -1 && currentMatchPosition < currentHashMatches.size) || (streamIter.hasNext && fetchNext()) override final def next(): Row = { val ret = buildSide match { case BuildRight => joinRow(currentStreamedRow, currentHashMatches(currentMatchPosition)) case BuildLeft => joinRow(currentHashMatches(currentMatchPosition), currentStreamedRow) } currentMatchPosition += 1 ret } private final def fetchNext(): Boolean = { currentHashMatches = null currentMatchPosition = -1 while (currentHashMatches == null && streamIter.hasNext) { currentStreamedRow = streamIter.next() if (!joinKeys(currentStreamedRow).anyNull) { currentHashMatches = hashedRelation.get(joinKeys.currentValue) } } if (currentHashMatches == null) { false } else { currentMatchPosition = 0 true } } } } }
Example 3
Source File: HashedRelationSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.joins import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Projection, Row} import org.apache.spark.util.collection.CompactBuffer class HashedRelationSuite extends SparkFunSuite { // Key is simply the record itself private val keyProjection = new Projection { override def apply(row: Row): Row = row } test("GeneralHashedRelation") { val data = Array(Row(0), Row(1), Row(2), Row(2)) val hashed = HashedRelation(data.iterator, keyProjection) assert(hashed.isInstanceOf[GeneralHashedRelation]) assert(hashed.get(data(0)) == CompactBuffer[Row](data(0))) assert(hashed.get(data(1)) == CompactBuffer[Row](data(1))) assert(hashed.get(Row(10)) === null) val data2 = CompactBuffer[Row](data(2)) data2 += data(2) assert(hashed.get(data(2)) == data2) } test("UniqueKeyHashedRelation") { val data = Array(Row(0), Row(1), Row(2)) val hashed = HashedRelation(data.iterator, keyProjection) assert(hashed.isInstanceOf[UniqueKeyHashedRelation]) assert(hashed.get(data(0)) == CompactBuffer[Row](data(0))) assert(hashed.get(data(1)) == CompactBuffer[Row](data(1))) assert(hashed.get(data(2)) == CompactBuffer[Row](data(2))) assert(hashed.get(Row(10)) === null) val uniqHashed = hashed.asInstanceOf[UniqueKeyHashedRelation] assert(uniqHashed.getValue(data(0)) == data(0)) assert(uniqHashed.getValue(data(1)) == data(1)) assert(uniqHashed.getValue(data(2)) == data(2)) assert(uniqHashed.getValue(Row(10)) == null) } }