org.apache.spark.util.CollectionsUtils Scala Examples
The following examples show how to use org.apache.spark.util.CollectionsUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HBasePartitioner.scala From Backup-Repo with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.serializer.JavaSerializer import org.apache.spark.util.{CollectionsUtils, Utils} import org.apache.spark.{Partitioner, SparkEnv} object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail def numPartitions = if (len == 0) 1 else len @transient private val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 2
Source File: HBasePartitioner.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner import org.apache.spark.util.CollectionsUtils object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (val splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail override def numPartitions = if (len == 0) 1 else len @transient private lazy val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] override def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 3
Source File: HBasePartitioner.scala From Spark-SQL-on-HBase with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.Partitioner import org.apache.spark.util.CollectionsUtils object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail def numPartitions = if (len == 0) 1 else len @transient private lazy val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 4
Source File: RangeDPartitioner.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba.partitioner import org.apache.spark.util.CollectionsUtils import org.apache.spark.{Partitioner, SparkEnv} import org.apache.spark.rdd.{RDD, ShuffledRDD} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.util.MutablePair import scala.reflect.ClassTag object RangeDPartition { def sortBasedShuffleOn: Boolean = SparkEnv.get.shuffleManager.isInstanceOf[SortShuffleManager] def apply[K: Ordering: ClassTag, T](origin: RDD[(K, (T, InternalRow))], range_bounds: Array[K]): RDD[(K, (T, InternalRow))] = { val rdd = if (sortBasedShuffleOn) { origin.mapPartitions {iter => iter.map(row => (row._1, (row._2._1, row._2._2.copy())))} } else { origin.mapPartitions {iter => val mutablePair = new MutablePair[K, (T, InternalRow)]() iter.map(row => mutablePair.update(row._1, (row._2._1, row._2._2.copy()))) } } val part = new RangeDPartitioner(range_bounds, ascending = true) new ShuffledRDD[K, (T, InternalRow), (T, InternalRow)](rdd, part) } } class RangeDPartitioner[K: Ordering: ClassTag](range_bounds: Array[K], ascending: Boolean) extends Partitioner { def numPartitions: Int = range_bounds.length + 1 private val binarySearch: ((Array[K], K) => Int) = CollectionsUtils.makeBinarySearch[K] def getPartition(key: Any): Int = { val k = key.asInstanceOf[K] var partition = 0 if (range_bounds.length < 128) { while (partition < range_bounds.length && Ordering[K].gt(k, range_bounds(partition))) partition += 1 } else { partition = binarySearch(range_bounds, k) if (partition < 0) partition = -partition - 1 if (partition > range_bounds.length) partition = range_bounds.length } if (ascending) partition else range_bounds.length - partition } }