java.util.Comparator Scala Example

Source File: ConcurrentRingSet.scala From rflows with Apache License 2.0

5 votes

package api.routing.metrics

import java.util.Comparator
import java.util.concurrent.ConcurrentSkipListSet
import java.util.concurrent.atomic.AtomicLong


final class ConcurrentRingSet[K](val maxSize: Int)(val onRemove: K => Unit) extends Iterable[K] {

  @volatile private var isActive = true

  private lazy val set = new ConcurrentSkipListSet[PositionedKey](new Comparator[PositionedKey] {
    override def compare(o1: PositionedKey, o2: PositionedKey): Int = if (o1.key == o2.key) 0 else if (o1.position > o2.position) 1 else -1
  })

  private case class PositionedKey(position: Long, key: K)

  private val count = new AtomicLong(0)

  def put(k: K): Boolean = if (isActive) {
    // $COVERAGE-OFF$
    if (count.get() == Long.MaxValue + 1) { set.clear(); count.set(0) } //Just in case...
    // $COVERAGE-ON$
    val index = count.incrementAndGet()
    while (set.size >= maxSize) Option(set.pollFirst()).map(_.key).foreach(onRemove(_))
    set.add(PositionedKey(index, k))
  }
  else false

  def close() = {
    isActive = false
    this
  }

  import scala.collection.JavaConverters._
  override def iterator: Iterator[K] = set.descendingIterator().asScala.map(_.key)
}

Source File: Utils.scala From shc with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.hbase

import java.util
import java.util.Comparator

import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.sql.catalyst.expressions.MutableRow
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution.SparkSqlSerializer
import org.apache.spark.sql.types._

import scala.collection.mutable.ArrayBuffer
import scala.math.Ordering

object Utils {

  def setRowCol(
      row: MutableRow,
      field: (Field, Int),
      src: HBaseType,
      offset: Int,
      length: Int): Unit = {
    val index = field._2
    val f = field._1
    if (f.sedes.isDefined) {
      // If we already have sedes defined , use it.
      val m = f.sedes.get.deserialize(src, offset, length)
      row.update(index, m)
    } else if (f.exeSchema.isDefined) {
      // println("avro schema is defined to do deserialization")
      // If we have avro schema defined, use it to get record, and then covert them to catalyst data type
      val m = AvroSedes.deserialize(src, f.exeSchema.get)
      // println(m)
      val n = f.avroToCatalyst.map(_(m))
      row.update(index, n.get)
    } else  {
      // Fall back to atomic type
      f.dt match {
        case BooleanType => row.setBoolean(index, toBoolean(src, offset))
        case ByteType => row.setByte(index, src(offset))
        case DoubleType => row.setDouble(index, Bytes.toDouble(src, offset))
        case FloatType => row.setFloat(index, Bytes.toFloat(src, offset))
        case IntegerType => row.setInt(index, Bytes.toInt(src, offset))
        case LongType => row.setLong(index, Bytes.toLong(src, offset))
        case ShortType => row.setShort(index, Bytes.toShort(src, offset))
        case StringType => row.update(index, toUTF8String(src, offset, length))
        case BinaryType =>
          val newArray = new Array[Byte](length)
          System.arraycopy(src, offset, newArray, 0, length)
          row.update(index, newArray)
        case _ => row.update(index, SparkSqlSerializer.deserialize[Any](src)) //TODO
      }
    }
  }

  // convert input to data type
  def toBytes(input: Any, field: Field): Array[Byte] = {
    if (field.sedes.isDefined) {
      field.sedes.get.serialize(input)
    } else if (field.schema.isDefined) {
      // Here we assume the top level type is structType
      val record = field.catalystToAvro(input)
      AvroSedes.serialize(record, field.schema.get)
    } else {
      input match {
        case data: Boolean => Bytes.toBytes(data)
        case data: Byte => Array(data)
        case data: Array[Byte] => data
        case data: Double => Bytes.toBytes(data)
        case data: Float => Bytes.toBytes(data)
        case data: Int => Bytes.toBytes(data)
        case data: Long => Bytes.toBytes(data)
        case data: Short => Bytes.toBytes(data)
        case data: UTF8String => data.getBytes
        case data: String => Bytes.toBytes(data)
          //Bytes.toBytes(input.asInstanceOf[String])//input.asInstanceOf[UTF8String].getBytes
        case _ => throw new Exception(s"unsupported data type ${field.dt}") //TODO
      }
    }
  }

  def toBoolean(input: HBaseType, offset: Int): Boolean = {
    input(offset) != 0
  }

  def toUTF8String(input: HBaseType, offset: Int, length: Int): UTF8String = {
    UTF8String(input.slice(offset, offset + length))
  }
}

Source File: BulkLoadPartitioner.scala From hbase-connectors with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark

import java.util
import java.util.Comparator

import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Partitioner


@InterfaceAudience.Public
class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
  extends Partitioner {
  // when table not exist, startKeys = Byte[0][]
  override def numPartitions: Int = if (startKeys.length == 0) 1 else startKeys.length

  override def getPartition(key: Any): Int = {

    val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
      override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
        Bytes.compareTo(o1, o2)
      }
    }

    val rowKey:Array[Byte] =
      key match {
        case qualifier: KeyFamilyQualifier =>
          qualifier.rowKey
        case wrapper: ByteArrayWrapper =>
          wrapper.value
        case _ =>
          key.asInstanceOf[Array[Byte]]
      }
    var partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
    if (partition < 0)
      partition = partition * -1 + -2
    if (partition < 0)
      partition = 0
    partition
  }
}

Source File: PartitionedPairBuffer.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY = Int.MaxValue / 2 // 2 ^ 30 - 1
}

Source File: SortAndTopNode.scala From ingraph with Eclipse Public License 1.0

5 votes

package ingraph.ire.nodes.unary

import java.util.Comparator

import ingraph.ire.datatypes.Tuple
import ingraph.ire.math.GenericMath
import ingraph.ire.messages.{ChangeSet, ReteMessage}
import ingraph.ire.util.SizeCounter
import ingraph.ire.messages.SingleForwarder

import scala.collection.immutable.VectorBuilder

class SortAndTopNode(override val next: (ReteMessage) => Unit,
                     tupleLength: Int,
                     selectionMask: Vector[(Tuple) => Any],
                     skip: Option[Long],
                     limit: Option[Long],
                     ascendingOrder: Vector[Boolean])
  extends UnaryNode with SingleForwarder {
  val longSkip = skip.getOrElse(0L)
  val longLimit = limit.getOrElse(Long.MaxValue / 2L)
  //implicit val order = new Ordering[Tuple] {
  val comparator = new Comparator[Tuple] {
    override def compare(x: Tuple, y: Tuple): Int = {
      if (x == y)
        return 0
      for ((x, y, ascending) <- (keyLookup(x), keyLookup(y), ascendingOrder).zipped) {
        val cmp = GenericMath.compare(x, y)
        if (cmp != 0) {
          return if (ascending) cmp else -cmp
        }
      }
      // Treemap uses the ordering function equality for detecting duplicate keys, so we need to make sure
      // that tuples with same keys are compared as different
      GenericMath.compare(x, y)
    }
  }

  // we can use Scala Tree once we migrate to 2.12
  val data: java.util.Map[Tuple, Int] = new java.util.TreeMap(comparator)
  //null //mutable.TreeMap[Tuple, Int]().withDefault(t => 0)

  def keyLookup(t: Tuple): Vector[Any] = selectionMask.map(m => m(t))

  def getTuplesInOrder: Vector[Tuple] = {
    var total: Long = 0
    val iterator = data.entrySet.iterator
    val builder = new VectorBuilder[Tuple]
    while (total < longLimit + longSkip && iterator.hasNext) {
      val entry = iterator.next
      val tuple = entry.getKey
      val count = entry.getValue
      for (i <- 0L until math.min(count, longLimit + longSkip - total))
        builder += tuple
      total += count
    }
    builder.result().drop(longSkip.toInt)
  }

  override def onChangeSet(changeSet: ChangeSet): Unit = {
    // TODO maybe checking the changed elements against the lowest forwarded element would speed things up
    val prevTop = getTuplesInOrder
    for (tuple <- changeSet.positive) {
      val count : Int = Some(data.get(tuple)).getOrElse(0)
      data.put(tuple, count + 1)
    }
    for (tuple <- changeSet.negative) {
      val count = data.get(tuple)
      if (count > 1) {
        data.put(tuple, count - 1)
      } else {
        data.remove(tuple)
      }
    }
    val topN = getTuplesInOrder
    if (topN != prevTop) {
      forward(ChangeSet(positive = topN, negative = prevTop))
    }
  }

  // TODO we can simplify this in 2.12
  override def onSizeRequest(): Long = SizeCounter.count(data.keySet)
}

Source File: PartitionedPairBuffer.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.unsafe.Platform
import org.apache.spark.unsafe.array.ByteArrayMethods
import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY: Int = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 2
}

Source File: DynamicExecution.scala From hazelcast-scala with Apache License 2.0

5 votes

package com.hazelcast.Scala.serialization

import java.util.{ Arrays, Comparator }
import java.util.concurrent.Callable

import scala.reflect.ClassTag

import com.hazelcast.Scala.{ Aggregator, Pipe }
import com.hazelcast.core.IFunction
import com.hazelcast.map.{ EntryBackupProcessor, EntryProcessor }
import com.hazelcast.nio.{ ObjectDataInput, ObjectDataOutput }
import com.hazelcast.query.Predicate


object DynamicExecution extends DynamicExecution {
  protected def serializeBytecodeFor(cls: Class[_]) = true
}

abstract class DynamicExecution extends SerializerEnum(Defaults) {
  protected def serializeBytecodeFor(cls: Class[_]): Boolean
  private[this] val loaderByClass = new ClassValue[Option[ByteArrayClassLoader]] {
    private[this] val excludePackages = Set("com.hazelcast.", "scala.", "java.", "javax.")
    private def include(cls: Class[_]): Boolean = !excludePackages.exists(cls.getName.startsWith) && serializeBytecodeFor(cls)
    def computeValue(cls: Class[_]): Option[ByteArrayClassLoader] =
      if (include(cls)) {
        try {
          Some(ByteArrayClassLoader(cls))
        } catch {
          case ncdf: NoClassDefFoundError =>
            classByName.get(cls.getName) match {
              case Some((bytes, classForBytes)) if cls == classForBytes => Some(new ByteArrayClassLoader(cls.getName, bytes))
              case _ => throw ncdf
            }
        }
      } else None
  }
  private[this] val classByName = new collection.concurrent.TrieMap[String, (Array[Byte], Class[_])]

  private class ClassBytesSerializer[T: ClassTag] extends StreamSerializer[T] {
    def write(out: ObjectDataOutput, any: T): Unit = {
      out.writeUTF(any.getClass.getName)
      loaderByClass.get(any.getClass) match {
        case Some(cl) => out.writeByteArray(cl.bytes)
        case _ => out.writeByteArray(Array.emptyByteArray)
      }
      UnsafeSerializer.write(out, any)
    }
    def read(inp: ObjectDataInput): T = {
      val className = inp.readUTF()
      val classBytes = inp.readByteArray()
      val cls =
        if (classBytes.length == 0) {
          Class.forName(className)
        } else {
          classByName.get(className) match {
            case Some((bytes, cls)) if Arrays.equals(classBytes, bytes) => cls
            case _ =>
              val cl = new ByteArrayClassLoader(className, classBytes)
              val cls = Class.forName(className, true, cl)
              classByName.put(className, classBytes -> cls)
              cls
          }
        }
      UnsafeSerializer.read(inp, cls).asInstanceOf[T]
    }
  }

  type S[T] = StreamSerializer[T]

  val Function0Ser: S[Function0[_]] = new ClassBytesSerializer
  val Function1Ser: S[Function1[_, _]] = new ClassBytesSerializer
  val Function2Ser: S[Function2[_, _, _]] = new ClassBytesSerializer
  val Function3Ser: S[Function3[_, _, _, _]] = new ClassBytesSerializer
  val PartialFunctionSer: S[PartialFunction[_, _]] = new ClassBytesSerializer
  val EntryProcessorSer: S[EntryProcessor[_, _]] = new ClassBytesSerializer
  val EntryBackupProcessorSer: S[EntryBackupProcessor[_, _]] = new ClassBytesSerializer
  val CallableSer: S[Callable[_]] = new ClassBytesSerializer
  val RunnableSer: S[Runnable] = new ClassBytesSerializer
  val PredicateSer: S[Predicate[_, _]] = new ClassBytesSerializer
  val PipeSer: S[Pipe[_]] = new ClassBytesSerializer
  val AggregatorSer: S[Aggregator[_, _]] = new ClassBytesSerializer
  val ComparatorSer: S[Comparator[_]] = new ClassBytesSerializer
  val IFunctionSer: S[IFunction[_, _]] = new ClassBytesSerializer

}

Source File: PartitionedPairBuffer.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY = Int.MaxValue / 2 // 2 ^ 30 - 1即1073741823
}

Source File: BulkLoadPartitioner.scala From SparkOnHBase with Apache License 2.0

5 votes

package org.apache.hadoop.hbase.spark

import java.util
import java.util.Comparator

import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Partitioner


class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
  extends Partitioner {

  override def numPartitions: Int = startKeys.length

  override def getPartition(key: Any): Int = {

    val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
      override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
        Bytes.compareTo(o1, o2)
      }
    }

    val rowKey:Array[Byte] =
      key match {
        case qualifier: KeyFamilyQualifier =>
          qualifier.rowKey
        case wrapper: ByteArrayWrapper =>
          wrapper.value
        case _ =>
          key.asInstanceOf[Array[Byte]]
      }
    val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
    if (partition < 0) partition * -1 + -2
    else partition
  }
}

Source File: PartitionedPairBuffer.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.storage.BlockObjectWriter
import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  override def writablePartitionedIterator(): WritablePartitionedIterator = {
    WritablePartitionedIterator.fromIterator(iterator)
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

Source File: PartitionedPairBuffer.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY = Int.MaxValue / 2 // 2 ^ 30 - 1
}

Source File: SameLengthByteArrayComparator.scala From hail with MIT License

5 votes

package is.hail.shuffler

import java.util.Comparator

class SameLengthByteArrayComparator extends Comparator[Array[Byte]] {
  def compare(l: Array[Byte], r: Array[Byte]): Int = {
    assert(l.length == r.length)
    val length = l.length
    var i = 0
    while (i < length) {
      if (l(i) < r(i)) {
        return -1
      }
      if (l(i) > r(i)) {
        return 1
      }
      i += 1
    }
    return 0
  }
}

Source File: PartitionedPairBuffer.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY = Int.MaxValue / 2 // 2 ^ 30 - 1
}

Source File: JBasicUtils.scala From scala-commons with MIT License

5 votes

package com.avsystem.commons
package jiop

import java.util.Comparator
import java.util.concurrent.Callable
import java.{lang => jl, math => jm, util => ju}

import com.avsystem.commons.misc.{Sam, TimestampConversions}

trait JBasicUtils {
  def jRunnable(code: => Any) = Sam[Runnable](code)
  def jCallable[T](expr: => T) = Sam[Callable[T]](expr)
  def jComparator[T](cmp: (T, T) => Int) = Sam[Comparator[T]](cmp)

  implicit def jDateTimestampConversions(date: JDate): TimestampConversions =
    new TimestampConversions(date.getTime)

  type JByte = jl.Byte
  type JShort = jl.Short
  type JInteger = jl.Integer
  type JLong = jl.Long
  type JFloat = jl.Float
  type JDouble = jl.Double
  type JBoolean = jl.Boolean
  type JCharacter = jl.Character
  type JBigInteger = jm.BigInteger
  type JBigDecimal = jm.BigDecimal
  type JDate = ju.Date
  type JNumber = jl.Number
  type JVoid = jl.Void
  type JEnum[E <: jl.Enum[E]] = jl.Enum[E]
  type JStringBuilder = jl.StringBuilder
}

Source File: TimeBasedUUIDs.scala From akka-persistence-couchbase with Apache License 2.0

5 votes

package akka.persistence.couchbase.internal

import java.time.Instant
import java.time.format.{DateTimeFormatterBuilder, SignStyle}
import java.time.temporal.ChronoField
import java.util.{Comparator, UUID}

import akka.annotation.InternalApi


  def toSortableString(id: UUID): String = {
    require(id.version() == 1)
    val builder = new StringBuilder()
    val instant = UUIDTimestamp(id.timestamp()).toInstant
    builder.append(SortableTimeFormatter.format(instant))
    builder.append('_')
    builder.append("%20s".format(java.lang.Long.toUnsignedString(id.getLeastSignificantBits)))
    builder.toString()
  }

  def fromSortableString(text: String): UUID = {
    val parts = text.split('_')
    val parsed = SortableTimeFormatter.parse(parts(0))
    val instant = Instant.from(parsed).atZone(UUIDTimestamp.GMT)
    val timestamp = UUIDTimestamp(instant)
    val lsb = java.lang.Long.parseUnsignedLong(parts(1).trim)
    TimeBasedUUIDs.create(timestamp, lsb)
  }
}

Source File: SortedArraySet.scala From sangria with Apache License 2.0

5 votes

package sangria.validation.rules.experimental.overlappingfields

import java.util
import java.util.Comparator
import java.util.function.Consumer


class SortedArraySet[T](private val sortedMembers: util.ArrayList[T]) extends java.lang.Iterable[T] {

  //cache the hashCode for faster handling
  override val hashCode: Int = {
    sortedMembers.hashCode()
  }

  //equals and hash code delegate to the members
  override def equals(obj: Any): Boolean = {
    obj match {
      case other: SortedArraySet[_] => eq(other) || (hashCode == other.hashCode && size() == other.size() && sortedMembers == other.sortedMembers)
      case _                        => false
    }
  }

  def isEmpty: Boolean = {
    sortedMembers.isEmpty
  }

  def size(): Int = {
    sortedMembers.size()
  }

  override def iterator(): util.Iterator[T] = {
    sortedMembers.iterator()
  }

  override def forEach(action: Consumer[_ >: T]): Unit = {
    sortedMembers.forEach(action)
  }
}

object SortedArraySet {

  def newBuilder[T: Ordering](sizeHint: Int): Builder[T] = {
    new Builder[T](sizeHint, implicitly[Ordering[T]])
  }

  def newBuilder[T: Ordering](): Builder[T] = {
    new Builder[T](implicitly[Ordering[T]])
  }

  //Beware:
  //The comparator wont be used in the final set for equality or removing duplicates, it's only here for sorting.
  //As such it has to be compatible with the standard equality and hashCode implementations.
  class Builder[T] private(private val members: util.ArrayList[T], private val comparator: Comparator[T]) {

    def this(sizeHint: Int, ordering: Ordering[T]) {
      this(new util.ArrayList[T](sizeHint), ordering)
    }

    def this(ordering: Ordering[T]) {
      this(new util.ArrayList[T](), ordering)
    }


    def add(value: T): this.type = {
      members.add(value)
      this
    }

    def addAll(values: util.Collection[T]): this.type = {
      members.addAll(values)
      this
    }

    def build(): SortedArraySet[T] = {
      sortAndRemoveDuplicates()
      new SortedArraySet(members)
    }

    private def sortAndRemoveDuplicates(): Unit = {
      members.sort(comparator)
      var into = 0
      var from = 0
      while (from < members.size()) {
        val first_from = members.get(from)
        members.set(into, first_from)
        into += 1
        do {
          from += 1
        } while (from < members.size() && members.get(from) == first_from)
      }
      members.subList(into, members.size()).clear()
    }
  }

}

Source File: FieldNameAndArguments.scala From sangria with Apache License 2.0

5 votes

package sangria.validation.rules.experimental.overlappingfields

import java.util
import java.util.{Comparator, Objects}

import sangria.ast
import sangria.renderer.QueryRenderer


final class FieldNameAndArguments(private val field: ast.Field) {

  private val fieldName: String = field.name
  private val arguments: util.ArrayList[(String, String)] = argumentsKey(field.arguments)

  override val hashCode: Int = {
    Objects.hash(fieldName, arguments)
  }

  override def equals(obj: Any): Boolean = {
    obj match {
      case other: FieldNameAndArguments => fieldName == other.fieldName && arguments == other.arguments
      case _                            => false
    }
  }

  def conflictReason(other: FieldNameAndArguments): String = {
    if (fieldName != other.fieldName) {
      s"'$fieldName' and '${other.fieldName}' are different fields"
    } else if (arguments != other.arguments) {
      "of differing arguments"
    } else {
      throw new IllegalArgumentException("no conflict between keys")
    }
  }

  private def argumentsKey(arguments: Vector[ast.Argument]): util.ArrayList[(String, String)] = {
    val key = new util.ArrayList[(String, String)](arguments.size)
    arguments.foreach { argument =>
      key.add(argument.name -> QueryRenderer.render(argument.value, QueryRenderer.Compact))
    }
    key.sort(new Comparator[(String, String)] {
      override def compare(a: (String, String), b: (String, String)): Int = a._1.compareTo(b._1)
    })
    key
  }
}

Source File: PartitionedPairBuffer.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.util.collection.WritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
    : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object PartitionedPairBuffer {
  val MAXIMUM_CAPACITY = Int.MaxValue / 2 // 2 ^ 30 - 1
}

Source File: RPartitionedPairBuffer.scala From OAP with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import java.util.Comparator

import org.apache.spark.unsafe.array.ByteArrayMethods
import org.apache.spark.util.collection.RWritablePartitionedPairCollection._


  override def partitionedDestructiveSortedIterator(keyComparator: Option[Comparator[K]])
  : Iterator[((Int, K), V)] = {
    val comparator = keyComparator.map(partitionKeyComparator).getOrElse(partitionComparator)
    new Sorter(new KVArraySortDataFormat[(Int, K), AnyRef]).sort(data, 0, curSize, comparator)
    iterator
  }

  private def iterator(): Iterator[((Int, K), V)] = new Iterator[((Int, K), V)] {
    var pos = 0

    override def hasNext: Boolean = pos < curSize

    override def next(): ((Int, K), V) = {
      if (!hasNext) {
        throw new NoSuchElementException
      }
      val pair = (data(2 * pos).asInstanceOf[(Int, K)], data(2 * pos + 1).asInstanceOf[V])
      pos += 1
      pair
    }
  }
}

private object RPartitionedPairBuffer {
  val MAXIMUM_CAPACITY: Int = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 2
}

Source File: SplashUtils.scala From splash with Apache License 2.0

5 votes

package org.apache.spark.shuffle

import java.io.{InputStream, OutputStream}
import java.util.Comparator

import org.apache.spark.internal.Logging

import scala.util.control.NonFatal

object SplashUtils extends Logging {
  def withResources[T <: AutoCloseable, V](r: => T)(f: T => V): V = {
    val resource: T = r
    require(resource != null, "resource is null")
    var exception: Throwable = null
    try {
      f(resource)
    } catch {
      case NonFatal(e) =>
        exception = e
        throw e
      case e: Throwable =>
        logError("fatal error received.", e)
        throw e
    } finally {
      closeAndAddSuppressed(exception, resource)
    }
  }

  private def closeAndAddSuppressed(e: Throwable,
      resource: AutoCloseable): Unit = {
    if (e != null) {
      try {
        resource.close()
      } catch {
        case NonFatal(suppressed) =>
          e.addSuppressed(suppressed)
      }
    } else {
      resource.close()
    }
  }

  
class SplashHashComparator[K] extends Comparator[K] {
  def compare(key1: K, key2: K): Int = {
    val hash1 = SplashUtils.hash(key1)
    val hash2 = SplashUtils.hash(key2)
    if (hash1 < hash2) -1 else if (hash1 == hash2) 0 else 1
  }
}


class SplashSpillableIterator[T](var upstream: Iterator[T],
    val spillInMemoryIterator: Iterator[T] => SpilledFile,
    val getNextUpstream: SpilledFile => Iterator[T])
    extends Iterator[T] with Logging {
  private val spillLock = new Object
  private var spilledFileOpt: Option[SpilledFile] = None
  private var cur: T = readNext()

  def spill(): Option[SpilledFile] = spillLock.synchronized {
    spilledFileOpt match {
      case Some(_) =>
        // has spilled, return None
        None
      case None =>
        // never spilled, now spilling
        val spilledFile = spillInMemoryIterator(upstream)
        spilledFileOpt = Some(spilledFile)
        spilledFileOpt
    }
  }

  def readNext(): T = spillLock.synchronized {
    spilledFileOpt match {
      case Some(spilledFile) =>
        upstream = getNextUpstream(spilledFile)
        spilledFileOpt = None
      case None =>
      // do nothing
    }
    if (upstream.hasNext) {
      upstream.next()
    } else {
      null.asInstanceOf[T]
    }
  }

  override def hasNext: Boolean = cur != null

  override def next(): T = {
    val ret = cur
    cur = readNext()
    ret
  }
}

Source File: Equiv.scala From perf_tester with Apache License 2.0

5 votes

package scala
package math

import java.util.Comparator


  def equiv(x: T, y: T): Boolean
}

trait LowPriorityEquiv {
  self: Equiv.type =>

  implicit def universalEquiv[T] : Equiv[T] = universal[T]
}

object Equiv extends LowPriorityEquiv {
  def reference[T <: AnyRef]: Equiv[T] = new Equiv[T] {
    def equiv(x: T, y: T) = x eq y
  }
  def universal[T]: Equiv[T] = new Equiv[T] {
    def equiv(x: T, y: T) = x == y
  }
  def fromComparator[T](cmp: Comparator[T]): Equiv[T] = new Equiv[T] {
    def equiv(x: T, y: T) = cmp.compare(x, y) == 0
  }
  def fromFunction[T](cmp: (T, T) => Boolean): Equiv[T] = new Equiv[T] {
    def equiv(x: T, y: T) = cmp(x, y)
  }
  def by[T, S: Equiv](f: T => S): Equiv[T] =
    fromFunction((x, y) => implicitly[Equiv[S]].equiv(f(x), f(y)))

  @inline def apply[T: Equiv]: Equiv[T] = implicitly[Equiv[T]]
}

Source File: DistributedScanner.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.counter.helper

import java.util
import java.util.Comparator

import com.google.common.primitives.SignedBytes
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes

object DistributedScanner {
   val BUCKET_BYTE_SIZE = Bytes.SIZEOF_BYTE

   def getRealRowKey(result: Result): Array[Byte] = {
     result.getRow.drop(BUCKET_BYTE_SIZE)
   }
 }

class DistributedScanner(table: Table, scan: Scan) extends AbstractClientScanner {
   import DistributedScanner._

   private val BYTE_MAX = BigInt(256)

   private[helper] val scanners = {
     for {
       i <- 0 until BYTE_MAX.pow(BUCKET_BYTE_SIZE).toInt
     } yield {
       val bucketBytes: Array[Byte] = Bytes.toBytes(i).takeRight(BUCKET_BYTE_SIZE)
       val newScan = new Scan(scan).setStartRow(bucketBytes ++ scan.getStartRow).setStopRow(bucketBytes ++ scan.getStopRow)
       table.getScanner(newScan)
     }
   }

   val resultCache = new util.TreeMap[Result, java.util.Iterator[Result]](new Comparator[Result] {
     val comparator = SignedBytes.lexicographicalComparator()
     override def compare(o1: Result, o2: Result): Int = {
       comparator.compare(getRealRowKey(o1), getRealRowKey(o2))
     }
   })

   lazy val initialized = {
     val iterators = scanners.map(_.iterator()).filter(_.hasNext)
     iterators.foreach { it =>
       resultCache.put(it.next(), it)
     }
     iterators.nonEmpty
   }

   override def next(): Result = {
     if (initialized) {
       Option(resultCache.pollFirstEntry()).map { entry =>
         val it = entry.getValue
         if (it.hasNext) {
           // fill cache
           resultCache.put(it.next(), it)
         }
         entry.getKey
       }.orNull
     } else {
       null
     }
   }

   override def close(): Unit = {
     for {
       scanner <- scanners
     } {
       scanner.close()
     }
   }

  override def renewLease(): Boolean = true
}

Source File: BulkLoadPartitioner.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.loader.spark

import java.util
import java.util.Comparator

import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Partitioner


class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
  extends Partitioner {

  override def numPartitions: Int = startKeys.length

  override def getPartition(key: Any): Int = {

    val rowKey:Array[Byte] =
      key match {
        case qualifier: KeyFamilyQualifier =>
          qualifier.rowKey
        case _ =>
          key.asInstanceOf[Array[Byte]]
      }

    val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
      override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
        Bytes.compareTo(o1, o2)
      }
    }
    val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
    if (partition < 0) partition * -1 + -2
    else partition
  }
}

Source File: BulkLoadPartitioner.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.s2jobs.spark

import java.util
import java.util.Comparator

import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.Partitioner


class BulkLoadPartitioner(startKeys:Array[Array[Byte]])
  extends Partitioner {

  override def numPartitions: Int = startKeys.length

  override def getPartition(key: Any): Int = {

    val rowKey:Array[Byte] =
      key match {
        case qualifier: KeyFamilyQualifier =>
          qualifier.rowKey
        case _ =>
          key.asInstanceOf[Array[Byte]]
      }

    val comparator: Comparator[Array[Byte]] = new Comparator[Array[Byte]] {
      override def compare(o1: Array[Byte], o2: Array[Byte]): Int = {
        Bytes.compareTo(o1, o2)
      }
    }
    val partition = util.Arrays.binarySearch(startKeys, rowKey, comparator)
    if (partition < 0) partition * -1 + -2
    else partition
  }
}

java.util.Comparator Scala Examples