scala.collection.mutable.PriorityQueue Scala Example

Source File: TopElementsAggregator.scala From salt-core with Apache License 2.0

5 votes

package software.uncharted.salt.core.analytic.collection

import software.uncharted.salt.core.analytic.Aggregator

import scala.collection.Map
import scala.collection.mutable.HashMap
import scala.collection.mutable.ListBuffer
import scala.collection.mutable.{Map => MutableMap}
import scala.collection.mutable.PriorityQueue
import scala.reflect.ClassTag


class TopElementsAggregator[ET: ClassTag](elementLimit: Int)
extends Aggregator[Seq[ET], Map[ET, Int], List[(ET, Int)]] {

  def default(): Map[ET, Int] = {
    Map[ET, Int]()
  }

  override def add(current: Map[ET, Int], next: Option[Seq[ET]]): Map[ET, Int] = {
    if (next.isDefined) {
      // If our current map is mutable, add new data in directly.
      // If not, convert to a mutable map, and then add data in
      val sum = current match {
        case hm: MutableMap[ET, Int] => hm
        case _ => {
          // The current value isn't itself a mutable hashmap yet; convert to one.
          val hm = new HashMap[ET, Int]()
          hm ++= current
          hm
        }
      }
      next.get.foreach(t => sum.put(t, sum.getOrElse(t, 0) + 1))
      sum
    } else {
      current
    }
  }

  override def merge(left: Map[ET, Int], right: Map[ET, Int]): Map[ET, Int] = {
    // If either input map is mutable, merge the other into it.
    // If neither is, convert one to mutable, and add the other into it.
    val (to, from) = left match {
      case hm: MutableMap[ET, Int] => (hm, right)
      case _ =>
        right match {
          case hm: MutableMap[ET, Int] => (hm, left)
          case _ =>
            val hm = new HashMap[ET, Int]()
            hm ++= left
            (hm, right)
        }
    }
    from.foreach(t => {
      to.put(t._1, to.getOrElse(t._1, 0) + t._2)
    })
    to
  }

  override def finish(intermediate: Map[ET, Int]): List[(ET, Int)] = {
    val x = new PriorityQueue[(ET, Int)]()(Ordering.by(
      a => a._2
    ))
    intermediate.foreach(t => {
      x.enqueue(t)
    })
    var result = new ListBuffer[(ET, Int)]
    for (i <- 0 until Math.min(elementLimit, x.size)) {
      result.append(x.dequeue)
    }
    result.toList
  }
}

Source File: Huffman.scala From bonsai with MIT License

5 votes

package com.stripe.bonsai
package example

import org.github.jamm.MemoryMeter

import scala.collection.BitSet
import scala.collection.mutable.PriorityQueue
import scala.util.Random.nextGaussian

object HuffmanExample extends App {
  sealed trait HuffmanTree[+A]

  object HuffmanTree {
    case class Branch[+A](zero: HuffmanTree[A], one: HuffmanTree[A]) extends HuffmanTree[A]
    case class Leaf[+A](value: A) extends HuffmanTree[A]

    def apply[A](symbols: Map[A, Double]): HuffmanTree[A] = {
      require(symbols.nonEmpty)

      val queue = new PriorityQueue[(HuffmanTree[A], Double)]()(Ordering.by(-_._2))

      // Initialize the queue with leaf nodes.
      symbols.foreach { case (symbol, weight) =>
        queue.enqueue(Leaf(symbol) -> weight)
      }

      // Iteratively build up optimal Huffman tree.
      while (queue.size > 1) {
        val (t0, w0) = queue.dequeue()
        val (t1, w1) = queue.dequeue()
        queue.enqueue(Branch(t0, t1) -> (w0 + w1))
      }

      // Return the final tree.
      queue.dequeue()._1
    }

    implicit def huffmanTreeOps[A] = new TreeOps[HuffmanTree[A], Option[A]] {
      type Node = HuffmanTree[A]

      def root(tree: HuffmanTree[A]): Option[HuffmanTree[A]] = Some(tree)
      def children(tree: HuffmanTree[A]): Iterable[HuffmanTree[A]] = tree match {
        case Branch(l, r) => l :: r :: Nil
        case _ => Nil
      }
      def label(tree: HuffmanTree[A]): Option[A] = tree match {
        case Leaf(value) => Some(value)
        case _ => None
      }
    }
  }

  implicit class HuffmanTreeOps[T, A](tree: T)(implicit treeOps: TreeOps[T, Option[A]]) {
    import HuffmanTree.{ Branch, Leaf }
    import treeOps._

    def decode(bits: BitSet, len: Int): Vector[A] = {
      val root = tree.root.get
      val (_, result) = (0 until len)
        .foldLeft((root, Vector.empty[A])) { case ((node, acc), i) =>
          node.label match {
            case Some(value) => (root, acc :+ value)
            case None if bits(i) => (node.children.head, acc)
            case None => (node.children.iterator.drop(1).next, acc)
          }
        }
      result
    }
  }

  val symbols = Map((' ' to '~').map { symbol =>
    symbol -> math.abs(nextGaussian)
  }: _*)
  val bigTree = HuffmanTree(symbols)
  val smallTree = Tree(bigTree)

  val meter = new MemoryMeter()
  val bigTreeSize = meter.measureDeep(bigTree)
  val smallTreeSize = meter.measureDeep(smallTree)
  println(s"big tree:   ${bigTreeSize} bytes")
  println(s"small tree: ${smallTreeSize} bytes")
  println(f"${bigTreeSize / smallTreeSize.toDouble}%.1fx reduction")
}

Source File: ExtremesSummarizer.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.rdd.function.summarize.summarizer

import scala.collection.mutable.PriorityQueue
import scala.reflect.ClassTag


case class ExtremesSummarizer[T](val k: Int, implicit val tag: ClassTag[T], ordering: Ordering[T])
  extends FlippableSummarizer[T, PriorityQueue[T], Array[T]] {

  // To find the k largest, we use a min heap, so the order needs to be reversed.
  override def zero(): PriorityQueue[T] = PriorityQueue.empty(ordering.reverse)

  override def add(u: PriorityQueue[T], t: T): PriorityQueue[T] = {
    if (u.isEmpty || ordering.gt(t, u.head)) {
      u.enqueue(t)
      while (u.size > k) {
        u.dequeue()
      }
    }

    u
  }

  override def merge(u1: PriorityQueue[T], u2: PriorityQueue[T]): PriorityQueue[T] = {
    val u = (u1 ++ u2)
    // If there are more than n items after merge, keep the top k items
    while (u.size > k) {
      u.dequeue()
    }

    u
  }

  // Output elements in order defined by ordering
  override def render(u: PriorityQueue[T]): Array[T] = u.toArray.sorted(ordering)

}

Source File: MockScheduler.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.kafka010.mocks

import java.util.concurrent.TimeUnit

import scala.collection.mutable.PriorityQueue

import kafka.utils.{Scheduler, Time}


      if(curr.periodic) {
        curr.nextExecution += curr.period
        this.tasks += curr
      }
    }
  }

  def schedule(
      name: String,
      fun: () => Unit,
      delay: Long = 0,
      period: Long = -1,
      unit: TimeUnit = TimeUnit.MILLISECONDS): Unit = synchronized {
    tasks += MockTask(name, fun, time.milliseconds + delay, period = period)
    tick()
  }

}

case class MockTask(
    val name: String,
    val fun: () => Unit,
    var nextExecution: Long,
    val period: Long) extends Ordered[MockTask] {
  def periodic: Boolean = period >= 0
  def compare(t: MockTask): Int = {
    java.lang.Long.compare(t.nextExecution, nextExecution)
  }
}

Source File: MedianHeap.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util.collection

import scala.collection.mutable.PriorityQueue


  private[this] var largerHalf = PriorityQueue.empty[Double](ord.reverse)

  def isEmpty(): Boolean = {
    smallerHalf.isEmpty && largerHalf.isEmpty
  }

  def size(): Int = {
    smallerHalf.size + largerHalf.size
  }

  def insert(x: Double): Unit = {
    // If both heaps are empty, we arbitrarily insert it into a heap, let's say, the largerHalf.
    if (isEmpty) {
      largerHalf.enqueue(x)
    } else {
      // If the number is larger than current median, it should be inserted into largerHalf,
      // otherwise smallerHalf.
      if (x > median) {
        largerHalf.enqueue(x)
      } else {
        smallerHalf.enqueue(x)
      }
    }
    rebalance()
  }

  private[this] def rebalance(): Unit = {
    if (largerHalf.size - smallerHalf.size > 1) {
      smallerHalf.enqueue(largerHalf.dequeue())
    }
    if (smallerHalf.size - largerHalf.size > 1) {
      largerHalf.enqueue(smallerHalf.dequeue)
    }
  }

  def median: Double = {
    if (isEmpty) {
      throw new NoSuchElementException("MedianHeap is empty.")
    }
    if (largerHalf.size == smallerHalf.size) {
      (largerHalf.head + smallerHalf.head) / 2.0
    } else if (largerHalf.size > smallerHalf.size) {
      largerHalf.head
    } else {
      smallerHalf.head
    }
  }
}

scala.collection.mutable.PriorityQueue Scala Examples