scala.util.hashing.MurmurHash3 Scala Example

Source File: EthereumTransactionRouter.scala From Raphtory with Apache License 2.0

5 votes

package com.raphtory.examples.blockchain.routers

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.EdgeAddWithProperties
import com.raphtory.core.model.communication.Properties
import com.raphtory.core.model.communication.StringProperty
import com.raphtory.core.model.communication.VertexAddWithProperties

import scala.util.hashing.MurmurHash3

class EthereumTransactionRouter(override val routerId: Int,override val workerID:Int, val initialManagerCount: Int) extends RouterWorker {

  override protected def parseTuple(value: Any): Unit = {
    val components   = value.toString.drop(1).dropRight(1).split(",")
    val creationDate = components(3).toLong * 1000 //seconds to miliseconds
    val sourceNode   = MurmurHash3.stringHash(components(0)) //hash the id to get a vertex ID
    sendGraphUpdate(
            VertexAddWithProperties(creationDate, sourceNode, Properties(StringProperty("id", components(0))))
    )                             //create the source node and add the wallet ID as a property
    if (components(1).nonEmpty) { //money being sent to an actual user
      val targetNode = MurmurHash3.stringHash(components(1)) //hash the id of the to wallet to get a vertex ID
      sendGraphUpdate(
              VertexAddWithProperties(creationDate, targetNode, Properties(StringProperty("id", components(1))))
      ) //create the destination vertex
      sendGraphUpdate(
              EdgeAddWithProperties(
                      creationDate,
                      sourceNode,
                      targetNode,
                      Properties(StringProperty("id", components(2)))
              )
      )      //create the edge between them adding the value as a property
    } else { //burnt cash
      val targetNode = MurmurHash3.stringHash("null")
      sendGraphUpdate(VertexAddWithProperties(creationDate, targetNode, Properties(StringProperty("id", "null"))))
      sendGraphUpdate(
              EdgeAddWithProperties(
                      creationDate,
                      sourceNode,
                      targetNode,
                      Properties(StringProperty("value", components(2)))
              )
      )
    }

  }
}

Source File: SignRandomProjectionLSH.scala From lexrank-summarizer with MIT License

5 votes

package io.github.karlhigley.lexrank

import scala.collection.immutable.BitSet
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import scala.util.hashing.MurmurHash3

import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.Logging

class SignRandomProjectionLSH(poolSize: Int = 10000) extends Serializable with Logging {
  val pool = SignRandomProjectionLSH.generatePool(poolSize)
  
  def computeSignature(vector: SparseVector, length: Int): BitSet = {
    val buf = ArrayBuffer.empty[Int]
    
    val elements = vector.indices.zip(vector.values)
    for (bit <- 1 to length) {
      val components = elements.map(e => {
          val hash      = MurmurHash3.productHash((bit, e._1))
          val poolIndex = ((hash % poolSize) + poolSize) % poolSize
          val result    = e._2 * pool(poolIndex)
          result
      })

      val dotProduct = components.reduce(_ + _)
      if (dotProduct > 0) {
        buf += bit
      }
    }

    BitSet(buf.toArray:_*)
  }
  
}

object SignRandomProjectionLSH {
  def signatureSet(length: Int): Set[BitSet] = {
    BitSet(1 to length:_*).subsets.toSet
  }

  def estimateCosine(a: BitSet, b: BitSet, length: Int): Double = {
    val hammingDistance = (a^b).size
    math.cos(hammingDistance.toDouble/length.toDouble*math.Pi)
  }

  private def generatePool(size: Int): Array[Double] = {
    val rand = new Random()
    val buf  = ArrayBuffer.fill[Double](size)(rand.nextGaussian)
    buf.toArray
  }
}

Source File: ShardedSparkeyReader.scala From scio with Apache License 2.0

5 votes

package com.spotify.scio.extra.sparkey.instances

import java.util

import com.spotify.sparkey.{IndexHeader, LogHeader, SparkeyReader}

import scala.util.hashing.MurmurHash3
import scala.jdk.CollectionConverters._


class ShardedSparkeyReader(val sparkeys: Map[Short, SparkeyReader], val numShards: Short)
    extends SparkeyReader {
  def hashKey(arr: Array[Byte]): Short = (MurmurHash3.bytesHash(arr, 1) % numShards).toShort

  def hashKey(str: String): Short = (MurmurHash3.stringHash(str, 1) % numShards).toShort

  override def getAsString(key: String): String = {
    val hashed = hashKey(key)
    if (sparkeys.contains(hashed)) {
      sparkeys(hashed).getAsString(key)
    } else {
      null
    }
  }

  override def getAsByteArray(key: Array[Byte]): Array[Byte] = {
    val hashed = hashKey(key)
    if (sparkeys.contains(hashed)) {
      sparkeys(hashed).getAsByteArray(key)
    } else {
      null
    }
  }

  override def getAsEntry(key: Array[Byte]): SparkeyReader.Entry = {
    val hashed = hashKey(key)
    if (sparkeys.contains(hashed)) {
      sparkeys(hashed).getAsEntry(key)
    } else {
      null
    }
  }

  override def getIndexHeader: IndexHeader =
    throw new NotImplementedError("ShardedSparkeyReader does not support getIndexHeader.")

  override def getLogHeader: LogHeader =
    throw new NotImplementedError("ShardedSparkeyReader does not support getLogHeader.")

  override def duplicate(): SparkeyReader =
    new ShardedSparkeyReader(sparkeys.map { case (k, v) => (k, v.duplicate) }, numShards)

  override def close(): Unit = sparkeys.values.foreach(_.close())

  override def iterator(): util.Iterator[SparkeyReader.Entry] =
    sparkeys.values.map(_.iterator.asScala).reduce(_ ++ _).asJava
}

Source File: HashedCategoricalDistribution.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.util.rand

import scala.util.hashing.MurmurHash3

object HashedCategoricalDistribution {
    private val MaxVal = Int.MaxValue.toFloat

    def apply(probabilities: Double*): HashedCategoricalDistribution =
        new HashedCategoricalDistribution(new IntAliasMethodSampler(probabilities))
}

case class HashedCategoricalDistribution(sampler: IntAliasMethodSampler) extends (TraversableOnce[Any] => Int) {
    import HashedCategoricalDistribution.MaxVal

    
    def apply(data: TraversableOnce[Any]): Int = {
        val h = MurmurHash3 orderedHash data

        // Take the absolute value because we want (hAbs % sampler.getNumClasses) to give a non-negative index.  This
        // also makes the computation of f easier.  It will lower the entropy of the output of the hash function but
        // this is acceptable.
        val hAbs = math.abs(h)
        val p = hAbs / MaxVal
        val k = hAbs % numClasses
        sampler.sample(k, p)
    }
}

Source File: SubString.scala From typed-schema with Apache License 2.0

5 votes

package ru.tinkoff.tschema.utils

import ru.tinkoff.tschema.utils.SubString.outOfBound

import scala.annotation.tailrec
import scala.util.hashing.MurmurHash3

final class SubString private[SubString] (private val arr: Array[Char], private val from: Int, private val to: Int)
    extends CharSequence {
  @inline private[this] def size         = to - from
  def length(): Int                      = size
  def charAt(index: Int): Char = {
    if (index >= size || index < 0) outOfBound(index)
    arr(from + index)
  }
  def subSequence(start: Int, end: Int): CharSequence = {
    if (start < 0) outOfBound(start)
    if (end > size) outOfBound(end)
    val len = end - start
    if (len < 0) outOfBound(len)
    if ((start == 0) && (end == size)) this
    else new SubString(arr, from + start, from + end)
  }
  override def equals(obj: Any): Boolean = obj match {
    case s: CharSequence =>
      @tailrec def go(i: Int): Boolean = (i == size) || ((s.charAt(i) == charAt(i)) && go(i + 1))
      size == s.length() && go(0)
    case _               => false
  }

  override def hashCode(): Int = {

    
    var h = MurmurHash3.stringSeed
    var i = from
    while (i + 1 < to) {
      val data = (arr(i) << 16) + arr(i + 1)
      h = MurmurHash3.mix(h, data)
      i += 2
    }
    if (i < to) h = MurmurHash3.mixLast(h, arr(i).toInt)
    MurmurHash3.finalizeHash(h, size)
  }

  override def toString: String = new String(arr.slice(from, to))

}

object SubString {
  @inline private def outOfBound(index: Int) = throw new StringIndexOutOfBoundsException(index)

  def apply(s: String): SubString = new SubString(s.toCharArray, 0, s.length)
}

Source File: RawJson.scala From almond with BSD 3-Clause "New" or "Revised" License

5 votes

package almond.protocol

import java.nio.charset.StandardCharsets
import java.{util => ju}

import scala.util.hashing.MurmurHash3
import scala.util.Try

// adapted from https://github.com/plokhotnyuk/jsoniter-scala/blob/209d918a030b188f064ee55505a6c47257731b4b/jsoniter-scala-macros/src/test/scala/com/github/plokhotnyuk/jsoniter_scala/macros/JsonCodecMakerSpec.scala#L645-L666
final case class RawJson(value: Array[Byte]) {
  override lazy val hashCode: Int = MurmurHash3.arrayHash(value)
  override def equals(obj: Any): Boolean = obj match {
    case that: RawJson => ju.Arrays.equals(value, that.value)
    case _ => false
  }
  override def toString: String =
    Try(new String(value, StandardCharsets.UTF_8))
      .toOption
      .getOrElse(value.toString)
}

object RawJson {

  import com.github.plokhotnyuk.jsoniter_scala.core._

  implicit val codec: JsonValueCodec[RawJson] = new JsonValueCodec[RawJson] {
    def decodeValue(in: JsonReader, default: RawJson): RawJson =
      new RawJson(in.readRawValAsBytes())
    def encodeValue(x: RawJson, out: JsonWriter): Unit =
      out.writeRawVal(x.value)
    val nullValue: RawJson =
      new RawJson(new Array[Byte](0))
  }

  val emptyObj: RawJson =
    RawJson("{}".getBytes(StandardCharsets.UTF_8))
}

Source File: XORShiftRandom.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.story.linalg

import java.nio.ByteBuffer
import java.util.{Random => JavaRandom}

import scala.util.Random
import scala.util.hashing.MurmurHash3


  private[this] def hashSeed(seed: Long): Long = {
    val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
    MurmurHash3.bytesHash(bytes)
  }

  // we need to just override next - this will be called by nextInt, nextDouble,
  // nextGaussian, nextLong, etc.
  override protected def next(bits: Int): Int = {
    var nextSeed = seed ^ (seed << 21)
    nextSeed ^= (nextSeed >>> 35)
    nextSeed ^= (nextSeed << 4)
    seed = nextSeed
    (nextSeed & ((1L << bits) - 1)).asInstanceOf[Int]
  }
}

object XORShiftRandom {
  val random = new Random()
}

Source File: DatasetClient.scala From elastiknn with Apache License 2.0

5 votes

package com.klibisz.elastiknn.benchmarks

import java.util.zip.GZIPInputStream

import com.amazonaws.services.s3.AmazonS3
import com.klibisz.elastiknn.api.{ElasticsearchCodec, Vec}
import com.klibisz.elastiknn.benchmarks.Dataset._
import io.circe
import zio._
import zio.stream._

import scala.io.Source
import scala.util.Random
import scala.util.hashing.MurmurHash3

object DatasetClient {

  trait Service {
    def streamTrain(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec]
    def streamTest(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec]
  }

  
  def s3(bucket: String, keyPrefix: String): ZLayer[Has[AmazonS3], Throwable, DatasetClient] = ZLayer.fromService[AmazonS3, Service] {
    client =>
      new Service {
        private def stream(dataset: Dataset, name: String, limit: Option[Int]): Stream[Throwable, Vec] =
          dataset match {
            case r: RandomSparseBool =>
              implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name)))
              Stream
                .range(0, if (name == "train") r.train else r.test)
                .map(_ => Vec.SparseBool.random(r.dims, r.bias))
            case r: RandomDenseFloat =>
              implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name)))
              Stream
                .range(0, if (name == "train") r.train else r.test)
                .map(_ => Vec.DenseFloat.random(r.dims))
            case _ =>
              def parseDecode(s: String): Either[circe.Error, Vec] =
                ElasticsearchCodec.parse(s).flatMap(j => ElasticsearchCodec.decode[Vec](j.hcursor))
              val obj = client.getObject(bucket, s"$keyPrefix/${dataset.name}/${name}.json.gz")
              val iterManaged = Managed.makeEffect(Source.fromInputStream(new GZIPInputStream(obj.getObjectContent)))(_.close())
              val lines = Stream.fromIteratorManaged(iterManaged.map(src => limit.map(n => src.getLines.take(n)).getOrElse(src.getLines())))
              val rawJson = lines.map(_.dropWhile(_ != '{'))
              rawJson.mapM(s => ZIO.fromEither(parseDecode(s)))
          }

        override def streamTrain(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] =
          stream(dataset, "train", limit)

        override def streamTest(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] =
          stream(dataset, "test", limit)
      }
  }

}

Source File: versionspecific.scala From kittens with Apache License 2.0

5 votes

package cats.derived.util

import scala.util.hashing.MurmurHash3

object VersionSpecific {
  type Lazy[+A] = shapeless.Lazy[A]
  private[derived] def productSeed(x: Product): Int = MurmurHash3.productSeed

  sealed trait OrElse[+A, +B] extends Serializable {
    def fold[C](prim: A => C, sec: B => C): C
    def unify[C >: A](implicit ev: B <:< C): C = fold(identity, ev)
  }

  final class Primary[+A](value: A) extends OrElse[A, Nothing] {
    def fold[C](prim: A => C, sec: Nothing => C): C = prim(value)
  }

  final class Secondary[+B](value: => B) extends OrElse[Nothing, B] {
    def fold[C](prim: Nothing => C, sec: B => C): C = sec(value)
  }

  object OrElse extends OrElse0 {
    implicit def primary[A, B](implicit a: A): A OrElse B = new Primary(a)
  }

  private[util] abstract class OrElse0 {
    implicit def secondary[A, B](implicit b: Lazy[B]): A OrElse B = new Secondary(b.value)
  }
}

Source File: versionspecific.scala From kittens with Apache License 2.0

5 votes

package cats.derived.util

import scala.annotation.implicitNotFound
import scala.util.hashing.MurmurHash3

object VersionSpecific {

  private[derived] def productSeed(x: Product): Int =
    MurmurHash3.mix(MurmurHash3.productSeed, x.productPrefix.hashCode)

  @implicitNotFound("could not find Lazy implicit value of type ${A}")
  abstract class Lazy[+A] extends Serializable {
    def value(): A
  }

  object Lazy {
    implicit def instance[A](implicit ev: => A): Lazy[A] = () => ev
  }

  sealed trait OrElse[+A, +B] extends Serializable {
    def fold[C](prim: A => C, sec: B => C): C
    def unify[C >: A](implicit ev: B <:< C): C = fold(identity, ev)
  }

  final class Primary[+A](value: A) extends OrElse[A, Nothing] {
    def fold[C](prim: A => C, sec: Nothing => C): C = prim(value)
  }

  final class Secondary[+B](value: => B) extends OrElse[Nothing, B] {
    def fold[C](prim: Nothing => C, sec: B => C): C = sec(value)
  }

  object OrElse extends OrElse0 {
    implicit def primary[A, B](implicit a: A): A OrElse B = new Primary(a)
  }

  private[util] abstract class OrElse0 {
    implicit def secondary[A, B](implicit b: => B): A OrElse B = new Secondary(b)
  }
}

Source File: Port.scala From ip4s with Apache License 2.0

5 votes

package com.comcast.ip4s

import scala.util.Try
import scala.util.hashing.MurmurHash3

import cats.{Order, Show}


final class Port private (val value: Int) extends Product with Serializable with Ordered[Port] {
  def copy(value: Int): Option[Port] = Port(value)
  def compare(that: Port): Int = value.compare(that.value)
  override def toString: String = value.toString
  override def hashCode: Int = MurmurHash3.productHash(this, productPrefix.hashCode)
  override def equals(other: Any): Boolean = other match {
    case that: Port => value == that.value
    case _          => false
  }
  override def canEqual(other: Any): Boolean = other.isInstanceOf[Port]
  override def productArity: Int = 1
  override def productElement(n: Int): Any =
    if (n == 0) value else throw new IndexOutOfBoundsException
}

object Port {
  val MinValue: Int = 0
  val MaxValue: Int = 65535

  def apply(value: Int): Option[Port] =
    if (value >= MinValue && value <= MaxValue) Some(new Port(value)) else None

  def fromString(value: String): Option[Port] =
    Try(value.toInt).toOption.flatMap(apply)

  def unapply(p: Port): Option[Int] = Some(p.value)

  implicit val order: Order[Port] = Order.fromComparable[Port]
  implicit val show: Show[Port] = Show.fromToString[Port]
}

Source File: Hostname.scala From ip4s with Apache License 2.0

5 votes

package com.comcast.ip4s

import scala.util.hashing.MurmurHash3

import cats.{Order, Show}


  def apply(value: String): Option[Hostname] = value.size match {
    case 0            => None
    case i if i > 253 => None
    case _ =>
      value match {
        case Pattern(_*) =>
          val labels = value
            .split('.')
            .iterator
            .map(new Label(_))
            .toList
          if (labels.isEmpty) None else Option(new Hostname(labels, value))
        case _ => None
      }
  }

  implicit val order: Order[Hostname] = Order.fromComparable[Hostname]
  implicit val show: Show[Hostname] = Show.fromToString[Hostname]
}

Source File: Orientation.scala From morpheus with Apache License 2.0

5 votes

package org.opencypher.okapi.ir.api.pattern

import cats.Eq

import scala.util.hashing.MurmurHash3

sealed trait Orientation[E <: Endpoints] extends Eq[E] {
  def hash(ends: E, seed: Int): Int
}

object Orientation {
  case object Directed extends Orientation[DifferentEndpoints] {
    override def hash(ends: DifferentEndpoints, seed: Int): Int = MurmurHash3.orderedHash(ends, seed)
    override def eqv(x: DifferentEndpoints, y: DifferentEndpoints): Boolean = x.source == y.source && x.target == y.target
  }

  case object Undirected extends Orientation[DifferentEndpoints] {
    override def hash(ends: DifferentEndpoints, seed: Int): Int = MurmurHash3.unorderedHash(ends, seed)
    override def eqv(x: DifferentEndpoints, y: DifferentEndpoints): Boolean =
      (x.source == y.source && x.target == y.target) || (x.source == y.target && x.target == y.source)
  }

  case object Cyclic extends Orientation[IdenticalEndpoints] {
    override def hash(ends: IdenticalEndpoints, seed: Int): Int = MurmurHash3.mix(seed, ends.field.hashCode())
    override def eqv(x: IdenticalEndpoints, y: IdenticalEndpoints): Boolean = x.field == y.field
  }
}

Source File: Rule.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.rts

import scala.util.hashing.MurmurHash3
import cmwell.domain.{FieldValue, Infoton}
import cmwell.formats.FormatType


// message

sealed abstract class TransmitType
case class Push(handler: (Seq[String]) => Unit) extends TransmitType
case class Pull(format: FormatType) extends TransmitType

class Path(path: String, val recursive: Boolean) extends Serializable {
  val hashValue: Int = MurmurHash3.stringHash(path)
  def length = path.length
  def hash: Int = hashValue

  override def toString() = s"path [$path] recursive [$recursive]"

  def check(p: String): Boolean = {
    if (length > p.length) {
      // the path is too short
      false
    } else
      path == p || {
        val prefixPath = p.take(length)
        if (hashValue == MurmurHash3.stringHash(prefixPath)) {
          // the prefix path is match now need to check also if it is recursive
          recursive || !p.drop(length + 1).contains('/')
        } else {
          false
        }
      }
  }
}

class MatchMap(fields: Map[String, Set[FieldValue]]) extends Serializable {

  // here we get the keys list from the tested infoton
  def check(f: Map[String, Set[FieldValue]]): Boolean = {
    if (fields.isEmpty)
      true
    else {
      val it = fields.iterator
      // if found than stop = true
      var stop: Boolean = false
      while (!stop && it.hasNext) {
        val (item, s) = it.next()
        f.get(item) match {
          case Some(fs) =>
            if (s.isEmpty)
              stop = true
            else {
              val i = s.intersect(fs)
              if (!i.isEmpty) {
                stop = true
              }
            }
          case None =>
        }

      }
      stop
    }
  }
}

sealed abstract class Rule
case object NoFilter extends Rule
case class PathFilter(path: Path) extends Rule
case class MatchFilter(matchMap: MatchMap) extends Rule
case class PMFilter(path: Path, matchMap: MatchMap) extends Rule

object Rule {
  def apply() = NoFilter
  def apply(path: String, recursive: Boolean) = PathFilter(new Path(path, recursive))
  def apply(fields: Map[String, Set[FieldValue]]) = MatchFilter(new MatchMap(fields))
  def apply(path: String, recursive: Boolean, fields: Map[String, Set[FieldValue]]) =
    PMFilter(new Path(path, recursive), new MatchMap(fields))
}

Source File: CategoricalColHashBucket.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.nn.ops

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3



class CategoricalColHashBucket[T: ClassTag](
  val hashBucketSize: Int,
  val strDelimiter: String = ",",
  val isSparse: Boolean = true
  )(implicit ev: TensorNumeric[T])
  extends Operation[Tensor[String], Tensor[Int], T] {

  output = Tensor[Int]()

  override def updateOutput(input: Tensor[String]): Tensor[Int] = {
    val rows = input.size(dim = 1)
    val indices0 = new ArrayBuffer[Int]()
    val indices1 = new ArrayBuffer[Int]()
    val values = new ArrayBuffer[Int]()
    var i = 1
    var max_fea_len = 0
    while(i <= rows) {
      val feaStrArr = input.valueAt(i, 1).split(strDelimiter)
      max_fea_len = math.max(max_fea_len, feaStrArr.length)
      var j = 0
      while(j < feaStrArr.length) {
        val hashVal = MurmurHash3.stringHash(feaStrArr(j)) % hashBucketSize match {
          case v if v < 0 => v + hashBucketSize
          case v => v
        }
        indices0 += i-1
        indices1 += j
        values += hashVal
        j += 1
      }
      i += 1
    }
    val indices = Array(indices0.toArray, indices1.toArray)
    val shape = Array(rows, max_fea_len)
    output = isSparse match {
      case true =>
        Tensor.sparse(indices, values.toArray, shape)
      case false =>
        Tensor.dense(Tensor.sparse(indices, values.toArray, shape))
    }
    output
  }
}

object CategoricalColHashBucket{
  def apply[T: ClassTag](
      hashBucketSize: Int,
      strDelimiter: String = ",",
      isSparse: Boolean = true)
      (implicit ev: TensorNumeric[T])
  : CategoricalColHashBucket[T] = new CategoricalColHashBucket[T](
    hashBucketSize = hashBucketSize,
    strDelimiter = strDelimiter,
    isSparse = isSparse
  )
}

Source File: Instances.scala From radixtree with Apache License 2.0

5 votes

package com.rklaehn.radixtree

import algebra.Eq
import cats.kernel.Hash

import scala.util.hashing.MurmurHash3

object Instances {

  implicit def ArrayHash[@specialized A](implicit aHash: Hash[A]): Hash[Array[A]] = new Hash[Array[A]] {
    def eqv(x: Array[A], y: Array[A]): Boolean = {
      x.length == y.length && {
        var i = 0
        while(i < x.length) {
          if(!aHash.eqv(x(i), y(i)))
            return false
          i += 1
        }
        true
      }
    }

    override def hash(a: Array[A]): Int = {
      var result = MurmurHash3.arraySeed
      var i = 0
      while(i < a.length) {
        result = MurmurHash3.mix(result, aHash.hash(a(i)))
        i += 1
      }
      result
    }
  }
}

Source File: package.scala From radixtree with Apache License 2.0

5 votes

package com.rklaehn

import algebra.Eq
import cats.kernel.Hash

import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3

// scalastyle:off return
package object radixtree {

  private[radixtree] def arrayEqv[A: Eq](x: Array[A], y: Array[A]): Boolean = x.length == y.length && {
    var i = 0
    while (i < x.length) {
      if (!Eq.eqv(x(i), y(i)))
        return false
      i += 1
    }
    true
  }

  private[radixtree] def arrayHash[A: Hash](a: Array[A]): Int = {
    var result = MurmurHash3.arraySeed
    var i = 0
    while(i < a.length) {
      result = MurmurHash3.mix(result, Hash.hash(a(i)))
      i += 1
    }
    result
  }

  private[radixtree] implicit class ArrayOps[T](private val underlying: Array[T]) extends AnyVal {

    def updated(index: Int, value: T): Array[T] = {
      val result = underlying.clone
      result(index) = value
      result
    }

    def patched(index: Int, value: T)(implicit c: ClassTag[T]): Array[T] = {
      val result = new Array[T](underlying.length + 1)
      System.arraycopy(underlying, 0, result, 0, index)
      result(index) = value
      if (index < underlying.length)
        System.arraycopy(underlying, index, result, index + 1, underlying.length - index)
      result
    }

    def resizeInPlace(n: Int)(implicit c: ClassTag[T]): Array[T] =
      if (underlying.length == n)
        underlying
      else {
        val r = c.newArray(n)
        System.arraycopy(underlying, 0, r, 0, n min underlying.length)
        r
      }
  }
}

Source File: SimpleCollisionStrategy.scala From spark-neighbors with MIT License

5 votes

package com.github.karlhigley.spark.neighbors.collision

import scala.util.hashing.MurmurHash3

import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import com.github.karlhigley.spark.neighbors.lsh.{ BitSignature, HashTableEntry, IntSignature }


  def apply(hashTables: RDD[_ <: HashTableEntry[_]]): RDD[(Product, Point)] = {
    val entries = hashTables.map(entry => {
      // Arrays are mutable and can't be used in RDD keys
      // Use a hash value (i.e. an int) as a substitute
      val key = (entry.table, MurmurHash3.arrayHash(entry.sigElements)).asInstanceOf[Product]
      (key, (entry.id, entry.point))
    })

    entries
  }
}

Source File: BandingCollisionStrategy.scala From spark-neighbors with MIT License

5 votes

package com.github.karlhigley.spark.neighbors.collision

import scala.util.hashing.MurmurHash3

import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import com.github.karlhigley.spark.neighbors.lsh.{ BitSignature, HashTableEntry, IntSignature }


  def apply(hashTables: RDD[_ <: HashTableEntry[_]]): RDD[(Product, Point)] = {
    val bandEntries = hashTables.flatMap(entry => {
      val elements = entry.sigElements
      val banded = elements.grouped(elements.size / bands).zipWithIndex
      banded.map {
        case (bandSig, bandNum) => {
          // Arrays are mutable and can't be used in RDD keys
          // Use a hash value (i.e. an int) as a substitute
          val bandSigHash = MurmurHash3.arrayHash(bandSig)
          val key = (entry.table, bandNum, bandSigHash).asInstanceOf[Product]
          (key, (entry.id, entry.point))
        }
      }
    })

    bandEntries
  }
}

Source File: Hasher.scala From sjson-new with Apache License 2.0

5 votes

package sjsonnew
package support.murmurhash

import scala.util.Try
import HashUtil.hashLong
import java.lang.{ Double => JDouble }
import scala.util.hashing.MurmurHash3

object Hasher extends SupportHasher[Int] {
  private val nullHash = 0xc0
  private val falseHash = 0xc2
  private val trueHash = 0xc3

  implicit val facade: BuilderFacade[Int] = FacadeImpl
  private object FacadeImpl extends SimpleBuilderFacade[Int] {
    val jnull                 = nullHash
    val jfalse                = falseHash
    val jtrue                 = trueHash
    def jnumstring(s: String) = jstring(s)
    def jintstring(s: String) = jstring(s)
    def jint(i: Int)          = hashLong(i.toLong)
    def jlong(l: Long)        = hashLong(l)
    def jdouble(d: Double)    = hashLong(JDouble.doubleToRawLongBits(d))
    def jbigdecimal(d: BigDecimal) = jstring(d.toString)
    def jstring(s: String)    = MurmurHash3.stringHash(s)
    def jarray(vs: List[Int]): Int = MurmurHash3.seqHash(vs)
    def jobject(vs: Map[String, Int]): Int = MurmurHash3.mapHash(vs)
  }
}

Source File: Hashing.scala From endpoints4s with MIT License

5 votes

package endpoints4s

import scala.util.hashing.MurmurHash3

private[endpoints4s] object Hashing {

  
  def hash(values: Any*): Int = {
    // The implementation has been copied and adapted from `MurmurHash3.productHash`
    require(values.nonEmpty)
    var h = MurmurHash3.productSeed
    for (value <- values) {
      h = MurmurHash3.mix(h, value.##)
    }
    MurmurHash3.finalizeHash(h, values.size)
  }

}

Source File: Literal.scala From dagon with Apache License 2.0

5 votes

package com.stripe.dagon

import java.io.Serializable
import scala.util.hashing.MurmurHash3
import scala.util.control.TailCalls


  private def eqFn[N[_]]: Function[RefPair[Literal[N, _], Literal[N, _]], Boolean] =
    Memoize.function[RefPair[Literal[N, _], Literal[N, _]], Boolean] {
      case (pair, _) if pair.itemsEq => true
      case (RefPair(Const(a), Const(b)), _) => a == b
      case (RefPair(Unary(left, fa), Unary(right, fb)), rec) =>
        (fa == fb) && rec(RefPair(left, right))
      case (RefPair(Binary(lefta, righta, fa), Binary(leftb, rightb, fb)), rec) =>
        (fa == fb) && rec(RefPair(lefta, leftb)) && rec(RefPair(righta, rightb))
      case (RefPair(Variadic(argsa, fa), Variadic(argsb, fb)), rec) =>
        @annotation.tailrec
        def loop(left: List[Literal[N, _]], right: List[Literal[N, _]]): Boolean =
          (left, right) match {
            case (lh :: ltail, rh :: rtail) =>
              rec(RefPair(lh, rh)) && loop(ltail, rtail)
            case (Nil, Nil) => true
            case _ => false
          }

        (fa == fb) && loop(argsa, argsb)
      case other => false
    }
}

Source File: DistributeAndMerge.scala From akka_streams_tutorial with MIT License

5 votes

package sample.graphdsl

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream._
import akka.stream.scaladsl._

import scala.concurrent.Future
import scala.util.hashing.MurmurHash3



    def hashingDistribution[A, B](numBuckets: Int,
                                  parallelism: Int,
                                  hash: A => Int,
                                  fn: A => Future[B]): Flow[A, B, NotUsed] = {
      Flow.fromGraph(GraphDSL.create() { implicit builder =>
        import GraphDSL.Implicits._
        val numPorts = numBuckets
        val partitioner =
          builder.add(Partition[A](outputPorts = numPorts, partitioner = a => math.abs(hash(a)) % numPorts))
        val merger = builder.add(Merge[B](inputPorts = numPorts, eagerComplete = false))

        Range(0, numPorts).foreach { eachPort =>
          partitioner.out(eachPort) ~> Flow[A].mapAsync(parallelism)(fn) ~> merger.in(eachPort)
        }

        FlowShape(partitioner.in, merger.out)
      })
    }

  Source(1 to 10)
    .via(
      hashingDistribution[Int, Int](
        numBuckets = 3,
        parallelism = 2,
        hash = element => MurmurHash3.stringHash(element.toString), //Hashing function: String => Int
        fn = sampleAsyncCall
      )
    )
    .runWith(Sink.foreach(each => println(s"Reached sink: $each")))
    .onComplete(_ => system.terminate())
}

Source File: Envelope.scala From seals with Apache License 2.0

5 votes

package dev.tauri.seals
package core

import scala.util.hashing.MurmurHash3

import cats.{ Eq, Show }
import cats.implicits._

sealed trait Envelope[A] extends Serializable {

  def value: A

  def reified: Reified[A]

  final def model: Model =
    reified.model

  final override def equals(that: Any): Boolean = that match {
    case that: Envelope[_] =>
      this.value == that.value
    case _ =>
      false
  }

  final override def hashCode: Int = {
    val s = MurmurHash3.mixLast(Envelope.hashSeed, value.##)
    MurmurHash3.finalizeHash(s, 1)
  }

  final override def toString: String =
    show(Show.fromToString[A])

  final def show(implicit A: Show[A]): String =
    sh"Envelope[${model}](${value})"
}

object Envelope {

  private final case class EnvelopeRepr[A](model: Model, value: A)

  private[seals] final val hashSeed = 0x37dd86e4

  def apply[A](a: A)(implicit r: Reified[A]): Envelope[A] = new Envelope[A] {
    override val value = a
    override val reified = r
  }

  implicit def envelopeEquality[A](implicit EqA: Eq[A]): Eq[Envelope[A]] = new Eq[Envelope[A]] {
    override def eqv(x: Envelope[A], y: Envelope[A]): Boolean =
      EqA.eqv(x.value, y.value)
  }

  implicit def envelopeShow[A](implicit A: Show[A]): Show[Envelope[A]] =
    Show.show(env => env.show(A))

  private def refinement[A](implicit r: Reified[A]): Refinement.Aux[Envelope[A], EnvelopeRepr[A]] = {
    new Refinement[Envelope[A]] {
      override type Repr = EnvelopeRepr[A]
      override val uuid = uuid"8e6e8b29-91e1-403c-9992-fd9cf8c82b06"
      override def repr = Refinement.ReprFormat.single("✉")
      override def from(repr: Repr) = {
        if (repr.model compatible r.model) Either.right(Envelope[A](repr.value)(r))
        else Either.left(sh"incompatible models: expected '${r.model}', got '${repr.model}'")
      }
      override def to(env: Envelope[A]) = {
        EnvelopeRepr[A](env.model, env.value)
      }
    }
  }

  implicit def reifiedForEnvelope[A](implicit r: Reified[A]): Reified[Envelope[A]] =
    Reified[EnvelopeRepr[A]].refined[Envelope[A]](refinement[A])
}

Source File: HashDerivation.scala From magnolify with Apache License 2.0

5 votes

package magnolify.cats.semiauto

import cats.Hash
import magnolia._
import magnolify.shims.MurmurHash3Compat

import scala.language.experimental.macros
import scala.util.hashing.MurmurHash3

object HashDerivation {
  type Typeclass[T] = Hash[T]

  def combine[T](caseClass: ReadOnlyCaseClass[Typeclass, T]): Typeclass[T] = {
    val eqvImpl = EqMethods.combine(caseClass)

    new Hash[T] {
      override def hash(x: T): Int =
        if (caseClass.parameters.isEmpty) {
          caseClass.typeName.short.hashCode
        } else {
          val seed = MurmurHash3Compat.seed(caseClass.typeName.short.hashCode)
          val h = caseClass.parameters.foldLeft(seed) { (h, p) =>
            MurmurHash3.mix(h, p.typeclass.hash(p.dereference(x)))
          }
          MurmurHash3.finalizeHash(h, caseClass.parameters.size)
        }

      override def eqv(x: T, y: T): Boolean = eqvImpl(x, y)
    }
  }

  def dispatch[T](sealedTrait: SealedTrait[Typeclass, T]): Typeclass[T] = {
    val eqvImpl = EqMethods.dispatch(sealedTrait)

    new Hash[T] {
      override def hash(x: T): Int = sealedTrait.dispatch(x) { sub =>
        sub.typeclass.hash(sub.cast(x))
      }

      override def eqv(x: T, y: T): Boolean = eqvImpl(x, y)
    }
  }

  implicit def apply[T]: Typeclass[T] = macro Magnolia.gen[T]
}

Source File: package.scala From magnolify with Apache License 2.0

5 votes

package magnolify

import scala.collection.generic.CanBuildFrom
import scala.collection.mutable
import scala.language.higherKinds
import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3

package object shims {
  trait Monadic[F[_]] extends mercator.Monadic[F] {
    def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B]
    def mapS[A, B](from: F[A])(fn: A => B): F[B]

    override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn)
    override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn)
  }

  trait FactoryCompat[-A, +C] extends Serializable {
    def newBuilder: mutable.Builder[A, C]
    def build(xs: TraversableOnce[A]): C = (newBuilder ++= xs).result()
  }

  object FactoryCompat extends LowPriorityFactoryCompat1 {
    private type FC[A, C] = FactoryCompat[A, C]

    def apply[A, C](f: () => mutable.Builder[A, C]): FC[A, C] =
      new FactoryCompat[A, C] {
        override def newBuilder: mutable.Builder[A, C] = f()
      }

    implicit def arrayFC[A: ClassTag] = FactoryCompat(() => Array.newBuilder[A])
    // Deprecated in 2.13
    // implicit def traversableFC[A] = FactoryCompat(() => Traversable.newBuilder[A])
    // List <: Iterable
    // implicit def iterableFC[A] = FactoryCompat(() => Iterable.newBuilder[A])
    // List <: Seq
    // implicit def seqFC[A] = FactoryCompat(() => Seq.newBuilder[A])
    // Vector <: IndexedSeq
    // implicit def indexedSeqFC[A] = FactoryCompat(() => IndexedSeq.newBuilder[A])
  }

  trait LowPriorityFactoryCompat1 extends LowPriorityFactoryCompat2 {
    implicit def listFC[A] = FactoryCompat(() => List.newBuilder[A])
  }

  trait LowPriorityFactoryCompat2 {
    implicit def vectorFC[A] = FactoryCompat(() => Vector.newBuilder[A])
    // Deprecated in 2.13
    // implicit def streamFC[A] = FactoryCompat(() => Stream.newBuilder[A])
  }

  object SerializableCanBuildFroms {
    private def cbf[A, C](f: () => mutable.Builder[A, C]): CanBuildFrom[C, A, C] =
      new CanBuildFrom[C, A, C] with Serializable {
        override def apply(from: C): mutable.Builder[A, C] = f()
        override def apply(): mutable.Builder[A, C] = f()
      }

    implicit def arrayCBF[A: ClassTag] = cbf(() => Array.newBuilder[A])
    implicit def traversableCBF[A] = cbf(() => Traversable.newBuilder[A])
    implicit def iterableCBF[A] = cbf(() => Iterable.newBuilder[A])
    implicit def seqCBF[A] = cbf(() => Seq.newBuilder[A])
    implicit def indexedSeqCBF[A] = cbf(() => IndexedSeq.newBuilder[A])
    implicit def listCBF[A] = cbf(() => List.newBuilder[A])
    implicit def vectorCBF[A] = cbf(() => Vector.newBuilder[A])
    implicit def streamCBF[A] = cbf(() => Stream.newBuilder[A])
  }

  val JavaConverters = scala.collection.JavaConverters

  object MurmurHash3Compat {
    def seed(data: Int): Int = MurmurHash3.productSeed
  }
}

Source File: package.scala From magnolify with Apache License 2.0

5 votes

package magnolify

import scala.collection.{mutable, Factory}
import scala.util.hashing.MurmurHash3

package object shims {
  trait Monadic[F[_]] extends mercator.Monadic[F] {
    def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B]
    def mapS[A, B](from: F[A])(fn: A => B): F[B]

    override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn)
    override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn)
  }

  trait FactoryCompat[-A, +C] extends Serializable {
    def newBuilder: mutable.Builder[A, C]
    def build(xs: IterableOnce[A]): C = newBuilder.addAll(xs).result()
  }

  object FactoryCompat {
    implicit def fromFactory[A, C](implicit f: Factory[A, C]): FactoryCompat[A, C] =
      new FactoryCompat[A, C] {
        override def newBuilder: mutable.Builder[A, C] = f.newBuilder
      }
  }

  object SerializableCanBuildFroms

  val JavaConverters = scala.jdk.CollectionConverters

  object MurmurHash3Compat {
    def seed(data: Int): Int = MurmurHash3.mix(MurmurHash3.productSeed, data)
  }
}

Source File: Hashes.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.counter.util

import org.apache.hadoop.hbase.util.Bytes

import scala.util.hashing.MurmurHash3

object Hashes {
  def sha1(s: String): String = {
    val md = java.security.MessageDigest.getInstance("SHA-1")
    Bytes.toHex(md.digest(s.getBytes("UTF-8")))
  }
  
  private def positiveHash(h: Int): Int = {
    if (h < 0) -1 * (h + 1) else h
  }

  def murmur3(s: String): Int = {
    val hash = MurmurHash3.stringHash(s)
    positiveHash(hash)
  }
}

scala.util.hashing.MurmurHash3 Scala Examples