scala.util.hashing.MurmurHash3 Scala Examples

The following examples show how to use scala.util.hashing.MurmurHash3. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: EthereumTransactionRouter.scala    From Raphtory   with Apache License 2.0 5 votes vote down vote up
package com.raphtory.examples.blockchain.routers

import com.raphtory.core.components.Router.RouterWorker
import com.raphtory.core.model.communication.EdgeAddWithProperties
import com.raphtory.core.model.communication.Properties
import com.raphtory.core.model.communication.StringProperty
import com.raphtory.core.model.communication.VertexAddWithProperties

import scala.util.hashing.MurmurHash3

class EthereumTransactionRouter(override val routerId: Int,override val workerID:Int, val initialManagerCount: Int) extends RouterWorker {

  override protected def parseTuple(value: Any): Unit = {
    val components   = value.toString.drop(1).dropRight(1).split(",")
    val creationDate = components(3).toLong * 1000 //seconds to miliseconds
    val sourceNode   = MurmurHash3.stringHash(components(0)) //hash the id to get a vertex ID
    sendGraphUpdate(
            VertexAddWithProperties(creationDate, sourceNode, Properties(StringProperty("id", components(0))))
    )                             //create the source node and add the wallet ID as a property
    if (components(1).nonEmpty) { //money being sent to an actual user
      val targetNode = MurmurHash3.stringHash(components(1)) //hash the id of the to wallet to get a vertex ID
      sendGraphUpdate(
              VertexAddWithProperties(creationDate, targetNode, Properties(StringProperty("id", components(1))))
      ) //create the destination vertex
      sendGraphUpdate(
              EdgeAddWithProperties(
                      creationDate,
                      sourceNode,
                      targetNode,
                      Properties(StringProperty("id", components(2)))
              )
      )      //create the edge between them adding the value as a property
    } else { //burnt cash
      val targetNode = MurmurHash3.stringHash("null")
      sendGraphUpdate(VertexAddWithProperties(creationDate, targetNode, Properties(StringProperty("id", "null"))))
      sendGraphUpdate(
              EdgeAddWithProperties(
                      creationDate,
                      sourceNode,
                      targetNode,
                      Properties(StringProperty("value", components(2)))
              )
      )
    }

  }
} 
Example 2
Source File: SignRandomProjectionLSH.scala    From lexrank-summarizer   with MIT License 5 votes vote down vote up
package io.github.karlhigley.lexrank

import scala.collection.immutable.BitSet
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
import scala.util.hashing.MurmurHash3

import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.Logging

class SignRandomProjectionLSH(poolSize: Int = 10000) extends Serializable with Logging {
  val pool = SignRandomProjectionLSH.generatePool(poolSize)
  
  def computeSignature(vector: SparseVector, length: Int): BitSet = {
    val buf = ArrayBuffer.empty[Int]
    
    val elements = vector.indices.zip(vector.values)
    for (bit <- 1 to length) {
      val components = elements.map(e => {
          val hash      = MurmurHash3.productHash((bit, e._1))
          val poolIndex = ((hash % poolSize) + poolSize) % poolSize
          val result    = e._2 * pool(poolIndex)
          result
      })

      val dotProduct = components.reduce(_ + _)
      if (dotProduct > 0) {
        buf += bit
      }
    }

    BitSet(buf.toArray:_*)
  }
  
}

object SignRandomProjectionLSH {
  def signatureSet(length: Int): Set[BitSet] = {
    BitSet(1 to length:_*).subsets.toSet
  }

  def estimateCosine(a: BitSet, b: BitSet, length: Int): Double = {
    val hammingDistance = (a^b).size
    math.cos(hammingDistance.toDouble/length.toDouble*math.Pi)
  }

  private def generatePool(size: Int): Array[Double] = {
    val rand = new Random()
    val buf  = ArrayBuffer.fill[Double](size)(rand.nextGaussian)
    buf.toArray
  }
} 
Example 3
Source File: ShardedSparkeyReader.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.extra.sparkey.instances

import java.util

import com.spotify.sparkey.{IndexHeader, LogHeader, SparkeyReader}

import scala.util.hashing.MurmurHash3
import scala.jdk.CollectionConverters._


class ShardedSparkeyReader(val sparkeys: Map[Short, SparkeyReader], val numShards: Short)
    extends SparkeyReader {
  def hashKey(arr: Array[Byte]): Short = (MurmurHash3.bytesHash(arr, 1) % numShards).toShort

  def hashKey(str: String): Short = (MurmurHash3.stringHash(str, 1) % numShards).toShort

  override def getAsString(key: String): String = {
    val hashed = hashKey(key)
    if (sparkeys.contains(hashed)) {
      sparkeys(hashed).getAsString(key)
    } else {
      null
    }
  }

  override def getAsByteArray(key: Array[Byte]): Array[Byte] = {
    val hashed = hashKey(key)
    if (sparkeys.contains(hashed)) {
      sparkeys(hashed).getAsByteArray(key)
    } else {
      null
    }
  }

  override def getAsEntry(key: Array[Byte]): SparkeyReader.Entry = {
    val hashed = hashKey(key)
    if (sparkeys.contains(hashed)) {
      sparkeys(hashed).getAsEntry(key)
    } else {
      null
    }
  }

  override def getIndexHeader: IndexHeader =
    throw new NotImplementedError("ShardedSparkeyReader does not support getIndexHeader.")

  override def getLogHeader: LogHeader =
    throw new NotImplementedError("ShardedSparkeyReader does not support getLogHeader.")

  override def duplicate(): SparkeyReader =
    new ShardedSparkeyReader(sparkeys.map { case (k, v) => (k, v.duplicate) }, numShards)

  override def close(): Unit = sparkeys.values.foreach(_.close())

  override def iterator(): util.Iterator[SparkeyReader.Entry] =
    sparkeys.values.map(_.iterator.asScala).reduce(_ ++ _).asJava
} 
Example 4
Source File: HashedCategoricalDistribution.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.util.rand

import scala.util.hashing.MurmurHash3

object HashedCategoricalDistribution {
    private val MaxVal = Int.MaxValue.toFloat

    def apply(probabilities: Double*): HashedCategoricalDistribution =
        new HashedCategoricalDistribution(new IntAliasMethodSampler(probabilities))
}

case class HashedCategoricalDistribution(sampler: IntAliasMethodSampler) extends (TraversableOnce[Any] => Int) {
    import HashedCategoricalDistribution.MaxVal

    
    def apply(data: TraversableOnce[Any]): Int = {
        val h = MurmurHash3 orderedHash data

        // Take the absolute value because we want (hAbs % sampler.getNumClasses) to give a non-negative index.  This
        // also makes the computation of f easier.  It will lower the entropy of the output of the hash function but
        // this is acceptable.
        val hAbs = math.abs(h)
        val p = hAbs / MaxVal
        val k = hAbs % numClasses
        sampler.sample(k, p)
    }
} 
Example 5
Source File: SubString.scala    From typed-schema   with Apache License 2.0 5 votes vote down vote up
package ru.tinkoff.tschema.utils

import ru.tinkoff.tschema.utils.SubString.outOfBound

import scala.annotation.tailrec
import scala.util.hashing.MurmurHash3

final class SubString private[SubString] (private val arr: Array[Char], private val from: Int, private val to: Int)
    extends CharSequence {
  @inline private[this] def size         = to - from
  def length(): Int                      = size
  def charAt(index: Int): Char = {
    if (index >= size || index < 0) outOfBound(index)
    arr(from + index)
  }
  def subSequence(start: Int, end: Int): CharSequence = {
    if (start < 0) outOfBound(start)
    if (end > size) outOfBound(end)
    val len = end - start
    if (len < 0) outOfBound(len)
    if ((start == 0) && (end == size)) this
    else new SubString(arr, from + start, from + end)
  }
  override def equals(obj: Any): Boolean = obj match {
    case s: CharSequence =>
      @tailrec def go(i: Int): Boolean = (i == size) || ((s.charAt(i) == charAt(i)) && go(i + 1))
      size == s.length() && go(0)
    case _               => false
  }

  override def hashCode(): Int = {

    
    var h = MurmurHash3.stringSeed
    var i = from
    while (i + 1 < to) {
      val data = (arr(i) << 16) + arr(i + 1)
      h = MurmurHash3.mix(h, data)
      i += 2
    }
    if (i < to) h = MurmurHash3.mixLast(h, arr(i).toInt)
    MurmurHash3.finalizeHash(h, size)
  }

  override def toString: String = new String(arr.slice(from, to))

}

object SubString {
  @inline private def outOfBound(index: Int) = throw new StringIndexOutOfBoundsException(index)

  def apply(s: String): SubString = new SubString(s.toCharArray, 0, s.length)
} 
Example 6
Source File: RawJson.scala    From almond   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package almond.protocol

import java.nio.charset.StandardCharsets
import java.{util => ju}

import scala.util.hashing.MurmurHash3
import scala.util.Try

// adapted from https://github.com/plokhotnyuk/jsoniter-scala/blob/209d918a030b188f064ee55505a6c47257731b4b/jsoniter-scala-macros/src/test/scala/com/github/plokhotnyuk/jsoniter_scala/macros/JsonCodecMakerSpec.scala#L645-L666
final case class RawJson(value: Array[Byte]) {
  override lazy val hashCode: Int = MurmurHash3.arrayHash(value)
  override def equals(obj: Any): Boolean = obj match {
    case that: RawJson => ju.Arrays.equals(value, that.value)
    case _ => false
  }
  override def toString: String =
    Try(new String(value, StandardCharsets.UTF_8))
      .toOption
      .getOrElse(value.toString)
}

object RawJson {

  import com.github.plokhotnyuk.jsoniter_scala.core._

  implicit val codec: JsonValueCodec[RawJson] = new JsonValueCodec[RawJson] {
    def decodeValue(in: JsonReader, default: RawJson): RawJson =
      new RawJson(in.readRawValAsBytes())
    def encodeValue(x: RawJson, out: JsonWriter): Unit =
      out.writeRawVal(x.value)
    val nullValue: RawJson =
      new RawJson(new Array[Byte](0))
  }

  val emptyObj: RawJson =
    RawJson("{}".getBytes(StandardCharsets.UTF_8))
} 
Example 7
Source File: XORShiftRandom.scala    From Mastering-Spark-for-Data-Science   with MIT License 5 votes vote down vote up
package io.gzet.story.linalg

import java.nio.ByteBuffer
import java.util.{Random => JavaRandom}

import scala.util.Random
import scala.util.hashing.MurmurHash3


  private[this] def hashSeed(seed: Long): Long = {
    val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
    MurmurHash3.bytesHash(bytes)
  }

  // we need to just override next - this will be called by nextInt, nextDouble,
  // nextGaussian, nextLong, etc.
  override protected def next(bits: Int): Int = {
    var nextSeed = seed ^ (seed << 21)
    nextSeed ^= (nextSeed >>> 35)
    nextSeed ^= (nextSeed << 4)
    seed = nextSeed
    (nextSeed & ((1L << bits) - 1)).asInstanceOf[Int]
  }
}

object XORShiftRandom {
  val random = new Random()
} 
Example 8
Source File: DatasetClient.scala    From elastiknn   with Apache License 2.0 5 votes vote down vote up
package com.klibisz.elastiknn.benchmarks

import java.util.zip.GZIPInputStream

import com.amazonaws.services.s3.AmazonS3
import com.klibisz.elastiknn.api.{ElasticsearchCodec, Vec}
import com.klibisz.elastiknn.benchmarks.Dataset._
import io.circe
import zio._
import zio.stream._

import scala.io.Source
import scala.util.Random
import scala.util.hashing.MurmurHash3

object DatasetClient {

  trait Service {
    def streamTrain(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec]
    def streamTest(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec]
  }

  
  def s3(bucket: String, keyPrefix: String): ZLayer[Has[AmazonS3], Throwable, DatasetClient] = ZLayer.fromService[AmazonS3, Service] {
    client =>
      new Service {
        private def stream(dataset: Dataset, name: String, limit: Option[Int]): Stream[Throwable, Vec] =
          dataset match {
            case r: RandomSparseBool =>
              implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name)))
              Stream
                .range(0, if (name == "train") r.train else r.test)
                .map(_ => Vec.SparseBool.random(r.dims, r.bias))
            case r: RandomDenseFloat =>
              implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name)))
              Stream
                .range(0, if (name == "train") r.train else r.test)
                .map(_ => Vec.DenseFloat.random(r.dims))
            case _ =>
              def parseDecode(s: String): Either[circe.Error, Vec] =
                ElasticsearchCodec.parse(s).flatMap(j => ElasticsearchCodec.decode[Vec](j.hcursor))
              val obj = client.getObject(bucket, s"$keyPrefix/${dataset.name}/${name}.json.gz")
              val iterManaged = Managed.makeEffect(Source.fromInputStream(new GZIPInputStream(obj.getObjectContent)))(_.close())
              val lines = Stream.fromIteratorManaged(iterManaged.map(src => limit.map(n => src.getLines.take(n)).getOrElse(src.getLines())))
              val rawJson = lines.map(_.dropWhile(_ != '{'))
              rawJson.mapM(s => ZIO.fromEither(parseDecode(s)))
          }

        override def streamTrain(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] =
          stream(dataset, "train", limit)

        override def streamTest(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] =
          stream(dataset, "test", limit)
      }
  }

} 
Example 9
Source File: versionspecific.scala    From kittens   with Apache License 2.0 5 votes vote down vote up
package cats.derived.util

import scala.util.hashing.MurmurHash3

object VersionSpecific {
  type Lazy[+A] = shapeless.Lazy[A]
  private[derived] def productSeed(x: Product): Int = MurmurHash3.productSeed

  sealed trait OrElse[+A, +B] extends Serializable {
    def fold[C](prim: A => C, sec: B => C): C
    def unify[C >: A](implicit ev: B <:< C): C = fold(identity, ev)
  }

  final class Primary[+A](value: A) extends OrElse[A, Nothing] {
    def fold[C](prim: A => C, sec: Nothing => C): C = prim(value)
  }

  final class Secondary[+B](value: => B) extends OrElse[Nothing, B] {
    def fold[C](prim: Nothing => C, sec: B => C): C = sec(value)
  }

  object OrElse extends OrElse0 {
    implicit def primary[A, B](implicit a: A): A OrElse B = new Primary(a)
  }

  private[util] abstract class OrElse0 {
    implicit def secondary[A, B](implicit b: Lazy[B]): A OrElse B = new Secondary(b.value)
  }
} 
Example 10
Source File: versionspecific.scala    From kittens   with Apache License 2.0 5 votes vote down vote up
package cats.derived.util

import scala.annotation.implicitNotFound
import scala.util.hashing.MurmurHash3

object VersionSpecific {

  private[derived] def productSeed(x: Product): Int =
    MurmurHash3.mix(MurmurHash3.productSeed, x.productPrefix.hashCode)

  @implicitNotFound("could not find Lazy implicit value of type ${A}")
  abstract class Lazy[+A] extends Serializable {
    def value(): A
  }

  object Lazy {
    implicit def instance[A](implicit ev: => A): Lazy[A] = () => ev
  }

  sealed trait OrElse[+A, +B] extends Serializable {
    def fold[C](prim: A => C, sec: B => C): C
    def unify[C >: A](implicit ev: B <:< C): C = fold(identity, ev)
  }

  final class Primary[+A](value: A) extends OrElse[A, Nothing] {
    def fold[C](prim: A => C, sec: Nothing => C): C = prim(value)
  }

  final class Secondary[+B](value: => B) extends OrElse[Nothing, B] {
    def fold[C](prim: Nothing => C, sec: B => C): C = sec(value)
  }

  object OrElse extends OrElse0 {
    implicit def primary[A, B](implicit a: A): A OrElse B = new Primary(a)
  }

  private[util] abstract class OrElse0 {
    implicit def secondary[A, B](implicit b: => B): A OrElse B = new Secondary(b)
  }
} 
Example 11
Source File: Port.scala    From ip4s   with Apache License 2.0 5 votes vote down vote up
package com.comcast.ip4s

import scala.util.Try
import scala.util.hashing.MurmurHash3

import cats.{Order, Show}


final class Port private (val value: Int) extends Product with Serializable with Ordered[Port] {
  def copy(value: Int): Option[Port] = Port(value)
  def compare(that: Port): Int = value.compare(that.value)
  override def toString: String = value.toString
  override def hashCode: Int = MurmurHash3.productHash(this, productPrefix.hashCode)
  override def equals(other: Any): Boolean = other match {
    case that: Port => value == that.value
    case _          => false
  }
  override def canEqual(other: Any): Boolean = other.isInstanceOf[Port]
  override def productArity: Int = 1
  override def productElement(n: Int): Any =
    if (n == 0) value else throw new IndexOutOfBoundsException
}

object Port {
  val MinValue: Int = 0
  val MaxValue: Int = 65535

  def apply(value: Int): Option[Port] =
    if (value >= MinValue && value <= MaxValue) Some(new Port(value)) else None

  def fromString(value: String): Option[Port] =
    Try(value.toInt).toOption.flatMap(apply)

  def unapply(p: Port): Option[Int] = Some(p.value)

  implicit val order: Order[Port] = Order.fromComparable[Port]
  implicit val show: Show[Port] = Show.fromToString[Port]
} 
Example 12
Source File: Hostname.scala    From ip4s   with Apache License 2.0 5 votes vote down vote up
package com.comcast.ip4s

import scala.util.hashing.MurmurHash3

import cats.{Order, Show}


  def apply(value: String): Option[Hostname] = value.size match {
    case 0            => None
    case i if i > 253 => None
    case _ =>
      value match {
        case Pattern(_*) =>
          val labels = value
            .split('.')
            .iterator
            .map(new Label(_))
            .toList
          if (labels.isEmpty) None else Option(new Hostname(labels, value))
        case _ => None
      }
  }

  implicit val order: Order[Hostname] = Order.fromComparable[Hostname]
  implicit val show: Show[Hostname] = Show.fromToString[Hostname]
} 
Example 13
Source File: Orientation.scala    From morpheus   with Apache License 2.0 5 votes vote down vote up
package org.opencypher.okapi.ir.api.pattern

import cats.Eq

import scala.util.hashing.MurmurHash3

sealed trait Orientation[E <: Endpoints] extends Eq[E] {
  def hash(ends: E, seed: Int): Int
}

object Orientation {
  case object Directed extends Orientation[DifferentEndpoints] {
    override def hash(ends: DifferentEndpoints, seed: Int): Int = MurmurHash3.orderedHash(ends, seed)
    override def eqv(x: DifferentEndpoints, y: DifferentEndpoints): Boolean = x.source == y.source && x.target == y.target
  }

  case object Undirected extends Orientation[DifferentEndpoints] {
    override def hash(ends: DifferentEndpoints, seed: Int): Int = MurmurHash3.unorderedHash(ends, seed)
    override def eqv(x: DifferentEndpoints, y: DifferentEndpoints): Boolean =
      (x.source == y.source && x.target == y.target) || (x.source == y.target && x.target == y.source)
  }

  case object Cyclic extends Orientation[IdenticalEndpoints] {
    override def hash(ends: IdenticalEndpoints, seed: Int): Int = MurmurHash3.mix(seed, ends.field.hashCode())
    override def eqv(x: IdenticalEndpoints, y: IdenticalEndpoints): Boolean = x.field == y.field
  }
} 
Example 14
Source File: Rule.scala    From CM-Well   with Apache License 2.0 5 votes vote down vote up
package cmwell.rts

import scala.util.hashing.MurmurHash3
import cmwell.domain.{FieldValue, Infoton}
import cmwell.formats.FormatType


// message

sealed abstract class TransmitType
case class Push(handler: (Seq[String]) => Unit) extends TransmitType
case class Pull(format: FormatType) extends TransmitType

class Path(path: String, val recursive: Boolean) extends Serializable {
  val hashValue: Int = MurmurHash3.stringHash(path)
  def length = path.length
  def hash: Int = hashValue

  override def toString() = s"path [$path] recursive [$recursive]"

  def check(p: String): Boolean = {
    if (length > p.length) {
      // the path is too short
      false
    } else
      path == p || {
        val prefixPath = p.take(length)
        if (hashValue == MurmurHash3.stringHash(prefixPath)) {
          // the prefix path is match now need to check also if it is recursive
          recursive || !p.drop(length + 1).contains('/')
        } else {
          false
        }
      }
  }
}

class MatchMap(fields: Map[String, Set[FieldValue]]) extends Serializable {

  // here we get the keys list from the tested infoton
  def check(f: Map[String, Set[FieldValue]]): Boolean = {
    if (fields.isEmpty)
      true
    else {
      val it = fields.iterator
      // if found than stop = true
      var stop: Boolean = false
      while (!stop && it.hasNext) {
        val (item, s) = it.next()
        f.get(item) match {
          case Some(fs) =>
            if (s.isEmpty)
              stop = true
            else {
              val i = s.intersect(fs)
              if (!i.isEmpty) {
                stop = true
              }
            }
          case None =>
        }

      }
      stop
    }
  }
}

sealed abstract class Rule
case object NoFilter extends Rule
case class PathFilter(path: Path) extends Rule
case class MatchFilter(matchMap: MatchMap) extends Rule
case class PMFilter(path: Path, matchMap: MatchMap) extends Rule

object Rule {
  def apply() = NoFilter
  def apply(path: String, recursive: Boolean) = PathFilter(new Path(path, recursive))
  def apply(fields: Map[String, Set[FieldValue]]) = MatchFilter(new MatchMap(fields))
  def apply(path: String, recursive: Boolean, fields: Map[String, Set[FieldValue]]) =
    PMFilter(new Path(path, recursive), new MatchMap(fields))
} 
Example 15
Source File: CategoricalColHashBucket.scala    From BigDL   with Apache License 2.0 5 votes vote down vote up
package com.intel.analytics.bigdl.nn.ops

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3



class CategoricalColHashBucket[T: ClassTag](
  val hashBucketSize: Int,
  val strDelimiter: String = ",",
  val isSparse: Boolean = true
  )(implicit ev: TensorNumeric[T])
  extends Operation[Tensor[String], Tensor[Int], T] {

  output = Tensor[Int]()

  override def updateOutput(input: Tensor[String]): Tensor[Int] = {
    val rows = input.size(dim = 1)
    val indices0 = new ArrayBuffer[Int]()
    val indices1 = new ArrayBuffer[Int]()
    val values = new ArrayBuffer[Int]()
    var i = 1
    var max_fea_len = 0
    while(i <= rows) {
      val feaStrArr = input.valueAt(i, 1).split(strDelimiter)
      max_fea_len = math.max(max_fea_len, feaStrArr.length)
      var j = 0
      while(j < feaStrArr.length) {
        val hashVal = MurmurHash3.stringHash(feaStrArr(j)) % hashBucketSize match {
          case v if v < 0 => v + hashBucketSize
          case v => v
        }
        indices0 += i-1
        indices1 += j
        values += hashVal
        j += 1
      }
      i += 1
    }
    val indices = Array(indices0.toArray, indices1.toArray)
    val shape = Array(rows, max_fea_len)
    output = isSparse match {
      case true =>
        Tensor.sparse(indices, values.toArray, shape)
      case false =>
        Tensor.dense(Tensor.sparse(indices, values.toArray, shape))
    }
    output
  }
}

object CategoricalColHashBucket{
  def apply[T: ClassTag](
      hashBucketSize: Int,
      strDelimiter: String = ",",
      isSparse: Boolean = true)
      (implicit ev: TensorNumeric[T])
  : CategoricalColHashBucket[T] = new CategoricalColHashBucket[T](
    hashBucketSize = hashBucketSize,
    strDelimiter = strDelimiter,
    isSparse = isSparse
  )
} 
Example 16
Source File: Instances.scala    From radixtree   with Apache License 2.0 5 votes vote down vote up
package com.rklaehn.radixtree

import algebra.Eq
import cats.kernel.Hash

import scala.util.hashing.MurmurHash3

object Instances {

  implicit def ArrayHash[@specialized A](implicit aHash: Hash[A]): Hash[Array[A]] = new Hash[Array[A]] {
    def eqv(x: Array[A], y: Array[A]): Boolean = {
      x.length == y.length && {
        var i = 0
        while(i < x.length) {
          if(!aHash.eqv(x(i), y(i)))
            return false
          i += 1
        }
        true
      }
    }

    override def hash(a: Array[A]): Int = {
      var result = MurmurHash3.arraySeed
      var i = 0
      while(i < a.length) {
        result = MurmurHash3.mix(result, aHash.hash(a(i)))
        i += 1
      }
      result
    }
  }
} 
Example 17
Source File: package.scala    From radixtree   with Apache License 2.0 5 votes vote down vote up
package com.rklaehn

import algebra.Eq
import cats.kernel.Hash

import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3

// scalastyle:off return
package object radixtree {

  private[radixtree] def arrayEqv[A: Eq](x: Array[A], y: Array[A]): Boolean = x.length == y.length && {
    var i = 0
    while (i < x.length) {
      if (!Eq.eqv(x(i), y(i)))
        return false
      i += 1
    }
    true
  }

  private[radixtree] def arrayHash[A: Hash](a: Array[A]): Int = {
    var result = MurmurHash3.arraySeed
    var i = 0
    while(i < a.length) {
      result = MurmurHash3.mix(result, Hash.hash(a(i)))
      i += 1
    }
    result
  }

  private[radixtree] implicit class ArrayOps[T](private val underlying: Array[T]) extends AnyVal {

    def updated(index: Int, value: T): Array[T] = {
      val result = underlying.clone
      result(index) = value
      result
    }

    def patched(index: Int, value: T)(implicit c: ClassTag[T]): Array[T] = {
      val result = new Array[T](underlying.length + 1)
      System.arraycopy(underlying, 0, result, 0, index)
      result(index) = value
      if (index < underlying.length)
        System.arraycopy(underlying, index, result, index + 1, underlying.length - index)
      result
    }

    def resizeInPlace(n: Int)(implicit c: ClassTag[T]): Array[T] =
      if (underlying.length == n)
        underlying
      else {
        val r = c.newArray(n)
        System.arraycopy(underlying, 0, r, 0, n min underlying.length)
        r
      }
  }
} 
Example 18
Source File: SimpleCollisionStrategy.scala    From spark-neighbors   with MIT License 5 votes vote down vote up
package com.github.karlhigley.spark.neighbors.collision

import scala.util.hashing.MurmurHash3

import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import com.github.karlhigley.spark.neighbors.lsh.{ BitSignature, HashTableEntry, IntSignature }


  def apply(hashTables: RDD[_ <: HashTableEntry[_]]): RDD[(Product, Point)] = {
    val entries = hashTables.map(entry => {
      // Arrays are mutable and can't be used in RDD keys
      // Use a hash value (i.e. an int) as a substitute
      val key = (entry.table, MurmurHash3.arrayHash(entry.sigElements)).asInstanceOf[Product]
      (key, (entry.id, entry.point))
    })

    entries
  }
} 
Example 19
Source File: BandingCollisionStrategy.scala    From spark-neighbors   with MIT License 5 votes vote down vote up
package com.github.karlhigley.spark.neighbors.collision

import scala.util.hashing.MurmurHash3

import org.apache.spark.mllib.linalg.SparseVector
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel

import com.github.karlhigley.spark.neighbors.lsh.{ BitSignature, HashTableEntry, IntSignature }


  def apply(hashTables: RDD[_ <: HashTableEntry[_]]): RDD[(Product, Point)] = {
    val bandEntries = hashTables.flatMap(entry => {
      val elements = entry.sigElements
      val banded = elements.grouped(elements.size / bands).zipWithIndex
      banded.map {
        case (bandSig, bandNum) => {
          // Arrays are mutable and can't be used in RDD keys
          // Use a hash value (i.e. an int) as a substitute
          val bandSigHash = MurmurHash3.arrayHash(bandSig)
          val key = (entry.table, bandNum, bandSigHash).asInstanceOf[Product]
          (key, (entry.id, entry.point))
        }
      }
    })

    bandEntries
  }
} 
Example 20
Source File: Hasher.scala    From sjson-new   with Apache License 2.0 5 votes vote down vote up
package sjsonnew
package support.murmurhash

import scala.util.Try
import HashUtil.hashLong
import java.lang.{ Double => JDouble }
import scala.util.hashing.MurmurHash3

object Hasher extends SupportHasher[Int] {
  private val nullHash = 0xc0
  private val falseHash = 0xc2
  private val trueHash = 0xc3

  implicit val facade: BuilderFacade[Int] = FacadeImpl
  private object FacadeImpl extends SimpleBuilderFacade[Int] {
    val jnull                 = nullHash
    val jfalse                = falseHash
    val jtrue                 = trueHash
    def jnumstring(s: String) = jstring(s)
    def jintstring(s: String) = jstring(s)
    def jint(i: Int)          = hashLong(i.toLong)
    def jlong(l: Long)        = hashLong(l)
    def jdouble(d: Double)    = hashLong(JDouble.doubleToRawLongBits(d))
    def jbigdecimal(d: BigDecimal) = jstring(d.toString)
    def jstring(s: String)    = MurmurHash3.stringHash(s)
    def jarray(vs: List[Int]): Int = MurmurHash3.seqHash(vs)
    def jobject(vs: Map[String, Int]): Int = MurmurHash3.mapHash(vs)
  }
} 
Example 21
Source File: Hashing.scala    From endpoints4s   with MIT License 5 votes vote down vote up
package endpoints4s

import scala.util.hashing.MurmurHash3

private[endpoints4s] object Hashing {

  
  def hash(values: Any*): Int = {
    // The implementation has been copied and adapted from `MurmurHash3.productHash`
    require(values.nonEmpty)
    var h = MurmurHash3.productSeed
    for (value <- values) {
      h = MurmurHash3.mix(h, value.##)
    }
    MurmurHash3.finalizeHash(h, values.size)
  }

} 
Example 22
Source File: Literal.scala    From dagon   with Apache License 2.0 5 votes vote down vote up
package com.stripe.dagon

import java.io.Serializable
import scala.util.hashing.MurmurHash3
import scala.util.control.TailCalls


  private def eqFn[N[_]]: Function[RefPair[Literal[N, _], Literal[N, _]], Boolean] =
    Memoize.function[RefPair[Literal[N, _], Literal[N, _]], Boolean] {
      case (pair, _) if pair.itemsEq => true
      case (RefPair(Const(a), Const(b)), _) => a == b
      case (RefPair(Unary(left, fa), Unary(right, fb)), rec) =>
        (fa == fb) && rec(RefPair(left, right))
      case (RefPair(Binary(lefta, righta, fa), Binary(leftb, rightb, fb)), rec) =>
        (fa == fb) && rec(RefPair(lefta, leftb)) && rec(RefPair(righta, rightb))
      case (RefPair(Variadic(argsa, fa), Variadic(argsb, fb)), rec) =>
        @annotation.tailrec
        def loop(left: List[Literal[N, _]], right: List[Literal[N, _]]): Boolean =
          (left, right) match {
            case (lh :: ltail, rh :: rtail) =>
              rec(RefPair(lh, rh)) && loop(ltail, rtail)
            case (Nil, Nil) => true
            case _ => false
          }

        (fa == fb) && loop(argsa, argsb)
      case other => false
    }
} 
Example 23
Source File: DistributeAndMerge.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package sample.graphdsl

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream._
import akka.stream.scaladsl._

import scala.concurrent.Future
import scala.util.hashing.MurmurHash3



    def hashingDistribution[A, B](numBuckets: Int,
                                  parallelism: Int,
                                  hash: A => Int,
                                  fn: A => Future[B]): Flow[A, B, NotUsed] = {
      Flow.fromGraph(GraphDSL.create() { implicit builder =>
        import GraphDSL.Implicits._
        val numPorts = numBuckets
        val partitioner =
          builder.add(Partition[A](outputPorts = numPorts, partitioner = a => math.abs(hash(a)) % numPorts))
        val merger = builder.add(Merge[B](inputPorts = numPorts, eagerComplete = false))

        Range(0, numPorts).foreach { eachPort =>
          partitioner.out(eachPort) ~> Flow[A].mapAsync(parallelism)(fn) ~> merger.in(eachPort)
        }

        FlowShape(partitioner.in, merger.out)
      })
    }

  Source(1 to 10)
    .via(
      hashingDistribution[Int, Int](
        numBuckets = 3,
        parallelism = 2,
        hash = element => MurmurHash3.stringHash(element.toString), //Hashing function: String => Int
        fn = sampleAsyncCall
      )
    )
    .runWith(Sink.foreach(each => println(s"Reached sink: $each")))
    .onComplete(_ => system.terminate())
} 
Example 24
Source File: Envelope.scala    From seals   with Apache License 2.0 5 votes vote down vote up
package dev.tauri.seals
package core

import scala.util.hashing.MurmurHash3

import cats.{ Eq, Show }
import cats.implicits._

sealed trait Envelope[A] extends Serializable {

  def value: A

  def reified: Reified[A]

  final def model: Model =
    reified.model

  final override def equals(that: Any): Boolean = that match {
    case that: Envelope[_] =>
      this.value == that.value
    case _ =>
      false
  }

  final override def hashCode: Int = {
    val s = MurmurHash3.mixLast(Envelope.hashSeed, value.##)
    MurmurHash3.finalizeHash(s, 1)
  }

  final override def toString: String =
    show(Show.fromToString[A])

  final def show(implicit A: Show[A]): String =
    sh"Envelope[${model}](${value})"
}

object Envelope {

  private final case class EnvelopeRepr[A](model: Model, value: A)

  private[seals] final val hashSeed = 0x37dd86e4

  def apply[A](a: A)(implicit r: Reified[A]): Envelope[A] = new Envelope[A] {
    override val value = a
    override val reified = r
  }

  implicit def envelopeEquality[A](implicit EqA: Eq[A]): Eq[Envelope[A]] = new Eq[Envelope[A]] {
    override def eqv(x: Envelope[A], y: Envelope[A]): Boolean =
      EqA.eqv(x.value, y.value)
  }

  implicit def envelopeShow[A](implicit A: Show[A]): Show[Envelope[A]] =
    Show.show(env => env.show(A))

  private def refinement[A](implicit r: Reified[A]): Refinement.Aux[Envelope[A], EnvelopeRepr[A]] = {
    new Refinement[Envelope[A]] {
      override type Repr = EnvelopeRepr[A]
      override val uuid = uuid"8e6e8b29-91e1-403c-9992-fd9cf8c82b06"
      override def repr = Refinement.ReprFormat.single("✉")
      override def from(repr: Repr) = {
        if (repr.model compatible r.model) Either.right(Envelope[A](repr.value)(r))
        else Either.left(sh"incompatible models: expected '${r.model}', got '${repr.model}'")
      }
      override def to(env: Envelope[A]) = {
        EnvelopeRepr[A](env.model, env.value)
      }
    }
  }

  implicit def reifiedForEnvelope[A](implicit r: Reified[A]): Reified[Envelope[A]] =
    Reified[EnvelopeRepr[A]].refined[Envelope[A]](refinement[A])
} 
Example 25
Source File: HashDerivation.scala    From magnolify   with Apache License 2.0 5 votes vote down vote up
package magnolify.cats.semiauto

import cats.Hash
import magnolia._
import magnolify.shims.MurmurHash3Compat

import scala.language.experimental.macros
import scala.util.hashing.MurmurHash3

object HashDerivation {
  type Typeclass[T] = Hash[T]

  def combine[T](caseClass: ReadOnlyCaseClass[Typeclass, T]): Typeclass[T] = {
    val eqvImpl = EqMethods.combine(caseClass)

    new Hash[T] {
      override def hash(x: T): Int =
        if (caseClass.parameters.isEmpty) {
          caseClass.typeName.short.hashCode
        } else {
          val seed = MurmurHash3Compat.seed(caseClass.typeName.short.hashCode)
          val h = caseClass.parameters.foldLeft(seed) { (h, p) =>
            MurmurHash3.mix(h, p.typeclass.hash(p.dereference(x)))
          }
          MurmurHash3.finalizeHash(h, caseClass.parameters.size)
        }

      override def eqv(x: T, y: T): Boolean = eqvImpl(x, y)
    }
  }

  def dispatch[T](sealedTrait: SealedTrait[Typeclass, T]): Typeclass[T] = {
    val eqvImpl = EqMethods.dispatch(sealedTrait)

    new Hash[T] {
      override def hash(x: T): Int = sealedTrait.dispatch(x) { sub =>
        sub.typeclass.hash(sub.cast(x))
      }

      override def eqv(x: T, y: T): Boolean = eqvImpl(x, y)
    }
  }

  implicit def apply[T]: Typeclass[T] = macro Magnolia.gen[T]
} 
Example 26
Source File: package.scala    From magnolify   with Apache License 2.0 5 votes vote down vote up
package magnolify

import scala.collection.generic.CanBuildFrom
import scala.collection.mutable
import scala.language.higherKinds
import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3

package object shims {
  trait Monadic[F[_]] extends mercator.Monadic[F] {
    def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B]
    def mapS[A, B](from: F[A])(fn: A => B): F[B]

    override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn)
    override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn)
  }

  trait FactoryCompat[-A, +C] extends Serializable {
    def newBuilder: mutable.Builder[A, C]
    def build(xs: TraversableOnce[A]): C = (newBuilder ++= xs).result()
  }

  object FactoryCompat extends LowPriorityFactoryCompat1 {
    private type FC[A, C] = FactoryCompat[A, C]

    def apply[A, C](f: () => mutable.Builder[A, C]): FC[A, C] =
      new FactoryCompat[A, C] {
        override def newBuilder: mutable.Builder[A, C] = f()
      }

    implicit def arrayFC[A: ClassTag] = FactoryCompat(() => Array.newBuilder[A])
    // Deprecated in 2.13
    // implicit def traversableFC[A] = FactoryCompat(() => Traversable.newBuilder[A])
    // List <: Iterable
    // implicit def iterableFC[A] = FactoryCompat(() => Iterable.newBuilder[A])
    // List <: Seq
    // implicit def seqFC[A] = FactoryCompat(() => Seq.newBuilder[A])
    // Vector <: IndexedSeq
    // implicit def indexedSeqFC[A] = FactoryCompat(() => IndexedSeq.newBuilder[A])
  }

  trait LowPriorityFactoryCompat1 extends LowPriorityFactoryCompat2 {
    implicit def listFC[A] = FactoryCompat(() => List.newBuilder[A])
  }

  trait LowPriorityFactoryCompat2 {
    implicit def vectorFC[A] = FactoryCompat(() => Vector.newBuilder[A])
    // Deprecated in 2.13
    // implicit def streamFC[A] = FactoryCompat(() => Stream.newBuilder[A])
  }

  object SerializableCanBuildFroms {
    private def cbf[A, C](f: () => mutable.Builder[A, C]): CanBuildFrom[C, A, C] =
      new CanBuildFrom[C, A, C] with Serializable {
        override def apply(from: C): mutable.Builder[A, C] = f()
        override def apply(): mutable.Builder[A, C] = f()
      }

    implicit def arrayCBF[A: ClassTag] = cbf(() => Array.newBuilder[A])
    implicit def traversableCBF[A] = cbf(() => Traversable.newBuilder[A])
    implicit def iterableCBF[A] = cbf(() => Iterable.newBuilder[A])
    implicit def seqCBF[A] = cbf(() => Seq.newBuilder[A])
    implicit def indexedSeqCBF[A] = cbf(() => IndexedSeq.newBuilder[A])
    implicit def listCBF[A] = cbf(() => List.newBuilder[A])
    implicit def vectorCBF[A] = cbf(() => Vector.newBuilder[A])
    implicit def streamCBF[A] = cbf(() => Stream.newBuilder[A])
  }

  val JavaConverters = scala.collection.JavaConverters

  object MurmurHash3Compat {
    def seed(data: Int): Int = MurmurHash3.productSeed
  }
} 
Example 27
Source File: package.scala    From magnolify   with Apache License 2.0 5 votes vote down vote up
package magnolify

import scala.collection.{mutable, Factory}
import scala.util.hashing.MurmurHash3

package object shims {
  trait Monadic[F[_]] extends mercator.Monadic[F] {
    def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B]
    def mapS[A, B](from: F[A])(fn: A => B): F[B]

    override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn)
    override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn)
  }

  trait FactoryCompat[-A, +C] extends Serializable {
    def newBuilder: mutable.Builder[A, C]
    def build(xs: IterableOnce[A]): C = newBuilder.addAll(xs).result()
  }

  object FactoryCompat {
    implicit def fromFactory[A, C](implicit f: Factory[A, C]): FactoryCompat[A, C] =
      new FactoryCompat[A, C] {
        override def newBuilder: mutable.Builder[A, C] = f.newBuilder
      }
  }

  object SerializableCanBuildFroms

  val JavaConverters = scala.jdk.CollectionConverters

  object MurmurHash3Compat {
    def seed(data: Int): Int = MurmurHash3.mix(MurmurHash3.productSeed, data)
  }
} 
Example 28
Source File: Hashes.scala    From incubator-s2graph   with Apache License 2.0 5 votes vote down vote up
package org.apache.s2graph.counter.util

import org.apache.hadoop.hbase.util.Bytes

import scala.util.hashing.MurmurHash3

object Hashes {
  def sha1(s: String): String = {
    val md = java.security.MessageDigest.getInstance("SHA-1")
    Bytes.toHex(md.digest(s.getBytes("UTF-8")))
  }
  
  private def positiveHash(h: Int): Int = {
    if (h < 0) -1 * (h + 1) else h
  }

  def murmur3(s: String): Int = {
    val hash = MurmurHash3.stringHash(s)
    positiveHash(hash)
  }
}