scala.util.hashing.MurmurHash3 Scala Examples
The following examples show how to use scala.util.hashing.MurmurHash3.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: EthereumTransactionRouter.scala From Raphtory with Apache License 2.0 | 5 votes |
package com.raphtory.examples.blockchain.routers import com.raphtory.core.components.Router.RouterWorker import com.raphtory.core.model.communication.EdgeAddWithProperties import com.raphtory.core.model.communication.Properties import com.raphtory.core.model.communication.StringProperty import com.raphtory.core.model.communication.VertexAddWithProperties import scala.util.hashing.MurmurHash3 class EthereumTransactionRouter(override val routerId: Int,override val workerID:Int, val initialManagerCount: Int) extends RouterWorker { override protected def parseTuple(value: Any): Unit = { val components = value.toString.drop(1).dropRight(1).split(",") val creationDate = components(3).toLong * 1000 //seconds to miliseconds val sourceNode = MurmurHash3.stringHash(components(0)) //hash the id to get a vertex ID sendGraphUpdate( VertexAddWithProperties(creationDate, sourceNode, Properties(StringProperty("id", components(0)))) ) //create the source node and add the wallet ID as a property if (components(1).nonEmpty) { //money being sent to an actual user val targetNode = MurmurHash3.stringHash(components(1)) //hash the id of the to wallet to get a vertex ID sendGraphUpdate( VertexAddWithProperties(creationDate, targetNode, Properties(StringProperty("id", components(1)))) ) //create the destination vertex sendGraphUpdate( EdgeAddWithProperties( creationDate, sourceNode, targetNode, Properties(StringProperty("id", components(2))) ) ) //create the edge between them adding the value as a property } else { //burnt cash val targetNode = MurmurHash3.stringHash("null") sendGraphUpdate(VertexAddWithProperties(creationDate, targetNode, Properties(StringProperty("id", "null")))) sendGraphUpdate( EdgeAddWithProperties( creationDate, sourceNode, targetNode, Properties(StringProperty("value", components(2))) ) ) } } }
Example 2
Source File: SignRandomProjectionLSH.scala From lexrank-summarizer with MIT License | 5 votes |
package io.github.karlhigley.lexrank import scala.collection.immutable.BitSet import scala.collection.mutable.ArrayBuffer import scala.util.Random import scala.util.hashing.MurmurHash3 import org.apache.spark.mllib.linalg.SparseVector import org.apache.spark.Logging class SignRandomProjectionLSH(poolSize: Int = 10000) extends Serializable with Logging { val pool = SignRandomProjectionLSH.generatePool(poolSize) def computeSignature(vector: SparseVector, length: Int): BitSet = { val buf = ArrayBuffer.empty[Int] val elements = vector.indices.zip(vector.values) for (bit <- 1 to length) { val components = elements.map(e => { val hash = MurmurHash3.productHash((bit, e._1)) val poolIndex = ((hash % poolSize) + poolSize) % poolSize val result = e._2 * pool(poolIndex) result }) val dotProduct = components.reduce(_ + _) if (dotProduct > 0) { buf += bit } } BitSet(buf.toArray:_*) } } object SignRandomProjectionLSH { def signatureSet(length: Int): Set[BitSet] = { BitSet(1 to length:_*).subsets.toSet } def estimateCosine(a: BitSet, b: BitSet, length: Int): Double = { val hammingDistance = (a^b).size math.cos(hammingDistance.toDouble/length.toDouble*math.Pi) } private def generatePool(size: Int): Array[Double] = { val rand = new Random() val buf = ArrayBuffer.fill[Double](size)(rand.nextGaussian) buf.toArray } }
Example 3
Source File: ShardedSparkeyReader.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.sparkey.instances import java.util import com.spotify.sparkey.{IndexHeader, LogHeader, SparkeyReader} import scala.util.hashing.MurmurHash3 import scala.jdk.CollectionConverters._ class ShardedSparkeyReader(val sparkeys: Map[Short, SparkeyReader], val numShards: Short) extends SparkeyReader { def hashKey(arr: Array[Byte]): Short = (MurmurHash3.bytesHash(arr, 1) % numShards).toShort def hashKey(str: String): Short = (MurmurHash3.stringHash(str, 1) % numShards).toShort override def getAsString(key: String): String = { val hashed = hashKey(key) if (sparkeys.contains(hashed)) { sparkeys(hashed).getAsString(key) } else { null } } override def getAsByteArray(key: Array[Byte]): Array[Byte] = { val hashed = hashKey(key) if (sparkeys.contains(hashed)) { sparkeys(hashed).getAsByteArray(key) } else { null } } override def getAsEntry(key: Array[Byte]): SparkeyReader.Entry = { val hashed = hashKey(key) if (sparkeys.contains(hashed)) { sparkeys(hashed).getAsEntry(key) } else { null } } override def getIndexHeader: IndexHeader = throw new NotImplementedError("ShardedSparkeyReader does not support getIndexHeader.") override def getLogHeader: LogHeader = throw new NotImplementedError("ShardedSparkeyReader does not support getLogHeader.") override def duplicate(): SparkeyReader = new ShardedSparkeyReader(sparkeys.map { case (k, v) => (k, v.duplicate) }, numShards) override def close(): Unit = sparkeys.values.foreach(_.close()) override def iterator(): util.Iterator[SparkeyReader.Entry] = sparkeys.values.map(_.iterator.asScala).reduce(_ ++ _).asJava }
Example 4
Source File: HashedCategoricalDistribution.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.util.rand import scala.util.hashing.MurmurHash3 object HashedCategoricalDistribution { private val MaxVal = Int.MaxValue.toFloat def apply(probabilities: Double*): HashedCategoricalDistribution = new HashedCategoricalDistribution(new IntAliasMethodSampler(probabilities)) } case class HashedCategoricalDistribution(sampler: IntAliasMethodSampler) extends (TraversableOnce[Any] => Int) { import HashedCategoricalDistribution.MaxVal def apply(data: TraversableOnce[Any]): Int = { val h = MurmurHash3 orderedHash data // Take the absolute value because we want (hAbs % sampler.getNumClasses) to give a non-negative index. This // also makes the computation of f easier. It will lower the entropy of the output of the hash function but // this is acceptable. val hAbs = math.abs(h) val p = hAbs / MaxVal val k = hAbs % numClasses sampler.sample(k, p) } }
Example 5
Source File: SubString.scala From typed-schema with Apache License 2.0 | 5 votes |
package ru.tinkoff.tschema.utils import ru.tinkoff.tschema.utils.SubString.outOfBound import scala.annotation.tailrec import scala.util.hashing.MurmurHash3 final class SubString private[SubString] (private val arr: Array[Char], private val from: Int, private val to: Int) extends CharSequence { @inline private[this] def size = to - from def length(): Int = size def charAt(index: Int): Char = { if (index >= size || index < 0) outOfBound(index) arr(from + index) } def subSequence(start: Int, end: Int): CharSequence = { if (start < 0) outOfBound(start) if (end > size) outOfBound(end) val len = end - start if (len < 0) outOfBound(len) if ((start == 0) && (end == size)) this else new SubString(arr, from + start, from + end) } override def equals(obj: Any): Boolean = obj match { case s: CharSequence => @tailrec def go(i: Int): Boolean = (i == size) || ((s.charAt(i) == charAt(i)) && go(i + 1)) size == s.length() && go(0) case _ => false } override def hashCode(): Int = { var h = MurmurHash3.stringSeed var i = from while (i + 1 < to) { val data = (arr(i) << 16) + arr(i + 1) h = MurmurHash3.mix(h, data) i += 2 } if (i < to) h = MurmurHash3.mixLast(h, arr(i).toInt) MurmurHash3.finalizeHash(h, size) } override def toString: String = new String(arr.slice(from, to)) } object SubString { @inline private def outOfBound(index: Int) = throw new StringIndexOutOfBoundsException(index) def apply(s: String): SubString = new SubString(s.toCharArray, 0, s.length) }
Example 6
Source File: RawJson.scala From almond with BSD 3-Clause "New" or "Revised" License | 5 votes |
package almond.protocol import java.nio.charset.StandardCharsets import java.{util => ju} import scala.util.hashing.MurmurHash3 import scala.util.Try // adapted from https://github.com/plokhotnyuk/jsoniter-scala/blob/209d918a030b188f064ee55505a6c47257731b4b/jsoniter-scala-macros/src/test/scala/com/github/plokhotnyuk/jsoniter_scala/macros/JsonCodecMakerSpec.scala#L645-L666 final case class RawJson(value: Array[Byte]) { override lazy val hashCode: Int = MurmurHash3.arrayHash(value) override def equals(obj: Any): Boolean = obj match { case that: RawJson => ju.Arrays.equals(value, that.value) case _ => false } override def toString: String = Try(new String(value, StandardCharsets.UTF_8)) .toOption .getOrElse(value.toString) } object RawJson { import com.github.plokhotnyuk.jsoniter_scala.core._ implicit val codec: JsonValueCodec[RawJson] = new JsonValueCodec[RawJson] { def decodeValue(in: JsonReader, default: RawJson): RawJson = new RawJson(in.readRawValAsBytes()) def encodeValue(x: RawJson, out: JsonWriter): Unit = out.writeRawVal(x.value) val nullValue: RawJson = new RawJson(new Array[Byte](0)) } val emptyObj: RawJson = RawJson("{}".getBytes(StandardCharsets.UTF_8)) }
Example 7
Source File: XORShiftRandom.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.story.linalg import java.nio.ByteBuffer import java.util.{Random => JavaRandom} import scala.util.Random import scala.util.hashing.MurmurHash3 private[this] def hashSeed(seed: Long): Long = { val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array() MurmurHash3.bytesHash(bytes) } // we need to just override next - this will be called by nextInt, nextDouble, // nextGaussian, nextLong, etc. override protected def next(bits: Int): Int = { var nextSeed = seed ^ (seed << 21) nextSeed ^= (nextSeed >>> 35) nextSeed ^= (nextSeed << 4) seed = nextSeed (nextSeed & ((1L << bits) - 1)).asInstanceOf[Int] } } object XORShiftRandom { val random = new Random() }
Example 8
Source File: DatasetClient.scala From elastiknn with Apache License 2.0 | 5 votes |
package com.klibisz.elastiknn.benchmarks import java.util.zip.GZIPInputStream import com.amazonaws.services.s3.AmazonS3 import com.klibisz.elastiknn.api.{ElasticsearchCodec, Vec} import com.klibisz.elastiknn.benchmarks.Dataset._ import io.circe import zio._ import zio.stream._ import scala.io.Source import scala.util.Random import scala.util.hashing.MurmurHash3 object DatasetClient { trait Service { def streamTrain(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec] def streamTest(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec] } def s3(bucket: String, keyPrefix: String): ZLayer[Has[AmazonS3], Throwable, DatasetClient] = ZLayer.fromService[AmazonS3, Service] { client => new Service { private def stream(dataset: Dataset, name: String, limit: Option[Int]): Stream[Throwable, Vec] = dataset match { case r: RandomSparseBool => implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name))) Stream .range(0, if (name == "train") r.train else r.test) .map(_ => Vec.SparseBool.random(r.dims, r.bias)) case r: RandomDenseFloat => implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name))) Stream .range(0, if (name == "train") r.train else r.test) .map(_ => Vec.DenseFloat.random(r.dims)) case _ => def parseDecode(s: String): Either[circe.Error, Vec] = ElasticsearchCodec.parse(s).flatMap(j => ElasticsearchCodec.decode[Vec](j.hcursor)) val obj = client.getObject(bucket, s"$keyPrefix/${dataset.name}/${name}.json.gz") val iterManaged = Managed.makeEffect(Source.fromInputStream(new GZIPInputStream(obj.getObjectContent)))(_.close()) val lines = Stream.fromIteratorManaged(iterManaged.map(src => limit.map(n => src.getLines.take(n)).getOrElse(src.getLines()))) val rawJson = lines.map(_.dropWhile(_ != '{')) rawJson.mapM(s => ZIO.fromEither(parseDecode(s))) } override def streamTrain(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] = stream(dataset, "train", limit) override def streamTest(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] = stream(dataset, "test", limit) } } }
Example 9
Source File: versionspecific.scala From kittens with Apache License 2.0 | 5 votes |
package cats.derived.util import scala.util.hashing.MurmurHash3 object VersionSpecific { type Lazy[+A] = shapeless.Lazy[A] private[derived] def productSeed(x: Product): Int = MurmurHash3.productSeed sealed trait OrElse[+A, +B] extends Serializable { def fold[C](prim: A => C, sec: B => C): C def unify[C >: A](implicit ev: B <:< C): C = fold(identity, ev) } final class Primary[+A](value: A) extends OrElse[A, Nothing] { def fold[C](prim: A => C, sec: Nothing => C): C = prim(value) } final class Secondary[+B](value: => B) extends OrElse[Nothing, B] { def fold[C](prim: Nothing => C, sec: B => C): C = sec(value) } object OrElse extends OrElse0 { implicit def primary[A, B](implicit a: A): A OrElse B = new Primary(a) } private[util] abstract class OrElse0 { implicit def secondary[A, B](implicit b: Lazy[B]): A OrElse B = new Secondary(b.value) } }
Example 10
Source File: versionspecific.scala From kittens with Apache License 2.0 | 5 votes |
package cats.derived.util import scala.annotation.implicitNotFound import scala.util.hashing.MurmurHash3 object VersionSpecific { private[derived] def productSeed(x: Product): Int = MurmurHash3.mix(MurmurHash3.productSeed, x.productPrefix.hashCode) @implicitNotFound("could not find Lazy implicit value of type ${A}") abstract class Lazy[+A] extends Serializable { def value(): A } object Lazy { implicit def instance[A](implicit ev: => A): Lazy[A] = () => ev } sealed trait OrElse[+A, +B] extends Serializable { def fold[C](prim: A => C, sec: B => C): C def unify[C >: A](implicit ev: B <:< C): C = fold(identity, ev) } final class Primary[+A](value: A) extends OrElse[A, Nothing] { def fold[C](prim: A => C, sec: Nothing => C): C = prim(value) } final class Secondary[+B](value: => B) extends OrElse[Nothing, B] { def fold[C](prim: Nothing => C, sec: B => C): C = sec(value) } object OrElse extends OrElse0 { implicit def primary[A, B](implicit a: A): A OrElse B = new Primary(a) } private[util] abstract class OrElse0 { implicit def secondary[A, B](implicit b: => B): A OrElse B = new Secondary(b) } }
Example 11
Source File: Port.scala From ip4s with Apache License 2.0 | 5 votes |
package com.comcast.ip4s import scala.util.Try import scala.util.hashing.MurmurHash3 import cats.{Order, Show} final class Port private (val value: Int) extends Product with Serializable with Ordered[Port] { def copy(value: Int): Option[Port] = Port(value) def compare(that: Port): Int = value.compare(that.value) override def toString: String = value.toString override def hashCode: Int = MurmurHash3.productHash(this, productPrefix.hashCode) override def equals(other: Any): Boolean = other match { case that: Port => value == that.value case _ => false } override def canEqual(other: Any): Boolean = other.isInstanceOf[Port] override def productArity: Int = 1 override def productElement(n: Int): Any = if (n == 0) value else throw new IndexOutOfBoundsException } object Port { val MinValue: Int = 0 val MaxValue: Int = 65535 def apply(value: Int): Option[Port] = if (value >= MinValue && value <= MaxValue) Some(new Port(value)) else None def fromString(value: String): Option[Port] = Try(value.toInt).toOption.flatMap(apply) def unapply(p: Port): Option[Int] = Some(p.value) implicit val order: Order[Port] = Order.fromComparable[Port] implicit val show: Show[Port] = Show.fromToString[Port] }
Example 12
Source File: Hostname.scala From ip4s with Apache License 2.0 | 5 votes |
package com.comcast.ip4s import scala.util.hashing.MurmurHash3 import cats.{Order, Show} def apply(value: String): Option[Hostname] = value.size match { case 0 => None case i if i > 253 => None case _ => value match { case Pattern(_*) => val labels = value .split('.') .iterator .map(new Label(_)) .toList if (labels.isEmpty) None else Option(new Hostname(labels, value)) case _ => None } } implicit val order: Order[Hostname] = Order.fromComparable[Hostname] implicit val show: Show[Hostname] = Show.fromToString[Hostname] }
Example 13
Source File: Orientation.scala From morpheus with Apache License 2.0 | 5 votes |
package org.opencypher.okapi.ir.api.pattern import cats.Eq import scala.util.hashing.MurmurHash3 sealed trait Orientation[E <: Endpoints] extends Eq[E] { def hash(ends: E, seed: Int): Int } object Orientation { case object Directed extends Orientation[DifferentEndpoints] { override def hash(ends: DifferentEndpoints, seed: Int): Int = MurmurHash3.orderedHash(ends, seed) override def eqv(x: DifferentEndpoints, y: DifferentEndpoints): Boolean = x.source == y.source && x.target == y.target } case object Undirected extends Orientation[DifferentEndpoints] { override def hash(ends: DifferentEndpoints, seed: Int): Int = MurmurHash3.unorderedHash(ends, seed) override def eqv(x: DifferentEndpoints, y: DifferentEndpoints): Boolean = (x.source == y.source && x.target == y.target) || (x.source == y.target && x.target == y.source) } case object Cyclic extends Orientation[IdenticalEndpoints] { override def hash(ends: IdenticalEndpoints, seed: Int): Int = MurmurHash3.mix(seed, ends.field.hashCode()) override def eqv(x: IdenticalEndpoints, y: IdenticalEndpoints): Boolean = x.field == y.field } }
Example 14
Source File: Rule.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.rts import scala.util.hashing.MurmurHash3 import cmwell.domain.{FieldValue, Infoton} import cmwell.formats.FormatType // message sealed abstract class TransmitType case class Push(handler: (Seq[String]) => Unit) extends TransmitType case class Pull(format: FormatType) extends TransmitType class Path(path: String, val recursive: Boolean) extends Serializable { val hashValue: Int = MurmurHash3.stringHash(path) def length = path.length def hash: Int = hashValue override def toString() = s"path [$path] recursive [$recursive]" def check(p: String): Boolean = { if (length > p.length) { // the path is too short false } else path == p || { val prefixPath = p.take(length) if (hashValue == MurmurHash3.stringHash(prefixPath)) { // the prefix path is match now need to check also if it is recursive recursive || !p.drop(length + 1).contains('/') } else { false } } } } class MatchMap(fields: Map[String, Set[FieldValue]]) extends Serializable { // here we get the keys list from the tested infoton def check(f: Map[String, Set[FieldValue]]): Boolean = { if (fields.isEmpty) true else { val it = fields.iterator // if found than stop = true var stop: Boolean = false while (!stop && it.hasNext) { val (item, s) = it.next() f.get(item) match { case Some(fs) => if (s.isEmpty) stop = true else { val i = s.intersect(fs) if (!i.isEmpty) { stop = true } } case None => } } stop } } } sealed abstract class Rule case object NoFilter extends Rule case class PathFilter(path: Path) extends Rule case class MatchFilter(matchMap: MatchMap) extends Rule case class PMFilter(path: Path, matchMap: MatchMap) extends Rule object Rule { def apply() = NoFilter def apply(path: String, recursive: Boolean) = PathFilter(new Path(path, recursive)) def apply(fields: Map[String, Set[FieldValue]]) = MatchFilter(new MatchMap(fields)) def apply(path: String, recursive: Boolean, fields: Map[String, Set[FieldValue]]) = PMFilter(new Path(path, recursive), new MatchMap(fields)) }
Example 15
Source File: CategoricalColHashBucket.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.util.hashing.MurmurHash3 class CategoricalColHashBucket[T: ClassTag]( val hashBucketSize: Int, val strDelimiter: String = ",", val isSparse: Boolean = true )(implicit ev: TensorNumeric[T]) extends Operation[Tensor[String], Tensor[Int], T] { output = Tensor[Int]() override def updateOutput(input: Tensor[String]): Tensor[Int] = { val rows = input.size(dim = 1) val indices0 = new ArrayBuffer[Int]() val indices1 = new ArrayBuffer[Int]() val values = new ArrayBuffer[Int]() var i = 1 var max_fea_len = 0 while(i <= rows) { val feaStrArr = input.valueAt(i, 1).split(strDelimiter) max_fea_len = math.max(max_fea_len, feaStrArr.length) var j = 0 while(j < feaStrArr.length) { val hashVal = MurmurHash3.stringHash(feaStrArr(j)) % hashBucketSize match { case v if v < 0 => v + hashBucketSize case v => v } indices0 += i-1 indices1 += j values += hashVal j += 1 } i += 1 } val indices = Array(indices0.toArray, indices1.toArray) val shape = Array(rows, max_fea_len) output = isSparse match { case true => Tensor.sparse(indices, values.toArray, shape) case false => Tensor.dense(Tensor.sparse(indices, values.toArray, shape)) } output } } object CategoricalColHashBucket{ def apply[T: ClassTag]( hashBucketSize: Int, strDelimiter: String = ",", isSparse: Boolean = true) (implicit ev: TensorNumeric[T]) : CategoricalColHashBucket[T] = new CategoricalColHashBucket[T]( hashBucketSize = hashBucketSize, strDelimiter = strDelimiter, isSparse = isSparse ) }
Example 16
Source File: Instances.scala From radixtree with Apache License 2.0 | 5 votes |
package com.rklaehn.radixtree import algebra.Eq import cats.kernel.Hash import scala.util.hashing.MurmurHash3 object Instances { implicit def ArrayHash[@specialized A](implicit aHash: Hash[A]): Hash[Array[A]] = new Hash[Array[A]] { def eqv(x: Array[A], y: Array[A]): Boolean = { x.length == y.length && { var i = 0 while(i < x.length) { if(!aHash.eqv(x(i), y(i))) return false i += 1 } true } } override def hash(a: Array[A]): Int = { var result = MurmurHash3.arraySeed var i = 0 while(i < a.length) { result = MurmurHash3.mix(result, aHash.hash(a(i))) i += 1 } result } } }
Example 17
Source File: package.scala From radixtree with Apache License 2.0 | 5 votes |
package com.rklaehn import algebra.Eq import cats.kernel.Hash import scala.reflect.ClassTag import scala.util.hashing.MurmurHash3 // scalastyle:off return package object radixtree { private[radixtree] def arrayEqv[A: Eq](x: Array[A], y: Array[A]): Boolean = x.length == y.length && { var i = 0 while (i < x.length) { if (!Eq.eqv(x(i), y(i))) return false i += 1 } true } private[radixtree] def arrayHash[A: Hash](a: Array[A]): Int = { var result = MurmurHash3.arraySeed var i = 0 while(i < a.length) { result = MurmurHash3.mix(result, Hash.hash(a(i))) i += 1 } result } private[radixtree] implicit class ArrayOps[T](private val underlying: Array[T]) extends AnyVal { def updated(index: Int, value: T): Array[T] = { val result = underlying.clone result(index) = value result } def patched(index: Int, value: T)(implicit c: ClassTag[T]): Array[T] = { val result = new Array[T](underlying.length + 1) System.arraycopy(underlying, 0, result, 0, index) result(index) = value if (index < underlying.length) System.arraycopy(underlying, index, result, index + 1, underlying.length - index) result } def resizeInPlace(n: Int)(implicit c: ClassTag[T]): Array[T] = if (underlying.length == n) underlying else { val r = c.newArray(n) System.arraycopy(underlying, 0, r, 0, n min underlying.length) r } } }
Example 18
Source File: SimpleCollisionStrategy.scala From spark-neighbors with MIT License | 5 votes |
package com.github.karlhigley.spark.neighbors.collision import scala.util.hashing.MurmurHash3 import org.apache.spark.mllib.linalg.SparseVector import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import com.github.karlhigley.spark.neighbors.lsh.{ BitSignature, HashTableEntry, IntSignature } def apply(hashTables: RDD[_ <: HashTableEntry[_]]): RDD[(Product, Point)] = { val entries = hashTables.map(entry => { // Arrays are mutable and can't be used in RDD keys // Use a hash value (i.e. an int) as a substitute val key = (entry.table, MurmurHash3.arrayHash(entry.sigElements)).asInstanceOf[Product] (key, (entry.id, entry.point)) }) entries } }
Example 19
Source File: BandingCollisionStrategy.scala From spark-neighbors with MIT License | 5 votes |
package com.github.karlhigley.spark.neighbors.collision import scala.util.hashing.MurmurHash3 import org.apache.spark.mllib.linalg.SparseVector import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import com.github.karlhigley.spark.neighbors.lsh.{ BitSignature, HashTableEntry, IntSignature } def apply(hashTables: RDD[_ <: HashTableEntry[_]]): RDD[(Product, Point)] = { val bandEntries = hashTables.flatMap(entry => { val elements = entry.sigElements val banded = elements.grouped(elements.size / bands).zipWithIndex banded.map { case (bandSig, bandNum) => { // Arrays are mutable and can't be used in RDD keys // Use a hash value (i.e. an int) as a substitute val bandSigHash = MurmurHash3.arrayHash(bandSig) val key = (entry.table, bandNum, bandSigHash).asInstanceOf[Product] (key, (entry.id, entry.point)) } } }) bandEntries } }
Example 20
Source File: Hasher.scala From sjson-new with Apache License 2.0 | 5 votes |
package sjsonnew package support.murmurhash import scala.util.Try import HashUtil.hashLong import java.lang.{ Double => JDouble } import scala.util.hashing.MurmurHash3 object Hasher extends SupportHasher[Int] { private val nullHash = 0xc0 private val falseHash = 0xc2 private val trueHash = 0xc3 implicit val facade: BuilderFacade[Int] = FacadeImpl private object FacadeImpl extends SimpleBuilderFacade[Int] { val jnull = nullHash val jfalse = falseHash val jtrue = trueHash def jnumstring(s: String) = jstring(s) def jintstring(s: String) = jstring(s) def jint(i: Int) = hashLong(i.toLong) def jlong(l: Long) = hashLong(l) def jdouble(d: Double) = hashLong(JDouble.doubleToRawLongBits(d)) def jbigdecimal(d: BigDecimal) = jstring(d.toString) def jstring(s: String) = MurmurHash3.stringHash(s) def jarray(vs: List[Int]): Int = MurmurHash3.seqHash(vs) def jobject(vs: Map[String, Int]): Int = MurmurHash3.mapHash(vs) } }
Example 21
Source File: Hashing.scala From endpoints4s with MIT License | 5 votes |
package endpoints4s import scala.util.hashing.MurmurHash3 private[endpoints4s] object Hashing { def hash(values: Any*): Int = { // The implementation has been copied and adapted from `MurmurHash3.productHash` require(values.nonEmpty) var h = MurmurHash3.productSeed for (value <- values) { h = MurmurHash3.mix(h, value.##) } MurmurHash3.finalizeHash(h, values.size) } }
Example 22
Source File: Literal.scala From dagon with Apache License 2.0 | 5 votes |
package com.stripe.dagon import java.io.Serializable import scala.util.hashing.MurmurHash3 import scala.util.control.TailCalls private def eqFn[N[_]]: Function[RefPair[Literal[N, _], Literal[N, _]], Boolean] = Memoize.function[RefPair[Literal[N, _], Literal[N, _]], Boolean] { case (pair, _) if pair.itemsEq => true case (RefPair(Const(a), Const(b)), _) => a == b case (RefPair(Unary(left, fa), Unary(right, fb)), rec) => (fa == fb) && rec(RefPair(left, right)) case (RefPair(Binary(lefta, righta, fa), Binary(leftb, rightb, fb)), rec) => (fa == fb) && rec(RefPair(lefta, leftb)) && rec(RefPair(righta, rightb)) case (RefPair(Variadic(argsa, fa), Variadic(argsb, fb)), rec) => @annotation.tailrec def loop(left: List[Literal[N, _]], right: List[Literal[N, _]]): Boolean = (left, right) match { case (lh :: ltail, rh :: rtail) => rec(RefPair(lh, rh)) && loop(ltail, rtail) case (Nil, Nil) => true case _ => false } (fa == fb) && loop(argsa, argsb) case other => false } }
Example 23
Source File: DistributeAndMerge.scala From akka_streams_tutorial with MIT License | 5 votes |
package sample.graphdsl import akka.NotUsed import akka.actor.ActorSystem import akka.stream._ import akka.stream.scaladsl._ import scala.concurrent.Future import scala.util.hashing.MurmurHash3 def hashingDistribution[A, B](numBuckets: Int, parallelism: Int, hash: A => Int, fn: A => Future[B]): Flow[A, B, NotUsed] = { Flow.fromGraph(GraphDSL.create() { implicit builder => import GraphDSL.Implicits._ val numPorts = numBuckets val partitioner = builder.add(Partition[A](outputPorts = numPorts, partitioner = a => math.abs(hash(a)) % numPorts)) val merger = builder.add(Merge[B](inputPorts = numPorts, eagerComplete = false)) Range(0, numPorts).foreach { eachPort => partitioner.out(eachPort) ~> Flow[A].mapAsync(parallelism)(fn) ~> merger.in(eachPort) } FlowShape(partitioner.in, merger.out) }) } Source(1 to 10) .via( hashingDistribution[Int, Int]( numBuckets = 3, parallelism = 2, hash = element => MurmurHash3.stringHash(element.toString), //Hashing function: String => Int fn = sampleAsyncCall ) ) .runWith(Sink.foreach(each => println(s"Reached sink: $each"))) .onComplete(_ => system.terminate()) }
Example 24
Source File: Envelope.scala From seals with Apache License 2.0 | 5 votes |
package dev.tauri.seals package core import scala.util.hashing.MurmurHash3 import cats.{ Eq, Show } import cats.implicits._ sealed trait Envelope[A] extends Serializable { def value: A def reified: Reified[A] final def model: Model = reified.model final override def equals(that: Any): Boolean = that match { case that: Envelope[_] => this.value == that.value case _ => false } final override def hashCode: Int = { val s = MurmurHash3.mixLast(Envelope.hashSeed, value.##) MurmurHash3.finalizeHash(s, 1) } final override def toString: String = show(Show.fromToString[A]) final def show(implicit A: Show[A]): String = sh"Envelope[${model}](${value})" } object Envelope { private final case class EnvelopeRepr[A](model: Model, value: A) private[seals] final val hashSeed = 0x37dd86e4 def apply[A](a: A)(implicit r: Reified[A]): Envelope[A] = new Envelope[A] { override val value = a override val reified = r } implicit def envelopeEquality[A](implicit EqA: Eq[A]): Eq[Envelope[A]] = new Eq[Envelope[A]] { override def eqv(x: Envelope[A], y: Envelope[A]): Boolean = EqA.eqv(x.value, y.value) } implicit def envelopeShow[A](implicit A: Show[A]): Show[Envelope[A]] = Show.show(env => env.show(A)) private def refinement[A](implicit r: Reified[A]): Refinement.Aux[Envelope[A], EnvelopeRepr[A]] = { new Refinement[Envelope[A]] { override type Repr = EnvelopeRepr[A] override val uuid = uuid"8e6e8b29-91e1-403c-9992-fd9cf8c82b06" override def repr = Refinement.ReprFormat.single("✉") override def from(repr: Repr) = { if (repr.model compatible r.model) Either.right(Envelope[A](repr.value)(r)) else Either.left(sh"incompatible models: expected '${r.model}', got '${repr.model}'") } override def to(env: Envelope[A]) = { EnvelopeRepr[A](env.model, env.value) } } } implicit def reifiedForEnvelope[A](implicit r: Reified[A]): Reified[Envelope[A]] = Reified[EnvelopeRepr[A]].refined[Envelope[A]](refinement[A]) }
Example 25
Source File: HashDerivation.scala From magnolify with Apache License 2.0 | 5 votes |
package magnolify.cats.semiauto import cats.Hash import magnolia._ import magnolify.shims.MurmurHash3Compat import scala.language.experimental.macros import scala.util.hashing.MurmurHash3 object HashDerivation { type Typeclass[T] = Hash[T] def combine[T](caseClass: ReadOnlyCaseClass[Typeclass, T]): Typeclass[T] = { val eqvImpl = EqMethods.combine(caseClass) new Hash[T] { override def hash(x: T): Int = if (caseClass.parameters.isEmpty) { caseClass.typeName.short.hashCode } else { val seed = MurmurHash3Compat.seed(caseClass.typeName.short.hashCode) val h = caseClass.parameters.foldLeft(seed) { (h, p) => MurmurHash3.mix(h, p.typeclass.hash(p.dereference(x))) } MurmurHash3.finalizeHash(h, caseClass.parameters.size) } override def eqv(x: T, y: T): Boolean = eqvImpl(x, y) } } def dispatch[T](sealedTrait: SealedTrait[Typeclass, T]): Typeclass[T] = { val eqvImpl = EqMethods.dispatch(sealedTrait) new Hash[T] { override def hash(x: T): Int = sealedTrait.dispatch(x) { sub => sub.typeclass.hash(sub.cast(x)) } override def eqv(x: T, y: T): Boolean = eqvImpl(x, y) } } implicit def apply[T]: Typeclass[T] = macro Magnolia.gen[T] }
Example 26
Source File: package.scala From magnolify with Apache License 2.0 | 5 votes |
package magnolify import scala.collection.generic.CanBuildFrom import scala.collection.mutable import scala.language.higherKinds import scala.reflect.ClassTag import scala.util.hashing.MurmurHash3 package object shims { trait Monadic[F[_]] extends mercator.Monadic[F] { def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B] def mapS[A, B](from: F[A])(fn: A => B): F[B] override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn) override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn) } trait FactoryCompat[-A, +C] extends Serializable { def newBuilder: mutable.Builder[A, C] def build(xs: TraversableOnce[A]): C = (newBuilder ++= xs).result() } object FactoryCompat extends LowPriorityFactoryCompat1 { private type FC[A, C] = FactoryCompat[A, C] def apply[A, C](f: () => mutable.Builder[A, C]): FC[A, C] = new FactoryCompat[A, C] { override def newBuilder: mutable.Builder[A, C] = f() } implicit def arrayFC[A: ClassTag] = FactoryCompat(() => Array.newBuilder[A]) // Deprecated in 2.13 // implicit def traversableFC[A] = FactoryCompat(() => Traversable.newBuilder[A]) // List <: Iterable // implicit def iterableFC[A] = FactoryCompat(() => Iterable.newBuilder[A]) // List <: Seq // implicit def seqFC[A] = FactoryCompat(() => Seq.newBuilder[A]) // Vector <: IndexedSeq // implicit def indexedSeqFC[A] = FactoryCompat(() => IndexedSeq.newBuilder[A]) } trait LowPriorityFactoryCompat1 extends LowPriorityFactoryCompat2 { implicit def listFC[A] = FactoryCompat(() => List.newBuilder[A]) } trait LowPriorityFactoryCompat2 { implicit def vectorFC[A] = FactoryCompat(() => Vector.newBuilder[A]) // Deprecated in 2.13 // implicit def streamFC[A] = FactoryCompat(() => Stream.newBuilder[A]) } object SerializableCanBuildFroms { private def cbf[A, C](f: () => mutable.Builder[A, C]): CanBuildFrom[C, A, C] = new CanBuildFrom[C, A, C] with Serializable { override def apply(from: C): mutable.Builder[A, C] = f() override def apply(): mutable.Builder[A, C] = f() } implicit def arrayCBF[A: ClassTag] = cbf(() => Array.newBuilder[A]) implicit def traversableCBF[A] = cbf(() => Traversable.newBuilder[A]) implicit def iterableCBF[A] = cbf(() => Iterable.newBuilder[A]) implicit def seqCBF[A] = cbf(() => Seq.newBuilder[A]) implicit def indexedSeqCBF[A] = cbf(() => IndexedSeq.newBuilder[A]) implicit def listCBF[A] = cbf(() => List.newBuilder[A]) implicit def vectorCBF[A] = cbf(() => Vector.newBuilder[A]) implicit def streamCBF[A] = cbf(() => Stream.newBuilder[A]) } val JavaConverters = scala.collection.JavaConverters object MurmurHash3Compat { def seed(data: Int): Int = MurmurHash3.productSeed } }
Example 27
Source File: package.scala From magnolify with Apache License 2.0 | 5 votes |
package magnolify import scala.collection.{mutable, Factory} import scala.util.hashing.MurmurHash3 package object shims { trait Monadic[F[_]] extends mercator.Monadic[F] { def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B] def mapS[A, B](from: F[A])(fn: A => B): F[B] override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn) override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn) } trait FactoryCompat[-A, +C] extends Serializable { def newBuilder: mutable.Builder[A, C] def build(xs: IterableOnce[A]): C = newBuilder.addAll(xs).result() } object FactoryCompat { implicit def fromFactory[A, C](implicit f: Factory[A, C]): FactoryCompat[A, C] = new FactoryCompat[A, C] { override def newBuilder: mutable.Builder[A, C] = f.newBuilder } } object SerializableCanBuildFroms val JavaConverters = scala.jdk.CollectionConverters object MurmurHash3Compat { def seed(data: Int): Int = MurmurHash3.mix(MurmurHash3.productSeed, data) } }
Example 28
Source File: Hashes.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.util import org.apache.hadoop.hbase.util.Bytes import scala.util.hashing.MurmurHash3 object Hashes { def sha1(s: String): String = { val md = java.security.MessageDigest.getInstance("SHA-1") Bytes.toHex(md.digest(s.getBytes("UTF-8"))) } private def positiveHash(h: Int): Int = { if (h < 0) -1 * (h + 1) else h } def murmur3(s: String): Int = { val hash = MurmurHash3.stringHash(s) positiveHash(hash) } }