scala.math.min Scala Examples
The following examples show how to use scala.math.min.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MaxAbsScalerModel.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.core.feature import ml.combust.mleap.core.Model import ml.combust.mleap.core.annotation.SparkCode import ml.combust.mleap.core.types.{StructType, TensorType} import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import scala.math.{max, min} @SparkCode(uri = "https://github.com/apache/spark/blob/v2.0.0/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala") case class MaxAbsScalerModel(maxAbs: Vector) extends Model { def apply(vector: Vector): Vector = { val maxAbsUnzero = Vectors.dense(maxAbs.toArray.map(x => if (x == 0) 1 else x)) vector match { case DenseVector(values) => val vs = values.clone() val size = vs.length var i = 0 while (i < size) { if (!values(i).isNaN) { val rescale = max(-1.0, min(1.0, values(i) / maxAbsUnzero(i))) vs(i) = rescale } i += 1 } Vectors.dense(vs) case SparseVector(size, indices, values) => val vs = values.clone() val nnz = vs.length var i = 0 while (i < nnz) { val raw = max(-1.0, min(1.0, values(i) / maxAbsUnzero(indices(i)))) vs(i) = raw i += 1 } Vectors.sparse(size, indices, vs) } } override def inputSchema: StructType = StructType("input" -> TensorType.Double(maxAbs.size)).get override def outputSchema: StructType = StructType("output" -> TensorType.Double(maxAbs.size)).get }
Example 2
Source File: MinMaxScalerModel.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.core.feature import ml.combust.mleap.core.Model import ml.combust.mleap.core.annotation.SparkCode import ml.combust.mleap.core.types.{StructType, TensorType} import org.apache.spark.ml.linalg.mleap.VectorUtil._ import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors} import scala.math.{max, min} def apply(vector: Vector): Vector = { val scale = maxValue - minValue // 0 in sparse vector will probably be rescaled to non-zero val values = vector.copy.toArray val size = values.length var i = 0 while (i < size) { if (!values(i).isNaN) { val raw = if (originalRange(i) != 0) (values(i) - minArray(i)) / originalRange(i) else 0.5 values(i) = raw * scale + minValue } i += 1 } Vectors.dense(values) } override def inputSchema: StructType = StructType("input" -> TensorType.Double(originalRange.length)).get override def outputSchema: StructType = StructType("output" -> TensorType.Double(originalRange.length)).get }
Example 3
Source File: WeightedLevenshtein.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp.annotators.spell.context import com.github.liblevenshtein.transducer.{Candidate, ITransducer} import com.johnsnowlabs.nlp.annotators.spell.context.parser.RegexParser import scala.collection.mutable import scala.io.Codec import scala.math.min trait WeightedLevenshtein { def levenshteinDist(s11: String, s22: String)(cost:(String, String) => Float): Float = { // cope with start of string val s1 = s"^${s11}_" val s2 = s"^${s22}_" val s1_ = s"_^${s11}_" val s2_ = s"_^${s22}_" val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f } for (j <- 1 to s2.length; i <- 1 to s1.length) dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1) else { minimum(dist(j - 1)(i) + cost(s2_.substring(j - 1, j + 1), s1(i - 1) + "Ɛ"), //insert in s1 dist(j)(i - 1) + cost(s2(j - 1) + "Ɛ", s1_.substring(i - 1, i + 1)), //insert in s2 dist(j - 1)(i - 1) + cost(s2(j - 1).toString, s1(i - 1).toString)) } dist(s2.length)(s1.length) } def wLevenshteinDist(s1:String, s2:String, weights:Map[String, Map[String, Float]]) = levenshteinDist(s1, s2)(genCost(weights)) def loadWeights(filename: String): Map[String, Map[String, Float]] = { // store word ids val vocabIdxs = mutable.HashMap[String, mutable.Map[String, Float]]() implicit val codec: Codec = Codec.UTF8 scala.io.Source.fromFile(filename).getLines.foreach { case line => val lineFields = line.split("\\|") val dist = vocabIdxs.getOrElse(lineFields(0), mutable.Map[String, Float]()).updated(lineFields(1), lineFields(2).toFloat) vocabIdxs.update(lineFields(0), dist) } vocabIdxs.toMap.mapValues(_.toMap) } private def genCost(weights: Map[String, Map[String, Float]])(a:String, b:String): Float = { if (weights.contains(a) && weights(a).contains(b)) weights(a)(b) else if (a == b) { 0.0f } else 1.0f } private def minimum(i1: Float, i2: Float, i3: Float) = min(min(i1, i2), i3) def learnDist(s1: String, s2: String): Seq[(String, String)] = { val acc: Seq[(String, String)] = Seq.empty val dist = Array.tabulate(s2.length + 1, s1.length + 1) { (j, i) => if (j == 0) i * 1.0f else if (i == 0) j * 1.0f else 0.0f } for (j <- 1 to s2.length; i <- 1 to s1.length) dist(j)(i) = if (s2(j - 1) == s1(i - 1)) dist(j - 1)(i - 1) else minimum( dist(j - 1)(i) + 1.0f, dist(j)(i - 1) + 1.0f, dist(j - 1)(i - 1) + 1.0f) backTrack(dist, s2, s1, s2.length, s1.length, acc) } def backTrack(dist: Array[Array[Float]], s2:String, s1:String, j:Int, i:Int, acc:Seq[(String, String)]): Seq[(String, String)]= { if (s2(j-1) == s1(i-1)) { if (j == 1 && i == 1) acc else backTrack(dist, s2, s1, j - 1, i - 1, acc) } else { val pSteps = Map(dist(j - 1)(i) -> ("", s2(j - 1).toString, j - 1, i), dist(j)(i - 1) -> (s1(i - 1).toString, "", j, i - 1), dist(j - 1)(i - 1) -> (s1(i - 1).toString, s2(j - 1).toString, j - 1, i - 1)) val best = pSteps.minBy(_._1)._2 backTrack(dist, s2, s1, best._3, best._4, acc :+ (best._1, best._2)) } } }
Example 4
Source File: Statistics.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.stats final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = { val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2) @annotation.tailrec def go(accum: Double, i: Int): Int = { if(accum < p) go(accum + distribution(i)._2, i + 1) else i } val cpt = go(0D, 0) if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1 } }
Example 5
Source File: SkewReplication.scala From spark-skewjoin with Apache License 2.0 | 5 votes |
package com.tresata.spark.skewjoin import scala.math.{ min, max } import org.slf4j.LoggerFactory trait SkewReplication extends Serializable { def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) } case class DefaultSkewReplication(replicationFactor: Double = 1e-2) extends SkewReplication { override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = ( max(min((rightCount * replicationFactor).toInt, numPartitions), 1), max(min((leftCount * replicationFactor).toInt, numPartitions), 1) ) } private case class RightReplication(skewReplication: SkewReplication) extends SkewReplication { override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = { val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions) (1, max(min(left * right, numPartitions), 1)) //(1, right) } } private case class LeftReplication(skewReplication: SkewReplication) extends SkewReplication { override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = { val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions) (max(min(left * right, numPartitions), 1), 1) //(left, 1) } } private object LoggingSkewReplication { private val log = LoggerFactory.getLogger(getClass) } case class LoggingSkewReplication(skewReplication: SkewReplication) extends SkewReplication { import LoggingSkewReplication._ private var maxLeftReplication = 0 private var maxRightReplication = 0 override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = { val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions) if (left > maxLeftReplication) { log.info("new max left replication {}", left) maxLeftReplication = left } if (right > maxRightReplication) { log.info("new max right replication {}", right) maxRightReplication = right } (left, right) } }
Example 6
Source File: Utils.scala From streamDM with Apache License 2.0 | 5 votes |
package org.apache.spark.streamdm.classifiers.trees import scala.math.min object Utils { def arraytoString[T](pre: Array[T], split: String = ",", head: String = "{", tail: String = "}"): String = { val sb = new StringBuffer(head) for (i <- 0 until pre.length) { sb.append(pre(i)) if (i < pre.length - 1) sb.append(split) } sb.append(tail).toString() } }
Example 7
Source File: TruncatableSeekableStream.scala From spark-bam with Apache License 2.0 | 5 votes |
package org.hammerlab.bam.check.seqdoop import java.io.EOFException import hammerlab.path._ import htsjdk.samtools.seekablestream.SeekableStream import org.hammerlab.channel.SeekableByteChannel import scala.math.min case class TruncatableSeekableStream(channel: SeekableByteChannel, source: Path) extends SeekableStream { var limit = channel.size def clear(): Unit = limit = channel.size override def length(): Long = limit override def seek(position: Long): Unit = channel.seek( min( limit, position ) ) override def getSource: String = source.toString() override def position(): Long = channel.position() override def eof(): Boolean = channel.position() == length() def remaining: Long = length() - position() override def read(): Int = if (position() < length()) channel.read() else -1 override def read(b: Array[Byte], off: Int, len: Int): Int = { if (len > remaining) { channel.read(b, off, remaining.toInt) throw new EOFException( s"Attempting to read $len bytes from offset $off when channel is at ${position()} with length ${length()} (only $remaining bytes available)" ) } channel.read(b, off, len) } override def close(): Unit = channel.close() }