breeze.linalg.sum Scala Examples
The following examples show how to use breeze.linalg.sum.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AliasTable.scala From glintlda with MIT License | 5 votes |
package glintlda.mh import breeze.linalg.{Vector, sum} import glintlda.util.FastRNG def draw(random: FastRNG): Int = { count += 1 val i = random.nextPositiveInt() % alias.length if (random.nextDouble() < prob(i)) { i } else { alias(i) } } }
Example 2
Source File: GibbsSample.scala From glintlda with MIT License | 5 votes |
package glintlda import breeze.linalg.{DenseVector, SparseVector, sum} import glintlda.util.FastRNG def apply(sv: SparseVector[Int], random: FastRNG, topics: Int): GibbsSample = { val totalTokens = sum(sv) val sample = new GibbsSample(new Array[Int](totalTokens), new Array[Int](totalTokens)) var i = 0 var current = 0 while (i < sv.activeSize) { val index = sv.indexAt(i) var value = sv.valueAt(i) while (value > 0) { sample.features(current) = index sample.topics(current) = random.nextPositiveInt() % topics current += 1 value -= 1 } i += 1 } sample } }
Example 3
Source File: Perplexity.scala From scalda with MIT License | 5 votes |
package com.nitro.scalda.evaluation.perplexity import breeze.linalg.Axis import breeze.numerics._ import com.nitro.scalda.Utils import breeze.linalg.DenseMatrix import breeze.linalg.sum import com.nitro.scalda.models.{ OnlineLdaParams, Document } object Perplexity { def perplexity( mb: Seq[Document], mbGamma: DenseMatrix[Double], lambda: DenseMatrix[Double], params: OnlineLdaParams ): Double = { val eLogTheta = Utils.dirichletExpectation(mbGamma) val eLogBeta = Utils.dirichletExpectation(lambda) var perplexityScore = 0.0 for ((doc, docId) <- mb.zipWithIndex) { val eLogThetaDoc = eLogTheta(docId, ::).t perplexityScore += sum( doc.wordIds.zip(doc.wordCts).map { case (wordId, wordCt) => Utils.logSumExp(eLogThetaDoc + eLogBeta(::, wordId)) * wordCt.toDouble } ) } perplexityScore += sum(mbGamma.map(el => params.alpha - el) :* eLogTheta) perplexityScore += sum(lgamma(mbGamma) - lgamma(params.alpha)) perplexityScore += sum(lgamma(params.alpha * params.numTopics) - lgamma(sum(mbGamma, Axis._1))) perplexityScore *= params.totalDocs / mb.size.toDouble perplexityScore += sum(lambda.map(el => params.eta - el) :* eLogBeta) perplexityScore += sum(lgamma(lambda) - lgamma(params.eta)) perplexityScore += sum(lgamma(params.eta * params.vocabulary.size) - lgamma(sum(lambda, Axis._1))) perplexityScore } }
Example 4
Source File: PoolingSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.images import breeze.linalg.{DenseVector, sum} import keystoneml.nodes._ import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{ChannelMajorArrayVectorizedImage, ImageMetadata} class PoolingSuite extends FunSuite with Logging { test("pooling") { val imgArr = (0 until 4).flatMap { x => (0 until 4).flatMap { y => (0 until 1).map { c => (c + x * 1 + y * 4 * 1).toDouble } } }.toArray val image = new ChannelMajorArrayVectorizedImage(imgArr, ImageMetadata(4, 4, 1)) val pooling = new Pooler(2, 2, x => x, x => x.max) val poolImage = pooling(image) assert(poolImage.get(0, 0, 0) === 5.0) assert(poolImage.get(0, 1, 0) === 7.0) assert(poolImage.get(1, 0, 0) === 13.0) assert(poolImage.get(1, 1, 0) === 15.0) } test("pooling odd") { val hogImgSize = 14 val convSizes = List(1, 2, 3, 4, 6, 8) convSizes.foreach { convSize => val convResSize = hogImgSize - convSize + 1 val imgArr = (0 until convResSize).flatMap { x => (0 until convResSize).flatMap { y => (0 until 1000).map { c => (c + x * 1 + y * 4 * 1).toDouble } } }.toArray val image = new ChannelMajorArrayVectorizedImage( imgArr, ImageMetadata(convResSize, convResSize, 1000)) val poolSizeReqd = math.ceil(convResSize / 2.0).toInt // We want poolSize to be even !! val poolSize = (math.ceil(poolSizeReqd / 2.0) * 2).toInt // overlap as little as possible val poolStride = convResSize - poolSize println(s"VALUES: $convSize $convResSize $poolSizeReqd $poolSize $poolStride") def summ(x: DenseVector[Double]): Double = sum(x) val pooling = new Pooler(poolStride, poolSize, identity, summ) val poolImage = pooling(image) } } }
Example 5
Source File: LinearCombinationChiSquare.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.stat import breeze.linalg.{DenseVector, sum} import org.dizhang.seqspark.stat.LinearCombinationChiSquare._ @SerialVersionUID(7778520001L) trait LinearCombinationChiSquare extends Serializable { def lambda: DenseVector[Double] def nonCentrality: DenseVector[Double] def degreeOfFreedom: DenseVector[Double] def cdf(cutoff: Double): CDF val meanLambda: Double = sum(lambda) val size: Int = lambda.length } object LinearCombinationChiSquare { @SerialVersionUID(7778550101L) trait CDF extends Serializable { def pvalue: Double def ifault: Int def trace: Array[Double] } }
Example 6
Source File: MixtureDistribution.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.probability.distributions import breeze.linalg.{DenseVector, sum} import breeze.stats.distributions.{ContinuousDistr, Moments, Multinomial} import spire.algebra.VectorSpace class MixtureWithConfBars[I, V]( distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]], probabilities: Multinomial[DenseVector[Double], Int])( implicit vI: VectorSpace[I, Double]) extends MixtureDistribution[I](distributions, probabilities) with HasErrorBars[I] { private val weightsArr = probabilities.params.toArray override def confidenceInterval(s: Double) = distributions.zip(weightsArr).map(c => { val (lower, upper) = c._1.confidenceInterval(s) (vI.timesr(lower, c._2), vI.timesr(upper, c._2)) }).reduce((a,b) => (vI.plus(a._1, b._1), vI.plus(a._2, b._2)) ) def mean = distributions.zip(weightsArr) .map(c => vI.timesr(c._1.mean, c._2)) .reduce((a,b) => vI.plus(a,b)) } object MixtureWithConfBars { def apply[I, V]( distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]], weights: DenseVector[Double])( implicit vI: VectorSpace[I, Double]): MixtureWithConfBars[I, V] = new MixtureWithConfBars(distributions, new Multinomial[DenseVector[Double], Int](weights)) }
Example 7
Source File: normDist.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.algebra import breeze.generic.UFunc import breeze.linalg.sum import breeze.numerics.{abs, pow} object normDist extends UFunc { implicit object implDV extends Impl2[SparkVector, Double, Double] { def apply(a: SparkVector, p: Double) = { assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0") math.pow(a._vector.values.map(x => math.pow(math.abs(x), p)).sum(), 1.0/p) } } } object normBDist extends UFunc { implicit object implBlockedDV extends Impl2[SparkBlockedVector, Double, Double] { def apply(a: SparkBlockedVector, p: Double) = { assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0") math.pow(a._vector.values.map(x => sum(pow(abs(x), p))).sum(), 1.0/p) } } implicit object implPartitionedDV extends Impl2[PartitionedVector, Double, Double] { def apply(a: PartitionedVector, p: Double) = { assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0") math.pow(a._data.map(_._2).map(x => sum(pow(abs(x), p))).sum, 1.0/p) } } }
Example 8
Source File: NeuralNetSpec.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.neuralnets import breeze.linalg.{DenseVector, sum} import breeze.stats.distributions.{Gaussian, Uniform} import io.github.mandar2812.dynaml.DynaMLPipe import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics import io.github.mandar2812.dynaml.graph.FFNeuralGraph import org.scalatest.{FlatSpec, Matchers} class NeuralNetSpec extends FlatSpec with Matchers { "A feed-forward neural network" should "be able to learn non-linear functions "+ "on a compact domain" in { val uni = new Uniform(0.0, 1.0) //Create synthetic data set of x,y values //x is sampled in unit hypercube, y = w.x + noise val noise = new Gaussian(0.0, 0.002) val uniH = new Uniform(0.0, 1.0) val numPoints:Int = 5000 val data = (1 to numPoints).map(_ => { val features = DenseVector.tabulate[Double](4)(_ => uniH.draw) val (x,y,u,v) = (features(0), features(1), features(2), features(3)) val target = DenseVector( 1.0 + x*x + y*y*y + v*u*v + v*u + noise.draw, 1.0 + x*u + u*y*y + v*v*v + u*u*u + noise.draw) (features, target) }) val (trainingData, testData) = (data.take(4000), data.takeRight(1000)) val epsilon = 0.85 val model = new FeedForwardNetwork[Stream[(DenseVector[Double], DenseVector[Double])]](trainingData.toStream, FFNeuralGraph(4,2,0, List("logsig", "linear"), List(10), biasFlag = true))(DynaMLPipe.identityPipe[Stream[(DenseVector[Double], DenseVector[Double])]]) model.setLearningRate(1.0) .setRegParam(0.01) .setMomentum(0.8) .setMaxIterations(150) .learn() val res = model.test(testData.toStream) val metrics = new MultiRegressionMetrics(res.toList, res.length) //println(metrics.Rsq) assert(sum(metrics.corr)/metrics.Rsq.length >= epsilon) } }
Example 9
Source File: AutoEncoderSpec.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.neuralnets import breeze.linalg.{DenseVector, sum} import breeze.stats.distributions.{Gaussian, Uniform} import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics import io.github.mandar2812.dynaml.pipes.DataPipe import io.github.mandar2812.dynaml.probability.RandomVariable import spire.implicits._ import org.scalatest.{FlatSpec, Matchers} ignore should "be able to learn a continuous, "+ "invertible identity map x = g(h(x))" in { val uni = new Uniform(-math.Pi, math.Pi) val theta = RandomVariable(new Uniform(-math.Pi, math.Pi)) val circleTransform = DataPipe((t: Double) => (math.cos(t), math.sin(t))) val rvOnCircle = theta > circleTransform //Create synthetic data set of x,y values val noise = new Gaussian(0.0, 0.02) val numPoints:Int = 4000 val epsilon = 0.05 val data = (1 to numPoints).map(_ => { val sample = rvOnCircle.draw val features = DenseVector(sample._1, sample._2) val augFeatures = DenseVector( math.pow(0.85*features(1), 2) + noise.draw, math.pow(0.45*features(0), 3) + noise.draw, math.pow(features(0)+0.85*features(1), 3) + noise.draw, math.pow(features(0)-0.5*features(1), 2) + noise.draw, math.pow(features(0)+features(1), 3) + noise.draw, math.pow(features(0)-features(1), 2) + noise.draw, math.pow(features(0)+0.4*features(1), 2) + noise.draw, math.pow(features(0)+0.5*features(1), 3) + noise.draw) augFeatures }) val (trainingData, testData) = (data.take(3000), data.takeRight(1000)) val enc = GenericAutoEncoder(List(8, 4, 4, 8), List(VectorTansig, VectorTansig, VectorTansig)) //BackPropagation.rho = 0.5 enc.optimizer.setRegParam(0.0001).setStepSize(0.1).setNumIterations(1000).momentum_(0.5) enc.learn(trainingData.toStream) val metrics = new MultiRegressionMetrics( testData.map(c => (enc.i(enc.f(c)), c)).toList, testData.length) println("Corr: "+metrics.corr) assert(sum(metrics.mae)/metrics.corr.length <= epsilon) } }
Example 10
Source File: Normalize.scala From ScalaNetwork with GNU General Public License v2.0 | 5 votes |
package kr.ac.kaist.ir.deep.layer import breeze.linalg.sum import breeze.numerics.pow import kr.ac.kaist.ir.deep.fn._ import play.api.libs.json.{JsObject, Json} abstract override def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = { val Xsq = pow(X, 2.0f) val lenSq = sum(Xsq) val len: Scalar = Math.sqrt(lenSq).toFloat // Note that length is the function of x_i. // Let z_i := x_i / len(x_i). // Then d z_i / d x_i = (len^2 - x_i^2) / len^3 = (1 - z_i^2) / len, // d z_j / d x_i = - x_i * x_j / len^3 = - z_i * z_j / len val rows = dFdX.rows val dZdX = ScalarMatrix $0(rows, rows) var r = 0 while (r < rows) { //dZ_r var c = 0 while (c < rows) { if (r == c) { //dX_c dZdX.update(r, c, (1.0f - Xsq(r, 0) / lenSq) / len) } else { dZdX.update(r, c, (-X(r, 0) * X(c, 0)) / (len * lenSq)) } c += 1 } r += 1 } // un-normalize the error super.updateBy(delta, dZdX * error) } }
Example 11
Source File: CVLogPerplexity.scala From spectrallda-tensorspark with Apache License 2.0 | 5 votes |
package edu.uci.eecs.spectralLDA import breeze.linalg.sum import org.apache.spark.{SparkConf, SparkContext} import edu.uci.eecs.spectralLDA.algorithm._ import org.apache.spark.rdd._ import org.apache.spark.mllib.clustering._ import org.apache.spark.mllib.linalg._ object CVLogPerplexity { def main(args: Array[String]) = { val conf: SparkConf = new SparkConf().setAppName(s"Spectral LDA") val sc: SparkContext = new SparkContext(conf) val cv = args(0).toInt val documentsPath = args(1) val k = args(2).toInt val alpha0 = args(3).toDouble val maxIterations = args(4).toInt val tol = args(5).toDouble val minWords = args(6).toInt val docs = sc.objectFile[(Long, breeze.linalg.SparseVector[Double])](documentsPath) .filter { case (_, tc) => sum(tc) >= minWords } for (i <- 0 until cv) { val splits = docs.randomSplit(Array[Double](0.9, 0.1)) computeLogLikelihood(splits, k, alpha0, maxIterations, tol) } sc.stop() } def computeLogLikelihood(splits: Array[RDD[(Long, breeze.linalg.SparseVector[Double])]], k: Int, alpha0: Double, maxIterations: Int, tol: Double ): Unit = { val numTestTokens = splits(1) .map { case (_, tc) => breeze.linalg.sum(tc) } .reduce(_ + _) val tensorLDA = new TensorLDA( dimK = k, alpha0 = alpha0, maxIterations = maxIterations, tol = tol ) val (beta, alpha, _, _, m1) = tensorLDA.fit(splits(0)) val augBeta = breeze.linalg.DenseMatrix.zeros[Double](beta.rows, k + 1) val augAlpha = breeze.linalg.DenseVector.ones[Double](alpha.length + 1) augBeta(::, 0 until k) := beta val dummyTopic = m1 + 0.1 * breeze.linalg.DenseVector.ones[Double](beta.rows) / beta.rows.toDouble augBeta(::, k) := dummyTopic / sum(dummyTopic) augAlpha(0 until k) := alpha val tensorLDAModel = new TensorLDAModel(augBeta, augAlpha) val tensorLDALogL = tensorLDAModel.logLikelihood(splits(1), smoothing = 1e-6, maxIterations = 50) println(s"Tensor LDA log-perplexity no extra smoothing: ${- tensorLDALogL / numTestTokens}") val trainMapped: RDD[(Long, Vector)] = splits(0).map { case (id, tc) => val (idx, v) = tc.activeIterator.toArray.unzip (id, new SparseVector(tc.length, idx, v)) } val testMapped: RDD[(Long, Vector)] = splits(1).map { case (id, tc) => val (idx, v) = tc.activeIterator.toArray.unzip (id, new SparseVector(tc.length, idx, v)) } val ldaOptimizer = new OnlineLDAOptimizer() .setMiniBatchFraction(0.05) val lda = new LDA() .setOptimizer(ldaOptimizer) .setMaxIterations(80) .setK(k) .setDocConcentration(alpha0 / k.toDouble) .setBeta(1.0) val ldaModel: LDAModel = lda.run(trainMapped) val ldaLogL = ldaModel.asInstanceOf[LocalLDAModel].logLikelihood(testMapped) println(s"Variational Inference log-perplexity: ${- ldaLogL / numTestTokens}") } }
Example 12
Source File: package.scala From hail with MIT License | 5 votes |
package is.hail import is.hail.stats._ import breeze.linalg.{Vector, DenseVector, max, sum} import breeze.numerics._ import is.hail.utils._ package object experimental { def findMaxAC(af: Double, an: Int, ci: Double = .95): Int = { if (af == 0) 0 else { val quantile_limit = ci // ci for one-sided, 1-(1-ci)/2 for two-sided val max_ac = qpois(quantile_limit, an * af) max_ac } } def calcFilterAlleleFreq(ac: Int, an: Int, ci: Double = .95, lower: Double = 1e-10, upper: Double = 2, tol: Double = 1e-7, precision: Double = 1e-6): Double = { if (ac <= 1 || an == 0) // FAF should not be calculated on singletons 0.0 else { var f = (af: Double) => ac.toDouble - 1 - qpois(ci, an.toDouble * af) val root = uniroot(f, lower, upper, tol) val rounder = 1d / (precision / 100d) var max_af = math.round(root.getOrElse(0.0) * rounder) / rounder while (findMaxAC(max_af, an, ci) < ac) { max_af += precision } max_af - precision } } def calcFilterAlleleFreq(ac: Int, an: Int, ci: Double): Double = calcFilterAlleleFreq(ac, an, ci, lower = 1e-10, upper = 2, tol = 1e-7, precision = 1e-6) def haplotypeFreqEM(gtCounts : IndexedSeq[Int]) : IndexedSeq[Double] = { assert(gtCounts.size == 9, "haplotypeFreqEM requires genotype counts for the 9 possible genotype combinations.") val _gtCounts = new DenseVector(gtCounts.toArray) val nSamples = sum(_gtCounts) //Needs some non-ref samples to compute if(_gtCounts(0) >= nSamples){ return FastIndexedSeq(_gtCounts(0),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)} val nHaplotypes = 2.0*nSamples.toDouble val const_counts = new DenseVector(Array[Double]( 2.0*_gtCounts(0) + _gtCounts(1) + _gtCounts(3), //n.AB 2.0*_gtCounts(6) + _gtCounts(3) + _gtCounts(7), //n.Ab 2.0*_gtCounts(2) + _gtCounts(1) + _gtCounts(5), //n.aB 2.0*_gtCounts(8) + _gtCounts(5) + _gtCounts(7) //n.ab )) //Initial estimate with AaBb contributing equally to each haplotype var p_next = (const_counts +:+ new DenseVector(Array.fill[Double](4)(_gtCounts(4)/2.0))) /:/ nHaplotypes var p_cur = p_next +:+ 1.0 //EM while(max(abs(p_next -:- p_cur)) > 1e-7){ p_cur = p_next p_next = (const_counts +:+ (new DenseVector(Array[Double]( p_cur(0)*p_cur(3), //n.AB p_cur(1)*p_cur(2), //n.Ab p_cur(1)*p_cur(2), //n.aB p_cur(0)*p_cur(3) //n.ab )) * (_gtCounts(4) / ((p_cur(0)*p_cur(3))+(p_cur(1)*p_cur(2))))) ) / nHaplotypes } return (p_next *:* nHaplotypes).toArray.toFastIndexedSeq } }
Example 13
Source File: Norms.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.preprocessing import breeze.linalg.{Axis, max, sum} import breeze.numerics.{abs, pow, sqrt} import io.picnicml.doddlemodel.data.{Features, RealVector} object Norms { sealed trait Norm { def apply(x: Features): RealVector } final case object L1Norm extends Norm { override def apply(x: Features): RealVector = sum(abs(x), Axis._1) } final case object L2Norm extends Norm { override def apply(x: Features): RealVector = sqrt(sum(pow(x, 2), Axis._1)) } final case object MaxNorm extends Norm { override def apply(x: Features): RealVector = max(abs(x), Axis._1) } }
Example 14
Source File: PoissonRegression.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.linear import breeze.linalg.{all, sum} import breeze.numerics.{exp, floor, isFinite, log} import cats.syntax.option._ import io.picnicml.doddlemodel.data.{Features, RealVector, Target} import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor case class PoissonRegression private (lambda: Float, private val w: Option[RealVector]) { private var yPredMeanCache: Target = _ } object PoissonRegression { def apply(lambda: Float = 0.0f): PoissonRegression = { require(lambda >= 0.0f, "L2 regularization strength must be non-negative") PoissonRegression(lambda, none) } private val wSlice: Range.Inclusive = 1 to -1 @SerialVersionUID(0L) implicit lazy val ev: LinearRegressor[PoissonRegression] = new LinearRegressor[PoissonRegression] { override protected def w(model: PoissonRegression): Option[RealVector] = model.w override protected def copy(model: PoissonRegression): PoissonRegression = model.copy() override protected def copy(model: PoissonRegression, w: RealVector): PoissonRegression = model.copy(w = w.some) override protected def targetVariableAppropriate(y: Target): Boolean = y == floor(y) && all(isFinite(y)) override protected def predictStateless(model: PoissonRegression, w: RealVector, x: Features): Target = floor(this.predictMean(w, x)) private def predictMean(w: RealVector, x: Features): Target = exp(x * w) override protected[linear] def lossStateless(model: PoissonRegression, w: RealVector, x: Features, y: Target): Float = { model.yPredMeanCache = predictMean(w, x) sum(y * log(model.yPredMeanCache) - model.yPredMeanCache) / (-x.rows.toFloat) + .5f * model.lambda * (w(wSlice).t * w(wSlice)) } override protected[linear] def lossGradStateless(model: PoissonRegression, w: RealVector, x: Features, y: Target): RealVector = { val grad = ((model.yPredMeanCache - y).t * x).t / x.rows.toFloat grad(wSlice) += model.lambda * w(wSlice) grad } } }
Example 15
Source File: LogisticRegression.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.linear import breeze.linalg.sum import breeze.numerics.{log, sigmoid} import cats.syntax.option._ import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target} import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier case class LogisticRegression private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) { private var yPredProbaCache: RealVector = _ } object LogisticRegression { def apply(lambda: Float = 0.0f): LogisticRegression = { require(lambda >= 0.0f, "L2 regularization strength must be non-negative") LogisticRegression(lambda, none, none) } private val wSlice: Range.Inclusive = 1 to -1 @SerialVersionUID(0L) implicit lazy val ev: LinearClassifier[LogisticRegression] = new LinearClassifier[LogisticRegression] { override def numClasses(model: LogisticRegression): Option[Int] = model.numClasses override protected def w(model: LogisticRegression): Option[RealVector] = model.w override protected[doddlemodel] def copy(model: LogisticRegression, numClasses: Int): LogisticRegression = model.copy(numClasses = numClasses.some) override protected def copy(model: LogisticRegression, w: RealVector): LogisticRegression = model.copy(w = w.some) override protected def predictStateless(model: LogisticRegression, w: RealVector, x: Features): Target = (predictProbaStateless(model, w, x)(::, 0) >:> 0.5f).map(x => if (x) 1.0f else 0.0f) override protected def predictProbaStateless(model: LogisticRegression, w: RealVector, x: Features): Simplex = sigmoid(x * w).asDenseMatrix.t override protected[linear] def lossStateless(model: LogisticRegression, w: RealVector, x: Features, y: Target): Float = { model.yPredProbaCache = predictProbaStateless(model, w, x)(::, 0) sum(y * log(model.yPredProbaCache) + (1.0f - y) * log(1.0f - model.yPredProbaCache)) / (-x.rows.toFloat) + .5f * model.lambda * (w(wSlice).t * w(wSlice)) } override protected[linear] def lossGradStateless(model: LogisticRegression, w: RealVector, x: Features, y: Target): RealVector = { val grad = ((y - model.yPredProbaCache).t * x).t / (-x.rows.toFloat) grad(wSlice) += model.lambda * w(wSlice) grad } } }
Example 16
Source File: MostFrequentClassifierTest.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.dummy.classification import breeze.linalg.sum import io.picnicml.doddlemodel.data.{loadBreastCancerDataset, loadIrisDataset} import io.picnicml.doddlemodel.dummy.classification.MostFrequentClassifier.ev import org.scalatest.OptionValues import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class MostFrequentClassifierTest extends AnyFlatSpec with Matchers with OptionValues { "Most frequent classifier" should "infer the most frequent class from the iris dataset" in { val (x, y, _) = loadIrisDataset val model = MostFrequentClassifier() val trainedModel = ev.fit(model, x, y) trainedModel.mostFrequentClass.value shouldBe 0.0 sum(ev.predict(trainedModel, x)) shouldBe 0.0 } it should "infer the most frequent class from the breast cancer dataset" in { val (x, y, _) = loadBreastCancerDataset val model = MostFrequentClassifier() val trainedModel = ev.fit(model, x, y) trainedModel.mostFrequentClass.value shouldBe 1.0 sum(ev.predict(trainedModel, x)) shouldBe x.rows.toDouble } }