breeze.linalg.sum Scala Example

Source File: AliasTable.scala From glintlda with MIT License

5 votes

package glintlda.mh

import breeze.linalg.{Vector, sum}
import glintlda.util.FastRNG


  def draw(random: FastRNG): Int = {
    count += 1
    val i = random.nextPositiveInt() % alias.length
    if (random.nextDouble() < prob(i)) {
      i
    } else {
      alias(i)
    }
  }

}

Source File: GibbsSample.scala From glintlda with MIT License

5 votes

package glintlda

import breeze.linalg.{DenseVector, SparseVector, sum}
import glintlda.util.FastRNG


  def apply(sv: SparseVector[Int], random: FastRNG, topics: Int): GibbsSample = {
    val totalTokens = sum(sv)
    val sample = new GibbsSample(new Array[Int](totalTokens), new Array[Int](totalTokens))

    var i = 0
    var current = 0
    while (i < sv.activeSize) {
      val index = sv.indexAt(i)
      var value = sv.valueAt(i)
      while (value > 0) {
        sample.features(current) = index
        sample.topics(current) = random.nextPositiveInt() % topics
        current += 1
        value -= 1
      }
      i += 1
    }

    sample
  }

}

Source File: Perplexity.scala From scalda with MIT License

5 votes

package com.nitro.scalda.evaluation.perplexity

import breeze.linalg.Axis
import breeze.numerics._
import com.nitro.scalda.Utils
import breeze.linalg.DenseMatrix
import breeze.linalg.sum
import com.nitro.scalda.models.{ OnlineLdaParams, Document }

object Perplexity {

  
  def perplexity(
    mb: Seq[Document],
    mbGamma: DenseMatrix[Double],
    lambda: DenseMatrix[Double],
    params: OnlineLdaParams
  ): Double = {

    val eLogTheta = Utils.dirichletExpectation(mbGamma)
    val eLogBeta = Utils.dirichletExpectation(lambda)

    var perplexityScore = 0.0

    for ((doc, docId) <- mb.zipWithIndex) {

      val eLogThetaDoc = eLogTheta(docId, ::).t

      perplexityScore += sum(
        doc.wordIds.zip(doc.wordCts).map {

          case (wordId, wordCt) => Utils.logSumExp(eLogThetaDoc + eLogBeta(::, wordId)) * wordCt.toDouble
        }
      )

    }

    perplexityScore += sum(mbGamma.map(el => params.alpha - el) :* eLogTheta)
    perplexityScore += sum(lgamma(mbGamma) - lgamma(params.alpha))
    perplexityScore += sum(lgamma(params.alpha * params.numTopics) - lgamma(sum(mbGamma, Axis._1)))
    perplexityScore *= params.totalDocs / mb.size.toDouble
    perplexityScore += sum(lambda.map(el => params.eta - el) :* eLogBeta)
    perplexityScore += sum(lgamma(lambda) - lgamma(params.eta))
    perplexityScore += sum(lgamma(params.eta * params.vocabulary.size) - lgamma(sum(lambda, Axis._1)))

    perplexityScore
  }

}

Source File: PoolingSuite.scala From keystone with Apache License 2.0

5 votes

package keystoneml.nodes.images

import breeze.linalg.{DenseVector, sum}
import keystoneml.nodes._
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{ChannelMajorArrayVectorizedImage, ImageMetadata}

class PoolingSuite extends FunSuite with Logging {

  test("pooling") {
    val imgArr =
      (0 until 4).flatMap { x =>
        (0 until 4).flatMap { y =>
          (0 until 1).map { c =>
            (c + x * 1 + y * 4 * 1).toDouble
          }
        }
      }.toArray

    val image = new ChannelMajorArrayVectorizedImage(imgArr, ImageMetadata(4, 4, 1))
    val pooling = new Pooler(2, 2, x => x, x => x.max)

    val poolImage = pooling(image)

    assert(poolImage.get(0, 0, 0) === 5.0)
    assert(poolImage.get(0, 1, 0) === 7.0)
    assert(poolImage.get(1, 0, 0) === 13.0)
    assert(poolImage.get(1, 1, 0) === 15.0)
  }

  test("pooling odd") {
    val hogImgSize = 14
    val convSizes = List(1, 2, 3, 4, 6, 8)
    convSizes.foreach { convSize =>
      val convResSize = hogImgSize - convSize + 1

      val imgArr =
        (0 until convResSize).flatMap { x =>
          (0 until convResSize).flatMap { y =>
            (0 until 1000).map { c =>
              (c + x * 1 + y * 4 * 1).toDouble
            }
          }
        }.toArray

      val image = new ChannelMajorArrayVectorizedImage(
        imgArr, ImageMetadata(convResSize, convResSize, 1000))

      val poolSizeReqd = math.ceil(convResSize / 2.0).toInt

      // We want poolSize to be even !!
      val poolSize = (math.ceil(poolSizeReqd / 2.0) * 2).toInt
      // overlap as little as possible
      val poolStride = convResSize - poolSize


      println(s"VALUES: $convSize $convResSize $poolSizeReqd $poolSize $poolStride")

      def summ(x: DenseVector[Double]): Double = sum(x)

      val pooling = new Pooler(poolStride, poolSize, identity, summ)
      val poolImage = pooling(image)
    }
  }
}

Source File: LinearCombinationChiSquare.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.stat

import breeze.linalg.{DenseVector, sum}
import org.dizhang.seqspark.stat.LinearCombinationChiSquare._


@SerialVersionUID(7778520001L)
trait LinearCombinationChiSquare extends Serializable {
  def lambda: DenseVector[Double]
  def nonCentrality: DenseVector[Double]
  def degreeOfFreedom: DenseVector[Double]
  def cdf(cutoff: Double): CDF

  val meanLambda: Double = sum(lambda)
  val size: Int = lambda.length

}

object LinearCombinationChiSquare {
  @SerialVersionUID(7778550101L)
  trait CDF extends Serializable {
    def pvalue: Double
    def ifault: Int
    def trace: Array[Double]
  }
}

Source File: MixtureDistribution.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.probability.distributions

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{ContinuousDistr, Moments, Multinomial}
import spire.algebra.VectorSpace


class MixtureWithConfBars[I, V](
  distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]],
  probabilities: Multinomial[DenseVector[Double], Int])(
  implicit vI: VectorSpace[I, Double]) extends
  MixtureDistribution[I](distributions, probabilities) with
  HasErrorBars[I] {

  private val weightsArr = probabilities.params.toArray

  override def confidenceInterval(s: Double) =
    distributions.zip(weightsArr).map(c => {
      val (lower, upper) = c._1.confidenceInterval(s)

      (vI.timesr(lower, c._2), vI.timesr(upper, c._2))
    }).reduce((a,b) =>
      (vI.plus(a._1, b._1), vI.plus(a._2, b._2))
    )


  def mean = distributions.zip(weightsArr)
    .map(c => vI.timesr(c._1.mean, c._2))
    .reduce((a,b) => vI.plus(a,b))

}

object MixtureWithConfBars {

  def apply[I, V](
    distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]],
    weights: DenseVector[Double])(
    implicit vI: VectorSpace[I, Double]): MixtureWithConfBars[I, V] =
    new MixtureWithConfBars(distributions, new Multinomial[DenseVector[Double], Int](weights))
}

Source File: normDist.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.algebra

import breeze.generic.UFunc
import breeze.linalg.sum
import breeze.numerics.{abs, pow}


object normDist extends UFunc {

  implicit object implDV extends Impl2[SparkVector, Double, Double] {
    def apply(a: SparkVector, p: Double) = {
      assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0")
      math.pow(a._vector.values.map(x => math.pow(math.abs(x), p)).sum(), 1.0/p)
    }
  }
}

object normBDist extends UFunc {
  implicit object implBlockedDV extends Impl2[SparkBlockedVector, Double, Double] {
    def apply(a: SparkBlockedVector, p: Double) = {
      assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0")
      math.pow(a._vector.values.map(x => sum(pow(abs(x), p))).sum(), 1.0/p)
    }
  }

  implicit object implPartitionedDV extends Impl2[PartitionedVector, Double, Double] {
    def apply(a: PartitionedVector, p: Double) = {
      assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0")
      math.pow(a._data.map(_._2).map(x => sum(pow(abs(x), p))).sum, 1.0/p)
    }
  }


}

Source File: NeuralNetSpec.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.DynaMLPipe
import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics
import io.github.mandar2812.dynaml.graph.FFNeuralGraph
import org.scalatest.{FlatSpec, Matchers}


class NeuralNetSpec extends FlatSpec with Matchers {

  "A feed-forward neural network" should "be able to learn non-linear functions "+
    "on a compact domain" in {
    val uni = new Uniform(0.0, 1.0)
    //Create synthetic data set of x,y values
    //x is sampled in unit hypercube, y = w.x + noise
    val noise = new Gaussian(0.0, 0.002)
    val uniH = new Uniform(0.0, 1.0)


    val numPoints:Int = 5000

    val data = (1 to numPoints).map(_ => {
      val features = DenseVector.tabulate[Double](4)(_ => uniH.draw)

      val (x,y,u,v) = (features(0), features(1), features(2), features(3))

      val target = DenseVector(
        1.0 + x*x + y*y*y + v*u*v + v*u + noise.draw,
        1.0 + x*u + u*y*y + v*v*v + u*u*u + noise.draw)

      (features, target)
    })

    val (trainingData, testData) = (data.take(4000), data.takeRight(1000))

    val epsilon = 0.85

    val model = new FeedForwardNetwork[Stream[(DenseVector[Double], DenseVector[Double])]](trainingData.toStream, FFNeuralGraph(4,2,0,
            List("logsig", "linear"),
            List(10), biasFlag = true))(DynaMLPipe.identityPipe[Stream[(DenseVector[Double], DenseVector[Double])]])

    model.setLearningRate(1.0)
      .setRegParam(0.01)
      .setMomentum(0.8)
      .setMaxIterations(150)
      .learn()

    val res = model.test(testData.toStream)

    val metrics = new MultiRegressionMetrics(res.toList, res.length)
    //println(metrics.Rsq)
    assert(sum(metrics.corr)/metrics.Rsq.length >= epsilon)
  }
}

Source File: AutoEncoderSpec.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics
import io.github.mandar2812.dynaml.pipes.DataPipe
import io.github.mandar2812.dynaml.probability.RandomVariable
import spire.implicits._

import org.scalatest.{FlatSpec, Matchers}

 ignore should "be able to learn a continuous, "+
    "invertible identity map x = g(h(x))" in {

    val uni = new Uniform(-math.Pi, math.Pi)
    val theta = RandomVariable(new Uniform(-math.Pi, math.Pi))
    val circleTransform = DataPipe((t: Double) => (math.cos(t), math.sin(t)))
    val rvOnCircle = theta > circleTransform
    //Create synthetic data set of x,y values

    val noise = new Gaussian(0.0, 0.02)

    val numPoints:Int = 4000
    val epsilon = 0.05

    val data = (1 to numPoints).map(_ => {
      val sample = rvOnCircle.draw
      val features = DenseVector(sample._1, sample._2)
      val augFeatures = DenseVector(
        math.pow(0.85*features(1), 2) + noise.draw,
        math.pow(0.45*features(0), 3) + noise.draw,
        math.pow(features(0)+0.85*features(1), 3) + noise.draw,
        math.pow(features(0)-0.5*features(1), 2) + noise.draw,
        math.pow(features(0)+features(1), 3) + noise.draw,
        math.pow(features(0)-features(1), 2) + noise.draw,
        math.pow(features(0)+0.4*features(1), 2) + noise.draw,
        math.pow(features(0)+0.5*features(1), 3) + noise.draw)

      augFeatures
    })

    val (trainingData, testData) = (data.take(3000), data.takeRight(1000))

    val enc = GenericAutoEncoder(List(8, 4, 4, 8), List(VectorTansig, VectorTansig, VectorTansig))

    //BackPropagation.rho = 0.5

    enc.optimizer.setRegParam(0.0001).setStepSize(0.1).setNumIterations(1000).momentum_(0.5)

    enc.learn(trainingData.toStream)

    val metrics = new MultiRegressionMetrics(
      testData.map(c => (enc.i(enc.f(c)), c)).toList,
      testData.length)

    println("Corr: "+metrics.corr)
    assert(sum(metrics.mae)/metrics.corr.length <= epsilon)

  }

}

Source File: Normalize.scala From ScalaNetwork with GNU General Public License v2.0

5 votes

package kr.ac.kaist.ir.deep.layer

import breeze.linalg.sum
import breeze.numerics.pow
import kr.ac.kaist.ir.deep.fn._
import play.api.libs.json.{JsObject, Json}


  abstract override def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
    val Xsq = pow(X, 2.0f)
    val lenSq = sum(Xsq)
    val len: Scalar = Math.sqrt(lenSq).toFloat

    // Note that length is the function of x_i.
    // Let z_i := x_i / len(x_i).
    // Then d z_i / d x_i = (len^2 - x_i^2) / len^3 = (1 - z_i^2) / len,
    //      d z_j / d x_i = - x_i * x_j / len^3 = - z_i * z_j / len
    val rows = dFdX.rows
    val dZdX = ScalarMatrix $0(rows, rows)
    var r = 0
    while (r < rows) {
      //dZ_r
      var c = 0
      while (c < rows) {
        if (r == c) {
          //dX_c
          dZdX.update(r, c, (1.0f - Xsq(r, 0) / lenSq) / len)
        } else {
          dZdX.update(r, c, (-X(r, 0) * X(c, 0)) / (len * lenSq))
        }
        c += 1
      }
      r += 1
    }

    // un-normalize the error
    super.updateBy(delta, dZdX * error)
  }
}

Source File: CVLogPerplexity.scala From spectrallda-tensorspark with Apache License 2.0

5 votes

package edu.uci.eecs.spectralLDA

import breeze.linalg.sum
import org.apache.spark.{SparkConf, SparkContext}
import edu.uci.eecs.spectralLDA.algorithm._
import org.apache.spark.rdd._
import org.apache.spark.mllib.clustering._
import org.apache.spark.mllib.linalg._

object CVLogPerplexity {
  def main(args: Array[String]) = {
    val conf: SparkConf = new SparkConf().setAppName(s"Spectral LDA")
    val sc: SparkContext = new SparkContext(conf)

    val cv = args(0).toInt
    val documentsPath = args(1)
    val k = args(2).toInt
    val alpha0 = args(3).toDouble
    val maxIterations = args(4).toInt
    val tol = args(5).toDouble
    val minWords = args(6).toInt

    val docs = sc.objectFile[(Long, breeze.linalg.SparseVector[Double])](documentsPath)
      .filter {
        case (_, tc) => sum(tc) >= minWords
      }

    for (i <- 0 until cv) {
      val splits = docs.randomSplit(Array[Double](0.9, 0.1))
      computeLogLikelihood(splits, k, alpha0, maxIterations, tol)
    }

    sc.stop()
  }

  def computeLogLikelihood(splits: Array[RDD[(Long, breeze.linalg.SparseVector[Double])]],
                           k: Int,
                           alpha0: Double,
                           maxIterations: Int,
                           tol: Double
                          ): Unit = {
    val numTestTokens = splits(1)
      .map {
        case (_, tc) => breeze.linalg.sum(tc)
      }
      .reduce(_ + _)

    val tensorLDA = new TensorLDA(
      dimK = k,
      alpha0 = alpha0,
      maxIterations = maxIterations,
      tol = tol
    )
    val (beta, alpha, _, _, m1) = tensorLDA.fit(splits(0))

    val augBeta = breeze.linalg.DenseMatrix.zeros[Double](beta.rows, k + 1)
    val augAlpha = breeze.linalg.DenseVector.ones[Double](alpha.length + 1)
    augBeta(::, 0 until k) := beta
    val dummyTopic = m1 + 0.1 * breeze.linalg.DenseVector.ones[Double](beta.rows) / beta.rows.toDouble
    augBeta(::, k) := dummyTopic / sum(dummyTopic)
    augAlpha(0 until k) := alpha

    val tensorLDAModel = new TensorLDAModel(augBeta, augAlpha)
    val tensorLDALogL = tensorLDAModel.logLikelihood(splits(1), smoothing = 1e-6, maxIterations = 50)
    println(s"Tensor LDA log-perplexity no extra smoothing: ${- tensorLDALogL / numTestTokens}")

    val trainMapped: RDD[(Long, Vector)] = splits(0).map {
      case (id, tc) =>
        val (idx, v) = tc.activeIterator.toArray.unzip
        (id, new SparseVector(tc.length, idx, v))
    }

    val testMapped: RDD[(Long, Vector)] = splits(1).map {
      case (id, tc) =>
        val (idx, v) = tc.activeIterator.toArray.unzip
        (id, new SparseVector(tc.length, idx, v))
    }

    val ldaOptimizer = new OnlineLDAOptimizer()
      .setMiniBatchFraction(0.05)
    val lda = new LDA()
      .setOptimizer(ldaOptimizer)
      .setMaxIterations(80)
      .setK(k)
      .setDocConcentration(alpha0 / k.toDouble)
      .setBeta(1.0)

    val ldaModel: LDAModel = lda.run(trainMapped)
    val ldaLogL = ldaModel.asInstanceOf[LocalLDAModel].logLikelihood(testMapped)

    println(s"Variational Inference log-perplexity: ${- ldaLogL / numTestTokens}")
  }
}

Source File: package.scala From hail with MIT License

5 votes

package is.hail

import is.hail.stats._
import breeze.linalg.{Vector, DenseVector, max, sum}
import breeze.numerics._
import is.hail.utils._

package object experimental {

  def findMaxAC(af: Double, an: Int, ci: Double = .95): Int = {
   if (af == 0)
      0
    else {
      val quantile_limit = ci // ci for one-sided, 1-(1-ci)/2 for two-sided
      val max_ac = qpois(quantile_limit, an * af)
      max_ac
    }
  }

  def calcFilterAlleleFreq(ac: Int, an: Int, ci: Double = .95, lower: Double = 1e-10, upper: Double = 2, tol: Double = 1e-7, precision: Double = 1e-6): Double = {
    if (ac <= 1 || an == 0) // FAF should not be calculated on singletons
      0.0
    else {
      var f = (af: Double) => ac.toDouble - 1 - qpois(ci, an.toDouble * af)
      val root = uniroot(f, lower, upper, tol)
      val rounder = 1d / (precision / 100d)
      var max_af = math.round(root.getOrElse(0.0) * rounder) / rounder
      while (findMaxAC(max_af, an, ci) < ac) {
        max_af += precision
      }
      max_af - precision
    }
  }

  def calcFilterAlleleFreq(ac: Int, an: Int, ci: Double): Double = calcFilterAlleleFreq(ac, an, ci, lower = 1e-10, upper = 2, tol = 1e-7, precision = 1e-6)


  def haplotypeFreqEM(gtCounts : IndexedSeq[Int]) : IndexedSeq[Double] = {

    assert(gtCounts.size == 9, "haplotypeFreqEM requires genotype counts for the 9 possible genotype combinations.")

    val _gtCounts = new DenseVector(gtCounts.toArray)
    val nSamples = sum(_gtCounts)

    //Needs some non-ref samples to compute
    if(_gtCounts(0) >= nSamples){ return FastIndexedSeq(_gtCounts(0),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)}

    val nHaplotypes = 2.0*nSamples.toDouble

    
    val const_counts = new DenseVector(Array[Double](
      2.0*_gtCounts(0) + _gtCounts(1) + _gtCounts(3), //n.AB
      2.0*_gtCounts(6) + _gtCounts(3) + _gtCounts(7), //n.Ab
      2.0*_gtCounts(2) + _gtCounts(1) + _gtCounts(5), //n.aB
      2.0*_gtCounts(8) + _gtCounts(5) + _gtCounts(7)  //n.ab
    ))

    //Initial estimate with AaBb contributing equally to each haplotype
    var p_next = (const_counts +:+ new DenseVector(Array.fill[Double](4)(_gtCounts(4)/2.0))) /:/ nHaplotypes
    var p_cur = p_next +:+ 1.0

    //EM
    while(max(abs(p_next -:- p_cur)) > 1e-7){
      p_cur = p_next

      p_next = (const_counts +:+
        (new DenseVector(Array[Double](
          p_cur(0)*p_cur(3), //n.AB
          p_cur(1)*p_cur(2), //n.Ab
          p_cur(1)*p_cur(2), //n.aB
          p_cur(0)*p_cur(3)  //n.ab
        )) * (_gtCounts(4) / ((p_cur(0)*p_cur(3))+(p_cur(1)*p_cur(2)))))
        ) / nHaplotypes

    }

    return (p_next *:* nHaplotypes).toArray.toFastIndexedSeq
  }

}

Source File: Norms.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{Axis, max, sum}
import breeze.numerics.{abs, pow, sqrt}
import io.picnicml.doddlemodel.data.{Features, RealVector}

object Norms {

  sealed trait Norm {
    def apply(x: Features): RealVector
  }

  final case object L1Norm extends Norm {
    override def apply(x: Features): RealVector = sum(abs(x), Axis._1)
  }

  final case object L2Norm extends Norm {
    override def apply(x: Features): RealVector = sqrt(sum(pow(x, 2), Axis._1))
  }

  final case object MaxNorm extends Norm {
    override def apply(x: Features): RealVector = max(abs(x), Axis._1)
  }
}

Source File: PoissonRegression.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.linear

import breeze.linalg.{all, sum}
import breeze.numerics.{exp, floor, isFinite, log}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor


case class PoissonRegression private (lambda: Float, private val w: Option[RealVector]) {
  private var yPredMeanCache: Target = _
}

object PoissonRegression {

  def apply(lambda: Float = 0.0f): PoissonRegression = {
    require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
    PoissonRegression(lambda, none)
  }

  private val wSlice: Range.Inclusive = 1 to -1

  @SerialVersionUID(0L)
  implicit lazy val ev: LinearRegressor[PoissonRegression] = new LinearRegressor[PoissonRegression] {

    override protected def w(model: PoissonRegression): Option[RealVector] = model.w

    override protected def copy(model: PoissonRegression): PoissonRegression = model.copy()

    override protected def copy(model: PoissonRegression, w: RealVector): PoissonRegression =
      model.copy(w = w.some)

    override protected def targetVariableAppropriate(y: Target): Boolean =
      y == floor(y) && all(isFinite(y))

    override protected def predictStateless(model: PoissonRegression, w: RealVector, x: Features): Target =
      floor(this.predictMean(w, x))

    private def predictMean(w: RealVector, x: Features): Target = exp(x * w)

    override protected[linear] def lossStateless(model: PoissonRegression,
                                                 w: RealVector, x: Features, y: Target): Float = {
      model.yPredMeanCache = predictMean(w, x)
      sum(y * log(model.yPredMeanCache) - model.yPredMeanCache) / (-x.rows.toFloat) +
        .5f * model.lambda * (w(wSlice).t * w(wSlice))
    }

    override protected[linear] def lossGradStateless(model: PoissonRegression,
                                                     w: RealVector, x: Features, y: Target): RealVector = {
      val grad = ((model.yPredMeanCache - y).t * x).t / x.rows.toFloat
      grad(wSlice) += model.lambda * w(wSlice)
      grad
    }
  }
}

Source File: LogisticRegression.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.linear

import breeze.linalg.sum
import breeze.numerics.{log, sigmoid}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier


case class LogisticRegression private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) {
  private var yPredProbaCache: RealVector = _
}

object LogisticRegression {

  def apply(lambda: Float = 0.0f): LogisticRegression = {
    require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
    LogisticRegression(lambda, none, none)
  }

  private val wSlice: Range.Inclusive = 1 to -1

  @SerialVersionUID(0L)
  implicit lazy val ev: LinearClassifier[LogisticRegression] = new LinearClassifier[LogisticRegression] {

    override def numClasses(model: LogisticRegression): Option[Int] = model.numClasses

    override protected def w(model: LogisticRegression): Option[RealVector] = model.w

    override protected[doddlemodel] def copy(model: LogisticRegression, numClasses: Int): LogisticRegression =
      model.copy(numClasses = numClasses.some)

    override protected def copy(model: LogisticRegression, w: RealVector): LogisticRegression =
      model.copy(w = w.some)

    override protected def predictStateless(model: LogisticRegression, w: RealVector, x: Features): Target =
      (predictProbaStateless(model, w, x)(::, 0) >:> 0.5f).map(x => if (x) 1.0f else 0.0f)

    override protected def predictProbaStateless(model: LogisticRegression, w: RealVector, x: Features): Simplex =
      sigmoid(x * w).asDenseMatrix.t

    override protected[linear] def lossStateless(model: LogisticRegression,
                                                 w: RealVector, x: Features, y: Target): Float = {
      model.yPredProbaCache = predictProbaStateless(model, w, x)(::, 0)
      sum(y * log(model.yPredProbaCache) + (1.0f - y) * log(1.0f - model.yPredProbaCache)) / (-x.rows.toFloat) +
        .5f * model.lambda * (w(wSlice).t * w(wSlice))
    }

    override protected[linear] def lossGradStateless(model: LogisticRegression,
                                                     w: RealVector, x: Features, y: Target): RealVector = {
      val grad = ((y - model.yPredProbaCache).t * x).t / (-x.rows.toFloat)
      grad(wSlice) += model.lambda * w(wSlice)
      grad
    }
  }
}

Source File: MostFrequentClassifierTest.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.dummy.classification

import breeze.linalg.sum
import io.picnicml.doddlemodel.data.{loadBreastCancerDataset, loadIrisDataset}
import io.picnicml.doddlemodel.dummy.classification.MostFrequentClassifier.ev
import org.scalatest.OptionValues
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class MostFrequentClassifierTest extends AnyFlatSpec with Matchers with OptionValues {

  "Most frequent classifier" should "infer the most frequent class from the iris dataset" in {
    val (x, y, _) = loadIrisDataset
    val model = MostFrequentClassifier()
    val trainedModel = ev.fit(model, x, y)
    trainedModel.mostFrequentClass.value shouldBe 0.0
    sum(ev.predict(trainedModel, x)) shouldBe 0.0
  }

  it should "infer the most frequent class from the breast cancer dataset" in {
    val (x, y, _) = loadBreastCancerDataset
    val model = MostFrequentClassifier()
    val trainedModel = ev.fit(model, x, y)
    trainedModel.mostFrequentClass.value shouldBe 1.0
    sum(ev.predict(trainedModel, x)) shouldBe x.rows.toDouble
  }
}

breeze.linalg.sum Scala Examples