breeze.linalg.sum Scala Examples

The following examples show how to use breeze.linalg.sum. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: AliasTable.scala    From glintlda   with MIT License 5 votes vote down vote up
package glintlda.mh

import breeze.linalg.{Vector, sum}
import glintlda.util.FastRNG


  def draw(random: FastRNG): Int = {
    count += 1
    val i = random.nextPositiveInt() % alias.length
    if (random.nextDouble() < prob(i)) {
      i
    } else {
      alias(i)
    }
  }

} 
Example 2
Source File: GibbsSample.scala    From glintlda   with MIT License 5 votes vote down vote up
package glintlda

import breeze.linalg.{DenseVector, SparseVector, sum}
import glintlda.util.FastRNG


  def apply(sv: SparseVector[Int], random: FastRNG, topics: Int): GibbsSample = {
    val totalTokens = sum(sv)
    val sample = new GibbsSample(new Array[Int](totalTokens), new Array[Int](totalTokens))

    var i = 0
    var current = 0
    while (i < sv.activeSize) {
      val index = sv.indexAt(i)
      var value = sv.valueAt(i)
      while (value > 0) {
        sample.features(current) = index
        sample.topics(current) = random.nextPositiveInt() % topics
        current += 1
        value -= 1
      }
      i += 1
    }

    sample
  }

} 
Example 3
Source File: Perplexity.scala    From scalda   with MIT License 5 votes vote down vote up
package com.nitro.scalda.evaluation.perplexity

import breeze.linalg.Axis
import breeze.numerics._
import com.nitro.scalda.Utils
import breeze.linalg.DenseMatrix
import breeze.linalg.sum
import com.nitro.scalda.models.{ OnlineLdaParams, Document }

object Perplexity {

  
  def perplexity(
    mb: Seq[Document],
    mbGamma: DenseMatrix[Double],
    lambda: DenseMatrix[Double],
    params: OnlineLdaParams
  ): Double = {

    val eLogTheta = Utils.dirichletExpectation(mbGamma)
    val eLogBeta = Utils.dirichletExpectation(lambda)

    var perplexityScore = 0.0

    for ((doc, docId) <- mb.zipWithIndex) {

      val eLogThetaDoc = eLogTheta(docId, ::).t

      perplexityScore += sum(
        doc.wordIds.zip(doc.wordCts).map {

          case (wordId, wordCt) => Utils.logSumExp(eLogThetaDoc + eLogBeta(::, wordId)) * wordCt.toDouble
        }
      )

    }

    perplexityScore += sum(mbGamma.map(el => params.alpha - el) :* eLogTheta)
    perplexityScore += sum(lgamma(mbGamma) - lgamma(params.alpha))
    perplexityScore += sum(lgamma(params.alpha * params.numTopics) - lgamma(sum(mbGamma, Axis._1)))
    perplexityScore *= params.totalDocs / mb.size.toDouble
    perplexityScore += sum(lambda.map(el => params.eta - el) :* eLogBeta)
    perplexityScore += sum(lgamma(lambda) - lgamma(params.eta))
    perplexityScore += sum(lgamma(params.eta * params.vocabulary.size) - lgamma(sum(lambda, Axis._1)))

    perplexityScore
  }

} 
Example 4
Source File: PoolingSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.images

import breeze.linalg.{DenseVector, sum}
import keystoneml.nodes._
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{ChannelMajorArrayVectorizedImage, ImageMetadata}

class PoolingSuite extends FunSuite with Logging {

  test("pooling") {
    val imgArr =
      (0 until 4).flatMap { x =>
        (0 until 4).flatMap { y =>
          (0 until 1).map { c =>
            (c + x * 1 + y * 4 * 1).toDouble
          }
        }
      }.toArray

    val image = new ChannelMajorArrayVectorizedImage(imgArr, ImageMetadata(4, 4, 1))
    val pooling = new Pooler(2, 2, x => x, x => x.max)

    val poolImage = pooling(image)

    assert(poolImage.get(0, 0, 0) === 5.0)
    assert(poolImage.get(0, 1, 0) === 7.0)
    assert(poolImage.get(1, 0, 0) === 13.0)
    assert(poolImage.get(1, 1, 0) === 15.0)
  }

  test("pooling odd") {
    val hogImgSize = 14
    val convSizes = List(1, 2, 3, 4, 6, 8)
    convSizes.foreach { convSize =>
      val convResSize = hogImgSize - convSize + 1

      val imgArr =
        (0 until convResSize).flatMap { x =>
          (0 until convResSize).flatMap { y =>
            (0 until 1000).map { c =>
              (c + x * 1 + y * 4 * 1).toDouble
            }
          }
        }.toArray

      val image = new ChannelMajorArrayVectorizedImage(
        imgArr, ImageMetadata(convResSize, convResSize, 1000))

      val poolSizeReqd = math.ceil(convResSize / 2.0).toInt

      // We want poolSize to be even !!
      val poolSize = (math.ceil(poolSizeReqd / 2.0) * 2).toInt
      // overlap as little as possible
      val poolStride = convResSize - poolSize


      println(s"VALUES: $convSize $convResSize $poolSizeReqd $poolSize $poolStride")

      def summ(x: DenseVector[Double]): Double = sum(x)

      val pooling = new Pooler(poolStride, poolSize, identity, summ)
      val poolImage = pooling(image)
    }
  }
} 
Example 5
Source File: LinearCombinationChiSquare.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.stat

import breeze.linalg.{DenseVector, sum}
import org.dizhang.seqspark.stat.LinearCombinationChiSquare._


@SerialVersionUID(7778520001L)
trait LinearCombinationChiSquare extends Serializable {
  def lambda: DenseVector[Double]
  def nonCentrality: DenseVector[Double]
  def degreeOfFreedom: DenseVector[Double]
  def cdf(cutoff: Double): CDF

  val meanLambda: Double = sum(lambda)
  val size: Int = lambda.length

}

object LinearCombinationChiSquare {
  @SerialVersionUID(7778550101L)
  trait CDF extends Serializable {
    def pvalue: Double
    def ifault: Int
    def trace: Array[Double]
  }
} 
Example 6
Source File: MixtureDistribution.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability.distributions

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{ContinuousDistr, Moments, Multinomial}
import spire.algebra.VectorSpace


class MixtureWithConfBars[I, V](
  distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]],
  probabilities: Multinomial[DenseVector[Double], Int])(
  implicit vI: VectorSpace[I, Double]) extends
  MixtureDistribution[I](distributions, probabilities) with
  HasErrorBars[I] {

  private val weightsArr = probabilities.params.toArray

  override def confidenceInterval(s: Double) =
    distributions.zip(weightsArr).map(c => {
      val (lower, upper) = c._1.confidenceInterval(s)

      (vI.timesr(lower, c._2), vI.timesr(upper, c._2))
    }).reduce((a,b) =>
      (vI.plus(a._1, b._1), vI.plus(a._2, b._2))
    )


  def mean = distributions.zip(weightsArr)
    .map(c => vI.timesr(c._1.mean, c._2))
    .reduce((a,b) => vI.plus(a,b))

}

object MixtureWithConfBars {

  def apply[I, V](
    distributions: Seq[ContinuousDistr[I] with Moments[I, V] with HasErrorBars[I]],
    weights: DenseVector[Double])(
    implicit vI: VectorSpace[I, Double]): MixtureWithConfBars[I, V] =
    new MixtureWithConfBars(distributions, new Multinomial[DenseVector[Double], Int](weights))
} 
Example 7
Source File: normDist.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.algebra

import breeze.generic.UFunc
import breeze.linalg.sum
import breeze.numerics.{abs, pow}


object normDist extends UFunc {

  implicit object implDV extends Impl2[SparkVector, Double, Double] {
    def apply(a: SparkVector, p: Double) = {
      assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0")
      math.pow(a._vector.values.map(x => math.pow(math.abs(x), p)).sum(), 1.0/p)
    }
  }
}

object normBDist extends UFunc {
  implicit object implBlockedDV extends Impl2[SparkBlockedVector, Double, Double] {
    def apply(a: SparkBlockedVector, p: Double) = {
      assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0")
      math.pow(a._vector.values.map(x => sum(pow(abs(x), p))).sum(), 1.0/p)
    }
  }

  implicit object implPartitionedDV extends Impl2[PartitionedVector, Double, Double] {
    def apply(a: PartitionedVector, p: Double) = {
      assert(p >= 1.0, "For an L_p norm to be computed p >= 1.0")
      math.pow(a._data.map(_._2).map(x => sum(pow(abs(x), p))).sum, 1.0/p)
    }
  }


} 
Example 8
Source File: NeuralNetSpec.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.DynaMLPipe
import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics
import io.github.mandar2812.dynaml.graph.FFNeuralGraph
import org.scalatest.{FlatSpec, Matchers}


class NeuralNetSpec extends FlatSpec with Matchers {

  "A feed-forward neural network" should "be able to learn non-linear functions "+
    "on a compact domain" in {
    val uni = new Uniform(0.0, 1.0)
    //Create synthetic data set of x,y values
    //x is sampled in unit hypercube, y = w.x + noise
    val noise = new Gaussian(0.0, 0.002)
    val uniH = new Uniform(0.0, 1.0)


    val numPoints:Int = 5000

    val data = (1 to numPoints).map(_ => {
      val features = DenseVector.tabulate[Double](4)(_ => uniH.draw)

      val (x,y,u,v) = (features(0), features(1), features(2), features(3))

      val target = DenseVector(
        1.0 + x*x + y*y*y + v*u*v + v*u + noise.draw,
        1.0 + x*u + u*y*y + v*v*v + u*u*u + noise.draw)

      (features, target)
    })

    val (trainingData, testData) = (data.take(4000), data.takeRight(1000))

    val epsilon = 0.85

    val model = new FeedForwardNetwork[Stream[(DenseVector[Double], DenseVector[Double])]](trainingData.toStream, FFNeuralGraph(4,2,0,
            List("logsig", "linear"),
            List(10), biasFlag = true))(DynaMLPipe.identityPipe[Stream[(DenseVector[Double], DenseVector[Double])]])

    model.setLearningRate(1.0)
      .setRegParam(0.01)
      .setMomentum(0.8)
      .setMaxIterations(150)
      .learn()

    val res = model.test(testData.toStream)

    val metrics = new MultiRegressionMetrics(res.toList, res.length)
    //println(metrics.Rsq)
    assert(sum(metrics.corr)/metrics.Rsq.length >= epsilon)
  }
} 
Example 9
Source File: AutoEncoderSpec.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics
import io.github.mandar2812.dynaml.pipes.DataPipe
import io.github.mandar2812.dynaml.probability.RandomVariable
import spire.implicits._

import org.scalatest.{FlatSpec, Matchers}

 ignore should "be able to learn a continuous, "+
    "invertible identity map x = g(h(x))" in {

    val uni = new Uniform(-math.Pi, math.Pi)
    val theta = RandomVariable(new Uniform(-math.Pi, math.Pi))
    val circleTransform = DataPipe((t: Double) => (math.cos(t), math.sin(t)))
    val rvOnCircle = theta > circleTransform
    //Create synthetic data set of x,y values

    val noise = new Gaussian(0.0, 0.02)

    val numPoints:Int = 4000
    val epsilon = 0.05

    val data = (1 to numPoints).map(_ => {
      val sample = rvOnCircle.draw
      val features = DenseVector(sample._1, sample._2)
      val augFeatures = DenseVector(
        math.pow(0.85*features(1), 2) + noise.draw,
        math.pow(0.45*features(0), 3) + noise.draw,
        math.pow(features(0)+0.85*features(1), 3) + noise.draw,
        math.pow(features(0)-0.5*features(1), 2) + noise.draw,
        math.pow(features(0)+features(1), 3) + noise.draw,
        math.pow(features(0)-features(1), 2) + noise.draw,
        math.pow(features(0)+0.4*features(1), 2) + noise.draw,
        math.pow(features(0)+0.5*features(1), 3) + noise.draw)

      augFeatures
    })

    val (trainingData, testData) = (data.take(3000), data.takeRight(1000))

    val enc = GenericAutoEncoder(List(8, 4, 4, 8), List(VectorTansig, VectorTansig, VectorTansig))

    //BackPropagation.rho = 0.5

    enc.optimizer.setRegParam(0.0001).setStepSize(0.1).setNumIterations(1000).momentum_(0.5)

    enc.learn(trainingData.toStream)

    val metrics = new MultiRegressionMetrics(
      testData.map(c => (enc.i(enc.f(c)), c)).toList,
      testData.length)

    println("Corr: "+metrics.corr)
    assert(sum(metrics.mae)/metrics.corr.length <= epsilon)

  }

} 
Example 10
Source File: Normalize.scala    From ScalaNetwork   with GNU General Public License v2.0 5 votes vote down vote up
package kr.ac.kaist.ir.deep.layer

import breeze.linalg.sum
import breeze.numerics.pow
import kr.ac.kaist.ir.deep.fn._
import play.api.libs.json.{JsObject, Json}


  abstract override def updateBy(delta: Iterator[ScalarMatrix], error: ScalarMatrix): ScalarMatrix = {
    val Xsq = pow(X, 2.0f)
    val lenSq = sum(Xsq)
    val len: Scalar = Math.sqrt(lenSq).toFloat

    // Note that length is the function of x_i.
    // Let z_i := x_i / len(x_i).
    // Then d z_i / d x_i = (len^2 - x_i^2) / len^3 = (1 - z_i^2) / len,
    //      d z_j / d x_i = - x_i * x_j / len^3 = - z_i * z_j / len
    val rows = dFdX.rows
    val dZdX = ScalarMatrix $0(rows, rows)
    var r = 0
    while (r < rows) {
      //dZ_r
      var c = 0
      while (c < rows) {
        if (r == c) {
          //dX_c
          dZdX.update(r, c, (1.0f - Xsq(r, 0) / lenSq) / len)
        } else {
          dZdX.update(r, c, (-X(r, 0) * X(c, 0)) / (len * lenSq))
        }
        c += 1
      }
      r += 1
    }

    // un-normalize the error
    super.updateBy(delta, dZdX * error)
  }
} 
Example 11
Source File: CVLogPerplexity.scala    From spectrallda-tensorspark   with Apache License 2.0 5 votes vote down vote up
package edu.uci.eecs.spectralLDA

import breeze.linalg.sum
import org.apache.spark.{SparkConf, SparkContext}
import edu.uci.eecs.spectralLDA.algorithm._
import org.apache.spark.rdd._
import org.apache.spark.mllib.clustering._
import org.apache.spark.mllib.linalg._

object CVLogPerplexity {
  def main(args: Array[String]) = {
    val conf: SparkConf = new SparkConf().setAppName(s"Spectral LDA")
    val sc: SparkContext = new SparkContext(conf)

    val cv = args(0).toInt
    val documentsPath = args(1)
    val k = args(2).toInt
    val alpha0 = args(3).toDouble
    val maxIterations = args(4).toInt
    val tol = args(5).toDouble
    val minWords = args(6).toInt

    val docs = sc.objectFile[(Long, breeze.linalg.SparseVector[Double])](documentsPath)
      .filter {
        case (_, tc) => sum(tc) >= minWords
      }

    for (i <- 0 until cv) {
      val splits = docs.randomSplit(Array[Double](0.9, 0.1))
      computeLogLikelihood(splits, k, alpha0, maxIterations, tol)
    }

    sc.stop()
  }

  def computeLogLikelihood(splits: Array[RDD[(Long, breeze.linalg.SparseVector[Double])]],
                           k: Int,
                           alpha0: Double,
                           maxIterations: Int,
                           tol: Double
                          ): Unit = {
    val numTestTokens = splits(1)
      .map {
        case (_, tc) => breeze.linalg.sum(tc)
      }
      .reduce(_ + _)

    val tensorLDA = new TensorLDA(
      dimK = k,
      alpha0 = alpha0,
      maxIterations = maxIterations,
      tol = tol
    )
    val (beta, alpha, _, _, m1) = tensorLDA.fit(splits(0))

    val augBeta = breeze.linalg.DenseMatrix.zeros[Double](beta.rows, k + 1)
    val augAlpha = breeze.linalg.DenseVector.ones[Double](alpha.length + 1)
    augBeta(::, 0 until k) := beta
    val dummyTopic = m1 + 0.1 * breeze.linalg.DenseVector.ones[Double](beta.rows) / beta.rows.toDouble
    augBeta(::, k) := dummyTopic / sum(dummyTopic)
    augAlpha(0 until k) := alpha

    val tensorLDAModel = new TensorLDAModel(augBeta, augAlpha)
    val tensorLDALogL = tensorLDAModel.logLikelihood(splits(1), smoothing = 1e-6, maxIterations = 50)
    println(s"Tensor LDA log-perplexity no extra smoothing: ${- tensorLDALogL / numTestTokens}")

    val trainMapped: RDD[(Long, Vector)] = splits(0).map {
      case (id, tc) =>
        val (idx, v) = tc.activeIterator.toArray.unzip
        (id, new SparseVector(tc.length, idx, v))
    }

    val testMapped: RDD[(Long, Vector)] = splits(1).map {
      case (id, tc) =>
        val (idx, v) = tc.activeIterator.toArray.unzip
        (id, new SparseVector(tc.length, idx, v))
    }

    val ldaOptimizer = new OnlineLDAOptimizer()
      .setMiniBatchFraction(0.05)
    val lda = new LDA()
      .setOptimizer(ldaOptimizer)
      .setMaxIterations(80)
      .setK(k)
      .setDocConcentration(alpha0 / k.toDouble)
      .setBeta(1.0)

    val ldaModel: LDAModel = lda.run(trainMapped)
    val ldaLogL = ldaModel.asInstanceOf[LocalLDAModel].logLikelihood(testMapped)

    println(s"Variational Inference log-perplexity: ${- ldaLogL / numTestTokens}")
  }
} 
Example 12
Source File: package.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail

import is.hail.stats._
import breeze.linalg.{Vector, DenseVector, max, sum}
import breeze.numerics._
import is.hail.utils._

package object experimental {

  def findMaxAC(af: Double, an: Int, ci: Double = .95): Int = {
   if (af == 0)
      0
    else {
      val quantile_limit = ci // ci for one-sided, 1-(1-ci)/2 for two-sided
      val max_ac = qpois(quantile_limit, an * af)
      max_ac
    }
  }

  def calcFilterAlleleFreq(ac: Int, an: Int, ci: Double = .95, lower: Double = 1e-10, upper: Double = 2, tol: Double = 1e-7, precision: Double = 1e-6): Double = {
    if (ac <= 1 || an == 0) // FAF should not be calculated on singletons
      0.0
    else {
      var f = (af: Double) => ac.toDouble - 1 - qpois(ci, an.toDouble * af)
      val root = uniroot(f, lower, upper, tol)
      val rounder = 1d / (precision / 100d)
      var max_af = math.round(root.getOrElse(0.0) * rounder) / rounder
      while (findMaxAC(max_af, an, ci) < ac) {
        max_af += precision
      }
      max_af - precision
    }
  }

  def calcFilterAlleleFreq(ac: Int, an: Int, ci: Double): Double = calcFilterAlleleFreq(ac, an, ci, lower = 1e-10, upper = 2, tol = 1e-7, precision = 1e-6)


  def haplotypeFreqEM(gtCounts : IndexedSeq[Int]) : IndexedSeq[Double] = {

    assert(gtCounts.size == 9, "haplotypeFreqEM requires genotype counts for the 9 possible genotype combinations.")

    val _gtCounts = new DenseVector(gtCounts.toArray)
    val nSamples = sum(_gtCounts)

    //Needs some non-ref samples to compute
    if(_gtCounts(0) >= nSamples){ return FastIndexedSeq(_gtCounts(0),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0)}

    val nHaplotypes = 2.0*nSamples.toDouble

    
    val const_counts = new DenseVector(Array[Double](
      2.0*_gtCounts(0) + _gtCounts(1) + _gtCounts(3), //n.AB
      2.0*_gtCounts(6) + _gtCounts(3) + _gtCounts(7), //n.Ab
      2.0*_gtCounts(2) + _gtCounts(1) + _gtCounts(5), //n.aB
      2.0*_gtCounts(8) + _gtCounts(5) + _gtCounts(7)  //n.ab
    ))

    //Initial estimate with AaBb contributing equally to each haplotype
    var p_next = (const_counts +:+ new DenseVector(Array.fill[Double](4)(_gtCounts(4)/2.0))) /:/ nHaplotypes
    var p_cur = p_next +:+ 1.0

    //EM
    while(max(abs(p_next -:- p_cur)) > 1e-7){
      p_cur = p_next

      p_next = (const_counts +:+
        (new DenseVector(Array[Double](
          p_cur(0)*p_cur(3), //n.AB
          p_cur(1)*p_cur(2), //n.Ab
          p_cur(1)*p_cur(2), //n.aB
          p_cur(0)*p_cur(3)  //n.ab
        )) * (_gtCounts(4) / ((p_cur(0)*p_cur(3))+(p_cur(1)*p_cur(2)))))
        ) / nHaplotypes

    }

    return (p_next *:* nHaplotypes).toArray.toFastIndexedSeq
  }

} 
Example 13
Source File: Norms.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.preprocessing

import breeze.linalg.{Axis, max, sum}
import breeze.numerics.{abs, pow, sqrt}
import io.picnicml.doddlemodel.data.{Features, RealVector}

object Norms {

  sealed trait Norm {
    def apply(x: Features): RealVector
  }

  final case object L1Norm extends Norm {
    override def apply(x: Features): RealVector = sum(abs(x), Axis._1)
  }

  final case object L2Norm extends Norm {
    override def apply(x: Features): RealVector = sqrt(sum(pow(x, 2), Axis._1))
  }

  final case object MaxNorm extends Norm {
    override def apply(x: Features): RealVector = max(abs(x), Axis._1)
  }
} 
Example 14
Source File: PoissonRegression.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{all, sum}
import breeze.numerics.{exp, floor, isFinite, log}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.typeclasses.LinearRegressor


case class PoissonRegression private (lambda: Float, private val w: Option[RealVector]) {
  private var yPredMeanCache: Target = _
}

object PoissonRegression {

  def apply(lambda: Float = 0.0f): PoissonRegression = {
    require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
    PoissonRegression(lambda, none)
  }

  private val wSlice: Range.Inclusive = 1 to -1

  @SerialVersionUID(0L)
  implicit lazy val ev: LinearRegressor[PoissonRegression] = new LinearRegressor[PoissonRegression] {

    override protected def w(model: PoissonRegression): Option[RealVector] = model.w

    override protected def copy(model: PoissonRegression): PoissonRegression = model.copy()

    override protected def copy(model: PoissonRegression, w: RealVector): PoissonRegression =
      model.copy(w = w.some)

    override protected def targetVariableAppropriate(y: Target): Boolean =
      y == floor(y) && all(isFinite(y))

    override protected def predictStateless(model: PoissonRegression, w: RealVector, x: Features): Target =
      floor(this.predictMean(w, x))

    private def predictMean(w: RealVector, x: Features): Target = exp(x * w)

    override protected[linear] def lossStateless(model: PoissonRegression,
                                                 w: RealVector, x: Features, y: Target): Float = {
      model.yPredMeanCache = predictMean(w, x)
      sum(y * log(model.yPredMeanCache) - model.yPredMeanCache) / (-x.rows.toFloat) +
        .5f * model.lambda * (w(wSlice).t * w(wSlice))
    }

    override protected[linear] def lossGradStateless(model: PoissonRegression,
                                                     w: RealVector, x: Features, y: Target): RealVector = {
      val grad = ((model.yPredMeanCache - y).t * x).t / x.rows.toFloat
      grad(wSlice) += model.lambda * w(wSlice)
      grad
    }
  }
} 
Example 15
Source File: LogisticRegression.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.sum
import breeze.numerics.{log, sigmoid}
import cats.syntax.option._
import io.picnicml.doddlemodel.data.{Features, RealVector, Simplex, Target}
import io.picnicml.doddlemodel.linear.typeclasses.LinearClassifier


case class LogisticRegression private (lambda: Float, numClasses: Option[Int], private val w: Option[RealVector]) {
  private var yPredProbaCache: RealVector = _
}

object LogisticRegression {

  def apply(lambda: Float = 0.0f): LogisticRegression = {
    require(lambda >= 0.0f, "L2 regularization strength must be non-negative")
    LogisticRegression(lambda, none, none)
  }

  private val wSlice: Range.Inclusive = 1 to -1

  @SerialVersionUID(0L)
  implicit lazy val ev: LinearClassifier[LogisticRegression] = new LinearClassifier[LogisticRegression] {

    override def numClasses(model: LogisticRegression): Option[Int] = model.numClasses

    override protected def w(model: LogisticRegression): Option[RealVector] = model.w

    override protected[doddlemodel] def copy(model: LogisticRegression, numClasses: Int): LogisticRegression =
      model.copy(numClasses = numClasses.some)

    override protected def copy(model: LogisticRegression, w: RealVector): LogisticRegression =
      model.copy(w = w.some)

    override protected def predictStateless(model: LogisticRegression, w: RealVector, x: Features): Target =
      (predictProbaStateless(model, w, x)(::, 0) >:> 0.5f).map(x => if (x) 1.0f else 0.0f)

    override protected def predictProbaStateless(model: LogisticRegression, w: RealVector, x: Features): Simplex =
      sigmoid(x * w).asDenseMatrix.t

    override protected[linear] def lossStateless(model: LogisticRegression,
                                                 w: RealVector, x: Features, y: Target): Float = {
      model.yPredProbaCache = predictProbaStateless(model, w, x)(::, 0)
      sum(y * log(model.yPredProbaCache) + (1.0f - y) * log(1.0f - model.yPredProbaCache)) / (-x.rows.toFloat) +
        .5f * model.lambda * (w(wSlice).t * w(wSlice))
    }

    override protected[linear] def lossGradStateless(model: LogisticRegression,
                                                     w: RealVector, x: Features, y: Target): RealVector = {
      val grad = ((y - model.yPredProbaCache).t * x).t / (-x.rows.toFloat)
      grad(wSlice) += model.lambda * w(wSlice)
      grad
    }
  }
} 
Example 16
Source File: MostFrequentClassifierTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.dummy.classification

import breeze.linalg.sum
import io.picnicml.doddlemodel.data.{loadBreastCancerDataset, loadIrisDataset}
import io.picnicml.doddlemodel.dummy.classification.MostFrequentClassifier.ev
import org.scalatest.OptionValues
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class MostFrequentClassifierTest extends AnyFlatSpec with Matchers with OptionValues {

  "Most frequent classifier" should "infer the most frequent class from the iris dataset" in {
    val (x, y, _) = loadIrisDataset
    val model = MostFrequentClassifier()
    val trainedModel = ev.fit(model, x, y)
    trainedModel.mostFrequentClass.value shouldBe 0.0
    sum(ev.predict(trainedModel, x)) shouldBe 0.0
  }

  it should "infer the most frequent class from the breast cancer dataset" in {
    val (x, y, _) = loadBreastCancerDataset
    val model = MostFrequentClassifier()
    val trainedModel = ev.fit(model, x, y)
    trainedModel.mostFrequentClass.value shouldBe 1.0
    sum(ev.predict(trainedModel, x)) shouldBe x.rows.toDouble
  }
}