breeze.stats.distributions.Gaussian Scala Examples

The following examples show how to use breeze.stats.distributions.Gaussian. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: generalDLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

import java.io.{File, PrintWriter}
import java.lang.Math.sqrt

import breeze.stats.distributions.Gaussian

import scalaz.Scalaz._


object generalDLM {
  type Loglikelihood = Double
  type Observation = Double
  type State = Double
  type Time = Double

  case class Data(time: Time, observation: Observation, state: Option[State]) {
    override def toString = state match {
      case Some(x) => s"$time, $observation, $x"
      case None => s"$time, $observation"
    }
  }
  case class Parameters(a: Double, b: Double, l1: Double, l2: Double) {
    override def toString = s"$a, $b, $l1, $l2" 
  }

  def simulate(p: Parameters): Stream[Data] = {
    val stateSpace = unfold(Gaussian(p.l1, sqrt(p.l2)).draw)(x =>
      Some(x, x + Gaussian(0, sqrt(p.b)).draw)
    )
    stateSpace.zipWithIndex map { case (x, t) => 
      Data(t, x + Gaussian(0, sqrt(p.a)).draw, Some(x)) }
  }

  val runSimulateFirstOrder = {
    val p = Parameters(3.0, 0.5, 0.0, 10.0)
    // simulate 16 different realisations of 100 observations, representing 16 stations
    val observations = (1 to 16) map (id => (id, simulate(p).take(100).toVector))

    val pw = new PrintWriter(new File("data/firstOrderdlmRes.csv"))
    pw.write(
      observations.
        flatMap{ case (id, data) =>
          data map (x => id + ", " + x.toString)}.
        mkString("\n"))
    pw.close()
  }
} 
Example 2
Source File: StatisticalVolumeModelTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.statisticalmodel.experimental

import java.io.File
import java.net.URLDecoder

import breeze.linalg.DenseVector
import breeze.stats.distributions.Gaussian
import scalismo.ScalismoTestSuite
import scalismo.geometry.{_3D, Point}
import scalismo.io.StatismoIO
import scalismo.registration.{RigidTransformation, RigidTransformationSpace}
import scalismo.utils.Random

class StatisticalVolumeModelTests extends ScalismoTestSuite {

  implicit val random = Random(42)

  implicit def doubleToFloat(d: Double): Float = d.toFloat

  describe("A statistical Volume mesh model") {

    def compareModels(oldModel: StatisticalVolumeMeshModel, newModel: StatisticalVolumeMeshModel) {

      for (i <- 0 until 10) {
        val standardNormal = Gaussian(0, 1)(random.breezeRandBasis)
        val coeffsData = standardNormal.sample(oldModel.rank)
        val coeffs = DenseVector(coeffsData.toArray)
        val inst = oldModel.instance(coeffs)
        val instNew = newModel.instance(coeffs)
        inst.pointSet.points
          .zip(instNew.pointSet.points)
          .foreach {
            case (pt1, pt2) =>
              (pt1.toVector - pt2.toVector).norm should be(0.0 +- (0.1))
          }
      }
    }

    it("can be transformed forth and back and yield the same deformations") {
      val path = getClass.getResource("/TetraMeshModel2.h5").getPath
      val model = StatismoIO.readStatismoVolumeMeshModel(new File(URLDecoder.decode(path))).get

      val parameterVector = DenseVector[Double](1.5, 1.0, 3.5, Math.PI, -Math.PI / 2.0, -Math.PI)
      val rigidTransform = RigidTransformationSpace[_3D]().transformForParameters(parameterVector)
      val inverseTransform = rigidTransform.inverse.asInstanceOf[RigidTransformation[_3D]]
      val transformedModel = model.transform(rigidTransform)
      val newModel = transformedModel.transform(inverseTransform)
      compareModels(model, newModel)
    }

    it("can change the mean shape and still yield the same shape space") {

      val path = getClass.getResource("/TetraMeshModel2.h5").getPath
      val model = StatismoIO.readStatismoVolumeMeshModel(new File(URLDecoder.decode(path))).get

      val newMesh = model.sample

      def t(pt: Point[_3D]): Point[_3D] = {
        val ptId = model.referenceVolumeMesh.pointSet.findClosestPoint(pt).id
        newMesh.pointSet.point(ptId)
      }

      val newModel = model.changeReference(t)

      compareModels(model, newModel)
    }

  }
} 
Example 3
Source File: RandomTests.scala    From scalismo   with Apache License 2.0 5 votes vote down vote up
package scalismo.utils

import breeze.stats.distributions.{Gaussian, Uniform}
import scalismo.ScalismoTestSuite

class RandomTests extends ScalismoTestSuite {

  describe("A random source") {

    it("should yield a deterministic sequence when correctly seeded") {

      def randomNumbersSeeded() = {
        val r = Random(42)
        val standardNormal = Gaussian(0, 1)(r.breezeRandBasis)
        val uniform = Uniform(0, 1)(r.breezeRandBasis)
        (r.scalaRandom.nextInt, standardNormal.draw(), uniform.draw())
      }
      randomNumbersSeeded() should equal(randomNumbersSeeded())

    }

    it("should yield a random sequence when the predefined implicit is imported") {

      def randomNumbersNotSeeded() = {
        import scalismo.utils.Random.implicits._
        val r = implicitly[Random]
        val standardNormal = Gaussian(0, 1)(r.breezeRandBasis)
        val uniform = Uniform(0, 1)(r.breezeRandBasis)
        (r.scalaRandom.nextInt, standardNormal.draw(), uniform.draw())
      }
      randomNumbersNotSeeded() should not equal (randomNumbersNotSeeded())
    }
  }
} 
Example 4
Source File: ExpectedImprovement.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package com.linkedin.photon.ml.hyperparameter.criteria

import breeze.linalg.DenseVector
import breeze.numerics.sqrt
import breeze.stats.distributions.Gaussian
import com.linkedin.photon.ml.hyperparameter.estimators.PredictionTransformation


  def apply(
      predictiveMeans: DenseVector[Double],
      predictiveVariances: DenseVector[Double]): DenseVector[Double] = {

    val std = sqrt(predictiveVariances)

    // PBO Eq. 1
    val gamma = - (predictiveMeans - bestEvaluation) / std

    // Eq. 2
    std :* ((gamma :* gamma.map(standardNormal.cdf)) + gamma.map(standardNormal.pdf))
  }
} 
Example 5
Source File: Arms.scala    From banditsbook-scala   with MIT License 5 votes vote down vote up
package com.github.everpeace.banditsbook.arm

import breeze.stats.distributions.{Bernoulli, Gaussian}

trait Arms {

  def BernoulliArm(p: Double): Arm[Double] = Bernoulli.distribution(p).map {
    case true  => 1.0d
    case false => 0.0d
  }

  def NormalArm(μ: Double, σ: Double): Arm[Double]  = Gaussian.distribution(μ -> σ)

  def AdversarialArm(start: Int, activeStart: Int, activeEnd: Int) = new Arm[Double] {
    @volatile var t = start
    override def draw(): Double = {
      t += 1
      t match {
        case _t if _t < activeStart => 0.0d
        case _t if activeStart <= _t && _t <= activeEnd => 1.0
        case _t => 0.0d
      }
    }
  }
} 
Example 6
Source File: RandNLATest.scala    From spectrallda-tensorspark   with Apache License 2.0 5 votes vote down vote up
package edu.uci.eecs.spectralLDA.utils

import breeze.linalg._
import breeze.linalg.qr.QR
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Uniform}
import edu.uci.eecs.spectralLDA.testharness.Context
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext
import org.scalatest._


class RandNLATest extends FlatSpec with Matchers {

  private val sc: SparkContext = Context.getSparkContext

  "M2 sketching" should "be correct" in {
    val a1 = SparseVector(DenseVector.rand[Double](100).toArray)
    val a2 = SparseVector(DenseVector.rand[Double](100).toArray)
    val a3 = SparseVector(DenseVector.rand[Double](100).toArray)

    val docs = Seq((1000L, a1), (1001L, a2), (1002L, a3))
    val docsRDD = sc.parallelize(docs)

    // Random Gaussian matrix
    val g = DenseMatrix.rand[Double](100, 50, Gaussian(mu = 0.0, sigma = 1.0))

    val result = DenseMatrix.zeros[Double](100, 50)
    docsRDD
      .flatMap {
        case (id: Long, w: SparseVector[Double]) => RandNLA.accumulate_M_mul_S(g, w, sum(w))
      }
      .reduceByKey(_ + _)
      .collect
      .foreach {
        case (r: Int, a: DenseVector[Double]) => result(r, ::) := a.t
      }

    val m2 = docsRDD
      .map {
        case (id: Long, w: SparseVector[Double]) =>
          val l = sum(w)
          (w * w.t - diag(w)) / (l * (l - 1.0))
      }
      .reduce(_ + _)
    val expectedResult = m2 * g

    val diff: DenseMatrix[Double] = result - expectedResult
    val normDiff: Double = norm(norm(diff(::, *)).toDenseVector)
    normDiff should be <= 1e-8
  }

  "Randomised Power Iteration method" should "be approximately correct" in {
    implicit val randBasis: RandBasis =
      new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(234787)))

    val n = 100
    val k = 5

    val alpha: DenseVector[Double] = DenseVector[Double](25.0, 20.0, 15.0, 10.0, 5.0)
    val beta: DenseMatrix[Double] = DenseMatrix.rand(n, k, Uniform(0.0, 1.0))

    val norms = norm(beta(::, *)).toDenseVector
    for (j <- 0 until k) {
      beta(::, j) /= norms(j)
    }

    val a: DenseMatrix[Double] = beta * diag(alpha) * beta.t
    val sigma: DenseMatrix[Double] = DenseMatrix.rand(n, k, Gaussian(mu = 0.0, sigma = 1.0))
    val y = a * sigma
    val QR(q: DenseMatrix[Double], _) = qr.reduced(y)

    val (s: DenseVector[Double], u: DenseMatrix[Double]) = RandNLA.decomp2(a * q, q)

    val diff_a = u * diag(s) * u.t - a
    val norm_diff_a = norm(norm(diff_a(::, *)).toDenseVector)

    norm_diff_a should be <= 1e-8
  }
} 
Example 7
Source File: sbt-test-test.scala    From scala-course   with GNU General Public License v3.0 5 votes vote down vote up
import org.scalatest.funsuite.AnyFunSuite

// Here using FunSuite style - but other possibilities...

class SetSuite extends AnyFunSuite {

  test("An empty Set should have size 0") {
    assert(Set.empty.size == 0)
  }

  test("A Gaussian sample of length 10 should have length 10") {
    import breeze.stats.distributions.Gaussian
    val x = Gaussian(2.0,4.0).sample(10)
    assert(x.length === 10)
  }

  test("Cats map merge") {
    import cats.instances.all._
    import cats.syntax.semigroup._
    val m1 = Map("a"->1,"b"->2)
    val m2 = Map("b"->2,"c"->1)
    val m3 = m1 |+| m2
    val m4 = Map("b" -> 4, "c" -> 1, "a" -> 1)
    assert(m3 === m4)
  }

}


// eof 
Example 8
Source File: AutoEncoderSpec.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics
import io.github.mandar2812.dynaml.pipes.DataPipe
import io.github.mandar2812.dynaml.probability.RandomVariable
import spire.implicits._

import org.scalatest.{FlatSpec, Matchers}

 ignore should "be able to learn a continuous, "+
    "invertible identity map x = g(h(x))" in {

    val uni = new Uniform(-math.Pi, math.Pi)
    val theta = RandomVariable(new Uniform(-math.Pi, math.Pi))
    val circleTransform = DataPipe((t: Double) => (math.cos(t), math.sin(t)))
    val rvOnCircle = theta > circleTransform
    //Create synthetic data set of x,y values

    val noise = new Gaussian(0.0, 0.02)

    val numPoints:Int = 4000
    val epsilon = 0.05

    val data = (1 to numPoints).map(_ => {
      val sample = rvOnCircle.draw
      val features = DenseVector(sample._1, sample._2)
      val augFeatures = DenseVector(
        math.pow(0.85*features(1), 2) + noise.draw,
        math.pow(0.45*features(0), 3) + noise.draw,
        math.pow(features(0)+0.85*features(1), 3) + noise.draw,
        math.pow(features(0)-0.5*features(1), 2) + noise.draw,
        math.pow(features(0)+features(1), 3) + noise.draw,
        math.pow(features(0)-features(1), 2) + noise.draw,
        math.pow(features(0)+0.4*features(1), 2) + noise.draw,
        math.pow(features(0)+0.5*features(1), 3) + noise.draw)

      augFeatures
    })

    val (trainingData, testData) = (data.take(3000), data.takeRight(1000))

    val enc = GenericAutoEncoder(List(8, 4, 4, 8), List(VectorTansig, VectorTansig, VectorTansig))

    //BackPropagation.rho = 0.5

    enc.optimizer.setRegParam(0.0001).setStepSize(0.1).setNumIterations(1000).momentum_(0.5)

    enc.learn(trainingData.toStream)

    val metrics = new MultiRegressionMetrics(
      testData.map(c => (enc.i(enc.f(c)), c)).toList,
      testData.length)

    println("Corr: "+metrics.corr)
    assert(sum(metrics.mae)/metrics.corr.length <= epsilon)

  }

} 
Example 9
Source File: NeuralNetSpec.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.neuralnets

import breeze.linalg.{DenseVector, sum}
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.DynaMLPipe
import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics
import io.github.mandar2812.dynaml.graph.FFNeuralGraph
import org.scalatest.{FlatSpec, Matchers}


class NeuralNetSpec extends FlatSpec with Matchers {

  "A feed-forward neural network" should "be able to learn non-linear functions "+
    "on a compact domain" in {
    val uni = new Uniform(0.0, 1.0)
    //Create synthetic data set of x,y values
    //x is sampled in unit hypercube, y = w.x + noise
    val noise = new Gaussian(0.0, 0.002)
    val uniH = new Uniform(0.0, 1.0)


    val numPoints:Int = 5000

    val data = (1 to numPoints).map(_ => {
      val features = DenseVector.tabulate[Double](4)(_ => uniH.draw)

      val (x,y,u,v) = (features(0), features(1), features(2), features(3))

      val target = DenseVector(
        1.0 + x*x + y*y*y + v*u*v + v*u + noise.draw,
        1.0 + x*u + u*y*y + v*v*v + u*u*u + noise.draw)

      (features, target)
    })

    val (trainingData, testData) = (data.take(4000), data.takeRight(1000))

    val epsilon = 0.85

    val model = new FeedForwardNetwork[Stream[(DenseVector[Double], DenseVector[Double])]](trainingData.toStream, FFNeuralGraph(4,2,0,
            List("logsig", "linear"),
            List(10), biasFlag = true))(DynaMLPipe.identityPipe[Stream[(DenseVector[Double], DenseVector[Double])]])

    model.setLearningRate(1.0)
      .setRegParam(0.01)
      .setMomentum(0.8)
      .setMaxIterations(150)
      .learn()

    val res = model.test(testData.toStream)

    val metrics = new MultiRegressionMetrics(res.toList, res.length)
    //println(metrics.Rsq)
    assert(sum(metrics.corr)/metrics.Rsq.length >= epsilon)
  }
} 
Example 10
Source File: mcmc.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.optimization

import java.io.{File, PrintWriter}
import breeze.numerics.log
import breeze.stats.distributions.{Gaussian, Uniform}
import io.github.mandar2812.dynaml.models.lm.KFilter
import io.github.mandar2812.dynaml.models.lm.generalDLM._
import KFilter._
import scalaz.Scalaz._

object mcmc {
  case class MetropolisState(params: Parameters, accepted: Int, ll: Loglikelihood)

    
  def metropolisIters(
    initParams: Parameters,
    likelihood: Parameters => Loglikelihood,
    perturb: Parameters => Parameters): Stream[MetropolisState] = {

    val initState = MetropolisState(initParams, 0, likelihood(initParams))
    unfold(initState)(metropolisStep(likelihood, perturb))
  }

  def main(args: Array[String]): Unit = {
    val n = 10000
    val p = Parameters(3.0, 0.5, 0.0, 10.0)
    val observations = simulate(p).take(100).toVector

    val iters = metropolisIters(p, filterll(observations), perturb(0.1)).take(n)
    println(s"Accepted: ${iters.last.accepted.toDouble/n}")

    // write the parameters to file
    val pw = new PrintWriter(new File("data/mcmcOutRes.csv"))
    pw.write(iters.map(_.params).mkString("\n"))
    pw.close()
  }
} 
Example 11
Source File: ApproximatePCA.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.learning

import breeze.linalg._
import breeze.numerics._
import breeze.stats._
import breeze.stats.distributions.{Gaussian, ThreadLocalRandomGenerator, RandBasis}
import com.github.fommil.netlib.LAPACK._
import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.rdd.RDD
import org.netlib.util.intW
import keystoneml.pipelines.Logging
import keystoneml.workflow.Estimator


  def approximateQ(A: DenseMatrix[Double], l: Int, q: Int, seed: Int = 0): DenseMatrix[Double] = {
    val d = A.cols

    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    val omega = DenseMatrix.rand(d, l, Gaussian(0,1)(randBasis)) //cpu: d*l, mem: d*l
    val y0 = A*omega //cpu: n*d*l, mem: n*l

    var Q = QRUtils.qrQR(y0)._1 //cpu: n*l**2

    for (i <- 1 to q) {
      val YHat = Q.t * A //cpu: l*n*d, mem: l*d
      val Qh = QRUtils.qrQR(YHat.t)._1 //cpu: d*l^2, mem: d*l

      val Yj = A * Qh //cpu: n*d*l, mem: n*l
      Q = QRUtils.qrQR(Yj)._1 //cpu:  n*l^2, mem: n*l
    }

    Q
  }
} 
Example 12
Source File: LogisticGLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

import breeze.linalg.DenseVector
import breeze.numerics._
import breeze.stats.distributions.Gaussian
import io.github.mandar2812.dynaml.optimization._


class ProbitGLM(data: Stream[(DenseVector[Double], Double)],
                numPoints: Int,
                map: (DenseVector[Double]) => DenseVector[Double] =
                identity[DenseVector[Double]])
  extends LogisticGLM(data, numPoints, map) {

  private val standardGaussian = new Gaussian(0, 1.0)

  override val h = (x: Double) =>
    standardGaussian.cdf(x)

  override protected val optimizer =
    new GradientDescent(
      new ProbitGradient,
      new SquaredL2Updater)

} 
Example 13
Source File: SparkLogisticGLM.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.models.lm

//Breeze Imports
import breeze.linalg.DenseVector
import breeze.numerics.sigmoid
import breeze.stats.distributions.Gaussian
import io.github.mandar2812.dynaml.optimization.ProbitGradient
import org.apache.spark.mllib.linalg.Vectors
//DynaML Imports
import io.github.mandar2812.dynaml.optimization.{
GradientDescentSpark, LogisticGradient,
RegularizedOptimizer, SquaredL2Updater}
//Spark Imports
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD


class SparkProbitGLM(
  data: RDD[(DenseVector[Double], Double)], numPoints: Long,
  map: (DenseVector[Double]) => DenseVector[Double] =
  identity[DenseVector[Double]]) extends SparkLogisticGLM(data, numPoints, map) {

  private val standardGaussian = new Gaussian(0, 1.0)

  override val h: (Double) => Double = (x: Double) => standardGaussian.cdf(x)

  override protected val optimizer: RegularizedOptimizer[
    DenseVector[Double], DenseVector[Double],
    Double, RDD[LabeledPoint]] = new GradientDescentSpark(new ProbitGradient, new SquaredL2Updater)

} 
Example 14
Source File: VectorIIDProbit.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability

import breeze.linalg.{DenseMatrix, DenseVector, diag}
import breeze.stats.distributions.Gaussian


  override def hessian(y: DenseVector[Double],
                       f: DenseVector[Double]): DenseMatrix[Double] = {
    diag(DenseVector((y.toArray zip f.toArray).map((couple) => {
      val n = standardGaussian.pdf(couple._2)
      val product = couple._1*couple._2
      val l = h(product)
      -1.0*(n*n)/(l*l) - product*n/l
    })))
  }

  override def gaussianExpectation(normalDistParams: (DenseVector[Double],
    DenseVector[Double])): DenseVector[Double] = {
    DenseVector((normalDistParams._1.toArray zip normalDistParams._2.toArray).map((couple) => {
      val gamma = math.sqrt(1.0 + couple._2)
      standardGaussian.pdf(couple._1/gamma)
    }))
  }
} 
Example 15
Source File: Burden.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.assoc

import breeze.linalg.DenseVector
import breeze.stats.distributions.{Gaussian, StudentsT}
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest}
import org.dizhang.seqspark.util.General._

import scala.language.existentials


@SerialVersionUID(7727280001L)
trait Burden extends AssocMethod {
  def nullModel: NM
  def x: Encode.Fixed
  def result: AssocMethod.Result
}

object Burden {

  def apply(nullModel: NM,
            x: Encode.Coding): Burden with AssocMethod.AnalyticTest = {
    nullModel match {
      case nm: NM.Fitted =>
        AnalyticScoreTest(nm, x.asInstanceOf[Encode.Fixed])
      case _ =>
        AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Fixed])
    }
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Fixed])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    val st = ScoreTest(nm, x.asInstanceOf[Encode.Fixed].coding)
    st.score(0)/st.variance(0,0).sqrt
  }

  def getStatistic(nm: NM, x: DenseVector[Double]): Double = {
    val wt = WaldTest(nm, x)
    (wt.beta /:/ wt.std).apply(1)
  }

  @SerialVersionUID(7727280101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.Fixed)
    extends Burden with AssocMethod.AnalyticTest
  {
    def geno = x.coding
    //val scoreTest = ScoreTest(nullModel, geno)
    val statistic = getStatistic(nullModel, x)
    val pValue = {
      val dis = new Gaussian(0.0, 1.0)
      Some(1.0 - dis.cdf(statistic))
    }

    def result: AssocMethod.BurdenAnalytic = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score")
    }

  }
  case class AnalyticWaldTest(nullModel: NM,
                              x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest {
    def geno = x.coding
    private val wt = WaldTest(nullModel, x.coding)
    val statistic = getStatistic(nullModel, geno)
    val pValue = {
      val dis = new StudentsT(nullModel.dof - 1)
      Some(1.0 - dis.cdf(statistic))
    }
    def result = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}")
    }
  }

  @SerialVersionUID(7727280201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.Fixed)
    extends Burden with AssocMethod.ResamplingTest
  {
    def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    def result: AssocMethod.BurdenResampling = {
      AssocMethod.BurdenResampling(x.vars, refStatistic, pCount)
    }
  }
} 
Example 16
Source File: SNV.scala    From seqspark   with Apache License 2.0 5 votes vote down vote up
package org.dizhang.seqspark.assoc

import breeze.stats.distributions.Gaussian
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest}
import org.dizhang.seqspark.util.General._

import scala.language.existentials


trait SNV extends AssocMethod {
  def nullModel: NM
  def x: Encode.Common
  def result: AssocMethod.Result
}

object SNV {
  def apply(nullModel: NM,
            x: Encode.Coding): SNV with AssocMethod.AnalyticTest = {
    nullModel match {
      case nm: NM.Fitted =>
        AnalyticScoreTest(nm, x.asInstanceOf[Encode.Common])
      case _ =>
        AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Common])
    }
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Common])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    val st = ScoreTest(nm, x.asInstanceOf[Encode.Common].coding)
    math.abs(st.score(0)/st.variance(0,0).sqrt)
  }

  @SerialVersionUID(7727280101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.Common)
    extends SNV with AssocMethod.AnalyticTest
  {
    //val scoreTest = ScoreTest(nullModel, x.coding)
    val statistic = getStatistic(nullModel, x)
    val pValue = {
      val dis = new Gaussian(0.0, 1.0)
      Some((1.0 - dis.cdf(statistic)) * 2)
    }

    def result: AssocMethod.BurdenAnalytic = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score")
    }
  }

  case class AnalyticWaldTest(nullModel: NM,
                              x: Encode.Common)
    extends SNV with AssocMethod.AnalyticTest
  {
    private val wt = WaldTest(nullModel, x.coding.toDenseVector)
    val statistic = wt.beta(1) / wt.std(1)
    val pVaue = Some(wt.pValue(oneSided = false).apply(1))
    def result = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pVaue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}")
    }
  }

  @SerialVersionUID(7727280201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.Common)
    extends SNV with AssocMethod.ResamplingTest
  {
    def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    def result: AssocMethod.BurdenResampling = {
      AssocMethod.BurdenResampling(x.vars, refStatistic, pCount)
    }
  }
} 
Example 17
Source File: IntegratorTest.scala    From spark-gp   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.commons.util

import breeze.numerics.{abs, sigmoid, sqrt}
import breeze.stats.distributions.{Gaussian, RandBasis}
import org.apache.commons.math3.random.MersenneTwister
import org.scalatest.FunSuite


class IntegratorTest extends FunSuite {

  test("testExpectedOfFunctionOfNormal") {
    val f = (x: Double) => sigmoid(x)
    val integrator = new Integrator(100)
    val mean = 0.5
    val variance = 3
    val sd = sqrt(variance)

    val testResult = integrator.expectedOfFunctionOfNormal(mean, variance, f)

    val gg = new Gaussian(mean, sd)(new RandBasis(new MersenneTwister()))
    val mcIters = 100000
    val values = gg.sample(mcIters).map(f)
    val mcResult = values.sum / mcIters
    val mcSD = sqrt(values.map(_ - mcResult).map(x => x * x).sum / mcIters) / sqrt(mcIters)
    assert(abs(mcResult - testResult) < 3 * mcSD)
  }

} 
Example 18
Source File: LinearDiscriminantAnalysisSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.learning

import breeze.linalg._
import breeze.stats.distributions.{Multinomial, Uniform, Gaussian}
import keystoneml.nodes.stats.StandardScaler
import org.apache.spark.SparkContext
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{TestUtils, MatrixUtils, Stats}
import keystoneml.workflow.PipelineContext

class LinearDiscriminantAnalysisSuite extends FunSuite with PipelineContext with Logging {
  test("Solve Linear Discriminant Analysis on the Iris Dataset") {
    sc = new SparkContext("local", "test")

    // Uses the Iris flower dataset
    val irisData = sc.parallelize(TestUtils.loadFile("iris.data"))
    val trainData = irisData.map(_.split(",").dropRight(1).map(_.toDouble)).map(new DenseVector(_))
    val features = new StandardScaler().fit(trainData).apply(trainData)
    val labels = irisData.map(_ match {
      case x if x.endsWith("Iris-setosa") => 1
      case x if x.endsWith("Iris-versicolor") => 2
      case x if x.endsWith("Iris-virginica") => 3
    })

    val lda = new LinearDiscriminantAnalysis(2)
    val out = lda.fit(features, labels)

    // Correct output taken from http://sebastianraschka.com/Articles/2014_python_lda.html#introduction
    logInfo(s"\n${out.x}")
    val majorVector = DenseVector(-0.1498, -0.1482, 0.8511, 0.4808)
    val minorVector = DenseVector(0.0095, 0.3272, -0.5748, 0.75)

    // Note that because eigenvectors can be reversed and still valid, we allow either direction
    assert(Stats.aboutEq(out.x(::, 0), majorVector, 1E-4) || Stats.aboutEq(out.x(::, 0), majorVector * -1.0, 1E-4))
    assert(Stats.aboutEq(out.x(::, 1), minorVector, 1E-4) || Stats.aboutEq(out.x(::, 1), minorVector * -1.0, 1E-4))
  }

  test("Check LDA output for a diagonal covariance") {
    sc = new SparkContext("local", "test")

    val matRows = 1000
    val matCols = 10
    val dimRed = 5

    // Generate a random Gaussian matrix.
    val gau = new Gaussian(0.0, 1.0)
    val randMatrix = new DenseMatrix(matRows, matCols, gau.sample(matRows*matCols).toArray)

    // Parallelize and estimate the LDA.
    val data = sc.parallelize(MatrixUtils.matrixToRowArray(randMatrix))
    val labels = data.map(x => Multinomial(DenseVector(0.2, 0.2, 0.2, 0.2, 0.2)).draw(): Int)
    val lda = new LinearDiscriminantAnalysis(dimRed).fit(data, labels)

    // Apply LDA to the input data.
    val redData = lda(data)
    val redMat = MatrixUtils.rowsToMatrix(redData.collect)

    // Compute its covariance.
    val redCov = cov(redMat)
    log.info(s"Covar\n$redCov")

    // The covariance of the dimensionality reduced matrix should be diagonal.
    for (
      x <- 0 until dimRed;
      y <- 0 until dimRed if x != y
    ) {
      assert(Stats.aboutEq(redCov(x,y), 0.0, 1e-6), s"LDA Matrix should be 0 off-diagonal. $x,$y = ${redCov(x,y)}")
    }
  }

} 
Example 19
Source File: EncEvalSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils.external

import java.io.File

import breeze.linalg._
import breeze.stats.distributions.Gaussian
import keystoneml.nodes.learning.GaussianMixtureModel
import keystoneml.nodes.learning.external.GaussianMixtureModelEstimator
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{Stats, TestUtils}

class EncEvalSuite extends FunSuite with Logging {

  test("Load SIFT Descriptors and compute Fisher Vector Features") {

    val siftDescriptor = csvread(new File(TestUtils.getTestResourceFileName("images/feats.csv")))

    val gmmMeans = TestUtils.getTestResourceFileName("images/voc_codebook/means.csv")
    val gmmVars = TestUtils.getTestResourceFileName("images/voc_codebook/variances.csv")
    val gmmWeights = TestUtils.getTestResourceFileName("images/voc_codebook/priors")

    val gmm = GaussianMixtureModel.load(gmmMeans, gmmVars, gmmWeights)

    val nCenters = gmm.means.cols
    val nDim = gmm.means.rows

    val extLib = new EncEval

    val fisherVector = extLib.calcAndGetFVs(
      gmm.means.toArray.map(_.toFloat),
      nCenters,
      nDim,
      gmm.variances.toArray.map(_.toFloat),
      gmm.weights.toArray.map(_.toFloat),
      siftDescriptor.toArray.map(_.toFloat))

    log.info(s"Fisher Vector is ${fisherVector.sum}")
    assert(Stats.aboutEq(fisherVector.sum, 40.109097, 1e-4), "SUM of Fisher Vectors must match expected sum.")

  }

  test("Compute a GMM from scala") {
    val nsamps = 10000

    // Generate two gaussians.
    val x = Gaussian(-1.0, 0.5).samples.take(nsamps).toArray
    val y = Gaussian(5.0, 1.0).samples.take(nsamps).toArray

    val z = shuffle(x ++ y).map(x => DenseVector(x))

    // Compute a 1-d GMM.
    val extLib = new EncEval
    val gmm = new GaussianMixtureModelEstimator(2).fit(z)

    logInfo(s"GMM means: ${gmm.means.toArray.mkString(",")}")
    logInfo(s"GMM vars: ${gmm.variances.toArray.mkString(",")}")
    logInfo(s"GMM weights: ${gmm.weights.toArray.mkString(",")}")

    // The results should be close to the distribution we set up.
    assert(Stats.aboutEq(min(gmm.means), -1.0, 1e-1), "Smallest mean should be close to -1.0")
    assert(Stats.aboutEq(max(gmm.means), 5.0, 1e-1), "Largest mean should be close to 1.0")
    assert(Stats.aboutEq(math.sqrt(min(gmm.variances)), 0.5, 1e-1), "Smallest SD should be close to 0.25")
    assert(Stats.aboutEq(math.sqrt(max(gmm.variances)), 1.0, 1e-1), "Largest SD should be close to 5.0")
  }
} 
Example 20
Source File: TestUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
}