breeze.stats.distributions.Gaussian Scala Examples
The following examples show how to use breeze.stats.distributions.Gaussian.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: generalDLM.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.lm import java.io.{File, PrintWriter} import java.lang.Math.sqrt import breeze.stats.distributions.Gaussian import scalaz.Scalaz._ object generalDLM { type Loglikelihood = Double type Observation = Double type State = Double type Time = Double case class Data(time: Time, observation: Observation, state: Option[State]) { override def toString = state match { case Some(x) => s"$time, $observation, $x" case None => s"$time, $observation" } } case class Parameters(a: Double, b: Double, l1: Double, l2: Double) { override def toString = s"$a, $b, $l1, $l2" } def simulate(p: Parameters): Stream[Data] = { val stateSpace = unfold(Gaussian(p.l1, sqrt(p.l2)).draw)(x => Some(x, x + Gaussian(0, sqrt(p.b)).draw) ) stateSpace.zipWithIndex map { case (x, t) => Data(t, x + Gaussian(0, sqrt(p.a)).draw, Some(x)) } } val runSimulateFirstOrder = { val p = Parameters(3.0, 0.5, 0.0, 10.0) // simulate 16 different realisations of 100 observations, representing 16 stations val observations = (1 to 16) map (id => (id, simulate(p).take(100).toVector)) val pw = new PrintWriter(new File("data/firstOrderdlmRes.csv")) pw.write( observations. flatMap{ case (id, data) => data map (x => id + ", " + x.toString)}. mkString("\n")) pw.close() } }
Example 2
Source File: StatisticalVolumeModelTests.scala From scalismo with Apache License 2.0 | 5 votes |
package scalismo.statisticalmodel.experimental import java.io.File import java.net.URLDecoder import breeze.linalg.DenseVector import breeze.stats.distributions.Gaussian import scalismo.ScalismoTestSuite import scalismo.geometry.{_3D, Point} import scalismo.io.StatismoIO import scalismo.registration.{RigidTransformation, RigidTransformationSpace} import scalismo.utils.Random class StatisticalVolumeModelTests extends ScalismoTestSuite { implicit val random = Random(42) implicit def doubleToFloat(d: Double): Float = d.toFloat describe("A statistical Volume mesh model") { def compareModels(oldModel: StatisticalVolumeMeshModel, newModel: StatisticalVolumeMeshModel) { for (i <- 0 until 10) { val standardNormal = Gaussian(0, 1)(random.breezeRandBasis) val coeffsData = standardNormal.sample(oldModel.rank) val coeffs = DenseVector(coeffsData.toArray) val inst = oldModel.instance(coeffs) val instNew = newModel.instance(coeffs) inst.pointSet.points .zip(instNew.pointSet.points) .foreach { case (pt1, pt2) => (pt1.toVector - pt2.toVector).norm should be(0.0 +- (0.1)) } } } it("can be transformed forth and back and yield the same deformations") { val path = getClass.getResource("/TetraMeshModel2.h5").getPath val model = StatismoIO.readStatismoVolumeMeshModel(new File(URLDecoder.decode(path))).get val parameterVector = DenseVector[Double](1.5, 1.0, 3.5, Math.PI, -Math.PI / 2.0, -Math.PI) val rigidTransform = RigidTransformationSpace[_3D]().transformForParameters(parameterVector) val inverseTransform = rigidTransform.inverse.asInstanceOf[RigidTransformation[_3D]] val transformedModel = model.transform(rigidTransform) val newModel = transformedModel.transform(inverseTransform) compareModels(model, newModel) } it("can change the mean shape and still yield the same shape space") { val path = getClass.getResource("/TetraMeshModel2.h5").getPath val model = StatismoIO.readStatismoVolumeMeshModel(new File(URLDecoder.decode(path))).get val newMesh = model.sample def t(pt: Point[_3D]): Point[_3D] = { val ptId = model.referenceVolumeMesh.pointSet.findClosestPoint(pt).id newMesh.pointSet.point(ptId) } val newModel = model.changeReference(t) compareModels(model, newModel) } } }
Example 3
Source File: RandomTests.scala From scalismo with Apache License 2.0 | 5 votes |
package scalismo.utils import breeze.stats.distributions.{Gaussian, Uniform} import scalismo.ScalismoTestSuite class RandomTests extends ScalismoTestSuite { describe("A random source") { it("should yield a deterministic sequence when correctly seeded") { def randomNumbersSeeded() = { val r = Random(42) val standardNormal = Gaussian(0, 1)(r.breezeRandBasis) val uniform = Uniform(0, 1)(r.breezeRandBasis) (r.scalaRandom.nextInt, standardNormal.draw(), uniform.draw()) } randomNumbersSeeded() should equal(randomNumbersSeeded()) } it("should yield a random sequence when the predefined implicit is imported") { def randomNumbersNotSeeded() = { import scalismo.utils.Random.implicits._ val r = implicitly[Random] val standardNormal = Gaussian(0, 1)(r.breezeRandBasis) val uniform = Uniform(0, 1)(r.breezeRandBasis) (r.scalaRandom.nextInt, standardNormal.draw(), uniform.draw()) } randomNumbersNotSeeded() should not equal (randomNumbersNotSeeded()) } } }
Example 4
Source File: ExpectedImprovement.scala From pravda-ml with Apache License 2.0 | 5 votes |
package com.linkedin.photon.ml.hyperparameter.criteria import breeze.linalg.DenseVector import breeze.numerics.sqrt import breeze.stats.distributions.Gaussian import com.linkedin.photon.ml.hyperparameter.estimators.PredictionTransformation def apply( predictiveMeans: DenseVector[Double], predictiveVariances: DenseVector[Double]): DenseVector[Double] = { val std = sqrt(predictiveVariances) // PBO Eq. 1 val gamma = - (predictiveMeans - bestEvaluation) / std // Eq. 2 std :* ((gamma :* gamma.map(standardNormal.cdf)) + gamma.map(standardNormal.pdf)) } }
Example 5
Source File: Arms.scala From banditsbook-scala with MIT License | 5 votes |
package com.github.everpeace.banditsbook.arm import breeze.stats.distributions.{Bernoulli, Gaussian} trait Arms { def BernoulliArm(p: Double): Arm[Double] = Bernoulli.distribution(p).map { case true => 1.0d case false => 0.0d } def NormalArm(μ: Double, σ: Double): Arm[Double] = Gaussian.distribution(μ -> σ) def AdversarialArm(start: Int, activeStart: Int, activeEnd: Int) = new Arm[Double] { @volatile var t = start override def draw(): Double = { t += 1 t match { case _t if _t < activeStart => 0.0d case _t if activeStart <= _t && _t <= activeEnd => 1.0 case _t => 0.0d } } } }
Example 6
Source File: RandNLATest.scala From spectrallda-tensorspark with Apache License 2.0 | 5 votes |
package edu.uci.eecs.spectralLDA.utils import breeze.linalg._ import breeze.linalg.qr.QR import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Uniform} import edu.uci.eecs.spectralLDA.testharness.Context import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.SparkContext import org.scalatest._ class RandNLATest extends FlatSpec with Matchers { private val sc: SparkContext = Context.getSparkContext "M2 sketching" should "be correct" in { val a1 = SparseVector(DenseVector.rand[Double](100).toArray) val a2 = SparseVector(DenseVector.rand[Double](100).toArray) val a3 = SparseVector(DenseVector.rand[Double](100).toArray) val docs = Seq((1000L, a1), (1001L, a2), (1002L, a3)) val docsRDD = sc.parallelize(docs) // Random Gaussian matrix val g = DenseMatrix.rand[Double](100, 50, Gaussian(mu = 0.0, sigma = 1.0)) val result = DenseMatrix.zeros[Double](100, 50) docsRDD .flatMap { case (id: Long, w: SparseVector[Double]) => RandNLA.accumulate_M_mul_S(g, w, sum(w)) } .reduceByKey(_ + _) .collect .foreach { case (r: Int, a: DenseVector[Double]) => result(r, ::) := a.t } val m2 = docsRDD .map { case (id: Long, w: SparseVector[Double]) => val l = sum(w) (w * w.t - diag(w)) / (l * (l - 1.0)) } .reduce(_ + _) val expectedResult = m2 * g val diff: DenseMatrix[Double] = result - expectedResult val normDiff: Double = norm(norm(diff(::, *)).toDenseVector) normDiff should be <= 1e-8 } "Randomised Power Iteration method" should "be approximately correct" in { implicit val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(234787))) val n = 100 val k = 5 val alpha: DenseVector[Double] = DenseVector[Double](25.0, 20.0, 15.0, 10.0, 5.0) val beta: DenseMatrix[Double] = DenseMatrix.rand(n, k, Uniform(0.0, 1.0)) val norms = norm(beta(::, *)).toDenseVector for (j <- 0 until k) { beta(::, j) /= norms(j) } val a: DenseMatrix[Double] = beta * diag(alpha) * beta.t val sigma: DenseMatrix[Double] = DenseMatrix.rand(n, k, Gaussian(mu = 0.0, sigma = 1.0)) val y = a * sigma val QR(q: DenseMatrix[Double], _) = qr.reduced(y) val (s: DenseVector[Double], u: DenseMatrix[Double]) = RandNLA.decomp2(a * q, q) val diff_a = u * diag(s) * u.t - a val norm_diff_a = norm(norm(diff_a(::, *)).toDenseVector) norm_diff_a should be <= 1e-8 } }
Example 7
Source File: sbt-test-test.scala From scala-course with GNU General Public License v3.0 | 5 votes |
import org.scalatest.funsuite.AnyFunSuite // Here using FunSuite style - but other possibilities... class SetSuite extends AnyFunSuite { test("An empty Set should have size 0") { assert(Set.empty.size == 0) } test("A Gaussian sample of length 10 should have length 10") { import breeze.stats.distributions.Gaussian val x = Gaussian(2.0,4.0).sample(10) assert(x.length === 10) } test("Cats map merge") { import cats.instances.all._ import cats.syntax.semigroup._ val m1 = Map("a"->1,"b"->2) val m2 = Map("b"->2,"c"->1) val m3 = m1 |+| m2 val m4 = Map("b" -> 4, "c" -> 1, "a" -> 1) assert(m3 === m4) } } // eof
Example 8
Source File: AutoEncoderSpec.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.neuralnets import breeze.linalg.{DenseVector, sum} import breeze.stats.distributions.{Gaussian, Uniform} import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics import io.github.mandar2812.dynaml.pipes.DataPipe import io.github.mandar2812.dynaml.probability.RandomVariable import spire.implicits._ import org.scalatest.{FlatSpec, Matchers} ignore should "be able to learn a continuous, "+ "invertible identity map x = g(h(x))" in { val uni = new Uniform(-math.Pi, math.Pi) val theta = RandomVariable(new Uniform(-math.Pi, math.Pi)) val circleTransform = DataPipe((t: Double) => (math.cos(t), math.sin(t))) val rvOnCircle = theta > circleTransform //Create synthetic data set of x,y values val noise = new Gaussian(0.0, 0.02) val numPoints:Int = 4000 val epsilon = 0.05 val data = (1 to numPoints).map(_ => { val sample = rvOnCircle.draw val features = DenseVector(sample._1, sample._2) val augFeatures = DenseVector( math.pow(0.85*features(1), 2) + noise.draw, math.pow(0.45*features(0), 3) + noise.draw, math.pow(features(0)+0.85*features(1), 3) + noise.draw, math.pow(features(0)-0.5*features(1), 2) + noise.draw, math.pow(features(0)+features(1), 3) + noise.draw, math.pow(features(0)-features(1), 2) + noise.draw, math.pow(features(0)+0.4*features(1), 2) + noise.draw, math.pow(features(0)+0.5*features(1), 3) + noise.draw) augFeatures }) val (trainingData, testData) = (data.take(3000), data.takeRight(1000)) val enc = GenericAutoEncoder(List(8, 4, 4, 8), List(VectorTansig, VectorTansig, VectorTansig)) //BackPropagation.rho = 0.5 enc.optimizer.setRegParam(0.0001).setStepSize(0.1).setNumIterations(1000).momentum_(0.5) enc.learn(trainingData.toStream) val metrics = new MultiRegressionMetrics( testData.map(c => (enc.i(enc.f(c)), c)).toList, testData.length) println("Corr: "+metrics.corr) assert(sum(metrics.mae)/metrics.corr.length <= epsilon) } }
Example 9
Source File: NeuralNetSpec.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.neuralnets import breeze.linalg.{DenseVector, sum} import breeze.stats.distributions.{Gaussian, Uniform} import io.github.mandar2812.dynaml.DynaMLPipe import io.github.mandar2812.dynaml.evaluation.MultiRegressionMetrics import io.github.mandar2812.dynaml.graph.FFNeuralGraph import org.scalatest.{FlatSpec, Matchers} class NeuralNetSpec extends FlatSpec with Matchers { "A feed-forward neural network" should "be able to learn non-linear functions "+ "on a compact domain" in { val uni = new Uniform(0.0, 1.0) //Create synthetic data set of x,y values //x is sampled in unit hypercube, y = w.x + noise val noise = new Gaussian(0.0, 0.002) val uniH = new Uniform(0.0, 1.0) val numPoints:Int = 5000 val data = (1 to numPoints).map(_ => { val features = DenseVector.tabulate[Double](4)(_ => uniH.draw) val (x,y,u,v) = (features(0), features(1), features(2), features(3)) val target = DenseVector( 1.0 + x*x + y*y*y + v*u*v + v*u + noise.draw, 1.0 + x*u + u*y*y + v*v*v + u*u*u + noise.draw) (features, target) }) val (trainingData, testData) = (data.take(4000), data.takeRight(1000)) val epsilon = 0.85 val model = new FeedForwardNetwork[Stream[(DenseVector[Double], DenseVector[Double])]](trainingData.toStream, FFNeuralGraph(4,2,0, List("logsig", "linear"), List(10), biasFlag = true))(DynaMLPipe.identityPipe[Stream[(DenseVector[Double], DenseVector[Double])]]) model.setLearningRate(1.0) .setRegParam(0.01) .setMomentum(0.8) .setMaxIterations(150) .learn() val res = model.test(testData.toStream) val metrics = new MultiRegressionMetrics(res.toList, res.length) //println(metrics.Rsq) assert(sum(metrics.corr)/metrics.Rsq.length >= epsilon) } }
Example 10
Source File: mcmc.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.optimization import java.io.{File, PrintWriter} import breeze.numerics.log import breeze.stats.distributions.{Gaussian, Uniform} import io.github.mandar2812.dynaml.models.lm.KFilter import io.github.mandar2812.dynaml.models.lm.generalDLM._ import KFilter._ import scalaz.Scalaz._ object mcmc { case class MetropolisState(params: Parameters, accepted: Int, ll: Loglikelihood) def metropolisIters( initParams: Parameters, likelihood: Parameters => Loglikelihood, perturb: Parameters => Parameters): Stream[MetropolisState] = { val initState = MetropolisState(initParams, 0, likelihood(initParams)) unfold(initState)(metropolisStep(likelihood, perturb)) } def main(args: Array[String]): Unit = { val n = 10000 val p = Parameters(3.0, 0.5, 0.0, 10.0) val observations = simulate(p).take(100).toVector val iters = metropolisIters(p, filterll(observations), perturb(0.1)).take(n) println(s"Accepted: ${iters.last.accepted.toDouble/n}") // write the parameters to file val pw = new PrintWriter(new File("data/mcmcOutRes.csv")) pw.write(iters.map(_.params).mkString("\n")) pw.close() } }
Example 11
Source File: ApproximatePCA.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.learning import breeze.linalg._ import breeze.numerics._ import breeze.stats._ import breeze.stats.distributions.{Gaussian, ThreadLocalRandomGenerator, RandBasis} import com.github.fommil.netlib.LAPACK._ import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.rdd.RDD import org.netlib.util.intW import keystoneml.pipelines.Logging import keystoneml.workflow.Estimator def approximateQ(A: DenseMatrix[Double], l: Int, q: Int, seed: Int = 0): DenseMatrix[Double] = { val d = A.cols val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed))) val omega = DenseMatrix.rand(d, l, Gaussian(0,1)(randBasis)) //cpu: d*l, mem: d*l val y0 = A*omega //cpu: n*d*l, mem: n*l var Q = QRUtils.qrQR(y0)._1 //cpu: n*l**2 for (i <- 1 to q) { val YHat = Q.t * A //cpu: l*n*d, mem: l*d val Qh = QRUtils.qrQR(YHat.t)._1 //cpu: d*l^2, mem: d*l val Yj = A * Qh //cpu: n*d*l, mem: n*l Q = QRUtils.qrQR(Yj)._1 //cpu: n*l^2, mem: n*l } Q } }
Example 12
Source File: LogisticGLM.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.lm import breeze.linalg.DenseVector import breeze.numerics._ import breeze.stats.distributions.Gaussian import io.github.mandar2812.dynaml.optimization._ class ProbitGLM(data: Stream[(DenseVector[Double], Double)], numPoints: Int, map: (DenseVector[Double]) => DenseVector[Double] = identity[DenseVector[Double]]) extends LogisticGLM(data, numPoints, map) { private val standardGaussian = new Gaussian(0, 1.0) override val h = (x: Double) => standardGaussian.cdf(x) override protected val optimizer = new GradientDescent( new ProbitGradient, new SquaredL2Updater) }
Example 13
Source File: SparkLogisticGLM.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.models.lm //Breeze Imports import breeze.linalg.DenseVector import breeze.numerics.sigmoid import breeze.stats.distributions.Gaussian import io.github.mandar2812.dynaml.optimization.ProbitGradient import org.apache.spark.mllib.linalg.Vectors //DynaML Imports import io.github.mandar2812.dynaml.optimization.{ GradientDescentSpark, LogisticGradient, RegularizedOptimizer, SquaredL2Updater} //Spark Imports import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD class SparkProbitGLM( data: RDD[(DenseVector[Double], Double)], numPoints: Long, map: (DenseVector[Double]) => DenseVector[Double] = identity[DenseVector[Double]]) extends SparkLogisticGLM(data, numPoints, map) { private val standardGaussian = new Gaussian(0, 1.0) override val h: (Double) => Double = (x: Double) => standardGaussian.cdf(x) override protected val optimizer: RegularizedOptimizer[ DenseVector[Double], DenseVector[Double], Double, RDD[LabeledPoint]] = new GradientDescentSpark(new ProbitGradient, new SquaredL2Updater) }
Example 14
Source File: VectorIIDProbit.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.probability import breeze.linalg.{DenseMatrix, DenseVector, diag} import breeze.stats.distributions.Gaussian override def hessian(y: DenseVector[Double], f: DenseVector[Double]): DenseMatrix[Double] = { diag(DenseVector((y.toArray zip f.toArray).map((couple) => { val n = standardGaussian.pdf(couple._2) val product = couple._1*couple._2 val l = h(product) -1.0*(n*n)/(l*l) - product*n/l }))) } override def gaussianExpectation(normalDistParams: (DenseVector[Double], DenseVector[Double])): DenseVector[Double] = { DenseVector((normalDistParams._1.toArray zip normalDistParams._2.toArray).map((couple) => { val gamma = math.sqrt(1.0 + couple._2) standardGaussian.pdf(couple._1/gamma) })) } }
Example 15
Source File: Burden.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.assoc import breeze.linalg.DenseVector import breeze.stats.distributions.{Gaussian, StudentsT} import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM} import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest} import org.dizhang.seqspark.util.General._ import scala.language.existentials @SerialVersionUID(7727280001L) trait Burden extends AssocMethod { def nullModel: NM def x: Encode.Fixed def result: AssocMethod.Result } object Burden { def apply(nullModel: NM, x: Encode.Coding): Burden with AssocMethod.AnalyticTest = { nullModel match { case nm: NM.Fitted => AnalyticScoreTest(nm, x.asInstanceOf[Encode.Fixed]) case _ => AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Fixed]) } } def apply(ref: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Coding): ResamplingTest = { ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Fixed]) } def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = { val st = ScoreTest(nm, x.asInstanceOf[Encode.Fixed].coding) st.score(0)/st.variance(0,0).sqrt } def getStatistic(nm: NM, x: DenseVector[Double]): Double = { val wt = WaldTest(nm, x) (wt.beta /:/ wt.std).apply(1) } @SerialVersionUID(7727280101L) final case class AnalyticScoreTest(nullModel: NM.Fitted, x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest { def geno = x.coding //val scoreTest = ScoreTest(nullModel, geno) val statistic = getStatistic(nullModel, x) val pValue = { val dis = new Gaussian(0.0, 1.0) Some(1.0 - dis.cdf(statistic)) } def result: AssocMethod.BurdenAnalytic = { AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score") } } case class AnalyticWaldTest(nullModel: NM, x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest { def geno = x.coding private val wt = WaldTest(nullModel, x.coding) val statistic = getStatistic(nullModel, geno) val pValue = { val dis = new StudentsT(nullModel.dof - 1) Some(1.0 - dis.cdf(statistic)) } def result = { AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}") } } @SerialVersionUID(7727280201L) final case class ResamplingTest(refStatistic: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Fixed) extends Burden with AssocMethod.ResamplingTest { def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount def result: AssocMethod.BurdenResampling = { AssocMethod.BurdenResampling(x.vars, refStatistic, pCount) } } }
Example 16
package org.dizhang.seqspark.assoc import breeze.stats.distributions.Gaussian import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM} import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest} import org.dizhang.seqspark.util.General._ import scala.language.existentials trait SNV extends AssocMethod { def nullModel: NM def x: Encode.Common def result: AssocMethod.Result } object SNV { def apply(nullModel: NM, x: Encode.Coding): SNV with AssocMethod.AnalyticTest = { nullModel match { case nm: NM.Fitted => AnalyticScoreTest(nm, x.asInstanceOf[Encode.Common]) case _ => AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Common]) } } def apply(ref: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Coding): ResamplingTest = { ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Common]) } def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = { val st = ScoreTest(nm, x.asInstanceOf[Encode.Common].coding) math.abs(st.score(0)/st.variance(0,0).sqrt) } @SerialVersionUID(7727280101L) final case class AnalyticScoreTest(nullModel: NM.Fitted, x: Encode.Common) extends SNV with AssocMethod.AnalyticTest { //val scoreTest = ScoreTest(nullModel, x.coding) val statistic = getStatistic(nullModel, x) val pValue = { val dis = new Gaussian(0.0, 1.0) Some((1.0 - dis.cdf(statistic)) * 2) } def result: AssocMethod.BurdenAnalytic = { AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score") } } case class AnalyticWaldTest(nullModel: NM, x: Encode.Common) extends SNV with AssocMethod.AnalyticTest { private val wt = WaldTest(nullModel, x.coding.toDenseVector) val statistic = wt.beta(1) / wt.std(1) val pVaue = Some(wt.pValue(oneSided = false).apply(1)) def result = { AssocMethod.BurdenAnalytic(x.vars, statistic, pVaue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}") } } @SerialVersionUID(7727280201L) final case class ResamplingTest(refStatistic: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Common) extends SNV with AssocMethod.ResamplingTest { def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount def result: AssocMethod.BurdenResampling = { AssocMethod.BurdenResampling(x.vars, refStatistic, pCount) } } }
Example 17
Source File: IntegratorTest.scala From spark-gp with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.commons.util import breeze.numerics.{abs, sigmoid, sqrt} import breeze.stats.distributions.{Gaussian, RandBasis} import org.apache.commons.math3.random.MersenneTwister import org.scalatest.FunSuite class IntegratorTest extends FunSuite { test("testExpectedOfFunctionOfNormal") { val f = (x: Double) => sigmoid(x) val integrator = new Integrator(100) val mean = 0.5 val variance = 3 val sd = sqrt(variance) val testResult = integrator.expectedOfFunctionOfNormal(mean, variance, f) val gg = new Gaussian(mean, sd)(new RandBasis(new MersenneTwister())) val mcIters = 100000 val values = gg.sample(mcIters).map(f) val mcResult = values.sum / mcIters val mcSD = sqrt(values.map(_ - mcResult).map(x => x * x).sum / mcIters) / sqrt(mcIters) assert(abs(mcResult - testResult) < 3 * mcSD) } }
Example 18
Source File: LinearDiscriminantAnalysisSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.learning import breeze.linalg._ import breeze.stats.distributions.{Multinomial, Uniform, Gaussian} import keystoneml.nodes.stats.StandardScaler import org.apache.spark.SparkContext import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{TestUtils, MatrixUtils, Stats} import keystoneml.workflow.PipelineContext class LinearDiscriminantAnalysisSuite extends FunSuite with PipelineContext with Logging { test("Solve Linear Discriminant Analysis on the Iris Dataset") { sc = new SparkContext("local", "test") // Uses the Iris flower dataset val irisData = sc.parallelize(TestUtils.loadFile("iris.data")) val trainData = irisData.map(_.split(",").dropRight(1).map(_.toDouble)).map(new DenseVector(_)) val features = new StandardScaler().fit(trainData).apply(trainData) val labels = irisData.map(_ match { case x if x.endsWith("Iris-setosa") => 1 case x if x.endsWith("Iris-versicolor") => 2 case x if x.endsWith("Iris-virginica") => 3 }) val lda = new LinearDiscriminantAnalysis(2) val out = lda.fit(features, labels) // Correct output taken from http://sebastianraschka.com/Articles/2014_python_lda.html#introduction logInfo(s"\n${out.x}") val majorVector = DenseVector(-0.1498, -0.1482, 0.8511, 0.4808) val minorVector = DenseVector(0.0095, 0.3272, -0.5748, 0.75) // Note that because eigenvectors can be reversed and still valid, we allow either direction assert(Stats.aboutEq(out.x(::, 0), majorVector, 1E-4) || Stats.aboutEq(out.x(::, 0), majorVector * -1.0, 1E-4)) assert(Stats.aboutEq(out.x(::, 1), minorVector, 1E-4) || Stats.aboutEq(out.x(::, 1), minorVector * -1.0, 1E-4)) } test("Check LDA output for a diagonal covariance") { sc = new SparkContext("local", "test") val matRows = 1000 val matCols = 10 val dimRed = 5 // Generate a random Gaussian matrix. val gau = new Gaussian(0.0, 1.0) val randMatrix = new DenseMatrix(matRows, matCols, gau.sample(matRows*matCols).toArray) // Parallelize and estimate the LDA. val data = sc.parallelize(MatrixUtils.matrixToRowArray(randMatrix)) val labels = data.map(x => Multinomial(DenseVector(0.2, 0.2, 0.2, 0.2, 0.2)).draw(): Int) val lda = new LinearDiscriminantAnalysis(dimRed).fit(data, labels) // Apply LDA to the input data. val redData = lda(data) val redMat = MatrixUtils.rowsToMatrix(redData.collect) // Compute its covariance. val redCov = cov(redMat) log.info(s"Covar\n$redCov") // The covariance of the dimensionality reduced matrix should be diagonal. for ( x <- 0 until dimRed; y <- 0 until dimRed if x != y ) { assert(Stats.aboutEq(redCov(x,y), 0.0, 1e-6), s"LDA Matrix should be 0 off-diagonal. $x,$y = ${redCov(x,y)}") } } }
Example 19
Source File: EncEvalSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils.external import java.io.File import breeze.linalg._ import breeze.stats.distributions.Gaussian import keystoneml.nodes.learning.GaussianMixtureModel import keystoneml.nodes.learning.external.GaussianMixtureModelEstimator import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{Stats, TestUtils} class EncEvalSuite extends FunSuite with Logging { test("Load SIFT Descriptors and compute Fisher Vector Features") { val siftDescriptor = csvread(new File(TestUtils.getTestResourceFileName("images/feats.csv"))) val gmmMeans = TestUtils.getTestResourceFileName("images/voc_codebook/means.csv") val gmmVars = TestUtils.getTestResourceFileName("images/voc_codebook/variances.csv") val gmmWeights = TestUtils.getTestResourceFileName("images/voc_codebook/priors") val gmm = GaussianMixtureModel.load(gmmMeans, gmmVars, gmmWeights) val nCenters = gmm.means.cols val nDim = gmm.means.rows val extLib = new EncEval val fisherVector = extLib.calcAndGetFVs( gmm.means.toArray.map(_.toFloat), nCenters, nDim, gmm.variances.toArray.map(_.toFloat), gmm.weights.toArray.map(_.toFloat), siftDescriptor.toArray.map(_.toFloat)) log.info(s"Fisher Vector is ${fisherVector.sum}") assert(Stats.aboutEq(fisherVector.sum, 40.109097, 1e-4), "SUM of Fisher Vectors must match expected sum.") } test("Compute a GMM from scala") { val nsamps = 10000 // Generate two gaussians. val x = Gaussian(-1.0, 0.5).samples.take(nsamps).toArray val y = Gaussian(5.0, 1.0).samples.take(nsamps).toArray val z = shuffle(x ++ y).map(x => DenseVector(x)) // Compute a 1-d GMM. val extLib = new EncEval val gmm = new GaussianMixtureModelEstimator(2).fit(z) logInfo(s"GMM means: ${gmm.means.toArray.mkString(",")}") logInfo(s"GMM vars: ${gmm.variances.toArray.mkString(",")}") logInfo(s"GMM weights: ${gmm.weights.toArray.mkString(",")}") // The results should be close to the distribution we set up. assert(Stats.aboutEq(min(gmm.means), -1.0, 1e-1), "Smallest mean should be close to -1.0") assert(Stats.aboutEq(max(gmm.means), 5.0, 1e-1), "Largest mean should be close to 1.0") assert(Stats.aboutEq(math.sqrt(min(gmm.variances)), 0.5, 1e-1), "Smallest SD should be close to 0.25") assert(Stats.aboutEq(math.sqrt(max(gmm.variances)), 1.0, 1e-1), "Largest SD should be close to 5.0") } }
Example 20
Source File: TestUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils import java.io.{FileReader, ByteArrayInputStream} import breeze.linalg.DenseMatrix import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand} import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix import org.apache.commons.io.IOUtils import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.SparkContext import scala.io.Source import scala.util.Random def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = { ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z)) } def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = { RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z)) } def createRandomMatrix( sc: SparkContext, numRows: Int, numCols: Int, numParts: Int, seed: Int = 42): RowPartitionedMatrix = { val rowsPerPart = numRows / numParts val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) => val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index))) Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis))) } RowPartitionedMatrix.fromMatrix(matrixParts.cache()) } def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = { val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed))) DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis)) } }