breeze.stats.distributions.Rand Scala Examples
The following examples show how to use breeze.stats.distributions.Rand.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Hedge.scala From banditsbook-scala with MIT License | 5 votes |
package com.github.everpeace.banditsbook.algorithm.hedge import breeze.linalg.Vector._ import breeze.linalg._ import breeze.numerics.exp import breeze.stats.distributions.{Rand, RandBasis} import breeze.storage.Zero import com.github.everpeace.banditsbook.algorithm._ import com.github.everpeace.banditsbook.arm.Arm import scala.collection.immutable.Seq import scala.reflect.ClassTag object Hedge { case class State(η: Double, counts: Vector[Int], gains: Vector[Double]) def Algorithm(η: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand) = { require(η > 0, "η must be positive.") new Algorithm[Double, State] { override def initialState(arms: Seq[Arm[Double]]): State = State( η, zeros(arms.size), zeros(arms.size) ) override def selectArm(arms: Seq[Arm[Double]], state: State): Int = { val gains = state.gains val η = state.η val p = exp(gains / η) / sum(exp(gains / η)) CategoricalDistribution(p).draw } override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = { val counts = state.counts val gains = state.gains val count = counts(chosen) + 1 counts.update(chosen, count) val expectation = gains(chosen) + reward gains.update(chosen, expectation) state.copy(counts = counts, gains = gains) } } } }
Example 2
Source File: BreezeSpec.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra import breeze.linalg.{DenseMatrix, DenseVector, SparseVector} import breeze.stats.distributions.Rand import com.spotify.scio.extra.Breeze._ import com.twitter.algebird.Semigroup import org.scalacheck._ trait BreezeSpec[M[_], T] extends PropertySpec { val dimension = 10 val rows = 20 val cols = 10 val fRand = Rand.uniform.map(_.toFloat) val m: Gen[M[T]] def ms: Gen[List[M[T]]] = Gen.listOf[M[T]](m) def plus(x: M[T], y: M[T])(implicit sg: Semigroup[M[T]]): M[T] = sg.plus(x, y) def sumOption(xs: Iterable[M[T]])(implicit sg: Semigroup[M[T]]): Option[M[T]] = sg.sumOption(xs) } class FloatDenseVectorSpec extends BreezeSpec[DenseVector, Float] { val m = Gen.const(dimension).map(DenseVector.rand[Float](_, fRand)) property("plus") { forAll(m, m)((x, y) => plus(x, y) == x + y) } property("sumOption") { forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _)) } } class DoubleDenseVectorSpec extends BreezeSpec[DenseVector, Double] { val m = Gen.const(dimension).map(DenseVector.rand[Double](_)) property("plus") { forAll(m, m)((x, y) => plus(x, y) == x + y) } property("sumOption") { forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _)) } } class FloatDenseMatrixSpec extends BreezeSpec[DenseMatrix, Float] { val m = Gen.const((rows, cols)).map { case (r, c) => DenseMatrix.rand[Float](r, c, fRand) } property("plus") { forAll(m, m)((x, y) => plus(x, y) == x + y) } property("sumOption") { forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _)) } } class DoubleDenseMatrixSpec extends BreezeSpec[DenseMatrix, Double] { val m = Gen.const((rows, cols)).map { case (r, c) => DenseMatrix.rand[Double](r, c) } property("plus") { forAll(m, m)((x, y) => plus(x, y) == x + y) } property("sumOption") { forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _)) } } class FloatSparseVectorSpec extends BreezeSpec[SparseVector, Float] { val m = Gen .const(dimension) .map(d => SparseVector(DenseVector.rand[Float](d, fRand).data)) property("plus") { forAll(m, m)((x, y) => plus(x, y) == x + y) } property("sumOption") { forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _)) } } class DoubleSparseVectorSpec extends BreezeSpec[SparseVector, Double] { val m = Gen .const(dimension) .map(d => SparseVector(DenseVector.rand[Double](d).data)) property("plus") { forAll(m, m)((x, y) => plus(x, y) == x + y) } property("sumOption") { forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _)) } }
Example 3
Source File: nmfSuite.scala From SparkAndMPIFactorizations with MIT License | 5 votes |
package org.apache.spark.mllib import org.apache.spark.SparkContext import org.apache.spark.SparkConf import edu.berkeley.cs.amplab.mlmatrix.{RowPartitionedMatrix} import org.scalatest._ import breeze.linalg._ import breeze.stats.distributions.Rand import org.apache.spark.mllib.util.TestingUtils._ class nmfSuite extends FunSuite with BeforeAndAfterAll { private var sc: SparkContext = null private val conf = new SparkConf().setAppName("NMF Test Suite").setMaster("local") override protected def beforeAll(): Unit = { sc = new SparkContext(conf) } override protected def afterAll(): Unit = { sc.stop() } test("NMF works locally on a separable matrix") { // about 800MB in doubles val m = 500000 val k = 7 val n = 200 val rowsPerPartition = Array.fill[Int](1000)(500) var basisIndices = Rand.permutation(m).draw.slice(0, k) val W = DenseMatrix.zeros[Double](m, k) (0 until k) foreach { idx => W(basisIndices(idx), idx) = 1.0 } val reorder = Rand.permutation(n).draw basisIndices = (0 until k).map( reorder.indexOf(_) ) val H = DenseMatrix.horzcat( DenseMatrix.eye[Double](k), DenseMatrix.rand(k, n-k)).apply(::, reorder).toDenseMatrix val A = W * H val chunksRDD = sc.parallelize( (0 to 1000).map(chunkIdx => A(chunkIdx*500 to (chunkIdx+1)*500, ::).toDenseMatrix )) val rowPartitionedA = RowPartitionedMatrix.fromMatrix(chunksRDD) val (solvedIndices, W, H) = nmf.fullNMF(rowPartitionedA, k) val overlap = basisIndices.toSet.intersect(solvedIndices.toSet) info(s"original basis column indices: ${basisIndices.mkString(" ")}") info(s"returned basis column indices: ${solvedIndices.mkString(" ")}") info(s"overlapped ${overlap.size} of $k basis indices out of $n choices") val relerr = norm((A - W*H).flatten())/norm(A.flatten()) info(s"relative frob norm err: $relerr") assert(relerr < 1) } }
Example 4
Source File: xraySuite.scala From SparkAndMPIFactorizations with MIT License | 5 votes |
package org.apache.spark.mllib import org.scalatest._ import breeze.stats.distributions.Rand import breeze.linalg._ import org.apache.spark.mllib.util.TestingUtils._ class xRaySuite extends FunSuite { test("solver for H is working") { val n = 50 val k = 7 val ell = 20 val basis = DenseMatrix.eye[Double](n).apply(::, 0 until k) val H = DenseMatrix.horzcat( DenseMatrix.eye[Double](k), DenseMatrix.rand(k, ell-k)) val A = basis * H val solvedH = xray.findWeights(A, basis) assert(norm((solvedH - H).toDenseVector) ~== 0 absTol 1e-10) } test("returns reasonable column indices") { val n = 50 val k = 7 val ell = 20 var basisIndices = Rand.permutation(n).draw.slice(0, k) val basis = DenseMatrix.eye[Double](n).apply(::, basisIndices) val lincombs = basis * DenseMatrix.rand(k, ell-k) val reorder = Rand.permutation(ell).draw val A = DenseMatrix.horzcat(basis, lincombs).apply(::, reorder).toDenseMatrix basisIndices = (0 until k).map(reorder.indexOf(_)) val (xrayIndices, h) = xray.computeXray(A, k) assert(xrayIndices.length == k) val overlap = basisIndices.toSet.intersect(xrayIndices.toSet) info(s"Actual columns: ${basisIndices.mkString(" ")}") info(s"Returned columns: ${xrayIndices.mkString(" ")}") info(s"Recovered ${overlap.size} of $k basis columns out of $ell overall columns") } }
Example 5
Source File: PoissonRegressionTest.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.linear import breeze.linalg.{DenseMatrix, DenseVector, convert} import breeze.stats.distributions.Rand import io.picnicml.doddlemodel.TestingUtils import io.picnicml.doddlemodel.data.{Features, RealVector, Target} import io.picnicml.doddlemodel.linear.PoissonRegression.ev import org.scalactic.{Equality, TolerantNumerics} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class PoissonRegressionTest extends AnyFlatSpec with Matchers with TestingUtils { implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-2f) "Poisson regression" should "calculate the value of the loss function" in { val w = DenseVector(1.0f, 2.0f, 3.0f) val x = DenseMatrix( List(3.0f, 1.0f, 2.0f), List(-1.0f, -2.0f, 2.0f) ) val y = DenseVector(3.0f, 4.0f) val model = PoissonRegression(lambda = 1.0f) ev.lossStateless(model, w, x, y) shouldEqual 29926.429998513137f } it should "calculate the gradient of the loss function wrt. to model parameters" in { for (_ <- 1 to 1000) { val w = DenseVector.rand[Float](5, rand = randomUniform) val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform) val y = convert(DenseVector.rand(10, rand = Rand.randInt(20)), Float) testGrad(w, x, y) } def testGrad(w: RealVector, x: Features, y: Target) = { val model = PoissonRegression(lambda = 0.5f) breezeEqual( gradApprox(w => ev.lossStateless(model, w, x, y), w), ev.lossGradStateless(model, w, x, y) ) shouldEqual true } } it should "prevent the usage of negative L2 regularization strength" in { an [IllegalArgumentException] shouldBe thrownBy(PoissonRegression(lambda = -0.5f)) } it should "throw an exception if fitting a model on a dataset that is not count data" in { val x = DenseMatrix( List(3.0f, 1.0f, 2.0f), List(-1.0f, -2.0f, 2.0f), List(3.0f, 1.0f, 2.0f) ) val y = DenseVector.rand[Float](3, rand = randomUniform) val model = PoissonRegression() an [IllegalArgumentException] shouldBe thrownBy(ev.fit(model, x, y)) } }
Example 6
Source File: TestingUtils.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel import breeze.linalg.{DenseMatrix, DenseVector, convert, zipValues} import breeze.stats.distributions.Rand import io.picnicml.doddlemodel.data.{Dataset, RealVector} import org.scalactic.Equality trait TestingUtils { implicit lazy val randomUniform: Rand[Float] = new Rand[Float] { override def draw(): Float = Rand.uniform.draw().toFloat } def breezeEqual(x0: DenseMatrix[Float], x1: DenseMatrix[Float])(implicit tol: Equality[Float]): Boolean = breezeEqual(x0.toDenseVector, x1.toDenseVector) def breezeEqual(x0: RealVector, x1: RealVector)(implicit tol: Equality[Float]): Boolean = zipValues(x0, x1).forall((v0, v1) => (v0.isNaN && v1.isNaN) || tol.areEquivalent(v0, v1)) def gradApprox(func: RealVector => Float, x: RealVector, h: Double = 1e-3): RealVector = { // two-sided finite differences val grad = DenseVector.zeros[Double](x.length) for ((i, _) <- x.activeIterator) { val xPlusH = convert(x.copy, Double) xPlusH(i) += h val xMinusH = convert(x.copy, Double) xMinusH(i) -= h grad(i) = (func(convert(xPlusH, Float)) - func(convert(xMinusH, Float)).toDouble) / (2.0 * h) } convert(grad, Float) } def dummyData(nRows: Int, nCols: Int = 1): Dataset = (DenseMatrix.zeros[Float](nRows, nCols), convert(DenseVector((0 until nRows).toArray), Float)) }
Example 7
Source File: Standard.scala From banditsbook-scala with MIT License | 5 votes |
package com.github.everpeace.banditsbook.algorithm.softmax import breeze.linalg.Vector._ import breeze.linalg._ import breeze.numerics.exp import breeze.stats.distributions.{Rand, RandBasis} import breeze.storage.Zero import com.github.everpeace.banditsbook.algorithm._ import com.github.everpeace.banditsbook.arm.Arm import scala.collection.immutable.Seq import scala.reflect.ClassTag object Standard { case class State(τ: Double, counts: Vector[Int], expectations: Vector[Double]) def Algorithm(τ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand) = { require(τ > 0, "τ must be positive.") new Algorithm[Double, State] { override def initialState(arms: Seq[Arm[Double]]): State = State( τ, zeros(arms.size), zeros(arms.size) ) override def selectArm(arms: Seq[Arm[Double]], state: State): Int = { val expectations = state.expectations val τ = state.τ val p = exp(expectations / τ) / sum(exp(expectations / τ)) CategoricalDistribution(p).draw } override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = { val counts = state.counts val expectations = state.expectations val count = counts(chosen) + 1 counts.update(chosen, count) val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward) expectations.update(chosen, expectation) state.copy(counts = counts, expectations = expectations) } } } }
Example 8
Source File: Exp3.scala From banditsbook-scala with MIT License | 5 votes |
package com.github.everpeace.banditsbook.algorithm.exp3 import breeze.linalg.Vector._ import breeze.linalg._ import breeze.numerics.exp import breeze.stats.distributions.{Rand, RandBasis} import breeze.storage.Zero import com.github.everpeace.banditsbook.algorithm._ import com.github.everpeace.banditsbook.arm.Arm import scala.collection.immutable.Seq import scala.reflect.ClassTag object Exp3 { case class State(γ: Double, weights: Vector[Double], counts: Vector[Int]) def Algorithm(γ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand) = { require(0< γ && γ <= 1, "γ must be in (0,1]") new Algorithm[Double, State] { override def initialState(arms: Seq[Arm[Double]]): State = State( γ, fill(arms.size)(1.0d), zeros[Int](arms.size) ) override def selectArm(arms: Seq[Arm[Double]], state: State): Int = CategoricalDistribution(probs(state.γ, state.weights)).draw() override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = { val counts = state.counts val weights = state.weights val count = counts(chosen) + 1 counts.update(chosen, count) val K = weights.size val p = probs(state.γ, weights) val x = zeros[Double](K) x.update(chosen, reward/p(chosen)) weights *= exp((state.γ * x) / K.toDouble) state.copy(weights = weights, counts = counts) } private def probs(γ: Double, weights: Vector[Double]): Vector[Double] = { val K = weights.size // #arms ((1 - γ) * (weights / sum(weights))) + (γ / K) } } } }
Example 9
Source File: UCB1.scala From banditsbook-scala with MIT License | 5 votes |
package com.github.everpeace.banditsbook.algorithm.ucb import breeze.numerics.sqrt import breeze.stats.distributions.{Rand, RandBasis} import breeze.storage.Zero import com.github.everpeace.banditsbook.algorithm.Algorithm import com.github.everpeace.banditsbook.arm.Arm import scala.collection.immutable.Seq import scala.reflect.ClassTag object UCB1 { import breeze.linalg._ import Vector._ case class State(counts: Vector[Int], expectations: Vector[Double]) def Algorithm(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand) = { new Algorithm[Double, State] { override def initialState(arms: Seq[Arm[Double]]): State = State( zeros(arms.size), zeros(arms.size) ) override def selectArm(arms: Seq[Arm[Double]], state: State): Int = { val counts = state.counts val expectations = state.expectations val step = sum(counts) val factor = fill(counts.size)(2 * scala.math.log(step)) val bonus = sqrt(factor / counts.map(_.toDouble)) val score = expectations + bonus argmax(score) } override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = { val counts = state.counts val expectations = state.expectations val count = counts(chosen) + 1 counts.update(chosen, count) val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward) expectations.update(chosen, expectation) state.copy(counts = counts, expectations = expectations) } } } }
Example 10
Source File: Standard.scala From banditsbook-scala with MIT License | 5 votes |
package com.github.everpeace.banditsbook.algorithm.epsilon_greedy import breeze.linalg.argmax import breeze.stats.distributions.{Bernoulli, Rand, RandBasis} import breeze.storage.Zero import com.github.everpeace.banditsbook.algorithm.Algorithm import com.github.everpeace.banditsbook.arm.Arm import scala.collection.immutable.Seq import scala.reflect.ClassTag object Standard { import breeze.linalg.Vector import Vector._ case class State(ε: Double, counts: Vector[Int], expectations: Vector[Double]) def Algorithm(ε: Double)(implicit zeroDouble: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand) = new Algorithm[Double, State] { override def initialState(arms: Seq[Arm[Double]]): State = State(ε, zeros[Int](arms.size), zeros[Double](arms.size)) override def selectArm(arms: Seq[Arm[Double]], state: State): Int = Bernoulli.distribution(state.ε).draw() match { case true => // Exploit argmax(state.expectations) case false => // Explore Rand.randInt(state.expectations.size).draw() } override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = { val counts = state.counts val expectations = state.expectations val count = counts(chosen) + 1 counts.update(chosen, count) val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward) expectations.update(chosen, expectation) state.copy(counts = counts, expectations = expectations) } } }
Example 11
Source File: BHTSNE.scala From spark-tsne with Apache License 2.0 | 5 votes |
package com.github.saurfang.spark.tsne.impl import breeze.linalg._ import breeze.stats.distributions.Rand import com.github.saurfang.spark.tsne.tree.SPTree import com.github.saurfang.spark.tsne.{TSNEGradient, TSNEHelper, TSNEParam, X2P} import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.storage.StorageLevel import org.slf4j.LoggerFactory import scala.util.Random object BHTSNE { private def logger = LoggerFactory.getLogger(BHTSNE.getClass) def tsne( input: RowMatrix, noDims: Int = 2, maxIterations: Int = 1000, perplexity: Double = 30, theta: Double = 0.5, reportLoss: Int => Boolean = {i => i % 10 == 0}, callback: (Int, DenseMatrix[Double], Option[Double]) => Unit = {case _ => }, seed: Long = Random.nextLong() ): DenseMatrix[Double] = { if(input.rows.getStorageLevel == StorageLevel.NONE) { logger.warn("Input is not persisted and performance could be bad") } Rand.generator.setSeed(seed) val tsneParam = TSNEParam() import tsneParam._ val n = input.numRows().toInt val Y: DenseMatrix[Double] = DenseMatrix.rand(n, noDims, Rand.gaussian(0, 1)) :/ 1e4 val iY = DenseMatrix.zeros[Double](n, noDims) val gains = DenseMatrix.ones[Double](n, noDims) // approximate p_{j|i} val p_ji = X2P(input, 1e-5, perplexity) val P = TSNEHelper.computeP(p_ji, n).glom() .map(rows => rows.map { case (i, data) => (i, data.map(_._1).toSeq, DenseVector(data.map(_._2 * exaggeration_factor).toArray)) }) .cache() var iteration = 1 while(iteration <= maxIterations) { val bcY = P.context.broadcast(Y) val bcTree = P.context.broadcast(SPTree(Y)) val initialValue = (DenseMatrix.zeros[Double](n, noDims), DenseMatrix.zeros[Double](n, noDims), 0.0) val (posF, negF, sumQ) = P.treeAggregate(initialValue)( seqOp = (c, v) => { // c: (pos, neg, sumQ), v: Array[(i, Seq(j), vec(Distance))] TSNEGradient.computeEdgeForces(v, bcY.value, c._1) val q = TSNEGradient.computeNonEdgeForces(bcTree.value, bcY.value, theta, c._2, v.map(_._1): _*) (c._1, c._2, c._3 + q) }, combOp = (c1, c2) => { // c: (grad, loss) (c1._1 + c2._1, c1._2 + c2._2, c1._3 + c2._3) }) val dY: DenseMatrix[Double] = posF :- (negF :/ sumQ) TSNEHelper.update(Y, dY, iY, gains, iteration, tsneParam) if(reportLoss(iteration)) { val loss = P.treeAggregate(0.0)( seqOp = (c, v) => { TSNEGradient.computeLoss(v, bcY.value, sumQ) }, combOp = _ + _ ) logger.debug(s"Iteration $iteration finished with $loss") callback(iteration, Y.copy, Some(loss)) } else { logger.debug(s"Iteration $iteration finished") callback(iteration, Y.copy, None) } bcY.destroy() bcTree.destroy() //undo early exaggeration if(iteration == early_exaggeration) { P.foreach { rows => rows.foreach { case (_, _, vec) => vec.foreachPair { case (i, v) => vec.update(i, v / exaggeration_factor) } } } } iteration += 1 } Y } }
Example 12
Source File: BlockedMultiVariateGaussian.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.probability.distributions import breeze.numerics._ import math.{Pi, log1p} import breeze.stats.distributions.{Moments, Rand, RandBasis} import io.github.mandar2812.dynaml.algebra._ import io.github.mandar2812.dynaml.algebra.PartitionedMatrixOps._ import io.github.mandar2812.dynaml.algebra.PartitionedMatrixSolvers._ import io.github.mandar2812.dynaml.probability.GaussianRV import scala.runtime.ScalaRunTime case class BlockedMultiVariateGaussian( mean: PartitionedVector, covariance: PartitionedPSDMatrix)(implicit rand: RandBasis = Rand) extends AbstractContinuousDistr[PartitionedVector] with Moments[PartitionedVector, PartitionedPSDMatrix] with HasErrorBars[PartitionedVector] { def draw() = { val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt val z: PartitionedVector = PartitionedVector.rand(mean.rows, nE, GaussianRV(0.0, 1.0)) val m: PartitionedVector = root * z m + mean } private lazy val root: LowerTriPartitionedMatrix = bcholesky(covariance) override def toString() = ScalaRunTime._toString(this) override def unnormalizedLogPdf(t: PartitionedVector) = { val centered: PartitionedVector = t - mean val z: PartitionedVector = root \ centered val slv: PartitionedVector = root.t \ z val d: Double = slv dot centered -1.0*d/2.0 } override lazy val logNormalizer = { // determinant of the cholesky decomp is the sqrt of the determinant of the cov matrix // this is the log det of the cholesky decomp val det = bsum(blog(bdiag(root))) mean.rows.toDouble/2 * log(2 * Pi) + 0.5*det } def variance = covariance def mode = mean lazy val entropy = { mean.rows.toDouble * log1p(2 * Pi) + bsum(blog(bdiag(root))) } override def confidenceInterval(s: Double) = { val signFlag = if(s < 0) -1.0 else 1.0 val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt val ones = PartitionedVector.ones(mean.rows, nE) val multiplier = signFlag*s val bar: PartitionedVector = root*(ones*multiplier) (mean - bar, mean + bar) } }
Example 13
Source File: RejectionSamplingScheme.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml.probability import breeze.stats.distributions.{Density, Rand} import io.github.mandar2812.dynaml.pipes._ import io.github.mandar2812.dynaml.probability.distributions.GenericDistribution import scala.util.Random import org.apache.log4j.Logger abstract class RejectionSamplingScheme[ ConditioningSet, Domain, Dist <: Density[ConditioningSet] with Rand[ConditioningSet], DistL <: Density[Domain] with Rand[Domain], JointDist <: Density[(ConditioningSet, Domain)] with Rand[(ConditioningSet, Domain)], Likelihood <: RandomVarWithDistr[Domain, DistL]]( p: RandomVarWithDistr[ConditioningSet, Dist], c: DataPipe[ConditioningSet, Likelihood]) extends RandomVarWithDistr[(ConditioningSet, Domain), JointDist] with BayesJointProbabilityScheme[ ConditioningSet, Domain, RandomVarWithDistr[ConditioningSet, Dist], Likelihood] { self => override val prior: RandomVarWithDistr[ConditioningSet, Dist] = p override val likelihood: DataPipe[ConditioningSet, Likelihood] = c var Max_Candidates: Int = 1000 var Max_Estimations: Int = 10000 override val sample = prior.sample > BifurcationPipe[ConditioningSet, ConditioningSet, Domain]( (c: ConditioningSet) => (c, likelihood(c).draw) ) override val posterior: DataPipe[Domain, RandomVariable[ConditioningSet]] = DataPipe((data: Domain) => { val sampl = this.prior.sample val q = this.prior.underlyingDist val M = (1 to Max_Estimations).map(i => { likelihood(sampl()).underlyingDist(data) }).sum/Max_Estimations.toDouble new RandomVarWithDistr[ConditioningSet, GenericDistribution[ConditioningSet]] { innerself => val logger = Logger.getLogger(this.getClass) override val sample: DataPipe[Unit, ConditioningSet] = DataPipe(() => { val iterations = 0 var accepted = false var accepted_sample: ConditioningSet = sampl() while(!accepted && iterations < Max_Candidates) { // generate a candidate val candidate = sampl() val a = underlyingDist(candidate)/(M*q(candidate)) if(Random.nextDouble() <= a) { logger.info("... Sample Accepted ...") accepted = true accepted_sample = candidate } } accepted_sample }) override val underlyingDist: GenericDistribution[ConditioningSet] = new GenericDistribution[ConditioningSet] { override def apply(x: ConditioningSet): Double = prior.underlyingDist(x)*likelihood(x).underlyingDist(data) override def draw() = innerself.sample() } } }) }
Example 14
Source File: CosineRandomFeaturesSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.stats import breeze.linalg._ import breeze.numerics.cos import breeze.stats._ import breeze.stats.distributions.{CauchyDistribution, Rand} import org.scalatest.FunSuite import keystoneml.utils.Stats class CosineRandomFeaturesSuite extends FunSuite { val gamma = 1.34 val numInputFeatures = 400 val numOutputFeatures = 1000 test("Guassian cosine random features") { val rf = CosineRandomFeatures(numInputFeatures, numOutputFeatures, gamma) // Check that b is uniform assert(max(rf.b) <= 2*math.Pi) assert(min(rf.b) >= 0) assert(rf.b.size == numOutputFeatures) // Check that W is gaussian assert(rf.W.rows == numOutputFeatures) assert(rf.W.cols == numInputFeatures) assert(Stats.aboutEq(mean(rf.W),0, 10e-3 * gamma)) assert(Stats.aboutEq(variance(rf.W), gamma * gamma, 10e-3 * gamma * gamma)) //check the mapping val in = DenseVector.rand(numInputFeatures, Rand.uniform) val out = cos((in.t * rf.W.t).t + rf.b) assert(Stats.aboutEq(rf(in), out, 10e-3)) } test("Cauchy cosine random features") { val rf = CosineRandomFeatures( numInputFeatures, numOutputFeatures, gamma, new CauchyDistribution(0, 1)) // Check that b is uniform assert(max(rf.b) <= 2*math.Pi) assert(min(rf.b) >= 0) assert(rf.b.size == numOutputFeatures) // Check that W is cauchy assert(rf.W.rows == numOutputFeatures) assert(rf.W.cols == numInputFeatures) assert(Stats.aboutEq(median(rf.W),0,10e-3 * gamma)) //check the mapping val in = DenseVector.rand(numInputFeatures, Rand.uniform) val out = cos((in.t * rf.W.t).t + rf.b) assert(Stats.aboutEq(rf(in), out, 10e-3)) } }
Example 15
Source File: LinearRectifierSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.stats import breeze.linalg.DenseMatrix import breeze.stats.distributions.Rand import org.apache.spark.SparkContext import org.scalatest.FunSuite import keystoneml.pipelines._ import keystoneml.utils.{TestUtils, MatrixUtils} import keystoneml.workflow.PipelineContext class LinearRectifierSuite extends FunSuite with PipelineContext with Logging { test("Test MaxVal") { sc = new SparkContext("local", "test") val matrixParts = TestUtils.createRandomMatrix(sc, 128, 16, 4).rdd.map(_.mat) val x = matrixParts.flatMap(y => MatrixUtils.matrixToRowArray(y)) val y = x.map(r => r.forall(_ >= 0.0)) val valmaxNode = LinearRectifier() val maxy = valmaxNode.apply(x).map(r => r.forall(_ >= 0.0)) //The random matrix should *not* all be >= 0 assert(!y.reduce {(a,b) => a | b}) //The valmax'ed random matrix *should* all be >= 0. assert(maxy.reduce {(a,b) => a | b}) } }
Example 16
Source File: BlockLinearMapperSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.learning import breeze.linalg.{DenseVector, DenseMatrix} import breeze.stats.distributions.Rand import keystoneml.workflow.PipelineContext import scala.collection.mutable.ArrayBuffer import org.scalatest.FunSuite import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import keystoneml.pipelines._ import keystoneml.utils.Stats class BlockLinearMapperSuite extends FunSuite with PipelineContext with Logging { test("BlockLinearMapper transformation") { sc = new SparkContext("local", "test") val inDims = 1000 val outDims = 100 val numChunks = 5 val numPerChunk = inDims/numChunks val mat = DenseMatrix.rand(inDims, outDims, Rand.gaussian) val vec = DenseVector.rand(inDims, Rand.gaussian) val intercept = DenseVector.rand(outDims, Rand.gaussian) val splitVec = (0 until numChunks).map(i => vec((numPerChunk*i) until (numPerChunk*i + numPerChunk))) val splitMat = (0 until numChunks).map(i => mat((numPerChunk*i) until (numPerChunk*i + numPerChunk), ::)) val linearMapper = new LinearMapper[DenseVector[Double]](mat, Some(intercept)) val blockLinearMapper = new BlockLinearMapper(splitMat, numPerChunk, Some(intercept)) val linearOut = linearMapper(vec) // Test with intercept assert(Stats.aboutEq(blockLinearMapper(vec), linearOut, 1e-4)) // Test the apply and evaluate call val blmOuts = new ArrayBuffer[RDD[DenseVector[Double]]] val splitVecRDDs = splitVec.map { vec => sc.parallelize(Seq(vec), 1) } blockLinearMapper.applyAndEvaluate(splitVecRDDs, (predictedValues: RDD[DenseVector[Double]]) => { blmOuts += predictedValues () } ) // The last blmOut should match the linear mapper's output assert(Stats.aboutEq(blmOuts.last.collect()(0), linearOut, 1e-4)) } }
Example 17
Source File: TestUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils import java.io.{FileReader, ByteArrayInputStream} import breeze.linalg.DenseMatrix import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand} import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix import org.apache.commons.io.IOUtils import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.SparkContext import scala.io.Source import scala.util.Random def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = { ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z)) } def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = { RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z)) } def createRandomMatrix( sc: SparkContext, numRows: Int, numCols: Int, numParts: Int, seed: Int = 42): RowPartitionedMatrix = { val rowsPerPart = numRows / numParts val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) => val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index))) Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis))) } RowPartitionedMatrix.fromMatrix(matrixParts.cache()) } def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = { val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed))) DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis)) } }
Example 18
Source File: CosineRandomFeatures.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.stats import breeze.linalg._ import breeze.numerics._ import breeze.stats.distributions.Rand import org.apache.spark.rdd.RDD import keystoneml.pipelines._ import keystoneml.utils.MatrixUtils import keystoneml.workflow.Transformer def apply( numInputFeatures: Int, numOutputFeatures: Int, gamma: Double, wDist: Rand[Double] = Rand.gaussian, bDist: Rand[Double] = Rand.uniform) = { val W = DenseMatrix.rand(numOutputFeatures, numInputFeatures, wDist) :* gamma val b = DenseVector.rand(numOutputFeatures, bDist) :* (2*math.Pi) new CosineRandomFeatures(W, b) } }
Example 19
Source File: SimpleTSNE.scala From spark-tsne with Apache License 2.0 | 5 votes |
package com.github.saurfang.spark.tsne.impl import breeze.linalg._ import breeze.stats.distributions.Rand import com.github.saurfang.spark.tsne.{TSNEGradient, TSNEHelper, TSNEParam, X2P} import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.storage.StorageLevel import org.slf4j.LoggerFactory import scala.util.Random object SimpleTSNE { private def logger = LoggerFactory.getLogger(SimpleTSNE.getClass) def tsne( input: RowMatrix, noDims: Int = 2, maxIterations: Int = 1000, perplexity: Double = 30, callback: (Int, DenseMatrix[Double], Option[Double]) => Unit = {case _ => }, seed: Long = Random.nextLong()): DenseMatrix[Double] = { if(input.rows.getStorageLevel == StorageLevel.NONE) { logger.warn("Input is not persisted and performance could be bad") } Rand.generator.setSeed(seed) val tsneParam = TSNEParam() import tsneParam._ val n = input.numRows().toInt val Y: DenseMatrix[Double] = DenseMatrix.rand(n, noDims, Rand.gaussian(0, 1)) val iY = DenseMatrix.zeros[Double](n, noDims) val gains = DenseMatrix.ones[Double](n, noDims) // approximate p_{j|i} val p_ji = X2P(input, 1e-5, perplexity) val P = TSNEHelper.computeP(p_ji, n).glom().cache() var iteration = 1 while(iteration <= maxIterations) { val bcY = P.context.broadcast(Y) val numerator = P.map{ arr => TSNEGradient.computeNumerator(bcY.value, arr.map(_._1): _*) }.cache() val bcNumerator = P.context.broadcast({ numerator.treeAggregate(0.0)(seqOp = (x, v) => x + sum(v), combOp = _ + _) }) val (dY, loss) = P.zip(numerator).treeAggregate((DenseMatrix.zeros[Double](n, noDims), 0.0))( seqOp = (c, v) => { // c: (grad, loss), v: (Array[(i, Iterable(j, Distance))], numerator) val l = TSNEGradient.compute(v._1, bcY.value, v._2, bcNumerator.value, c._1, iteration <= early_exaggeration) (c._1, c._2 + l) }, combOp = (c1, c2) => { // c: (grad, loss) (c1._1 + c2._1, c1._2 + c2._2) }) bcY.destroy() bcNumerator.destroy() numerator.unpersist() TSNEHelper.update(Y, dY, iY, gains, iteration, tsneParam) logger.debug(s"Iteration $iteration finished with $loss") callback(iteration, Y.copy, Some(loss)) iteration += 1 } Y } }