breeze.stats.distributions.Rand Scala Example

Source File: Hedge.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.hedge

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Hedge {

  case class State(η: Double, counts: Vector[Int], gains: Vector[Double])

  def Algorithm(η: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(η > 0, "η must be positive.")
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        η, zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val gains = state.gains
        val η = state.η
        val p = exp(gains / η) / sum(exp(gains / η))
        CategoricalDistribution(p).draw
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val gains = state.gains

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = gains(chosen) + reward
        gains.update(chosen, expectation)

        state.copy(counts = counts, gains = gains)
      }
    }
  }
}

Source File: BreezeSpec.scala From scio with Apache License 2.0

5 votes

package com.spotify.scio.extra

import breeze.linalg.{DenseMatrix, DenseVector, SparseVector}
import breeze.stats.distributions.Rand
import com.spotify.scio.extra.Breeze._
import com.twitter.algebird.Semigroup
import org.scalacheck._

trait BreezeSpec[M[_], T] extends PropertySpec {
  val dimension = 10
  val rows = 20
  val cols = 10
  val fRand = Rand.uniform.map(_.toFloat)
  val m: Gen[M[T]]
  def ms: Gen[List[M[T]]] = Gen.listOf[M[T]](m)
  def plus(x: M[T], y: M[T])(implicit sg: Semigroup[M[T]]): M[T] = sg.plus(x, y)
  def sumOption(xs: Iterable[M[T]])(implicit sg: Semigroup[M[T]]): Option[M[T]] = sg.sumOption(xs)
}

class FloatDenseVectorSpec extends BreezeSpec[DenseVector, Float] {
  val m = Gen.const(dimension).map(DenseVector.rand[Float](_, fRand))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleDenseVectorSpec extends BreezeSpec[DenseVector, Double] {
  val m = Gen.const(dimension).map(DenseVector.rand[Double](_))
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class FloatDenseMatrixSpec extends BreezeSpec[DenseMatrix, Float] {
  val m = Gen.const((rows, cols)).map {
    case (r, c) => DenseMatrix.rand[Float](r, c, fRand)
  }
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleDenseMatrixSpec extends BreezeSpec[DenseMatrix, Double] {
  val m = Gen.const((rows, cols)).map {
    case (r, c) => DenseMatrix.rand[Double](r, c)
  }
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class FloatSparseVectorSpec extends BreezeSpec[SparseVector, Float] {
  val m = Gen
    .const(dimension)
    .map(d => SparseVector(DenseVector.rand[Float](d, fRand).data))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleSparseVectorSpec extends BreezeSpec[SparseVector, Double] {
  val m = Gen
    .const(dimension)
    .map(d => SparseVector(DenseVector.rand[Double](d).data))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

Source File: nmfSuite.scala From SparkAndMPIFactorizations with MIT License

5 votes

package org.apache.spark.mllib

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf

import edu.berkeley.cs.amplab.mlmatrix.{RowPartitionedMatrix}

import org.scalatest._
import breeze.linalg._
import breeze.stats.distributions.Rand
import org.apache.spark.mllib.util.TestingUtils._

class nmfSuite extends FunSuite with BeforeAndAfterAll {

	private var sc: SparkContext = null
	private val conf = new SparkConf().setAppName("NMF Test Suite").setMaster("local")

	override protected def beforeAll(): Unit = {
		sc = new SparkContext(conf) 
	}

	override protected def afterAll(): Unit = {
		sc.stop()
	}	

	test("NMF works locally on a separable matrix") {
		// about 800MB in doubles
		val m = 500000
		val k = 7
		val n = 200
		val rowsPerPartition = Array.fill[Int](1000)(500)

		var basisIndices = Rand.permutation(m).draw.slice(0, k)
		val W = DenseMatrix.zeros[Double](m, k)
		(0 until k) foreach { idx => W(basisIndices(idx), idx) = 1.0 }

		val reorder = Rand.permutation(n).draw	
		basisIndices = (0 until k).map( reorder.indexOf(_) )
		val H = DenseMatrix.horzcat( DenseMatrix.eye[Double](k), DenseMatrix.rand(k, n-k)).apply(::, reorder).toDenseMatrix
		val A = W * H

		val chunksRDD = sc.parallelize( (0 to 1000).map(chunkIdx =>
			A(chunkIdx*500 to (chunkIdx+1)*500, ::).toDenseMatrix ))
		val rowPartitionedA = RowPartitionedMatrix.fromMatrix(chunksRDD)
		val (solvedIndices, W, H) = nmf.fullNMF(rowPartitionedA, k)
		
		val overlap = basisIndices.toSet.intersect(solvedIndices.toSet)
		info(s"original basis column indices: ${basisIndices.mkString(" ")}")
		info(s"returned basis column indices: ${solvedIndices.mkString(" ")}")
		info(s"overlapped ${overlap.size} of $k basis indices out of $n choices")

		val relerr = norm((A - W*H).flatten())/norm(A.flatten())
		info(s"relative frob norm err: $relerr")
		assert(relerr < 1)
	}
}

Source File: xraySuite.scala From SparkAndMPIFactorizations with MIT License

5 votes

package org.apache.spark.mllib

import org.scalatest._
import breeze.stats.distributions.Rand
import breeze.linalg._
import org.apache.spark.mllib.util.TestingUtils._

class xRaySuite extends FunSuite {
	test("solver for H is working") {
		val n = 50
		val k = 7
		val ell = 20

		val basis = DenseMatrix.eye[Double](n).apply(::, 0 until k)
		val H = DenseMatrix.horzcat( DenseMatrix.eye[Double](k), DenseMatrix.rand(k, ell-k))
		val A = basis * H
		val solvedH = xray.findWeights(A, basis) 

		assert(norm((solvedH - H).toDenseVector) ~== 0 absTol 1e-10)
	}

	test("returns reasonable column indices") {
		val n = 50
		val k = 7
		val ell = 20

		var basisIndices = Rand.permutation(n).draw.slice(0, k)
		val basis = DenseMatrix.eye[Double](n).apply(::, basisIndices)
		val lincombs = basis * DenseMatrix.rand(k, ell-k)
		val reorder = Rand.permutation(ell).draw
		val A = DenseMatrix.horzcat(basis, lincombs).apply(::, reorder).toDenseMatrix
		basisIndices = (0 until k).map(reorder.indexOf(_))

		val (xrayIndices, h) = xray.computeXray(A, k)
		assert(xrayIndices.length == k)

		val overlap = basisIndices.toSet.intersect(xrayIndices.toSet) 
		info(s"Actual columns: ${basisIndices.mkString(" ")}")
		info(s"Returned columns: ${xrayIndices.mkString(" ")}")
		info(s"Recovered ${overlap.size} of $k basis columns out of $ell overall columns")		
	}	
}

Source File: PoissonRegressionTest.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector, convert}
import breeze.stats.distributions.Rand
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.PoissonRegression.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class PoissonRegressionTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-2f)

  "Poisson regression" should "calculate the value of the loss function" in {
    val w = DenseVector(1.0f, 2.0f, 3.0f)
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f)
    )
    val y = DenseVector(3.0f, 4.0f)

    val model = PoissonRegression(lambda = 1.0f)
    ev.lossStateless(model, w, x, y) shouldEqual 29926.429998513137f
  }

  it should "calculate the gradient of the loss function wrt. to model parameters" in {
    for (_ <- 1 to 1000) {
      val w = DenseVector.rand[Float](5, rand = randomUniform)
      val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
      val y = convert(DenseVector.rand(10, rand = Rand.randInt(20)), Float)
      testGrad(w, x, y)
    }

    def testGrad(w: RealVector, x: Features, y: Target) = {
      val model = PoissonRegression(lambda = 0.5f)
      breezeEqual(
        gradApprox(w => ev.lossStateless(model, w, x, y), w),
        ev.lossGradStateless(model, w, x, y)
      ) shouldEqual true
    }
  }

  it should "prevent the usage of negative L2 regularization strength" in {
    an [IllegalArgumentException] shouldBe thrownBy(PoissonRegression(lambda = -0.5f))
  }

  it should "throw an exception if fitting a model on a dataset that is not count data" in {
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f),
      List(3.0f, 1.0f, 2.0f)
    )
    val y = DenseVector.rand[Float](3, rand = randomUniform)
    val model = PoissonRegression()

    an [IllegalArgumentException] shouldBe thrownBy(ev.fit(model, x, y))
  }
}

Source File: TestingUtils.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel

import breeze.linalg.{DenseMatrix, DenseVector, convert, zipValues}
import breeze.stats.distributions.Rand
import io.picnicml.doddlemodel.data.{Dataset, RealVector}
import org.scalactic.Equality

trait TestingUtils {

  implicit lazy val randomUniform: Rand[Float] = new Rand[Float] {
    override def draw(): Float = Rand.uniform.draw().toFloat
  }

  def breezeEqual(x0: DenseMatrix[Float], x1: DenseMatrix[Float])(implicit tol: Equality[Float]): Boolean =
    breezeEqual(x0.toDenseVector, x1.toDenseVector)

  def breezeEqual(x0: RealVector, x1: RealVector)(implicit tol: Equality[Float]): Boolean =
    zipValues(x0, x1).forall((v0, v1) => (v0.isNaN && v1.isNaN) || tol.areEquivalent(v0, v1))

  def gradApprox(func: RealVector => Float, x: RealVector, h: Double = 1e-3): RealVector = {
    // two-sided finite differences
    val grad = DenseVector.zeros[Double](x.length)
    for ((i, _) <- x.activeIterator) {
      val xPlusH = convert(x.copy, Double)
      xPlusH(i) += h
      val xMinusH = convert(x.copy, Double)
      xMinusH(i) -= h
      grad(i) = (func(convert(xPlusH, Float)) - func(convert(xMinusH, Float)).toDouble) / (2.0 * h)
    }
    convert(grad, Float)
  }

  def dummyData(nRows: Int, nCols: Int = 1): Dataset =
    (DenseMatrix.zeros[Float](nRows, nCols), convert(DenseVector((0 until nRows).toArray), Float))
}

Source File: Standard.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.softmax

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Standard {

  case class State(τ: Double, counts: Vector[Int], expectations: Vector[Double])


  def Algorithm(τ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(τ > 0, "τ must be positive.")
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        τ, zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val expectations = state.expectations
        val τ = state.τ
        val p = exp(expectations / τ) / sum(exp(expectations / τ))
        CategoricalDistribution(p).draw
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val expectations = state.expectations

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
        expectations.update(chosen, expectation)

        state.copy(counts = counts, expectations = expectations)
      }
    }
  }
}

Source File: Exp3.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.exp3

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Exp3 {

  case class State(γ: Double, weights: Vector[Double], counts: Vector[Int])

  def Algorithm(γ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(0< γ && γ <= 1, "γ must be in (0,1]")

    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        γ, fill(arms.size)(1.0d), zeros[Int](arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int =
        CategoricalDistribution(probs(state.γ, state.weights)).draw()

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val weights = state.weights

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val K = weights.size
        val p = probs(state.γ, weights)
        val x = zeros[Double](K)
        x.update(chosen, reward/p(chosen))
        weights *= exp((state.γ * x) / K.toDouble)

        state.copy(weights = weights, counts = counts)
      }

      private def probs(γ: Double, weights: Vector[Double]): Vector[Double] = {
        val K = weights.size  // #arms
        ((1 - γ) * (weights / sum(weights))) + (γ / K)
      }
    }
  }
}

Source File: UCB1.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.ucb

import breeze.numerics.sqrt
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm.Algorithm
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object UCB1 {

  import breeze.linalg._
  import Vector._

  case class State(counts: Vector[Int], expectations: Vector[Double])

  def Algorithm(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val counts = state.counts
        val expectations = state.expectations
        val step = sum(counts)
        val factor = fill(counts.size)(2 * scala.math.log(step))
        val bonus = sqrt(factor / counts.map(_.toDouble))
        val score = expectations + bonus
        argmax(score)
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val expectations = state.expectations
        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
        expectations.update(chosen, expectation)

        state.copy(counts = counts, expectations = expectations)
      }
    }
  }
}

Source File: Standard.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.epsilon_greedy

import breeze.linalg.argmax
import breeze.stats.distributions.{Bernoulli, Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm.Algorithm
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Standard {

  import breeze.linalg.Vector
  import Vector._

  case class State(ε: Double, counts: Vector[Int], expectations: Vector[Double])

  def Algorithm(ε: Double)(implicit zeroDouble: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = new Algorithm[Double, State] {

    override def initialState(arms: Seq[Arm[Double]]): State =
      State(ε, zeros[Int](arms.size), zeros[Double](arms.size))

    override def selectArm(arms: Seq[Arm[Double]], state: State): Int =
      Bernoulli.distribution(state.ε).draw() match {
        case true =>
          // Exploit
          argmax(state.expectations)
        case false =>
          // Explore
          Rand.randInt(state.expectations.size).draw()
      }

    override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
      val counts = state.counts
      val expectations = state.expectations

      val count = counts(chosen) + 1
      counts.update(chosen, count)

      val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
      expectations.update(chosen, expectation)
      state.copy(counts = counts, expectations = expectations)
    }
  }
}

Source File: BHTSNE.scala From spark-tsne with Apache License 2.0

5 votes

package com.github.saurfang.spark.tsne.impl

import breeze.linalg._
import breeze.stats.distributions.Rand
import com.github.saurfang.spark.tsne.tree.SPTree
import com.github.saurfang.spark.tsne.{TSNEGradient, TSNEHelper, TSNEParam, X2P}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.storage.StorageLevel
import org.slf4j.LoggerFactory

import scala.util.Random

object BHTSNE {
  private def logger = LoggerFactory.getLogger(BHTSNE.getClass)

  def tsne(
            input: RowMatrix,
            noDims: Int = 2,
            maxIterations: Int = 1000,
            perplexity: Double = 30,
            theta: Double = 0.5,
            reportLoss: Int => Boolean = {i => i % 10 == 0},
            callback: (Int, DenseMatrix[Double], Option[Double]) => Unit = {case _ => },
            seed: Long = Random.nextLong()
            ): DenseMatrix[Double] = {
    if(input.rows.getStorageLevel == StorageLevel.NONE) {
      logger.warn("Input is not persisted and performance could be bad")
    }

    Rand.generator.setSeed(seed)

    val tsneParam = TSNEParam()
    import tsneParam._

    val n = input.numRows().toInt
    val Y: DenseMatrix[Double] = DenseMatrix.rand(n, noDims, Rand.gaussian(0, 1)) :/ 1e4
    val iY = DenseMatrix.zeros[Double](n, noDims)
    val gains = DenseMatrix.ones[Double](n, noDims)

    // approximate p_{j|i}
    val p_ji = X2P(input, 1e-5, perplexity)
    val P = TSNEHelper.computeP(p_ji, n).glom()
      .map(rows => rows.map {
      case (i, data) =>
        (i, data.map(_._1).toSeq, DenseVector(data.map(_._2 * exaggeration_factor).toArray))
    })
      .cache()

      var iteration = 1
      while(iteration <= maxIterations) {
        val bcY = P.context.broadcast(Y)
        val bcTree = P.context.broadcast(SPTree(Y))

        val initialValue = (DenseMatrix.zeros[Double](n, noDims), DenseMatrix.zeros[Double](n, noDims), 0.0)
        val (posF, negF, sumQ) = P.treeAggregate(initialValue)(
          seqOp = (c, v) => {
            // c: (pos, neg, sumQ), v: Array[(i, Seq(j), vec(Distance))]
            TSNEGradient.computeEdgeForces(v, bcY.value, c._1)
            val q = TSNEGradient.computeNonEdgeForces(bcTree.value, bcY.value, theta, c._2, v.map(_._1): _*)
            (c._1, c._2, c._3 + q)
          },
          combOp = (c1, c2) => {
            // c: (grad, loss)
            (c1._1 + c2._1, c1._2 + c2._2, c1._3 + c2._3)
          })
        val dY: DenseMatrix[Double] = posF :- (negF :/ sumQ)

        TSNEHelper.update(Y, dY, iY, gains, iteration, tsneParam)

        if(reportLoss(iteration)) {
          val loss = P.treeAggregate(0.0)(
            seqOp = (c, v) => {
              TSNEGradient.computeLoss(v, bcY.value, sumQ)
            },
            combOp = _ + _
          )
          logger.debug(s"Iteration $iteration finished with $loss")
          callback(iteration, Y.copy, Some(loss))
        } else {
          logger.debug(s"Iteration $iteration finished")
          callback(iteration, Y.copy, None)
        }

        bcY.destroy()
        bcTree.destroy()

        //undo early exaggeration
        if(iteration == early_exaggeration) {
          P.foreach {
            rows => rows.foreach {
              case (_, _, vec) => vec.foreachPair { case (i, v) => vec.update(i, v / exaggeration_factor) }
            }
          }
        }

        iteration += 1
      }

    Y
  }
}

Source File: BlockedMultiVariateGaussian.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.probability.distributions

import breeze.numerics._

import math.{Pi, log1p}
import breeze.stats.distributions.{Moments, Rand, RandBasis}
import io.github.mandar2812.dynaml.algebra._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixOps._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixSolvers._
import io.github.mandar2812.dynaml.probability.GaussianRV
import scala.runtime.ScalaRunTime


case class BlockedMultiVariateGaussian(
  mean: PartitionedVector,
  covariance: PartitionedPSDMatrix)(implicit rand: RandBasis = Rand) extends
  AbstractContinuousDistr[PartitionedVector] with
  Moments[PartitionedVector, PartitionedPSDMatrix] with
  HasErrorBars[PartitionedVector] {

  def draw() = {
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt
    val z: PartitionedVector = PartitionedVector.rand(mean.rows, nE, GaussianRV(0.0, 1.0))
    val m: PartitionedVector = root * z
    m + mean
  }

  private lazy val root: LowerTriPartitionedMatrix = bcholesky(covariance)

  override def toString() =  ScalaRunTime._toString(this)

  override def unnormalizedLogPdf(t: PartitionedVector) = {
    val centered: PartitionedVector = t - mean
    val z: PartitionedVector = root \ centered
    val slv: PartitionedVector = root.t \ z
    val d: Double = slv dot centered
    -1.0*d/2.0

  }

  override lazy val logNormalizer = {
    // determinant of the cholesky decomp is the sqrt of the determinant of the cov matrix
    // this is the log det of the cholesky decomp
    val det = bsum(blog(bdiag(root)))
    mean.rows.toDouble/2 *  log(2 * Pi) + 0.5*det
  }

  def variance = covariance
  def mode = mean
  lazy val entropy = {
    mean.rows.toDouble * log1p(2 * Pi) + bsum(blog(bdiag(root)))
  }

  override def confidenceInterval(s: Double) = {
    val signFlag = if(s < 0) -1.0 else 1.0
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt

    val ones = PartitionedVector.ones(mean.rows, nE)
    val multiplier = signFlag*s

    val bar: PartitionedVector = root*(ones*multiplier)

    (mean - bar, mean + bar)
  }
}

Source File: RejectionSamplingScheme.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.probability

import breeze.stats.distributions.{Density, Rand}
import io.github.mandar2812.dynaml.pipes._
import io.github.mandar2812.dynaml.probability.distributions.GenericDistribution

import scala.util.Random
import org.apache.log4j.Logger




abstract class RejectionSamplingScheme[
ConditioningSet, Domain,
Dist <: Density[ConditioningSet] with Rand[ConditioningSet],
DistL <: Density[Domain] with Rand[Domain],
JointDist <: Density[(ConditioningSet, Domain)] with Rand[(ConditioningSet, Domain)],
Likelihood <: RandomVarWithDistr[Domain, DistL]](
  p: RandomVarWithDistr[ConditioningSet, Dist],
  c: DataPipe[ConditioningSet, Likelihood])
  extends RandomVarWithDistr[(ConditioningSet, Domain), JointDist]
    with BayesJointProbabilityScheme[
    ConditioningSet, Domain,
    RandomVarWithDistr[ConditioningSet, Dist],
    Likelihood] { self =>

  override val prior: RandomVarWithDistr[ConditioningSet, Dist] = p

  override val likelihood: DataPipe[ConditioningSet, Likelihood] = c

  var Max_Candidates: Int = 1000

  var Max_Estimations: Int = 10000

  override val sample = prior.sample >
  BifurcationPipe[ConditioningSet, ConditioningSet, Domain](
      (c: ConditioningSet) => (c, likelihood(c).draw)
    )

  override val posterior: DataPipe[Domain, RandomVariable[ConditioningSet]] =
    DataPipe((data: Domain) => {

    val sampl = this.prior.sample
    val q = this.prior.underlyingDist

    val M = (1 to Max_Estimations).map(i => {
      likelihood(sampl()).underlyingDist(data)
    }).sum/Max_Estimations.toDouble

    new RandomVarWithDistr[ConditioningSet, GenericDistribution[ConditioningSet]] { innerself =>

      val logger = Logger.getLogger(this.getClass)

      override val sample: DataPipe[Unit, ConditioningSet] = DataPipe(() => {
        val iterations = 0
        var accepted = false
        var accepted_sample: ConditioningSet = sampl()

        while(!accepted && iterations < Max_Candidates) {
          // generate a candidate
          val candidate = sampl()
          val a = underlyingDist(candidate)/(M*q(candidate))
          if(Random.nextDouble() <= a) {
            logger.info("... Sample Accepted ...")
            accepted = true
            accepted_sample = candidate
          }
        }

        accepted_sample
      })

      override val underlyingDist: GenericDistribution[ConditioningSet] = new GenericDistribution[ConditioningSet] {
        override def apply(x: ConditioningSet): Double =
          prior.underlyingDist(x)*likelihood(x).underlyingDist(data)

        override def draw() = innerself.sample()
      }

    }
  })

}

Source File: CosineRandomFeaturesSuite.scala From keystone with Apache License 2.0

5 votes

package keystoneml.nodes.stats

import breeze.linalg._
import breeze.numerics.cos
import breeze.stats._
import breeze.stats.distributions.{CauchyDistribution, Rand}
import org.scalatest.FunSuite
import keystoneml.utils.Stats


class CosineRandomFeaturesSuite extends FunSuite {
  val gamma = 1.34
  val numInputFeatures = 400
  val numOutputFeatures = 1000

  test("Guassian cosine random features") {
    val rf = CosineRandomFeatures(numInputFeatures, numOutputFeatures, gamma)

    // Check that b is uniform
    assert(max(rf.b) <= 2*math.Pi)
    assert(min(rf.b) >= 0)
    assert(rf.b.size == numOutputFeatures)

    // Check that W is gaussian
    assert(rf.W.rows == numOutputFeatures)
    assert(rf.W.cols == numInputFeatures)
    assert(Stats.aboutEq(mean(rf.W),0, 10e-3 * gamma))
    assert(Stats.aboutEq(variance(rf.W), gamma * gamma, 10e-3 * gamma * gamma))

    //check the mapping
    val in = DenseVector.rand(numInputFeatures, Rand.uniform)
    val out = cos((in.t * rf.W.t).t + rf.b)
    assert(Stats.aboutEq(rf(in), out, 10e-3))
  }

  test("Cauchy cosine random features") {
    val rf = CosineRandomFeatures(
      numInputFeatures,
      numOutputFeatures,
      gamma,
      new CauchyDistribution(0, 1))

    // Check that b is uniform
    assert(max(rf.b) <= 2*math.Pi)
    assert(min(rf.b) >= 0)
    assert(rf.b.size == numOutputFeatures)

    // Check that W is cauchy
    assert(rf.W.rows == numOutputFeatures)
    assert(rf.W.cols == numInputFeatures)
    assert(Stats.aboutEq(median(rf.W),0,10e-3 * gamma))

    //check the mapping
    val in = DenseVector.rand(numInputFeatures, Rand.uniform)
    val out = cos((in.t * rf.W.t).t + rf.b)
    assert(Stats.aboutEq(rf(in), out, 10e-3))
  }
}

Source File: LinearRectifierSuite.scala From keystone with Apache License 2.0

5 votes

package keystoneml.nodes.stats

import breeze.linalg.DenseMatrix
import breeze.stats.distributions.Rand
import org.apache.spark.SparkContext
import org.scalatest.FunSuite
import keystoneml.pipelines._
import keystoneml.utils.{TestUtils, MatrixUtils}
import keystoneml.workflow.PipelineContext

class LinearRectifierSuite extends FunSuite with PipelineContext with Logging {

  test("Test MaxVal") {
    sc = new SparkContext("local", "test")
    val matrixParts = TestUtils.createRandomMatrix(sc, 128, 16, 4).rdd.map(_.mat)

    val x = matrixParts.flatMap(y => MatrixUtils.matrixToRowArray(y))
    val y = x.map(r => r.forall(_ >= 0.0))

    val valmaxNode = LinearRectifier()
    val maxy = valmaxNode.apply(x).map(r => r.forall(_ >= 0.0))

    //The random matrix should *not* all be >= 0
    assert(!y.reduce {(a,b) => a | b})

    //The valmax'ed random matrix *should* all be >= 0.
    assert(maxy.reduce {(a,b) => a | b})
  }
}

Source File: BlockLinearMapperSuite.scala From keystone with Apache License 2.0

5 votes

package keystoneml.nodes.learning

import breeze.linalg.{DenseVector, DenseMatrix}
import breeze.stats.distributions.Rand
import keystoneml.workflow.PipelineContext
import scala.collection.mutable.ArrayBuffer

import org.scalatest.FunSuite

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import keystoneml.pipelines._
import keystoneml.utils.Stats

class BlockLinearMapperSuite extends FunSuite with PipelineContext with Logging {

  test("BlockLinearMapper transformation") {
    sc = new SparkContext("local", "test")

    val inDims = 1000
    val outDims = 100
    val numChunks = 5
    val numPerChunk = inDims/numChunks

    val mat = DenseMatrix.rand(inDims, outDims, Rand.gaussian)
    val vec = DenseVector.rand(inDims, Rand.gaussian)
    val intercept = DenseVector.rand(outDims, Rand.gaussian)

    val splitVec = (0 until numChunks).map(i => vec((numPerChunk*i) until (numPerChunk*i + numPerChunk)))
    val splitMat = (0 until numChunks).map(i => mat((numPerChunk*i) until (numPerChunk*i + numPerChunk), ::))

    val linearMapper = new LinearMapper[DenseVector[Double]](mat, Some(intercept))
    val blockLinearMapper = new BlockLinearMapper(splitMat, numPerChunk, Some(intercept))

    val linearOut = linearMapper(vec)

    // Test with intercept
    assert(Stats.aboutEq(blockLinearMapper(vec), linearOut, 1e-4))

    // Test the apply and evaluate call
    val blmOuts = new ArrayBuffer[RDD[DenseVector[Double]]]
    val splitVecRDDs = splitVec.map { vec =>
      sc.parallelize(Seq(vec), 1)
    }
    blockLinearMapper.applyAndEvaluate(splitVecRDDs,
      (predictedValues: RDD[DenseVector[Double]]) => {
        blmOuts += predictedValues
        ()
      }
    )

    // The last blmOut should match the linear mapper's output
    assert(Stats.aboutEq(blmOuts.last.collect()(0), linearOut, 1e-4))
  }
}

Source File: TestUtils.scala From keystone with Apache License 2.0

5 votes

package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
}

Source File: CosineRandomFeatures.scala From keystone with Apache License 2.0

5 votes

package keystoneml.nodes.stats

import breeze.linalg._
import breeze.numerics._
import breeze.stats.distributions.Rand
import org.apache.spark.rdd.RDD
import keystoneml.pipelines._
import keystoneml.utils.MatrixUtils
import keystoneml.workflow.Transformer


  def apply(
      numInputFeatures: Int,
      numOutputFeatures: Int,
      gamma: Double,
      wDist: Rand[Double] = Rand.gaussian,
      bDist: Rand[Double] = Rand.uniform) = {
    val W = DenseMatrix.rand(numOutputFeatures, numInputFeatures, wDist) :* gamma
    val b = DenseVector.rand(numOutputFeatures, bDist) :* (2*math.Pi)
    new CosineRandomFeatures(W, b)
  }
}

Source File: SimpleTSNE.scala From spark-tsne with Apache License 2.0

5 votes

package com.github.saurfang.spark.tsne.impl

import breeze.linalg._
import breeze.stats.distributions.Rand
import com.github.saurfang.spark.tsne.{TSNEGradient, TSNEHelper, TSNEParam, X2P}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.storage.StorageLevel
import org.slf4j.LoggerFactory

import scala.util.Random

object SimpleTSNE {
  private def logger = LoggerFactory.getLogger(SimpleTSNE.getClass)

  def tsne(
            input: RowMatrix,
            noDims: Int = 2,
            maxIterations: Int = 1000,
            perplexity: Double = 30,
            callback: (Int, DenseMatrix[Double], Option[Double]) => Unit = {case _ => },
            seed: Long = Random.nextLong()): DenseMatrix[Double] = {
    if(input.rows.getStorageLevel == StorageLevel.NONE) {
      logger.warn("Input is not persisted and performance could be bad")
    }

    Rand.generator.setSeed(seed)

    val tsneParam = TSNEParam()
    import tsneParam._

    val n = input.numRows().toInt
    val Y: DenseMatrix[Double] = DenseMatrix.rand(n, noDims, Rand.gaussian(0, 1))
    val iY = DenseMatrix.zeros[Double](n, noDims)
    val gains = DenseMatrix.ones[Double](n, noDims)

    // approximate p_{j|i}
    val p_ji = X2P(input, 1e-5, perplexity)
    val P = TSNEHelper.computeP(p_ji, n).glom().cache()

      var iteration = 1
      while(iteration <= maxIterations) {
        val bcY = P.context.broadcast(Y)

        val numerator = P.map{ arr => TSNEGradient.computeNumerator(bcY.value, arr.map(_._1): _*) }.cache()
        val bcNumerator = P.context.broadcast({
          numerator.treeAggregate(0.0)(seqOp = (x, v) => x + sum(v), combOp = _ + _)
        })

        val (dY, loss) = P.zip(numerator).treeAggregate((DenseMatrix.zeros[Double](n, noDims), 0.0))(
          seqOp = (c, v) => {
            // c: (grad, loss), v: (Array[(i, Iterable(j, Distance))], numerator)
            val l = TSNEGradient.compute(v._1, bcY.value, v._2, bcNumerator.value, c._1, iteration <= early_exaggeration)
            (c._1, c._2 + l)
          },
          combOp = (c1, c2) => {
            // c: (grad, loss)
            (c1._1 + c2._1, c1._2 + c2._2)
          })

        bcY.destroy()
        bcNumerator.destroy()
        numerator.unpersist()

        TSNEHelper.update(Y, dY, iY, gains, iteration, tsneParam)

        logger.debug(s"Iteration $iteration finished with $loss")
        callback(iteration, Y.copy, Some(loss))
        iteration += 1
      }
      Y
  }
}

breeze.stats.distributions.Rand Scala Examples