breeze.stats.distributions.Rand Scala Examples

The following examples show how to use breeze.stats.distributions.Rand. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: Hedge.scala    From banditsbook-scala   with MIT License 5 votes vote down vote up
package com.github.everpeace.banditsbook.algorithm.hedge

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Hedge {

  case class State(η: Double, counts: Vector[Int], gains: Vector[Double])

  def Algorithm(η: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(η > 0, "η must be positive.")
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        η, zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val gains = state.gains
        val η = state.η
        val p = exp(gains / η) / sum(exp(gains / η))
        CategoricalDistribution(p).draw
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val gains = state.gains

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = gains(chosen) + reward
        gains.update(chosen, expectation)

        state.copy(counts = counts, gains = gains)
      }
    }
  }
} 
Example 2
Source File: BreezeSpec.scala    From scio   with Apache License 2.0 5 votes vote down vote up
package com.spotify.scio.extra

import breeze.linalg.{DenseMatrix, DenseVector, SparseVector}
import breeze.stats.distributions.Rand
import com.spotify.scio.extra.Breeze._
import com.twitter.algebird.Semigroup
import org.scalacheck._

trait BreezeSpec[M[_], T] extends PropertySpec {
  val dimension = 10
  val rows = 20
  val cols = 10
  val fRand = Rand.uniform.map(_.toFloat)
  val m: Gen[M[T]]
  def ms: Gen[List[M[T]]] = Gen.listOf[M[T]](m)
  def plus(x: M[T], y: M[T])(implicit sg: Semigroup[M[T]]): M[T] = sg.plus(x, y)
  def sumOption(xs: Iterable[M[T]])(implicit sg: Semigroup[M[T]]): Option[M[T]] = sg.sumOption(xs)
}

class FloatDenseVectorSpec extends BreezeSpec[DenseVector, Float] {
  val m = Gen.const(dimension).map(DenseVector.rand[Float](_, fRand))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleDenseVectorSpec extends BreezeSpec[DenseVector, Double] {
  val m = Gen.const(dimension).map(DenseVector.rand[Double](_))
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class FloatDenseMatrixSpec extends BreezeSpec[DenseMatrix, Float] {
  val m = Gen.const((rows, cols)).map {
    case (r, c) => DenseMatrix.rand[Float](r, c, fRand)
  }
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleDenseMatrixSpec extends BreezeSpec[DenseMatrix, Double] {
  val m = Gen.const((rows, cols)).map {
    case (r, c) => DenseMatrix.rand[Double](r, c)
  }
  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class FloatSparseVectorSpec extends BreezeSpec[SparseVector, Float] {
  val m = Gen
    .const(dimension)
    .map(d => SparseVector(DenseVector.rand[Float](d, fRand).data))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
}

class DoubleSparseVectorSpec extends BreezeSpec[SparseVector, Double] {
  val m = Gen
    .const(dimension)
    .map(d => SparseVector(DenseVector.rand[Double](d).data))

  property("plus") {
    forAll(m, m)((x, y) => plus(x, y) == x + y)
  }
  property("sumOption") {
    forAll(ms)(xs => sumOption(xs) == xs.reduceLeftOption(_ + _))
  }
} 
Example 3
Source File: nmfSuite.scala    From SparkAndMPIFactorizations   with MIT License 5 votes vote down vote up
package org.apache.spark.mllib

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf

import edu.berkeley.cs.amplab.mlmatrix.{RowPartitionedMatrix}

import org.scalatest._
import breeze.linalg._
import breeze.stats.distributions.Rand
import org.apache.spark.mllib.util.TestingUtils._

class nmfSuite extends FunSuite with BeforeAndAfterAll {

	private var sc: SparkContext = null
	private val conf = new SparkConf().setAppName("NMF Test Suite").setMaster("local")

	override protected def beforeAll(): Unit = {
		sc = new SparkContext(conf) 
	}

	override protected def afterAll(): Unit = {
		sc.stop()
	}	

	test("NMF works locally on a separable matrix") {
		// about 800MB in doubles
		val m = 500000
		val k = 7
		val n = 200
		val rowsPerPartition = Array.fill[Int](1000)(500)

		var basisIndices = Rand.permutation(m).draw.slice(0, k)
		val W = DenseMatrix.zeros[Double](m, k)
		(0 until k) foreach { idx => W(basisIndices(idx), idx) = 1.0 }

		val reorder = Rand.permutation(n).draw	
		basisIndices = (0 until k).map( reorder.indexOf(_) )
		val H = DenseMatrix.horzcat( DenseMatrix.eye[Double](k), DenseMatrix.rand(k, n-k)).apply(::, reorder).toDenseMatrix
		val A = W * H

		val chunksRDD = sc.parallelize( (0 to 1000).map(chunkIdx =>
			A(chunkIdx*500 to (chunkIdx+1)*500, ::).toDenseMatrix ))
		val rowPartitionedA = RowPartitionedMatrix.fromMatrix(chunksRDD)
		val (solvedIndices, W, H) = nmf.fullNMF(rowPartitionedA, k)
		
		val overlap = basisIndices.toSet.intersect(solvedIndices.toSet)
		info(s"original basis column indices: ${basisIndices.mkString(" ")}")
		info(s"returned basis column indices: ${solvedIndices.mkString(" ")}")
		info(s"overlapped ${overlap.size} of $k basis indices out of $n choices")

		val relerr = norm((A - W*H).flatten())/norm(A.flatten())
		info(s"relative frob norm err: $relerr")
		assert(relerr < 1)
	}
} 
Example 4
Source File: xraySuite.scala    From SparkAndMPIFactorizations   with MIT License 5 votes vote down vote up
package org.apache.spark.mllib

import org.scalatest._
import breeze.stats.distributions.Rand
import breeze.linalg._
import org.apache.spark.mllib.util.TestingUtils._

class xRaySuite extends FunSuite {
	test("solver for H is working") {
		val n = 50
		val k = 7
		val ell = 20

		val basis = DenseMatrix.eye[Double](n).apply(::, 0 until k)
		val H = DenseMatrix.horzcat( DenseMatrix.eye[Double](k), DenseMatrix.rand(k, ell-k))
		val A = basis * H
		val solvedH = xray.findWeights(A, basis) 

		assert(norm((solvedH - H).toDenseVector) ~== 0 absTol 1e-10)
	}

	test("returns reasonable column indices") {
		val n = 50
		val k = 7
		val ell = 20

		var basisIndices = Rand.permutation(n).draw.slice(0, k)
		val basis = DenseMatrix.eye[Double](n).apply(::, basisIndices)
		val lincombs = basis * DenseMatrix.rand(k, ell-k)
		val reorder = Rand.permutation(ell).draw
		val A = DenseMatrix.horzcat(basis, lincombs).apply(::, reorder).toDenseMatrix
		basisIndices = (0 until k).map(reorder.indexOf(_))

		val (xrayIndices, h) = xray.computeXray(A, k)
		assert(xrayIndices.length == k)

		val overlap = basisIndices.toSet.intersect(xrayIndices.toSet) 
		info(s"Actual columns: ${basisIndices.mkString(" ")}")
		info(s"Returned columns: ${xrayIndices.mkString(" ")}")
		info(s"Recovered ${overlap.size} of $k basis columns out of $ell overall columns")		
	}	
} 
Example 5
Source File: PoissonRegressionTest.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.linear

import breeze.linalg.{DenseMatrix, DenseVector, convert}
import breeze.stats.distributions.Rand
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.{Features, RealVector, Target}
import io.picnicml.doddlemodel.linear.PoissonRegression.ev
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class PoissonRegressionTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1e-2f)

  "Poisson regression" should "calculate the value of the loss function" in {
    val w = DenseVector(1.0f, 2.0f, 3.0f)
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f)
    )
    val y = DenseVector(3.0f, 4.0f)

    val model = PoissonRegression(lambda = 1.0f)
    ev.lossStateless(model, w, x, y) shouldEqual 29926.429998513137f
  }

  it should "calculate the gradient of the loss function wrt. to model parameters" in {
    for (_ <- 1 to 1000) {
      val w = DenseVector.rand[Float](5, rand = randomUniform)
      val x = DenseMatrix.rand[Float](10, 5, rand = randomUniform)
      val y = convert(DenseVector.rand(10, rand = Rand.randInt(20)), Float)
      testGrad(w, x, y)
    }

    def testGrad(w: RealVector, x: Features, y: Target) = {
      val model = PoissonRegression(lambda = 0.5f)
      breezeEqual(
        gradApprox(w => ev.lossStateless(model, w, x, y), w),
        ev.lossGradStateless(model, w, x, y)
      ) shouldEqual true
    }
  }

  it should "prevent the usage of negative L2 regularization strength" in {
    an [IllegalArgumentException] shouldBe thrownBy(PoissonRegression(lambda = -0.5f))
  }

  it should "throw an exception if fitting a model on a dataset that is not count data" in {
    val x = DenseMatrix(
      List(3.0f, 1.0f, 2.0f),
      List(-1.0f, -2.0f, 2.0f),
      List(3.0f, 1.0f, 2.0f)
    )
    val y = DenseVector.rand[Float](3, rand = randomUniform)
    val model = PoissonRegression()

    an [IllegalArgumentException] shouldBe thrownBy(ev.fit(model, x, y))
  }
} 
Example 6
Source File: TestingUtils.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel

import breeze.linalg.{DenseMatrix, DenseVector, convert, zipValues}
import breeze.stats.distributions.Rand
import io.picnicml.doddlemodel.data.{Dataset, RealVector}
import org.scalactic.Equality

trait TestingUtils {

  implicit lazy val randomUniform: Rand[Float] = new Rand[Float] {
    override def draw(): Float = Rand.uniform.draw().toFloat
  }

  def breezeEqual(x0: DenseMatrix[Float], x1: DenseMatrix[Float])(implicit tol: Equality[Float]): Boolean =
    breezeEqual(x0.toDenseVector, x1.toDenseVector)

  def breezeEqual(x0: RealVector, x1: RealVector)(implicit tol: Equality[Float]): Boolean =
    zipValues(x0, x1).forall((v0, v1) => (v0.isNaN && v1.isNaN) || tol.areEquivalent(v0, v1))

  def gradApprox(func: RealVector => Float, x: RealVector, h: Double = 1e-3): RealVector = {
    // two-sided finite differences
    val grad = DenseVector.zeros[Double](x.length)
    for ((i, _) <- x.activeIterator) {
      val xPlusH = convert(x.copy, Double)
      xPlusH(i) += h
      val xMinusH = convert(x.copy, Double)
      xMinusH(i) -= h
      grad(i) = (func(convert(xPlusH, Float)) - func(convert(xMinusH, Float)).toDouble) / (2.0 * h)
    }
    convert(grad, Float)
  }

  def dummyData(nRows: Int, nCols: Int = 1): Dataset =
    (DenseMatrix.zeros[Float](nRows, nCols), convert(DenseVector((0 until nRows).toArray), Float))
} 
Example 7
Source File: Standard.scala    From banditsbook-scala   with MIT License 5 votes vote down vote up
package com.github.everpeace.banditsbook.algorithm.softmax

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Standard {

  case class State(τ: Double, counts: Vector[Int], expectations: Vector[Double])


  def Algorithm(τ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(τ > 0, "τ must be positive.")
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        τ, zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val expectations = state.expectations
        val τ = state.τ
        val p = exp(expectations / τ) / sum(exp(expectations / τ))
        CategoricalDistribution(p).draw
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val expectations = state.expectations

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
        expectations.update(chosen, expectation)

        state.copy(counts = counts, expectations = expectations)
      }
    }
  }
} 
Example 8
Source File: Exp3.scala    From banditsbook-scala   with MIT License 5 votes vote down vote up
package com.github.everpeace.banditsbook.algorithm.exp3

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Exp3 {

  case class State(γ: Double, weights: Vector[Double], counts: Vector[Int])

  def Algorithm(γ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(0< γ && γ <= 1, "γ must be in (0,1]")

    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        γ, fill(arms.size)(1.0d), zeros[Int](arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int =
        CategoricalDistribution(probs(state.γ, state.weights)).draw()

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val weights = state.weights

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val K = weights.size
        val p = probs(state.γ, weights)
        val x = zeros[Double](K)
        x.update(chosen, reward/p(chosen))
        weights *= exp((state.γ * x) / K.toDouble)

        state.copy(weights = weights, counts = counts)
      }

      private def probs(γ: Double, weights: Vector[Double]): Vector[Double] = {
        val K = weights.size  // #arms
        ((1 - γ) * (weights / sum(weights))) + (γ / K)
      }
    }
  }
} 
Example 9
Source File: UCB1.scala    From banditsbook-scala   with MIT License 5 votes vote down vote up
package com.github.everpeace.banditsbook.algorithm.ucb

import breeze.numerics.sqrt
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm.Algorithm
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object UCB1 {

  import breeze.linalg._
  import Vector._

  case class State(counts: Vector[Int], expectations: Vector[Double])

  def Algorithm(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val counts = state.counts
        val expectations = state.expectations
        val step = sum(counts)
        val factor = fill(counts.size)(2 * scala.math.log(step))
        val bonus = sqrt(factor / counts.map(_.toDouble))
        val score = expectations + bonus
        argmax(score)
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val expectations = state.expectations
        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
        expectations.update(chosen, expectation)

        state.copy(counts = counts, expectations = expectations)
      }
    }
  }
} 
Example 10
Source File: Standard.scala    From banditsbook-scala   with MIT License 5 votes vote down vote up
package com.github.everpeace.banditsbook.algorithm.epsilon_greedy

import breeze.linalg.argmax
import breeze.stats.distributions.{Bernoulli, Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm.Algorithm
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Standard {

  import breeze.linalg.Vector
  import Vector._

  case class State(ε: Double, counts: Vector[Int], expectations: Vector[Double])

  def Algorithm(ε: Double)(implicit zeroDouble: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = new Algorithm[Double, State] {

    override def initialState(arms: Seq[Arm[Double]]): State =
      State(ε, zeros[Int](arms.size), zeros[Double](arms.size))

    override def selectArm(arms: Seq[Arm[Double]], state: State): Int =
      Bernoulli.distribution(state.ε).draw() match {
        case true =>
          // Exploit
          argmax(state.expectations)
        case false =>
          // Explore
          Rand.randInt(state.expectations.size).draw()
      }

    override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
      val counts = state.counts
      val expectations = state.expectations

      val count = counts(chosen) + 1
      counts.update(chosen, count)

      val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
      expectations.update(chosen, expectation)
      state.copy(counts = counts, expectations = expectations)
    }
  }
} 
Example 11
Source File: BHTSNE.scala    From spark-tsne   with Apache License 2.0 5 votes vote down vote up
package com.github.saurfang.spark.tsne.impl

import breeze.linalg._
import breeze.stats.distributions.Rand
import com.github.saurfang.spark.tsne.tree.SPTree
import com.github.saurfang.spark.tsne.{TSNEGradient, TSNEHelper, TSNEParam, X2P}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.storage.StorageLevel
import org.slf4j.LoggerFactory

import scala.util.Random

object BHTSNE {
  private def logger = LoggerFactory.getLogger(BHTSNE.getClass)

  def tsne(
            input: RowMatrix,
            noDims: Int = 2,
            maxIterations: Int = 1000,
            perplexity: Double = 30,
            theta: Double = 0.5,
            reportLoss: Int => Boolean = {i => i % 10 == 0},
            callback: (Int, DenseMatrix[Double], Option[Double]) => Unit = {case _ => },
            seed: Long = Random.nextLong()
            ): DenseMatrix[Double] = {
    if(input.rows.getStorageLevel == StorageLevel.NONE) {
      logger.warn("Input is not persisted and performance could be bad")
    }

    Rand.generator.setSeed(seed)

    val tsneParam = TSNEParam()
    import tsneParam._

    val n = input.numRows().toInt
    val Y: DenseMatrix[Double] = DenseMatrix.rand(n, noDims, Rand.gaussian(0, 1)) :/ 1e4
    val iY = DenseMatrix.zeros[Double](n, noDims)
    val gains = DenseMatrix.ones[Double](n, noDims)

    // approximate p_{j|i}
    val p_ji = X2P(input, 1e-5, perplexity)
    val P = TSNEHelper.computeP(p_ji, n).glom()
      .map(rows => rows.map {
      case (i, data) =>
        (i, data.map(_._1).toSeq, DenseVector(data.map(_._2 * exaggeration_factor).toArray))
    })
      .cache()

      var iteration = 1
      while(iteration <= maxIterations) {
        val bcY = P.context.broadcast(Y)
        val bcTree = P.context.broadcast(SPTree(Y))

        val initialValue = (DenseMatrix.zeros[Double](n, noDims), DenseMatrix.zeros[Double](n, noDims), 0.0)
        val (posF, negF, sumQ) = P.treeAggregate(initialValue)(
          seqOp = (c, v) => {
            // c: (pos, neg, sumQ), v: Array[(i, Seq(j), vec(Distance))]
            TSNEGradient.computeEdgeForces(v, bcY.value, c._1)
            val q = TSNEGradient.computeNonEdgeForces(bcTree.value, bcY.value, theta, c._2, v.map(_._1): _*)
            (c._1, c._2, c._3 + q)
          },
          combOp = (c1, c2) => {
            // c: (grad, loss)
            (c1._1 + c2._1, c1._2 + c2._2, c1._3 + c2._3)
          })
        val dY: DenseMatrix[Double] = posF :- (negF :/ sumQ)

        TSNEHelper.update(Y, dY, iY, gains, iteration, tsneParam)

        if(reportLoss(iteration)) {
          val loss = P.treeAggregate(0.0)(
            seqOp = (c, v) => {
              TSNEGradient.computeLoss(v, bcY.value, sumQ)
            },
            combOp = _ + _
          )
          logger.debug(s"Iteration $iteration finished with $loss")
          callback(iteration, Y.copy, Some(loss))
        } else {
          logger.debug(s"Iteration $iteration finished")
          callback(iteration, Y.copy, None)
        }

        bcY.destroy()
        bcTree.destroy()

        //undo early exaggeration
        if(iteration == early_exaggeration) {
          P.foreach {
            rows => rows.foreach {
              case (_, _, vec) => vec.foreachPair { case (i, v) => vec.update(i, v / exaggeration_factor) }
            }
          }
        }

        iteration += 1
      }

    Y
  }
} 
Example 12
Source File: BlockedMultiVariateGaussian.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability.distributions

import breeze.numerics._

import math.{Pi, log1p}
import breeze.stats.distributions.{Moments, Rand, RandBasis}
import io.github.mandar2812.dynaml.algebra._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixOps._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixSolvers._
import io.github.mandar2812.dynaml.probability.GaussianRV
import scala.runtime.ScalaRunTime


case class BlockedMultiVariateGaussian(
  mean: PartitionedVector,
  covariance: PartitionedPSDMatrix)(implicit rand: RandBasis = Rand) extends
  AbstractContinuousDistr[PartitionedVector] with
  Moments[PartitionedVector, PartitionedPSDMatrix] with
  HasErrorBars[PartitionedVector] {

  def draw() = {
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt
    val z: PartitionedVector = PartitionedVector.rand(mean.rows, nE, GaussianRV(0.0, 1.0))
    val m: PartitionedVector = root * z
    m + mean
  }

  private lazy val root: LowerTriPartitionedMatrix = bcholesky(covariance)

  override def toString() =  ScalaRunTime._toString(this)

  override def unnormalizedLogPdf(t: PartitionedVector) = {
    val centered: PartitionedVector = t - mean
    val z: PartitionedVector = root \ centered
    val slv: PartitionedVector = root.t \ z
    val d: Double = slv dot centered
    -1.0*d/2.0

  }

  override lazy val logNormalizer = {
    // determinant of the cholesky decomp is the sqrt of the determinant of the cov matrix
    // this is the log det of the cholesky decomp
    val det = bsum(blog(bdiag(root)))
    mean.rows.toDouble/2 *  log(2 * Pi) + 0.5*det
  }

  def variance = covariance
  def mode = mean
  lazy val entropy = {
    mean.rows.toDouble * log1p(2 * Pi) + bsum(blog(bdiag(root)))
  }

  override def confidenceInterval(s: Double) = {
    val signFlag = if(s < 0) -1.0 else 1.0
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt

    val ones = PartitionedVector.ones(mean.rows, nE)
    val multiplier = signFlag*s

    val bar: PartitionedVector = root*(ones*multiplier)

    (mean - bar, mean + bar)
  }
} 
Example 13
Source File: RejectionSamplingScheme.scala    From DynaML   with Apache License 2.0 5 votes vote down vote up
package io.github.mandar2812.dynaml.probability

import breeze.stats.distributions.{Density, Rand}
import io.github.mandar2812.dynaml.pipes._
import io.github.mandar2812.dynaml.probability.distributions.GenericDistribution

import scala.util.Random
import org.apache.log4j.Logger




abstract class RejectionSamplingScheme[
ConditioningSet, Domain,
Dist <: Density[ConditioningSet] with Rand[ConditioningSet],
DistL <: Density[Domain] with Rand[Domain],
JointDist <: Density[(ConditioningSet, Domain)] with Rand[(ConditioningSet, Domain)],
Likelihood <: RandomVarWithDistr[Domain, DistL]](
  p: RandomVarWithDistr[ConditioningSet, Dist],
  c: DataPipe[ConditioningSet, Likelihood])
  extends RandomVarWithDistr[(ConditioningSet, Domain), JointDist]
    with BayesJointProbabilityScheme[
    ConditioningSet, Domain,
    RandomVarWithDistr[ConditioningSet, Dist],
    Likelihood] { self =>

  override val prior: RandomVarWithDistr[ConditioningSet, Dist] = p

  override val likelihood: DataPipe[ConditioningSet, Likelihood] = c

  var Max_Candidates: Int = 1000

  var Max_Estimations: Int = 10000

  override val sample = prior.sample >
  BifurcationPipe[ConditioningSet, ConditioningSet, Domain](
      (c: ConditioningSet) => (c, likelihood(c).draw)
    )

  override val posterior: DataPipe[Domain, RandomVariable[ConditioningSet]] =
    DataPipe((data: Domain) => {

    val sampl = this.prior.sample
    val q = this.prior.underlyingDist

    val M = (1 to Max_Estimations).map(i => {
      likelihood(sampl()).underlyingDist(data)
    }).sum/Max_Estimations.toDouble

    new RandomVarWithDistr[ConditioningSet, GenericDistribution[ConditioningSet]] { innerself =>

      val logger = Logger.getLogger(this.getClass)

      override val sample: DataPipe[Unit, ConditioningSet] = DataPipe(() => {
        val iterations = 0
        var accepted = false
        var accepted_sample: ConditioningSet = sampl()

        while(!accepted && iterations < Max_Candidates) {
          // generate a candidate
          val candidate = sampl()
          val a = underlyingDist(candidate)/(M*q(candidate))
          if(Random.nextDouble() <= a) {
            logger.info("... Sample Accepted ...")
            accepted = true
            accepted_sample = candidate
          }
        }

        accepted_sample
      })

      override val underlyingDist: GenericDistribution[ConditioningSet] = new GenericDistribution[ConditioningSet] {
        override def apply(x: ConditioningSet): Double =
          prior.underlyingDist(x)*likelihood(x).underlyingDist(data)

        override def draw() = innerself.sample()
      }

    }
  })

} 
Example 14
Source File: CosineRandomFeaturesSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.stats

import breeze.linalg._
import breeze.numerics.cos
import breeze.stats._
import breeze.stats.distributions.{CauchyDistribution, Rand}
import org.scalatest.FunSuite
import keystoneml.utils.Stats


class CosineRandomFeaturesSuite extends FunSuite {
  val gamma = 1.34
  val numInputFeatures = 400
  val numOutputFeatures = 1000

  test("Guassian cosine random features") {
    val rf = CosineRandomFeatures(numInputFeatures, numOutputFeatures, gamma)

    // Check that b is uniform
    assert(max(rf.b) <= 2*math.Pi)
    assert(min(rf.b) >= 0)
    assert(rf.b.size == numOutputFeatures)

    // Check that W is gaussian
    assert(rf.W.rows == numOutputFeatures)
    assert(rf.W.cols == numInputFeatures)
    assert(Stats.aboutEq(mean(rf.W),0, 10e-3 * gamma))
    assert(Stats.aboutEq(variance(rf.W), gamma * gamma, 10e-3 * gamma * gamma))

    //check the mapping
    val in = DenseVector.rand(numInputFeatures, Rand.uniform)
    val out = cos((in.t * rf.W.t).t + rf.b)
    assert(Stats.aboutEq(rf(in), out, 10e-3))
  }

  test("Cauchy cosine random features") {
    val rf = CosineRandomFeatures(
      numInputFeatures,
      numOutputFeatures,
      gamma,
      new CauchyDistribution(0, 1))

    // Check that b is uniform
    assert(max(rf.b) <= 2*math.Pi)
    assert(min(rf.b) >= 0)
    assert(rf.b.size == numOutputFeatures)

    // Check that W is cauchy
    assert(rf.W.rows == numOutputFeatures)
    assert(rf.W.cols == numInputFeatures)
    assert(Stats.aboutEq(median(rf.W),0,10e-3 * gamma))

    //check the mapping
    val in = DenseVector.rand(numInputFeatures, Rand.uniform)
    val out = cos((in.t * rf.W.t).t + rf.b)
    assert(Stats.aboutEq(rf(in), out, 10e-3))
  }
} 
Example 15
Source File: LinearRectifierSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.stats

import breeze.linalg.DenseMatrix
import breeze.stats.distributions.Rand
import org.apache.spark.SparkContext
import org.scalatest.FunSuite
import keystoneml.pipelines._
import keystoneml.utils.{TestUtils, MatrixUtils}
import keystoneml.workflow.PipelineContext

class LinearRectifierSuite extends FunSuite with PipelineContext with Logging {

  test("Test MaxVal") {
    sc = new SparkContext("local", "test")
    val matrixParts = TestUtils.createRandomMatrix(sc, 128, 16, 4).rdd.map(_.mat)

    val x = matrixParts.flatMap(y => MatrixUtils.matrixToRowArray(y))
    val y = x.map(r => r.forall(_ >= 0.0))

    val valmaxNode = LinearRectifier()
    val maxy = valmaxNode.apply(x).map(r => r.forall(_ >= 0.0))

    //The random matrix should *not* all be >= 0
    assert(!y.reduce {(a,b) => a | b})

    //The valmax'ed random matrix *should* all be >= 0.
    assert(maxy.reduce {(a,b) => a | b})
  }
} 
Example 16
Source File: BlockLinearMapperSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.learning

import breeze.linalg.{DenseVector, DenseMatrix}
import breeze.stats.distributions.Rand
import keystoneml.workflow.PipelineContext
import scala.collection.mutable.ArrayBuffer

import org.scalatest.FunSuite

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import keystoneml.pipelines._
import keystoneml.utils.Stats

class BlockLinearMapperSuite extends FunSuite with PipelineContext with Logging {

  test("BlockLinearMapper transformation") {
    sc = new SparkContext("local", "test")

    val inDims = 1000
    val outDims = 100
    val numChunks = 5
    val numPerChunk = inDims/numChunks

    val mat = DenseMatrix.rand(inDims, outDims, Rand.gaussian)
    val vec = DenseVector.rand(inDims, Rand.gaussian)
    val intercept = DenseVector.rand(outDims, Rand.gaussian)

    val splitVec = (0 until numChunks).map(i => vec((numPerChunk*i) until (numPerChunk*i + numPerChunk)))
    val splitMat = (0 until numChunks).map(i => mat((numPerChunk*i) until (numPerChunk*i + numPerChunk), ::))

    val linearMapper = new LinearMapper[DenseVector[Double]](mat, Some(intercept))
    val blockLinearMapper = new BlockLinearMapper(splitMat, numPerChunk, Some(intercept))

    val linearOut = linearMapper(vec)

    // Test with intercept
    assert(Stats.aboutEq(blockLinearMapper(vec), linearOut, 1e-4))

    // Test the apply and evaluate call
    val blmOuts = new ArrayBuffer[RDD[DenseVector[Double]]]
    val splitVecRDDs = splitVec.map { vec =>
      sc.parallelize(Seq(vec), 1)
    }
    blockLinearMapper.applyAndEvaluate(splitVecRDDs,
      (predictedValues: RDD[DenseVector[Double]]) => {
        blmOuts += predictedValues
        ()
      }
    )

    // The last blmOut should match the linear mapper's output
    assert(Stats.aboutEq(blmOuts.last.collect()(0), linearOut, 1e-4))
  }
} 
Example 17
Source File: TestUtils.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
} 
Example 18
Source File: CosineRandomFeatures.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.nodes.stats

import breeze.linalg._
import breeze.numerics._
import breeze.stats.distributions.Rand
import org.apache.spark.rdd.RDD
import keystoneml.pipelines._
import keystoneml.utils.MatrixUtils
import keystoneml.workflow.Transformer


  def apply(
      numInputFeatures: Int,
      numOutputFeatures: Int,
      gamma: Double,
      wDist: Rand[Double] = Rand.gaussian,
      bDist: Rand[Double] = Rand.uniform) = {
    val W = DenseMatrix.rand(numOutputFeatures, numInputFeatures, wDist) :* gamma
    val b = DenseVector.rand(numOutputFeatures, bDist) :* (2*math.Pi)
    new CosineRandomFeatures(W, b)
  }
} 
Example 19
Source File: SimpleTSNE.scala    From spark-tsne   with Apache License 2.0 5 votes vote down vote up
package com.github.saurfang.spark.tsne.impl

import breeze.linalg._
import breeze.stats.distributions.Rand
import com.github.saurfang.spark.tsne.{TSNEGradient, TSNEHelper, TSNEParam, X2P}
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.storage.StorageLevel
import org.slf4j.LoggerFactory

import scala.util.Random

object SimpleTSNE {
  private def logger = LoggerFactory.getLogger(SimpleTSNE.getClass)

  def tsne(
            input: RowMatrix,
            noDims: Int = 2,
            maxIterations: Int = 1000,
            perplexity: Double = 30,
            callback: (Int, DenseMatrix[Double], Option[Double]) => Unit = {case _ => },
            seed: Long = Random.nextLong()): DenseMatrix[Double] = {
    if(input.rows.getStorageLevel == StorageLevel.NONE) {
      logger.warn("Input is not persisted and performance could be bad")
    }

    Rand.generator.setSeed(seed)

    val tsneParam = TSNEParam()
    import tsneParam._

    val n = input.numRows().toInt
    val Y: DenseMatrix[Double] = DenseMatrix.rand(n, noDims, Rand.gaussian(0, 1))
    val iY = DenseMatrix.zeros[Double](n, noDims)
    val gains = DenseMatrix.ones[Double](n, noDims)

    // approximate p_{j|i}
    val p_ji = X2P(input, 1e-5, perplexity)
    val P = TSNEHelper.computeP(p_ji, n).glom().cache()

      var iteration = 1
      while(iteration <= maxIterations) {
        val bcY = P.context.broadcast(Y)

        val numerator = P.map{ arr => TSNEGradient.computeNumerator(bcY.value, arr.map(_._1): _*) }.cache()
        val bcNumerator = P.context.broadcast({
          numerator.treeAggregate(0.0)(seqOp = (x, v) => x + sum(v), combOp = _ + _)
        })

        val (dY, loss) = P.zip(numerator).treeAggregate((DenseMatrix.zeros[Double](n, noDims), 0.0))(
          seqOp = (c, v) => {
            // c: (grad, loss), v: (Array[(i, Iterable(j, Distance))], numerator)
            val l = TSNEGradient.compute(v._1, bcY.value, v._2, bcNumerator.value, c._1, iteration <= early_exaggeration)
            (c._1, c._2 + l)
          },
          combOp = (c1, c2) => {
            // c: (grad, loss)
            (c1._1 + c2._1, c1._2 + c2._2)
          })

        bcY.destroy()
        bcNumerator.destroy()
        numerator.unpersist()

        TSNEHelper.update(Y, dY, iY, gains, iteration, tsneParam)

        logger.debug(s"Iteration $iteration finished with $loss")
        callback(iteration, Y.copy, Some(loss))
        iteration += 1
      }
      Y
  }
}