breeze.stats.distributions.RandBasis Scala Example

Source File: ApproximatePCA.scala From keystone with Apache License 2.0

5 votes

package keystoneml.nodes.learning

import breeze.linalg._
import breeze.numerics._
import breeze.stats._
import breeze.stats.distributions.{Gaussian, ThreadLocalRandomGenerator, RandBasis}
import com.github.fommil.netlib.LAPACK._
import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.rdd.RDD
import org.netlib.util.intW
import keystoneml.pipelines.Logging
import keystoneml.workflow.Estimator


  def approximateQ(A: DenseMatrix[Double], l: Int, q: Int, seed: Int = 0): DenseMatrix[Double] = {
    val d = A.cols

    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    val omega = DenseMatrix.rand(d, l, Gaussian(0,1)(randBasis)) //cpu: d*l, mem: d*l
    val y0 = A*omega //cpu: n*d*l, mem: n*l

    var Q = QRUtils.qrQR(y0)._1 //cpu: n*l**2

    for (i <- 1 to q) {
      val YHat = Q.t * A //cpu: l*n*d, mem: l*d
      val Qh = QRUtils.qrQR(YHat.t)._1 //cpu: d*l^2, mem: d*l

      val Yj = A * Qh //cpu: n*d*l, mem: n*l
      Q = QRUtils.qrQR(Yj)._1 //cpu:  n*l^2, mem: n*l
    }

    Q
  }
}

Source File: TestUtils.scala From keystone with Apache License 2.0

5 votes

package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
}

Source File: IntegratorTest.scala From spark-gp with Apache License 2.0

5 votes

package org.apache.spark.ml.commons.util

import breeze.numerics.{abs, sigmoid, sqrt}
import breeze.stats.distributions.{Gaussian, RandBasis}
import org.apache.commons.math3.random.MersenneTwister
import org.scalatest.FunSuite


class IntegratorTest extends FunSuite {

  test("testExpectedOfFunctionOfNormal") {
    val f = (x: Double) => sigmoid(x)
    val integrator = new Integrator(100)
    val mean = 0.5
    val variance = 3
    val sd = sqrt(variance)

    val testResult = integrator.expectedOfFunctionOfNormal(mean, variance, f)

    val gg = new Gaussian(mean, sd)(new RandBasis(new MersenneTwister()))
    val mcIters = 100000
    val values = gg.sample(mcIters).map(f)
    val mcResult = values.sum / mcIters
    val mcSD = sqrt(values.map(_ - mcResult).map(x => x * x).sum / mcIters) / sqrt(mcIters)
    assert(abs(mcResult - testResult) < 3 * mcSD)
  }

}

Source File: BlockedMultiVariateGaussian.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.probability.distributions

import breeze.numerics._

import math.{Pi, log1p}
import breeze.stats.distributions.{Moments, Rand, RandBasis}
import io.github.mandar2812.dynaml.algebra._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixOps._
import io.github.mandar2812.dynaml.algebra.PartitionedMatrixSolvers._
import io.github.mandar2812.dynaml.probability.GaussianRV
import scala.runtime.ScalaRunTime


case class BlockedMultiVariateGaussian(
  mean: PartitionedVector,
  covariance: PartitionedPSDMatrix)(implicit rand: RandBasis = Rand) extends
  AbstractContinuousDistr[PartitionedVector] with
  Moments[PartitionedVector, PartitionedPSDMatrix] with
  HasErrorBars[PartitionedVector] {

  def draw() = {
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt
    val z: PartitionedVector = PartitionedVector.rand(mean.rows, nE, GaussianRV(0.0, 1.0))
    val m: PartitionedVector = root * z
    m + mean
  }

  private lazy val root: LowerTriPartitionedMatrix = bcholesky(covariance)

  override def toString() =  ScalaRunTime._toString(this)

  override def unnormalizedLogPdf(t: PartitionedVector) = {
    val centered: PartitionedVector = t - mean
    val z: PartitionedVector = root \ centered
    val slv: PartitionedVector = root.t \ z
    val d: Double = slv dot centered
    -1.0*d/2.0

  }

  override lazy val logNormalizer = {
    // determinant of the cholesky decomp is the sqrt of the determinant of the cov matrix
    // this is the log det of the cholesky decomp
    val det = bsum(blog(bdiag(root)))
    mean.rows.toDouble/2 *  log(2 * Pi) + 0.5*det
  }

  def variance = covariance
  def mode = mean
  lazy val entropy = {
    mean.rows.toDouble * log1p(2 * Pi) + bsum(blog(bdiag(root)))
  }

  override def confidenceInterval(s: Double) = {
    val signFlag = if(s < 0) -1.0 else 1.0
    val nE: Int = if(mean.rowBlocks > 1L) mean(0L to 0L)._data.head._2.length else mean.rows.toInt

    val ones = PartitionedVector.ones(mean.rows, nE)
    val multiplier = signFlag*s

    val bar: PartitionedVector = root*(ones*multiplier)

    (mean - bar, mean + bar)
  }
}

Source File: RandNLATest.scala From spectrallda-tensorspark with Apache License 2.0

5 votes

package edu.uci.eecs.spectralLDA.utils

import breeze.linalg._
import breeze.linalg.qr.QR
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Uniform}
import edu.uci.eecs.spectralLDA.testharness.Context
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext
import org.scalatest._


class RandNLATest extends FlatSpec with Matchers {

  private val sc: SparkContext = Context.getSparkContext

  "M2 sketching" should "be correct" in {
    val a1 = SparseVector(DenseVector.rand[Double](100).toArray)
    val a2 = SparseVector(DenseVector.rand[Double](100).toArray)
    val a3 = SparseVector(DenseVector.rand[Double](100).toArray)

    val docs = Seq((1000L, a1), (1001L, a2), (1002L, a3))
    val docsRDD = sc.parallelize(docs)

    // Random Gaussian matrix
    val g = DenseMatrix.rand[Double](100, 50, Gaussian(mu = 0.0, sigma = 1.0))

    val result = DenseMatrix.zeros[Double](100, 50)
    docsRDD
      .flatMap {
        case (id: Long, w: SparseVector[Double]) => RandNLA.accumulate_M_mul_S(g, w, sum(w))
      }
      .reduceByKey(_ + _)
      .collect
      .foreach {
        case (r: Int, a: DenseVector[Double]) => result(r, ::) := a.t
      }

    val m2 = docsRDD
      .map {
        case (id: Long, w: SparseVector[Double]) =>
          val l = sum(w)
          (w * w.t - diag(w)) / (l * (l - 1.0))
      }
      .reduce(_ + _)
    val expectedResult = m2 * g

    val diff: DenseMatrix[Double] = result - expectedResult
    val normDiff: Double = norm(norm(diff(::, *)).toDenseVector)
    normDiff should be <= 1e-8
  }

  "Randomised Power Iteration method" should "be approximately correct" in {
    implicit val randBasis: RandBasis =
      new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(234787)))

    val n = 100
    val k = 5

    val alpha: DenseVector[Double] = DenseVector[Double](25.0, 20.0, 15.0, 10.0, 5.0)
    val beta: DenseMatrix[Double] = DenseMatrix.rand(n, k, Uniform(0.0, 1.0))

    val norms = norm(beta(::, *)).toDenseVector
    for (j <- 0 until k) {
      beta(::, j) /= norms(j)
    }

    val a: DenseMatrix[Double] = beta * diag(alpha) * beta.t
    val sigma: DenseMatrix[Double] = DenseMatrix.rand(n, k, Gaussian(mu = 0.0, sigma = 1.0))
    val y = a * sigma
    val QR(q: DenseMatrix[Double], _) = qr.reduced(y)

    val (s: DenseVector[Double], u: DenseMatrix[Double]) = RandNLA.decomp2(a * q, q)

    val diff_a = u * diag(s) * u.t - a
    val norm_diff_a = norm(norm(diff_a(::, *)).toDenseVector)

    norm_diff_a should be <= 1e-8
  }
}

Source File: DatasetConstraints.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.test.datagen

import breeze.stats.distributions.{RandBasis, Uniform}
import org.apache.commons.math3.random.MersenneTwister

import scala.util.Random


  def generateConstraints(random: Random): Unit = {
    val rand = new RandBasis(new MersenneTwister(random.nextInt()))
    val distributionRows = new Uniform(minRows.toDouble, maxRows.toDouble)(rand)
    val distributionCols = new Uniform(minCols.toDouble, maxCols.toDouble)(rand)
    val distributionSlots = new Uniform(minCols.toDouble, maxCols.toDouble)(rand)
    numRows = distributionRows.draw().toInt
    numCols = distributionCols.draw().toInt
    numSlotsPerCol = (1 to numCols).map(col => distributionSlots.draw().toInt).toArray
  }

}

Source File: Hedge.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.hedge

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Hedge {

  case class State(η: Double, counts: Vector[Int], gains: Vector[Double])

  def Algorithm(η: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(η > 0, "η must be positive.")
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        η, zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val gains = state.gains
        val η = state.η
        val p = exp(gains / η) / sum(exp(gains / η))
        CategoricalDistribution(p).draw
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val gains = state.gains

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = gains(chosen) + reward
        gains.update(chosen, expectation)

        state.copy(counts = counts, gains = gains)
      }
    }
  }
}

Source File: Standard.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.epsilon_greedy

import breeze.linalg.argmax
import breeze.stats.distributions.{Bernoulli, Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm.Algorithm
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Standard {

  import breeze.linalg.Vector
  import Vector._

  case class State(ε: Double, counts: Vector[Int], expectations: Vector[Double])

  def Algorithm(ε: Double)(implicit zeroDouble: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = new Algorithm[Double, State] {

    override def initialState(arms: Seq[Arm[Double]]): State =
      State(ε, zeros[Int](arms.size), zeros[Double](arms.size))

    override def selectArm(arms: Seq[Arm[Double]], state: State): Int =
      Bernoulli.distribution(state.ε).draw() match {
        case true =>
          // Exploit
          argmax(state.expectations)
        case false =>
          // Explore
          Rand.randInt(state.expectations.size).draw()
      }

    override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
      val counts = state.counts
      val expectations = state.expectations

      val count = counts(chosen) + 1
      counts.update(chosen, count)

      val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
      expectations.update(chosen, expectation)
      state.copy(counts = counts, expectations = expectations)
    }
  }
}

Source File: UCB1.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.ucb

import breeze.numerics.sqrt
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm.Algorithm
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object UCB1 {

  import breeze.linalg._
  import Vector._

  case class State(counts: Vector[Int], expectations: Vector[Double])

  def Algorithm(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val counts = state.counts
        val expectations = state.expectations
        val step = sum(counts)
        val factor = fill(counts.size)(2 * scala.math.log(step))
        val bonus = sqrt(factor / counts.map(_.toDouble))
        val score = expectations + bonus
        argmax(score)
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val expectations = state.expectations
        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
        expectations.update(chosen, expectation)

        state.copy(counts = counts, expectations = expectations)
      }
    }
  }
}

Source File: Exp3.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.exp3

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Exp3 {

  case class State(γ: Double, weights: Vector[Double], counts: Vector[Int])

  def Algorithm(γ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(0< γ && γ <= 1, "γ must be in (0,1]")

    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        γ, fill(arms.size)(1.0d), zeros[Int](arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int =
        CategoricalDistribution(probs(state.γ, state.weights)).draw()

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val weights = state.weights

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val K = weights.size
        val p = probs(state.γ, weights)
        val x = zeros[Double](K)
        x.update(chosen, reward/p(chosen))
        weights *= exp((state.γ * x) / K.toDouble)

        state.copy(weights = weights, counts = counts)
      }

      private def probs(γ: Double, weights: Vector[Double]): Vector[Double] = {
        val K = weights.size  // #arms
        ((1 - γ) * (weights / sum(weights))) + (γ / K)
      }
    }
  }
}

Source File: Standard.scala From banditsbook-scala with MIT License

5 votes

package com.github.everpeace.banditsbook.algorithm.softmax

import breeze.linalg.Vector._
import breeze.linalg._
import breeze.numerics.exp
import breeze.stats.distributions.{Rand, RandBasis}
import breeze.storage.Zero
import com.github.everpeace.banditsbook.algorithm._
import com.github.everpeace.banditsbook.arm.Arm

import scala.collection.immutable.Seq
import scala.reflect.ClassTag


object Standard {

  case class State(τ: Double, counts: Vector[Int], expectations: Vector[Double])


  def Algorithm(τ: Double)(implicit zeroReward: Zero[Double], zeroInt: Zero[Int], tag: ClassTag[Double], rand: RandBasis = Rand)
  = {
    require(τ > 0, "τ must be positive.")
    new Algorithm[Double, State] {

      override def initialState(arms: Seq[Arm[Double]]): State = State(
        τ, zeros(arms.size), zeros(arms.size)
      )

      override def selectArm(arms: Seq[Arm[Double]], state: State): Int = {
        val expectations = state.expectations
        val τ = state.τ
        val p = exp(expectations / τ) / sum(exp(expectations / τ))
        CategoricalDistribution(p).draw
      }

      override def updateState(arms: Seq[Arm[Double]], state: State, chosen: Int, reward: Double): State = {
        val counts = state.counts
        val expectations = state.expectations

        val count = counts(chosen) + 1
        counts.update(chosen, count)

        val expectation = (((count - 1) / count.toDouble) * expectations(chosen)) + ((1 / count.toDouble) * reward)
        expectations.update(chosen, expectation)

        state.copy(counts = counts, expectations = expectations)
      }
    }
  }
}

breeze.stats.distributions.RandBasis Scala Examples