org.apache.spark.util.random.XORShiftRandom Scala Examples
The following examples show how to use org.apache.spark.util.random.XORShiftRandom.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BaggedPoint.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tree.impl import org.apache.commons.math3.distribution.PoissonDistribution import org.apache.spark.rdd.RDD import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom def convertToBaggedRDD[Datum] ( input: RDD[Datum], subsamplingRate: Double, numSubsamples: Int, withReplacement: Boolean, seed: Long = Utils.random.nextLong()): RDD[BaggedPoint[Datum]] = { if (withReplacement) { convertToBaggedRDDSamplingWithReplacement(input, subsamplingRate, numSubsamples, seed) } else { if (numSubsamples == 1 && subsamplingRate == 1.0) { convertToBaggedRDDWithoutSampling(input) } else { convertToBaggedRDDSamplingWithoutReplacement(input, subsamplingRate, numSubsamples, seed) } } } private def convertToBaggedRDDSamplingWithoutReplacement[Datum] ( input: RDD[Datum], subsamplingRate: Double, numSubsamples: Int, seed: Long): RDD[BaggedPoint[Datum]] = { input.mapPartitionsWithIndex { (partitionIndex, instances) => // Use random seed = seed + partitionIndex + 1 to make generation reproducible. val rng = new XORShiftRandom rng.setSeed(seed + partitionIndex + 1) instances.map { instance => val subsampleWeights = new Array[Double](numSubsamples) var subsampleIndex = 0 while (subsampleIndex < numSubsamples) { val x = rng.nextDouble() subsampleWeights(subsampleIndex) = { if (x < subsamplingRate) 1.0 else 0.0 } subsampleIndex += 1 } new BaggedPoint(instance, subsampleWeights) } } } private def convertToBaggedRDDSamplingWithReplacement[Datum] ( input: RDD[Datum], subsample: Double, numSubsamples: Int, seed: Long): RDD[BaggedPoint[Datum]] = { input.mapPartitionsWithIndex { (partitionIndex, instances) => // Use random seed = seed + partitionIndex + 1 to make generation reproducible. val poisson = new PoissonDistribution(subsample) poisson.reseedRandomGenerator(seed + partitionIndex + 1) instances.map { instance => val subsampleWeights = new Array[Double](numSubsamples) var subsampleIndex = 0 while (subsampleIndex < numSubsamples) { subsampleWeights(subsampleIndex) = poisson.sample() subsampleIndex += 1 } new BaggedPoint(instance, subsampleWeights) } } } private def convertToBaggedRDDWithoutSampling[Datum] ( input: RDD[Datum]): RDD[BaggedPoint[Datum]] = { input.map(datum => new BaggedPoint(datum, Array(1.0))) } }
Example 2
Source File: randomExpressions.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.TaskContext import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom @ExpressionDescription( usage = "_FUNC_(a) - Returns a random column with i.i.d. gaussian random distribution.") case class Randn(seed: Long) extends RDG { override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian() def this() = this(Utils.random.nextLong()) def this(seed: Expression) = this(seed match { case IntegerLiteral(s) => s case _ => throw new AnalysisException("Input argument to randn must be an integer literal.") }) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val rngTerm = ctx.freshName("rng") val className = classOf[XORShiftRandom].getName ctx.addMutableState(className, rngTerm, s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());") ev.copy(code = s""" final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""", isNull = "false") } }
Example 3
Source File: TestLPSolver.scala From spark-lp with Apache License 2.0 | 5 votes |
object TestLPSolver { def main(args: Array[String]) { val rnd = new Random(12345) val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLPSolver") val sc = new SparkContext(sparkConf) val n = 1000 // Transpose constraint matrix row count. val m = 100 // Transpose constraint matrix column count. val numPartitions = 2 // Generate the starting vector from uniform distribution U(3.0, 5.0) println("generate x") val x0 = RandomRDDs.uniformRDD(sc, n, numPartitions).map(v => 3.0 + 2.0 * v).glom.map(new DenseVector(_)) // Generate the transpose constraint matrix 'B' using sparse uniformly generated values. println("generate B") val B = new RandomVectorRDD(sc, n, m, numPartitions, new SparseStandardNormalGenerator(0.1), rnd.nextLong) // Generate the cost vector 'c' using uniformly generated values. println("generate c") val c = RandomRDDs.uniformRDD(sc, n, numPartitions, rnd.nextLong).glom.map(new DenseVector(_)) // Compute 'b' using the starting 'x' vector. println("generate b") val b = (new LinopMatrixAdjoint(B))(x0) // Solve the linear program using LP.solve, finding the optimal x vector 'optimalX'. println("Start solving ...") val (optimalVal, _) = LP.solve(c, B, b, sc=sc) println("optimalVal: " + optimalVal) //println("optimalX: " + optimalX.collectElements.mkString(", ")) sc.stop() } }
Example 4
Source File: TestLinearProgram.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.examples import scala.util.Random import org.apache.spark.mllib.linalg.DenseVector import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._ import org.apache.spark.mllib.optimization.tfocs.SolverSLP import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint import org.apache.spark.mllib.random.{ RandomDataGenerator, RandomRDDs } import org.apache.spark.mllib.rdd.RandomVectorRDD import org.apache.spark.{ SparkConf, SparkContext } import org.apache.spark.util.random.XORShiftRandom object TestLinearProgram { def main(args: Array[String]) { val rnd = new Random(34324) val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLinearProgram") val sc = new SparkContext(sparkConf) val n = 5000 // Tranpose constraint matrix row count. val m = n / 2 // Transpose constrint matrix column count. // Generate a starting 'x' vector, using normally generated values. val x = RandomRDDs.normalRDD(sc, n).map(_ + 10).glom.map(new DenseVector(_)) // Generate the transpose constraint matrix 'A' using sparse normally generated values. val A = new RandomVectorRDD(sc, n, m, sc.defaultMinPartitions, new SparseStandardNormalGenerator(0.01), rnd.nextLong) // Generate the cost vector 'c' using normally generated values. val c = RandomRDDs.normalRDD(sc, n, 0, rnd.nextLong).glom.map(new DenseVector(_)) // Compute 'b' using the starting 'x' vector. val b = new LinopMatrixAdjoint(A)(x) val mu = 1e-2 // Solve the linear program using SolverSLP, finding the optimal x vector 'optimalX'. val (optimalX, _) = SolverSLP.run(c, A, b, mu) println("optimalX: " + optimalX.collectElements.mkString(", ")) sc.stop() } }
Example 5
Source File: randomExpressions.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral} import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.types._ import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom // scalastyle:off line.size.limit @ExpressionDescription( usage = """_FUNC_([seed]) - Returns a random value with independent and identically distributed (i.i.d.) values drawn from the standard normal distribution.""", examples = """ Examples: > SELECT _FUNC_(); -0.3254147983080288 > SELECT _FUNC_(0); 1.1164209726833079 > SELECT _FUNC_(null); 1.1164209726833079 """, note = "The function is non-deterministic in general case.") // scalastyle:on line.size.limit case class Randn(child: Expression) extends RDG with ExpressionWithRandomSeed { def this() = this(Literal(Utils.random.nextLong(), LongType)) override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType)) override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian() override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val className = classOf[XORShiftRandom].getName val rngTerm = ctx.addMutableState(className, "rng") ctx.addPartitionInitializationStatement( s"$rngTerm = new $className(${seed}L + partitionIndex);") ev.copy(code = code""" final ${CodeGenerator.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian();""", isNull = FalseLiteral) } override def freshCopy(): Randn = Randn(child) } object Randn { def apply(seed: Long): Randn = Randn(Literal(seed, LongType)) }
Example 6
Source File: Vector.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.language.implicitConversions import scala.util.Random import org.apache.spark.util.random.XORShiftRandom @deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0") class Vector(val elements: Array[Double]) extends Serializable { def length: Int = elements.length def apply(index: Int): Double = elements(index) def + (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) + other(i)) } def add(other: Vector): Vector = this + other def - (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) - other(i)) } def subtract(other: Vector): Vector = this - other def dot(other: Vector): Double = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } var ans = 0.0 var i = 0 while (i < length) { ans += this(i) * other(i) i += 1 } ans } def random(length: Int, random: Random = new XORShiftRandom()): Vector = Vector(length, _ => random.nextDouble()) class Multiplier(num: Double) { def * (vec: Vector): Vector = vec * num } implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num) implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] { def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2 def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length) } }
Example 7
Source File: Vector.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.language.implicitConversions import scala.util.Random import org.apache.spark.util.random.XORShiftRandom @deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0") class Vector(val elements: Array[Double]) extends Serializable { def length: Int = elements.length def apply(index: Int): Double = elements(index) def + (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) + other(i)) } def add(other: Vector): Vector = this + other def - (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) - other(i)) } def subtract(other: Vector): Vector = this - other def dot(other: Vector): Double = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } var ans = 0.0 var i = 0 while (i < length) { ans += this(i) * other(i) i += 1 } ans } def random(length: Int, random: Random = new XORShiftRandom()): Vector = Vector(length, _ => random.nextDouble()) class Multiplier(num: Double) { def * (vec: Vector): Vector = vec * num } implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num) implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] { def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2 def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length) } }
Example 8
Source File: randomExpressions.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.TaskContext import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom case class Randn(seed: Long) extends RDG { override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian() def this() = this(Utils.random.nextLong()) def this(seed: Expression) = this(seed match { case IntegerLiteral(s) => s case _ => throw new AnalysisException("Input argument to rand must be an integer literal.") }) override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val rngTerm = ctx.freshName("rng") val className = classOf[XORShiftRandom].getName ctx.addMutableState(className, rngTerm, s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());") ev.isNull = "false" s""" final ${ctx.javaType(dataType)} ${ev.primitive} = $rngTerm.nextGaussian(); """ } }
Example 9
Source File: Vector.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.language.implicitConversions import scala.util.Random import org.apache.spark.util.random.XORShiftRandom @deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0") class Vector(val elements: Array[Double]) extends Serializable { def length: Int = elements.length def apply(index: Int): Double = elements(index) def + (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) + other(i)) } def add(other: Vector): Vector = this + other def - (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) - other(i)) } def subtract(other: Vector): Vector = this - other def dot(other: Vector): Double = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } var ans = 0.0 var i = 0 while (i < length) { ans += this(i) * other(i) i += 1 } ans } def random(length: Int, random: Random = new XORShiftRandom()): Vector = Vector(length, _ => random.nextDouble()) class Multiplier(num: Double) { def * (vec: Vector): Vector = vec * num } implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num) implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] { def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2 def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length) } }
Example 10
Source File: randomExpressions.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import org.apache.spark.TaskContext import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode} import org.apache.spark.sql.types.{DataType, DoubleType} import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom case class Randn(seed: Long) extends RDG { override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian() def this() = this(Utils.random.nextLong()) def this(seed: Expression) = this(seed match { case IntegerLiteral(s) => s case _ => throw new AnalysisException("Input argument to rand must be an integer literal.") }) override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val rngTerm = ctx.freshName("rng") val className = classOf[XORShiftRandom].getName ctx.addMutableState(className, rngTerm, s"$rngTerm = new $className(${seed}L + org.apache.spark.TaskContext.getPartitionId());") ev.isNull = "false" s""" final ${ctx.javaType(dataType)} ${ev.value} = $rngTerm.nextGaussian(); """ } }
Example 11
Source File: Vector.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.language.implicitConversions import scala.util.Random import org.apache.spark.util.random.XORShiftRandom @deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0") class Vector(val elements: Array[Double]) extends Serializable { def length: Int = elements.length def apply(index: Int): Double = elements(index) def + (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) + other(i)) } def add(other: Vector): Vector = this + other def - (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) - other(i)) } def subtract(other: Vector): Vector = this - other def dot(other: Vector): Double = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } var ans = 0.0 var i = 0 while (i < length) { ans += this(i) * other(i) i += 1 } ans } def random(length: Int, random: Random = new XORShiftRandom()): Vector = Vector(length, _ => random.nextDouble()) class Multiplier(num: Double) { def * (vec: Vector): Vector = vec * num } implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num) implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] { def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2 def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length) } }