org.apache.spark.mllib.random.RandomDataGenerator Scala Examples
The following examples show how to use org.apache.spark.mllib.random.RandomDataGenerator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: TestLPSolver.scala From spark-lp with Apache License 2.0 | 5 votes |
object TestLPSolver { def main(args: Array[String]) { val rnd = new Random(12345) val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLPSolver") val sc = new SparkContext(sparkConf) val n = 1000 // Transpose constraint matrix row count. val m = 100 // Transpose constraint matrix column count. val numPartitions = 2 // Generate the starting vector from uniform distribution U(3.0, 5.0) println("generate x") val x0 = RandomRDDs.uniformRDD(sc, n, numPartitions).map(v => 3.0 + 2.0 * v).glom.map(new DenseVector(_)) // Generate the transpose constraint matrix 'B' using sparse uniformly generated values. println("generate B") val B = new RandomVectorRDD(sc, n, m, numPartitions, new SparseStandardNormalGenerator(0.1), rnd.nextLong) // Generate the cost vector 'c' using uniformly generated values. println("generate c") val c = RandomRDDs.uniformRDD(sc, n, numPartitions, rnd.nextLong).glom.map(new DenseVector(_)) // Compute 'b' using the starting 'x' vector. println("generate b") val b = (new LinopMatrixAdjoint(B))(x0) // Solve the linear program using LP.solve, finding the optimal x vector 'optimalX'. println("Start solving ...") val (optimalVal, _) = LP.solve(c, B, b, sc=sc) println("optimalVal: " + optimalVal) //println("optimalX: " + optimalX.collectElements.mkString(", ")) sc.stop() } }
Example 2
Source File: TestLinearProgram.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.examples import scala.util.Random import org.apache.spark.mllib.linalg.DenseVector import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._ import org.apache.spark.mllib.optimization.tfocs.SolverSLP import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint import org.apache.spark.mllib.random.{ RandomDataGenerator, RandomRDDs } import org.apache.spark.mllib.rdd.RandomVectorRDD import org.apache.spark.{ SparkConf, SparkContext } import org.apache.spark.util.random.XORShiftRandom object TestLinearProgram { def main(args: Array[String]) { val rnd = new Random(34324) val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLinearProgram") val sc = new SparkContext(sparkConf) val n = 5000 // Tranpose constraint matrix row count. val m = n / 2 // Transpose constrint matrix column count. // Generate a starting 'x' vector, using normally generated values. val x = RandomRDDs.normalRDD(sc, n).map(_ + 10).glom.map(new DenseVector(_)) // Generate the transpose constraint matrix 'A' using sparse normally generated values. val A = new RandomVectorRDD(sc, n, m, sc.defaultMinPartitions, new SparseStandardNormalGenerator(0.01), rnd.nextLong) // Generate the cost vector 'c' using normally generated values. val c = RandomRDDs.normalRDD(sc, n, 0, rnd.nextLong).glom.map(new DenseVector(_)) // Compute 'b' using the starting 'x' vector. val b = new LinopMatrixAdjoint(A)(x) val mu = 1e-2 // Solve the linear program using SolverSLP, finding the optimal x vector 'optimalX'. val (optimalX, _) = SolverSLP.run(c, A, b, mu) println("optimalX: " + optimalX.collectElements.mkString(", ")) sc.stop() } }
Example 3
Source File: RatingGenerator.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.mllib.data import org.apache.spark.ml.recommendation.ALS.Rating import org.apache.spark.mllib.random.RandomDataGenerator import scala.collection.mutable class RatingGenerator( private val numUsers: Int, private val numProducts: Int, private val implicitPrefs: Boolean) extends RandomDataGenerator[Rating[Int]] { private val rng = new java.util.Random() private val observed = new mutable.HashMap[(Int, Int), Boolean]() override def nextValue(): Rating[Int] = { var tuple = (rng.nextInt(numUsers),rng.nextInt(numProducts)) while (observed.getOrElse(tuple,false)){ tuple = (rng.nextInt(numUsers),rng.nextInt(numProducts)) } observed += (tuple -> true) val rating = if (implicitPrefs) rng.nextInt(2)*1.0 else rng.nextDouble()*5 new Rating(tuple._1, tuple._2, rating.toFloat) } override def setSeed(seed: Long) { rng.setSeed(seed) } override def copy(): RatingGenerator = new RatingGenerator(numUsers, numProducts, implicitPrefs) }
Example 4
Source File: ItemSetGenerator.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.mllib.data import scala.collection.mutable.ArrayBuffer import org.apache.spark.mllib.random.{PoissonGenerator, RandomDataGenerator} class ItemSetGenerator( val numItems: Int, val avgItemSetSize: Int) extends RandomDataGenerator[Array[String]] { assert(avgItemSetSize > 2) assert(numItems > 2) private val rng = new java.util.Random() private val itemSetSizeRng = new PoissonGenerator(avgItemSetSize - 2) private val itemRng = new PoissonGenerator(numItems / 2.0) override def setSeed(seed: Long) { rng.setSeed(seed) itemSetSizeRng.setSeed(seed) itemRng.setSeed(seed) } override def nextValue(): Array[String] = { // 1. generate size of itemset val size = DataGenUtil.nextPoisson(itemSetSizeRng, v => v >= 1 && v <= numItems).toInt val arrayBuff = new ArrayBuffer[Int](size + 2) // 2. generate items in the itemset var i = 0 while (i < size) { val nextVal = DataGenUtil.nextPoisson(itemRng, (item: Double) => { item >= 0 && item < numItems && !arrayBuff.contains(item) }).toInt arrayBuff.append(nextVal) i += 1 } // 3 generate association rules by adding two computed items // 3.1 add a new item = (firstItem + numItems / 2) % numItems val newItem1 = (arrayBuff(0) + numItems / 2) % numItems if (!arrayBuff.contains(newItem1)) { arrayBuff.append(newItem1) } // 3.2 add a new item = (firstItem + secondItem) % numItems if (arrayBuff.size >= 2) { val newItem2 = (arrayBuff(0) + arrayBuff(1)) % numItems if (!arrayBuff.contains(newItem2)) { arrayBuff.append(newItem2) } } arrayBuff.map(_.toString).toArray } override def copy(): ItemSetGenerator = new ItemSetGenerator(numItems, avgItemSetSize) }