org.apache.spark.mllib.random.RandomDataGenerator Scala Example

Source File: TestLPSolver.scala From spark-lp with Apache License 2.0

5 votes

object TestLPSolver {
  def main(args: Array[String]) {

    val rnd = new Random(12345)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLPSolver")
    val sc = new SparkContext(sparkConf)

    val n = 1000 // Transpose constraint matrix row count.
    val m = 100 // Transpose constraint matrix column count.
    val numPartitions = 2

    // Generate the starting vector from uniform distribution U(3.0, 5.0)
    println("generate x")
    val x0 = RandomRDDs.uniformRDD(sc, n, numPartitions).map(v => 3.0 + 2.0 * v).glom.map(new DenseVector(_))

    // Generate the transpose constraint matrix 'B' using sparse uniformly generated values.
    println("generate B")
    val B = new RandomVectorRDD(sc,
      n,
      m,
      numPartitions,
      new SparseStandardNormalGenerator(0.1),
      rnd.nextLong)

    // Generate the cost vector 'c' using uniformly generated values.
    println("generate c")
    val c = RandomRDDs.uniformRDD(sc, n, numPartitions, rnd.nextLong).glom.map(new DenseVector(_))
    // Compute 'b' using the starting 'x' vector.
    println("generate b")
    val b = (new LinopMatrixAdjoint(B))(x0)

    // Solve the linear program using LP.solve, finding the optimal x vector 'optimalX'.
    println("Start solving ...")
    val (optimalVal, _) = LP.solve(c, B, b, sc=sc)
    println("optimalVal: " + optimalVal)
    //println("optimalX: " + optimalX.collectElements.mkString(", "))

    sc.stop()
  }
}

Source File: TestLinearProgram.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.examples

import scala.util.Random

import org.apache.spark.mllib.linalg.DenseVector
import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._
import org.apache.spark.mllib.optimization.tfocs.SolverSLP
import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint
import org.apache.spark.mllib.random.{ RandomDataGenerator, RandomRDDs }
import org.apache.spark.mllib.rdd.RandomVectorRDD
import org.apache.spark.{ SparkConf, SparkContext }
import org.apache.spark.util.random.XORShiftRandom


object TestLinearProgram {
  def main(args: Array[String]) {

    val rnd = new Random(34324)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLinearProgram")
    val sc = new SparkContext(sparkConf)

    val n = 5000 // Tranpose constraint matrix row count.
    val m = n / 2 // Transpose constrint matrix column count.

    // Generate a starting 'x' vector, using normally generated values.
    val x = RandomRDDs.normalRDD(sc, n).map(_ + 10).glom.map(new DenseVector(_))

    // Generate the transpose constraint matrix 'A' using sparse normally generated values.
    val A = new RandomVectorRDD(sc,
      n,
      m,
      sc.defaultMinPartitions,
      new SparseStandardNormalGenerator(0.01),
      rnd.nextLong)

    // Generate the cost vector 'c' using normally generated values.
    val c = RandomRDDs.normalRDD(sc, n, 0, rnd.nextLong).glom.map(new DenseVector(_))

    // Compute 'b' using the starting 'x' vector.
    val b = new LinopMatrixAdjoint(A)(x)

    val mu = 1e-2

    // Solve the linear program using SolverSLP, finding the optimal x vector 'optimalX'.
    val (optimalX, _) = SolverSLP.run(c, A, b, mu)
    println("optimalX: " + optimalX.collectElements.mkString(", "))

    sc.stop()
  }
}

Source File: RatingGenerator.scala From spark-sql-perf with Apache License 2.0

5 votes

package com.databricks.spark.sql.perf.mllib.data

import org.apache.spark.ml.recommendation.ALS.Rating
import org.apache.spark.mllib.random.RandomDataGenerator

import scala.collection.mutable

class RatingGenerator(
    private val numUsers: Int,
    private val numProducts: Int,
    private val implicitPrefs: Boolean) extends RandomDataGenerator[Rating[Int]] {

  private val rng = new java.util.Random()

  private val observed = new mutable.HashMap[(Int, Int), Boolean]()

  override def nextValue(): Rating[Int] = {
    var tuple = (rng.nextInt(numUsers),rng.nextInt(numProducts))
    while (observed.getOrElse(tuple,false)){
      tuple = (rng.nextInt(numUsers),rng.nextInt(numProducts))
    }
    observed += (tuple -> true)

    val rating = if (implicitPrefs) rng.nextInt(2)*1.0 else rng.nextDouble()*5

    new Rating(tuple._1, tuple._2, rating.toFloat)
  }

  override def setSeed(seed: Long) {
    rng.setSeed(seed)
  }

  override def copy(): RatingGenerator =
    new RatingGenerator(numUsers, numProducts, implicitPrefs)
}

Source File: ItemSetGenerator.scala From spark-sql-perf with Apache License 2.0

5 votes

package com.databricks.spark.sql.perf.mllib.data

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.mllib.random.{PoissonGenerator, RandomDataGenerator}

class ItemSetGenerator(
    val numItems: Int,
    val avgItemSetSize: Int)
  extends RandomDataGenerator[Array[String]] {

  assert(avgItemSetSize > 2)
  assert(numItems > 2)

  private val rng = new java.util.Random()
  private val itemSetSizeRng = new PoissonGenerator(avgItemSetSize - 2)
  private val itemRng = new PoissonGenerator(numItems / 2.0)

  override def setSeed(seed: Long) {
    rng.setSeed(seed)
    itemSetSizeRng.setSeed(seed)
    itemRng.setSeed(seed)
  }

  override def nextValue(): Array[String] = {
    // 1. generate size of itemset
    val size = DataGenUtil.nextPoisson(itemSetSizeRng, v => v >= 1 && v <= numItems).toInt
    val arrayBuff = new ArrayBuffer[Int](size + 2)

    // 2. generate items in the itemset
    var i = 0
    while (i < size) {
      val nextVal = DataGenUtil.nextPoisson(itemRng, (item: Double) => {
        item >= 0 && item < numItems && !arrayBuff.contains(item)
      }).toInt
      arrayBuff.append(nextVal)
      i += 1
    }

    // 3 generate association rules by adding two computed items

    // 3.1 add a new item = (firstItem + numItems / 2) % numItems
    val newItem1 = (arrayBuff(0) + numItems / 2) % numItems
    if (!arrayBuff.contains(newItem1)) {
      arrayBuff.append(newItem1)
    }
    // 3.2 add a new item = (firstItem + secondItem) % numItems
    if (arrayBuff.size >= 2) {
      val newItem2 = (arrayBuff(0) + arrayBuff(1)) % numItems
      if (!arrayBuff.contains(newItem2)) {
        arrayBuff.append(newItem2)
      }
    }
    arrayBuff.map(_.toString).toArray
  }

  override def copy(): ItemSetGenerator
    = new ItemSetGenerator(numItems, avgItemSetSize)
}

org.apache.spark.mllib.random.RandomDataGenerator Scala Examples