org.apache.spark.mllib.linalg.BLAS Scala Examples
The following examples show how to use org.apache.spark.mllib.linalg.BLAS.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MFDataGenerator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import java.{util => ju} import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix} import org.apache.spark.rdd.RDD @DeveloperApi @Since("0.8.0") object MFDataGenerator { @Since("0.8.0") def main(args: Array[String]) { if (args.length < 2) { // scalastyle:off println println("Usage: MFDataGenerator " + "<master> <outputDir> [m] [n] [rank] [trainSampFact] [noise] [sigma] [test] [testSampFact]") // scalastyle:on println System.exit(1) } val sparkMaster: String = args(0) val outputPath: String = args(1) val m: Int = if (args.length > 2) args(2).toInt else 100 val n: Int = if (args.length > 3) args(3).toInt else 100 val rank: Int = if (args.length > 4) args(4).toInt else 10 val trainSampFact: Double = if (args.length > 5) args(5).toDouble else 1.0 val noise: Boolean = if (args.length > 6) args(6).toBoolean else false val sigma: Double = if (args.length > 7) args(7).toDouble else 0.1 val test: Boolean = if (args.length > 8) args(8).toBoolean else false val testSampFact: Double = if (args.length > 9) args(9).toDouble else 0.1 val sc = new SparkContext(sparkMaster, "MFDataGenerator") val random = new ju.Random(42L) val A = DenseMatrix.randn(m, rank, random) val B = DenseMatrix.randn(rank, n, random) val z = 1 / math.sqrt(rank) val fullData = DenseMatrix.zeros(m, n) BLAS.gemm(z, A, B, 1.0, fullData) val df = rank * (m + n - rank) val sampSize = math.min(math.round(trainSampFact * df), math.round(.99 * m * n)).toInt val rand = new Random() val mn = m * n val shuffled = rand.shuffle((0 until mn).toList) val omega = shuffled.slice(0, sampSize) val ordered = omega.sortWith(_ < _).toArray val trainData: RDD[(Int, Int, Double)] = sc.parallelize(ordered) .map(x => (x % m, x / m, fullData.values(x))) // optionally add gaussian noise if (noise) { trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma)) } trainData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) // optionally generate testing data if (test) { val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize) val testOmega = shuffled.slice(sampSize, sampSize + testSampSize) val testOrdered = testOmega.sortWith(_ < _).toArray val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered) .map(x => (x % m, x / m, fullData.values(x))) testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) } sc.stop() } }
Example 2
Source File: package.scala From spark-lp with Apache License 2.0 | 5 votes |
implicit object DenseVectorSpace extends VectorSpace[DenseVector] { override def combine(alpha: Double, a: DenseVector, beta: Double, b: DenseVector): DenseVector = { val ret = a.copy BLAS.scal(alpha, ret) BLAS.axpy(beta, b, ret) ret } override def dot(a: DenseVector, b: DenseVector): Double = BLAS.dot(a, b) override def entrywiseProd(a: DenseVector, b: DenseVector): DenseVector = { val c = a.values.zip(b.values).map { case (i: Double, j: Double) => i * j } new DenseVector(c) } override def entrywiseNegDiv(a: DenseVector, b: DenseVector): DenseVector = { val c = a.values.zip(b.values).map { case (ai, bi) if bi < 0 => ai / Math.max(Math.abs(bi), 1e-15) case (_, bi) if bi >= 0 => Double.PositiveInfinity // Make Infinity value to be neglected in min } new DenseVector(c) } override def sum(a: DenseVector): Double = a.values.sum override def max(a: DenseVector): Double = a.values.max override def min(a: DenseVector): Double = a.values.min } }
Example 3
Source File: package.scala From spark-lp with Apache License 2.0 | 5 votes |
implicit object DVectorSpace extends VectorSpace[DVector] { override def combine(alpha: Double, a: DVector, beta: Double, b: DVector): DVector = if (alpha == 1.0 && beta == 1.0) { a.zip(b).map { case (aPart, bPart) => { BLAS.axpy(1.0, aPart, bPart) // bPart += aPart bPart } } } else { a.zip(b).map { case (aPart, bPart) => // NOTE A DenseVector result is assumed here (not sparse safe). DenseVectorSpace.combine(alpha, aPart, beta, bPart).toDense } } override def dot(a: DVector, b: DVector): Double = a.dot(b) override def entrywiseProd(a: DVector, b: DVector): DVector = { a.zip(b).map { case (aPart, bPart) => DenseVectorSpace.entrywiseProd(aPart, bPart).toDense } } override def entrywiseNegDiv(a: DVector, b: DVector): DVector = { a.zip(b).map { case (aPart, bPart) => DenseVectorSpace.entrywiseNegDiv(aPart, bPart) } } override def sum(a: DVector): Double = a.aggregate(0.0)( seqOp = (acc: Double, v: DenseVector) => acc + v.values.sum, combOp = (acc1: Double, acc2: Double) => acc1 + acc2 ) override def min(a: DVector): Double = a.aggregate(Double.PositiveInfinity)( (mi, x) => Math.min(mi, x.values.min), Math.min ) override def max(a: DVector): Double = a.aggregate(Double.NegativeInfinity)( (ma, x) => Math.max(ma, x.values.max), Math.max ) override def cache(a: DVector): Unit = if (a.getStorageLevel == StorageLevel.NONE) { a.cache() } } }
Example 4
Source File: LinopMatrixAdjoint.scala From spark-lp with Apache License 2.0 | 5 votes |
override def apply(x: DVector): DenseVector = { val n = this.n matrix.zipPartitions(x)((matrixPartition, xPartition) => Iterator.single( matrixPartition.checkedZip(xPartition.next.values.toIterator).aggregate( // NOTE A DenseVector result is assumed here (not sparse safe). Vectors.zeros(n).toDense)( seqop = (_, _) match { case (sum, (matrix_i, x_i)) => { // Multiply an element of x by its corresponding matrix row, and add to the // accumulation sum vector. BLAS.axpy(x_i, matrix_i, sum) sum } }, combop = (sum1, sum2) => { // Add the intermediate sum vectors. BLAS.axpy(1.0, sum2, sum1) sum1 } )) ).treeAggregate(Vectors.zeros(n).toDense)( seqOp = (sum1, sum2) => { // Add the intermediate sum vectors. BLAS.axpy(1.0, sum2, sum1) sum1 }, combOp = (sum1, sum2) => { // Add the intermediate sum vectors. BLAS.axpy(1.0, sum2, sum1) sum1 } , depth ) } }
Example 5
Source File: SpLinopMatrix.scala From spark-lp with Apache License 2.0 | 5 votes |
override def apply(mat: DMatrix): DMatrix = { dvector.zipPartitions(mat)((vectorPartition, matPartition) => vectorPartition.next().values.toIterator.checkedZip(matPartition.toIterator).map { case (a: Double, x: Vector) => val xc = x.copy BLAS.scal(a, xc) xc } ) } }
Example 6
Source File: TestLASSO.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.examples import scala.util.Random import org.apache.spark.mllib.linalg.{ BLAS, DenseVector, Vectors } import org.apache.spark.mllib.optimization.tfocs.SolverL1RLS import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.{ SparkConf, SparkContext } object TestLASSO { def main(args: Array[String]) { val rnd = new Random(34324) val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLASSO") val sc = new SparkContext(sparkConf) val n = 1024 // Design matrix column count. val m = n / 2 // Design matrix row count. val k = m / 5 // Count of nonzero weights. // Generate the design matrix using random normal values, then normalize the columns. val unnormalizedA = RandomRDDs.normalVectorRDD(sc, m, n, 0, rnd.nextLong) val AColumnNormSq = unnormalizedA.treeAggregate(Vectors.zeros(n).toDense)( seqOp = (sum, rowA) => { val rowASq = Vectors.dense(rowA.toArray.map(rowA_i => rowA_i * rowA_i)) BLAS.axpy(1.0, rowASq, sum) sum }, combOp = (sum1, sum2) => { BLAS.axpy(1.0, sum2, sum1) sum1 }) val A = unnormalizedA.map(rowA => Vectors.dense(rowA.toArray.zip(AColumnNormSq.toArray).map { case (rowA_i, normsq_i) => rowA_i / math.sqrt(normsq_i) })) // Generate the actual 'x' vector, including 'k' nonzero values. val x = Vectors.zeros(n).toDense for (i <- rnd.shuffle(0 to n - 1).take(k)) { x.values(i) = rnd.nextGaussian } // Generate the 'b' vector using the design matrix and weights, adding gaussian noise. val bOriginal = new DenseVector(A.map(rowA => BLAS.dot(rowA, x)).collect) val snr = 30 // SNR in dB val sigma = math.pow(10, ((10 * math.log10(math.pow(Vectors.norm(bOriginal, 2), 2) / n) - snr) / 20)) val b = sc.parallelize(bOriginal.values.map(_ + sigma * rnd.nextGaussian)) .glom .map(new DenseVector(_)) // Set 'lambda' using the noise standard deviation. val lambda = 2 * sigma * math.sqrt(2 * math.log(n)) // Solve the lasso problem using SolverL1RLS, finding the estimated x vector 'estimatedX'. val (estimatedX, _) = SolverL1RLS.run(A, b, lambda) println("estimatedX: " + estimatedX.values.mkString(", ")) sc.stop() } }
Example 7
Source File: SolverSLP.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs import org.apache.spark.mllib.linalg.{ BLAS, DenseVector, Vectors } import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._ import org.apache.spark.mllib.optimization.tfocs.VectorSpace._ import org.apache.spark.mllib.optimization.tfocs.fs.dvector.double._ import org.apache.spark.mllib.optimization.tfocs.fs.dvectordouble.vector._ import org.apache.spark.mllib.optimization.tfocs.fs.vector.double._ import org.apache.spark.mllib.optimization.tfocs.vs.dvector._ object SolverSLP { def run( c: DVector, A: DMatrix, b: DenseVector, mu: Double, x0: Option[DVector] = None, z0: Option[DenseVector] = None, numContinuations: Int = 10, tol: Double = 1e-4, initialTol: Double = 1e-3, dualTolCheckInterval: Int = 10): (DVector, Array[Double]) = { val minusB = b.copy BLAS.scal(-1.0, minusB) TFOCS_SCD.optimize(new ProxShiftRPlus(c), new LinopMatrixAdjoint(A, minusB), new ProxZero(), mu, x0.getOrElse(c.mapElements(_ => 0.0)), z0.getOrElse(Vectors.zeros(b.size).toDense), numContinuations, tol, initialTol, dualTolCheckInterval) } }
Example 8
Source File: package.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.vs import org.apache.spark.mllib.linalg.{ BLAS, DenseVector } import org.apache.spark.mllib.optimization.tfocs.VectorSpace package object vector { implicit object DenseVectorSpace extends VectorSpace[DenseVector] { override def combine(alpha: Double, a: DenseVector, beta: Double, b: DenseVector): DenseVector = { val ret = a.copy BLAS.scal(alpha, ret) BLAS.axpy(beta, b, ret) ret } override def dot(a: DenseVector, b: DenseVector): Double = BLAS.dot(a, b) } }
Example 9
Source File: package.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.vs import org.apache.spark.mllib.linalg.BLAS import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._ import org.apache.spark.mllib.optimization.tfocs.VectorSpace import org.apache.spark.mllib.optimization.tfocs.VectorSpace._ import org.apache.spark.mllib.optimization.tfocs.vs.vector.DenseVectorSpace import org.apache.spark.storage.StorageLevel package object dvector { implicit object DVectorSpace extends VectorSpace[DVector] { override def combine(alpha: Double, a: DVector, beta: Double, b: DVector): DVector = if (alpha == 1.0 && beta == 0.0) { // When minimizing rather than maximizing, the TFOCS implementation frequently requests a // no-op linear combination where alpha == 1.0 and beta == 0.0. This case is specifically // optimized. a } else { a.zip(b).map { case (aPart, bPart) => // NOTE A DenseVector result is assumed here (not sparse safe). DenseVectorSpace.combine(alpha, aPart, beta, bPart).toDense } } override def dot(a: DVector, b: DVector): Double = a.dot(b) override def cache(a: DVector): Unit = if (a.getStorageLevel == StorageLevel.NONE) { a.cache() } } }
Example 10
Source File: LinopMatrixAdjoint.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.fs.dvectordouble.vector import org.apache.spark.mllib.linalg.{ BLAS, DenseVector } import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.{ LinopMatrixAdjoint => Delegate } import org.apache.spark.mllib.optimization.tfocs.fs.vector.dvectordouble.LinopMatrix import org.apache.spark.mllib.optimization.tfocs.LinearOperator import org.apache.spark.mllib.optimization.tfocs.VectorSpace._ class LinopMatrixAdjoint(private val A: DMatrix, private val b: DenseVector) extends LinearOperator[(DVector, Double), DenseVector] { private val delegate = new Delegate(A) override def apply(x: (DVector, Double)): DenseVector = { val ret = delegate.apply(x._1) BLAS.axpy(1.0, b, ret) ret } override def t: LinearOperator[DenseVector, (DVector, Double)] = new LinopMatrix(A, b) }
Example 11
Source File: LinopMatrix.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.fs.vector.dvector import org.apache.spark.mllib.linalg.{ BLAS, DenseVector } import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint import org.apache.spark.mllib.optimization.tfocs.LinearOperator import org.apache.spark.mllib.optimization.tfocs.VectorSpace._ import org.apache.spark.storage.StorageLevel class LinopMatrix(private val matrix: DMatrix) extends LinearOperator[DenseVector, DVector] { if (matrix.getStorageLevel == StorageLevel.NONE) { matrix.cache() } override def apply(x: DenseVector): DVector = { val bcX = matrix.context.broadcast(x) // Take the dot product of each matrix row with x. // NOTE A DenseVector result is assumed here (not sparse safe). matrix.mapPartitions(partitionRows => Iterator.single(new DenseVector(partitionRows.map(row => BLAS.dot(row, bcX.value)).toArray))) } override def t: LinearOperator[DVector, DenseVector] = new LinopMatrixAdjoint(matrix) }
Example 12
Source File: LinopMatrixAdjoint.scala From spark-tfocs with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector import org.apache.spark.mllib.linalg.BLAS import org.apache.spark.mllib.linalg.{ DenseVector, Vectors } import org.apache.spark.mllib.optimization.tfocs.CheckedIteratorFunctions._ import org.apache.spark.mllib.optimization.tfocs.fs.vector.dvector.LinopMatrix import org.apache.spark.mllib.optimization.tfocs.LinearOperator import org.apache.spark.mllib.optimization.tfocs.VectorSpace._ import org.apache.spark.storage.StorageLevel class LinopMatrixAdjoint(@transient private val matrix: DMatrix) extends LinearOperator[DVector, DenseVector] { if (matrix.getStorageLevel == StorageLevel.NONE) { matrix.cache() } private lazy val n = matrix.first().size override def apply(x: DVector): DenseVector = { val n = this.n matrix.zipPartitions(x)((matrixPartition, xPartition) => Iterator.single( matrixPartition.checkedZip(xPartition.next.values.toIterator).aggregate( // NOTE A DenseVector result is assumed here (not sparse safe). Vectors.zeros(n).toDense)( seqop = (_, _) match { case (sum, (matrix_i, x_i)) => { // Multiply an element of x by its corresponding matrix row, and add to the // accumulation sum vector. BLAS.axpy(x_i, matrix_i, sum) sum } }, combop = (sum1, sum2) => { // Add the intermediate sum vectors. BLAS.axpy(1.0, sum2, sum1) sum1 } )) ).treeAggregate(Vectors.zeros(n).toDense)( seqOp = (sum1, sum2) => { // Add the intermediate sum vectors. BLAS.axpy(1.0, sum2, sum1) sum1 }, combOp = (sum1, sum2) => { // Add the intermediate sum vectors. BLAS.axpy(1.0, sum2, sum1) sum1 } ) } override def t: LinearOperator[DenseVector, DVector] = new LinopMatrix(matrix) }
Example 13
Source File: VLBFGS1.scala From spark-vl-bfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.optim import java.util.Random import scala.language.implicitConversions import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.ml.optim.VectorFreeLBFGS.{Oracle, VectorSpace} import org.apache.spark.ml.optim.VectorRDDFunctions._ import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} import org.apache.spark.mllib.random.RandomRDDs import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.storage.StorageLevel private def gradient(data: RDD[Array[LabeledPoint]], dx: RDD[Vector]): RDD[Vector] = { data.cartesian(dx).map { case (points, x) => val g = Vectors.zeros(x.size) points.foreach { case LabeledPoint(b, a) => val err = BLAS.dot(a, x) - b BLAS.axpy(err, a, g) } g }.treeSum() } def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("VLBFGS").setMaster("local[*]") val sc = new SparkContext(conf) sc.setCheckpointDir("/tmp/checkpoint") val n = 1000 val p = 100 val random = new Random(0L) val xExact = Vectors.dense(Array.fill(p)(random.nextDouble())) val data = RandomRDDs.normalVectorRDD(sc, n, p, 4, 11L).mapPartitionsWithIndex { (idx, part) => val random = new Random(100 + idx) part.map { v => val target = BLAS.dot(v, xExact) + 0.1 * random.nextGaussian() LabeledPoint(target, v) } }.glom() .cache() val x = solve(data).first() println(s"x_exact = $xExact") println(s"x_vlbfgs = $x") sc.stop() } }
Example 14
Source File: HivemallUtils.scala From hivemall-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{Column, DataFrame, Row, UserDefinedFunction} object HivemallUtils { // # of maximum dimensions for feature vectors val maxDims = 100000000 def funcVectorizer(dense: Boolean = false, dims: Int = maxDims) : UserDefinedFunction = { udf(funcVectorizerImpl(dense, dims)) } private def funcVectorizerImpl(dense: Boolean, dims: Int) : Seq[String] => Vector = { if (dense) { // Dense features i: Seq[String] => { val features = new Array[Double](dims) i.map { ft => val s = ft.split(":").ensuring(_.size == 2) features(s(0).toInt) = s(1).toDouble } Vectors.dense(features) } } else { // Sparse features i: Seq[String] => { val features = i.map { ft => // val s = ft.split(":").ensuring(_.size == 2) val s = ft.split(":") (s(0).toInt, s(1).toDouble) } Vectors.sparse(dims, features) } } } }