org.apache.spark.mllib.linalg.BLAS Scala Example

Source File: MFDataGenerator.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import java.{util => ju}

import scala.util.Random

import org.apache.spark.SparkContext
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix}
import org.apache.spark.rdd.RDD


@DeveloperApi
@Since("0.8.0")
object MFDataGenerator {
  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off println
      println("Usage: MFDataGenerator " +
        "<master> <outputDir> [m] [n] [rank] [trainSampFact] [noise] [sigma] [test] [testSampFact]")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster: String = args(0)
    val outputPath: String = args(1)
    val m: Int = if (args.length > 2) args(2).toInt else 100
    val n: Int = if (args.length > 3) args(3).toInt else 100
    val rank: Int = if (args.length > 4) args(4).toInt else 10
    val trainSampFact: Double = if (args.length > 5) args(5).toDouble else 1.0
    val noise: Boolean = if (args.length > 6) args(6).toBoolean else false
    val sigma: Double = if (args.length > 7) args(7).toDouble else 0.1
    val test: Boolean = if (args.length > 8) args(8).toBoolean else false
    val testSampFact: Double = if (args.length > 9) args(9).toDouble else 0.1

    val sc = new SparkContext(sparkMaster, "MFDataGenerator")

    val random = new ju.Random(42L)

    val A = DenseMatrix.randn(m, rank, random)
    val B = DenseMatrix.randn(rank, n, random)
    val z = 1 / math.sqrt(rank)
    val fullData = DenseMatrix.zeros(m, n)
    BLAS.gemm(z, A, B, 1.0, fullData)

    val df = rank * (m + n - rank)
    val sampSize = math.min(math.round(trainSampFact * df), math.round(.99 * m * n)).toInt
    val rand = new Random()
    val mn = m * n
    val shuffled = rand.shuffle((0 until mn).toList)

    val omega = shuffled.slice(0, sampSize)
    val ordered = omega.sortWith(_ < _).toArray
    val trainData: RDD[(Int, Int, Double)] = sc.parallelize(ordered)
      .map(x => (x % m, x / m, fullData.values(x)))

    // optionally add gaussian noise
    if (noise) {
      trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma))
    }

    trainData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath)

    // optionally generate testing data
    if (test) {
      val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize)
      val testOmega = shuffled.slice(sampSize, sampSize + testSampSize)
      val testOrdered = testOmega.sortWith(_ < _).toArray
      val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered)
        .map(x => (x % m, x / m, fullData.values(x)))
      testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath)
    }

    sc.stop()

  }
}

Source File: package.scala From spark-lp with Apache License 2.0

5 votes

  implicit object DenseVectorSpace extends VectorSpace[DenseVector] {

    override def combine(alpha: Double,
                         a: DenseVector,
                         beta: Double,
                         b: DenseVector): DenseVector = {
      val ret = a.copy
      BLAS.scal(alpha, ret)
      BLAS.axpy(beta, b, ret)
      ret
    }

    override def dot(a: DenseVector, b: DenseVector): Double = BLAS.dot(a, b)

    override def entrywiseProd(a: DenseVector, b: DenseVector): DenseVector = {
      val c = a.values.zip(b.values).map { case (i: Double, j: Double) => i * j }
      new DenseVector(c)
    }

    override def entrywiseNegDiv(a: DenseVector, b: DenseVector): DenseVector = {
      val c = a.values.zip(b.values).map {
        case (ai, bi) if bi < 0 => ai / Math.max(Math.abs(bi), 1e-15)
        case (_, bi) if bi >= 0 => Double.PositiveInfinity // Make Infinity value to be neglected in min
      }
      new DenseVector(c)
    }

    override def sum(a: DenseVector): Double = a.values.sum

    override def max(a: DenseVector): Double = a.values.max

    override def min(a: DenseVector): Double = a.values.min

  }
}

Source File: package.scala From spark-lp with Apache License 2.0

5 votes

  implicit object DVectorSpace extends VectorSpace[DVector] {

    override def combine(alpha: Double, a: DVector, beta: Double, b: DVector): DVector =
      if (alpha == 1.0 && beta == 1.0) {
        a.zip(b).map {
          case (aPart, bPart) => {
            BLAS.axpy(1.0, aPart, bPart) // bPart += aPart
            bPart
          }
        }
      } else {
        a.zip(b).map {
          case (aPart, bPart) =>
            // NOTE A DenseVector result is assumed here (not sparse safe).
            DenseVectorSpace.combine(alpha, aPart, beta, bPart).toDense
        }
      }

    override def dot(a: DVector, b: DVector): Double = a.dot(b)

    override def entrywiseProd(a: DVector, b: DVector): DVector = {
      a.zip(b).map {
        case (aPart, bPart) =>
          DenseVectorSpace.entrywiseProd(aPart, bPart).toDense
      }
    }

    override def entrywiseNegDiv(a: DVector, b: DVector): DVector = {
      a.zip(b).map {
        case (aPart, bPart) =>
            DenseVectorSpace.entrywiseNegDiv(aPart, bPart)
      }
    }

    override def sum(a: DVector): Double = a.aggregate(0.0)(
      seqOp = (acc: Double, v: DenseVector) => acc + v.values.sum,
      combOp = (acc1: Double, acc2: Double) => acc1 + acc2
    )

    override def min(a: DVector): Double = a.aggregate(Double.PositiveInfinity)(
      (mi, x) => Math.min(mi, x.values.min), Math.min
    )

    override def max(a: DVector): Double = a.aggregate(Double.NegativeInfinity)(
      (ma, x) => Math.max(ma, x.values.max), Math.max
    )


    override def cache(a: DVector): Unit =
      if (a.getStorageLevel == StorageLevel.NONE) {
        a.cache()
      }
  }
}

Source File: LinopMatrixAdjoint.scala From spark-lp with Apache License 2.0

5 votes

  override def apply(x: DVector): DenseVector = {
    val n = this.n
    matrix.zipPartitions(x)((matrixPartition, xPartition) =>
      Iterator.single(
        matrixPartition.checkedZip(xPartition.next.values.toIterator).aggregate(
          // NOTE A DenseVector result is assumed here (not sparse safe).
          Vectors.zeros(n).toDense)(
            seqop = (_, _) match {
              case (sum, (matrix_i, x_i)) => {
                // Multiply an element of x by its corresponding matrix row, and add to the
                // accumulation sum vector.
                BLAS.axpy(x_i, matrix_i, sum)
                sum
              }
            },
            combop = (sum1, sum2) => {
              // Add the intermediate sum vectors.
              BLAS.axpy(1.0, sum2, sum1)
              sum1
            }
          ))
    ).treeAggregate(Vectors.zeros(n).toDense)(
      seqOp = (sum1, sum2) => {
        // Add the intermediate sum vectors.
        BLAS.axpy(1.0, sum2, sum1)
        sum1
      },
      combOp = (sum1, sum2) => {
        // Add the intermediate sum vectors.
        BLAS.axpy(1.0, sum2, sum1)
        sum1
      }
      , depth
    )
  }
}

Source File: SpLinopMatrix.scala From spark-lp with Apache License 2.0

5 votes

  override def apply(mat: DMatrix): DMatrix = {
    dvector.zipPartitions(mat)((vectorPartition, matPartition) =>
      vectorPartition.next().values.toIterator.checkedZip(matPartition.toIterator).map {
          case (a: Double, x: Vector) =>
            val xc = x.copy
              BLAS.scal(a, xc)
            xc
        }
      )
  }
}

Source File: TestLASSO.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.examples

import scala.util.Random

import org.apache.spark.mllib.linalg.{ BLAS, DenseVector, Vectors }
import org.apache.spark.mllib.optimization.tfocs.SolverL1RLS
import org.apache.spark.mllib.random.RandomRDDs
import org.apache.spark.{ SparkConf, SparkContext }


object TestLASSO {
  def main(args: Array[String]) {

    val rnd = new Random(34324)
    val sparkConf = new SparkConf().setMaster("local[2]").setAppName("TestLASSO")
    val sc = new SparkContext(sparkConf)

    val n = 1024 // Design matrix column count.
    val m = n / 2 // Design matrix row count.
    val k = m / 5 // Count of nonzero weights.

    // Generate the design matrix using random normal values, then normalize the columns.
    val unnormalizedA = RandomRDDs.normalVectorRDD(sc, m, n, 0, rnd.nextLong)
    val AColumnNormSq = unnormalizedA.treeAggregate(Vectors.zeros(n).toDense)(
      seqOp = (sum, rowA) => {
        val rowASq = Vectors.dense(rowA.toArray.map(rowA_i => rowA_i * rowA_i))
        BLAS.axpy(1.0, rowASq, sum)
        sum
      },
      combOp = (sum1, sum2) => {
        BLAS.axpy(1.0, sum2, sum1)
        sum1
      })
    val A = unnormalizedA.map(rowA =>
      Vectors.dense(rowA.toArray.zip(AColumnNormSq.toArray).map {
        case (rowA_i, normsq_i) => rowA_i / math.sqrt(normsq_i)
      }))

    // Generate the actual 'x' vector, including 'k' nonzero values.
    val x = Vectors.zeros(n).toDense
    for (i <- rnd.shuffle(0 to n - 1).take(k)) {
      x.values(i) = rnd.nextGaussian
    }

    // Generate the 'b' vector using the design matrix and weights, adding gaussian noise.
    val bOriginal = new DenseVector(A.map(rowA => BLAS.dot(rowA, x)).collect)
    val snr = 30 // SNR in dB
    val sigma =
      math.pow(10, ((10 * math.log10(math.pow(Vectors.norm(bOriginal, 2), 2) / n) - snr) / 20))
    val b = sc.parallelize(bOriginal.values.map(_ + sigma * rnd.nextGaussian))
      .glom
      .map(new DenseVector(_))

    // Set 'lambda' using the noise standard deviation.
    val lambda = 2 * sigma * math.sqrt(2 * math.log(n))

    // Solve the lasso problem using SolverL1RLS, finding the estimated x vector 'estimatedX'.
    val (estimatedX, _) = SolverL1RLS.run(A, b, lambda)
    println("estimatedX: " + estimatedX.values.mkString(", "))

    sc.stop()
  }
}

Source File: SolverSLP.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs

import org.apache.spark.mllib.linalg.{ BLAS, DenseVector, Vectors }
import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._
import org.apache.spark.mllib.optimization.tfocs.VectorSpace._
import org.apache.spark.mllib.optimization.tfocs.fs.dvector.double._
import org.apache.spark.mllib.optimization.tfocs.fs.dvectordouble.vector._
import org.apache.spark.mllib.optimization.tfocs.fs.vector.double._
import org.apache.spark.mllib.optimization.tfocs.vs.dvector._

object SolverSLP {

  
  def run(
    c: DVector,
    A: DMatrix,
    b: DenseVector,
    mu: Double,
    x0: Option[DVector] = None,
    z0: Option[DenseVector] = None,
    numContinuations: Int = 10,
    tol: Double = 1e-4,
    initialTol: Double = 1e-3,
    dualTolCheckInterval: Int = 10): (DVector, Array[Double]) = {

    val minusB = b.copy
    BLAS.scal(-1.0, minusB)
    TFOCS_SCD.optimize(new ProxShiftRPlus(c),
      new LinopMatrixAdjoint(A, minusB),
      new ProxZero(),
      mu,
      x0.getOrElse(c.mapElements(_ => 0.0)),
      z0.getOrElse(Vectors.zeros(b.size).toDense),
      numContinuations,
      tol,
      initialTol,
      dualTolCheckInterval)
  }
}

Source File: package.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.vs

import org.apache.spark.mllib.linalg.{ BLAS, DenseVector }
import org.apache.spark.mllib.optimization.tfocs.VectorSpace

package object vector {

  
  implicit object DenseVectorSpace extends VectorSpace[DenseVector] {

    override def combine(alpha: Double,
      a: DenseVector,
      beta: Double,
      b: DenseVector): DenseVector = {
      val ret = a.copy
      BLAS.scal(alpha, ret)
      BLAS.axpy(beta, b, ret)
      ret
    }

    override def dot(a: DenseVector, b: DenseVector): Double = BLAS.dot(a, b)
  }
}

Source File: package.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.vs

import org.apache.spark.mllib.linalg.BLAS
import org.apache.spark.mllib.optimization.tfocs.DVectorFunctions._
import org.apache.spark.mllib.optimization.tfocs.VectorSpace
import org.apache.spark.mllib.optimization.tfocs.VectorSpace._
import org.apache.spark.mllib.optimization.tfocs.vs.vector.DenseVectorSpace
import org.apache.spark.storage.StorageLevel

package object dvector {

  
  implicit object DVectorSpace extends VectorSpace[DVector] {

    override def combine(alpha: Double, a: DVector, beta: Double, b: DVector): DVector =
      if (alpha == 1.0 && beta == 0.0) {
        // When minimizing rather than maximizing, the TFOCS implementation frequently requests a
        // no-op linear combination where alpha == 1.0 and beta == 0.0. This case is specifically
        // optimized.
        a
      } else {
        a.zip(b).map {
          case (aPart, bPart) =>
            // NOTE A DenseVector result is assumed here (not sparse safe).
            DenseVectorSpace.combine(alpha, aPart, beta, bPart).toDense
        }
      }

    override def dot(a: DVector, b: DVector): Double = a.dot(b)

    override def cache(a: DVector): Unit =
      if (a.getStorageLevel == StorageLevel.NONE) {
        a.cache()
      }
  }
}

Source File: LinopMatrixAdjoint.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.fs.dvectordouble.vector

import org.apache.spark.mllib.linalg.{ BLAS, DenseVector }
import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.{ LinopMatrixAdjoint => Delegate }
import org.apache.spark.mllib.optimization.tfocs.fs.vector.dvectordouble.LinopMatrix
import org.apache.spark.mllib.optimization.tfocs.LinearOperator
import org.apache.spark.mllib.optimization.tfocs.VectorSpace._


class LinopMatrixAdjoint(private val A: DMatrix, private val b: DenseVector)
    extends LinearOperator[(DVector, Double), DenseVector] {

  private val delegate = new Delegate(A)

  override def apply(x: (DVector, Double)): DenseVector = {
    val ret = delegate.apply(x._1)
    BLAS.axpy(1.0, b, ret)
    ret
  }

  override def t: LinearOperator[DenseVector, (DVector, Double)] = new LinopMatrix(A, b)
}

Source File: LinopMatrix.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.fs.vector.dvector

import org.apache.spark.mllib.linalg.{ BLAS, DenseVector }
import org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector.LinopMatrixAdjoint
import org.apache.spark.mllib.optimization.tfocs.LinearOperator
import org.apache.spark.mllib.optimization.tfocs.VectorSpace._
import org.apache.spark.storage.StorageLevel


class LinopMatrix(private val matrix: DMatrix) extends LinearOperator[DenseVector, DVector] {

  if (matrix.getStorageLevel == StorageLevel.NONE) {
    matrix.cache()
  }

  override def apply(x: DenseVector): DVector = {
    val bcX = matrix.context.broadcast(x)
    // Take the dot product of each matrix row with x.
    // NOTE A DenseVector result is assumed here (not sparse safe).
    matrix.mapPartitions(partitionRows =>
      Iterator.single(new DenseVector(partitionRows.map(row => BLAS.dot(row, bcX.value)).toArray)))
  }

  override def t: LinearOperator[DVector, DenseVector] = new LinopMatrixAdjoint(matrix)
}

Source File: LinopMatrixAdjoint.scala From spark-tfocs with Apache License 2.0

5 votes

package org.apache.spark.mllib.optimization.tfocs.fs.dvector.vector

import org.apache.spark.mllib.linalg.BLAS
import org.apache.spark.mllib.linalg.{ DenseVector, Vectors }
import org.apache.spark.mllib.optimization.tfocs.CheckedIteratorFunctions._
import org.apache.spark.mllib.optimization.tfocs.fs.vector.dvector.LinopMatrix
import org.apache.spark.mllib.optimization.tfocs.LinearOperator
import org.apache.spark.mllib.optimization.tfocs.VectorSpace._
import org.apache.spark.storage.StorageLevel


class LinopMatrixAdjoint(@transient private val matrix: DMatrix)
    extends LinearOperator[DVector, DenseVector] {

  if (matrix.getStorageLevel == StorageLevel.NONE) {
    matrix.cache()
  }

  private lazy val n = matrix.first().size

  override def apply(x: DVector): DenseVector = {
    val n = this.n
    matrix.zipPartitions(x)((matrixPartition, xPartition) =>
      Iterator.single(
        matrixPartition.checkedZip(xPartition.next.values.toIterator).aggregate(
          // NOTE A DenseVector result is assumed here (not sparse safe).
          Vectors.zeros(n).toDense)(
            seqop = (_, _) match {
              case (sum, (matrix_i, x_i)) => {
                // Multiply an element of x by its corresponding matrix row, and add to the
                // accumulation sum vector.
                BLAS.axpy(x_i, matrix_i, sum)
                sum
              }
            },
            combop = (sum1, sum2) => {
              // Add the intermediate sum vectors.
              BLAS.axpy(1.0, sum2, sum1)
              sum1
            }
          ))
    ).treeAggregate(Vectors.zeros(n).toDense)(
      seqOp = (sum1, sum2) => {
        // Add the intermediate sum vectors.
        BLAS.axpy(1.0, sum2, sum1)
        sum1
      },
      combOp = (sum1, sum2) => {
        // Add the intermediate sum vectors.
        BLAS.axpy(1.0, sum2, sum1)
        sum1
      }
    )
  }

  override def t: LinearOperator[DenseVector, DVector] = new LinopMatrix(matrix)
}

Source File: VLBFGS1.scala From spark-vl-bfgs with Apache License 2.0

5 votes

package org.apache.spark.ml.optim

import java.util.Random

import scala.language.implicitConversions

import org.apache.hadoop.fs.{FileSystem, Path}

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.ml.optim.VectorFreeLBFGS.{Oracle, VectorSpace}
import org.apache.spark.ml.optim.VectorRDDFunctions._
import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
import org.apache.spark.mllib.random.RandomRDDs
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.{RDD, UnionRDD}
import org.apache.spark.storage.StorageLevel


  private def gradient(data: RDD[Array[LabeledPoint]], dx: RDD[Vector]): RDD[Vector] = {
    data.cartesian(dx).map { case (points, x) =>
      val g = Vectors.zeros(x.size)
      points.foreach { case LabeledPoint(b, a) =>
        val err = BLAS.dot(a, x) - b
        BLAS.axpy(err, a, g)
      }
      g
    }.treeSum()
  }

  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("VLBFGS").setMaster("local[*]")
    val sc = new SparkContext(conf)
    sc.setCheckpointDir("/tmp/checkpoint")
    val n = 1000
    val p = 100
    val random = new Random(0L)
    val xExact = Vectors.dense(Array.fill(p)(random.nextDouble()))
    val data = RandomRDDs.normalVectorRDD(sc, n, p, 4, 11L).mapPartitionsWithIndex { (idx, part) =>
      val random = new Random(100 + idx)
      part.map { v =>
        val target = BLAS.dot(v, xExact) + 0.1 * random.nextGaussian()
        LabeledPoint(target, v)
      }
    }.glom()
    .cache()

    val x = solve(data).first()

    println(s"x_exact = $xExact")
    println(s"x_vlbfgs = $x")

    sc.stop()
  }
}

Source File: HivemallUtils.scala From hivemall-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.hive

import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Column, DataFrame, Row, UserDefinedFunction}

object HivemallUtils {

  // # of maximum dimensions for feature vectors
  val maxDims = 100000000

  
  def funcVectorizer(dense: Boolean = false, dims: Int = maxDims)
    : UserDefinedFunction = {
    udf(funcVectorizerImpl(dense, dims))
  }

  private def funcVectorizerImpl(dense: Boolean, dims: Int)
    : Seq[String] => Vector = {
    if (dense) {
      // Dense features
      i: Seq[String] => {
        val features = new Array[Double](dims)
        i.map { ft =>
          val s = ft.split(":").ensuring(_.size == 2)
          features(s(0).toInt) = s(1).toDouble
        }
        Vectors.dense(features)
      }
    } else {
      // Sparse features
      i: Seq[String] => {
        val features = i.map { ft =>
          // val s = ft.split(":").ensuring(_.size == 2)
          val s = ft.split(":")
          (s(0).toInt, s(1).toDouble)
        }
        Vectors.sparse(dims, features)
      }
    }
  }
}

org.apache.spark.mllib.linalg.BLAS Scala Examples