org.apache.spark.ml.linalg.DenseMatrix Scala Examples

The following examples show how to use org.apache.spark.ml.linalg.DenseMatrix. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: MLSerDeSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.python

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors}

class MLSerDeSuite extends SparkFunSuite {

  MLSerDe.initialize()

  test("pickle vector") {
    val vectors = Seq(
      Vectors.dense(Array.empty[Double]),
      Vectors.dense(0.0),
      Vectors.dense(0.0, -2.0),
      Vectors.sparse(0, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(1, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(2, Array(1), Array(-2.0)))
    vectors.foreach { v =>
      val u = MLSerDe.loads(MLSerDe.dumps(v))
      assert(u.getClass === v.getClass)
      assert(u === v)
    }
  }

  test("pickle double") {
    for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) {
      val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double]
      // We use `equals` here for comparison because we cannot use `==` for NaN
      assert(x.equals(deser))
    }
  }

  test("pickle matrix") {
    val values = Array[Double](0, 1.2, 3, 4.56, 7, 8)
    val matrix = Matrices.dense(2, 3, values)
    val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix]
    assert(matrix === nm)

    // Test conversion for empty matrix
    val empty = Array.empty[Double]
    val emptyMatrix = Matrices.dense(0, 0, empty)
    val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix]
    assert(emptyMatrix == ne)

    val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4))
    val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix]
    assert(sm.toArray === nsm.toArray)

    val smt = new SparseMatrix(
      3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9),
      isTransposed = true)
    val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix]
    assert(smt.toArray === nsmt.toArray)
  }
} 
Example 2
Source File: LocalPCAModel.scala    From spark-ml-serving   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.spark_ml_serving.preprocessors

import io.hydrosphere.spark_ml_serving.TypedTransformerConverter
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils._
import io.hydrosphere.spark_ml_serving.common._
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils
import org.apache.spark.ml.feature.PCAModel
import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector, Matrices, Vectors}
import org.apache.spark.mllib.linalg.{DenseMatrix => OldDenseMatrix, Matrices => OldMatrices}

class LocalPCAModel(override val sparkTransformer: PCAModel) extends LocalTransformer[PCAModel] {
  override def transform(localData: LocalData): LocalData = {
    localData.column(sparkTransformer.getInputCol) match {
      case Some(column) =>
        val pc      = OldMatrices.fromML(sparkTransformer.pc).asInstanceOf[OldDenseMatrix]
        val newData = column.data.mapToMlLibVectors.map(pc.transpose.multiply).map(_.toList)
        localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData))
      case None => localData
    }
  }
}

object LocalPCAModel extends SimpleModelLoader[PCAModel] with TypedTransformerConverter[PCAModel] {

  override def build(metadata: Metadata, data: LocalData): PCAModel = {
    val constructor = classOf[PCAModel].getDeclaredConstructor(
      classOf[String],
      classOf[DenseMatrix],
      classOf[DenseVector]
    )
    constructor.setAccessible(true)
    val pcMap = data.column("pc").get.data.head.asInstanceOf[Map[String, Any]]
    val pcMat = DataUtils.constructMatrix(pcMap).asInstanceOf[DenseMatrix]
    data.column("explainedVariance") match {
      case Some(ev) =>
        // NOTE: Spark >= 2
        val evParams = ev.data.head.asInstanceOf[Map[String, Any]]
        val explainedVariance = DataUtils.constructVector(evParams).toDense

        constructor
          .newInstance(metadata.uid, pcMat, explainedVariance)
          .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
          .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
      case None =>
        // NOTE: Spark < 2
        constructor
          .newInstance(
            metadata.uid,
            pcMat,
            Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector]
          )
          .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String])
          .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String])
    }
  }

  override implicit def toLocal(transformer: PCAModel) =
    new LocalPCAModel(transformer)
} 
Example 3
Source File: PcaOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.bundle.ops.feature

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.OpModel
import ml.combust.mleap.bundle.ops.MleapOp
import ml.combust.mleap.core.feature.PcaModel
import ml.combust.mleap.runtime.MleapContext
import ml.combust.mleap.runtime.transformer.feature.Pca
import ml.combust.mleap.tensor.DenseTensor
import org.apache.spark.ml.linalg.DenseMatrix


class PcaOp extends MleapOp[Pca, PcaModel] {
  override val Model: OpModel[MleapContext, PcaModel] = new OpModel[MleapContext, PcaModel] {
    override val klazz: Class[PcaModel] = classOf[PcaModel]

    override def opName: String = Bundle.BuiltinOps.feature.pca

    override def store(model: Model, obj: PcaModel)
                      (implicit context: BundleContext[MleapContext]): Model = {
      model.withValue("principal_components", Value.tensor[Double](DenseTensor(obj.principalComponents.values,
        Seq(obj.principalComponents.numRows, obj.principalComponents.numCols))))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[MleapContext]): PcaModel = {
      val values = model.value("principal_components").getTensor[Double]
      PcaModel(new DenseMatrix(values.dimensions.head, values.dimensions(1), values.toArray))
    }
  }

  override def model(node: Pca): PcaModel = node.model
} 
Example 4
Source File: PcaSpec.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.runtime.transformer.feature

import ml.combust.mleap.core.feature.PcaModel
import ml.combust.mleap.core.types._
import ml.combust.mleap.runtime.frame.{DefaultLeapFrame, Row}
import ml.combust.mleap.tensor.Tensor
import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
import org.scalatest.FunSpec


class PcaSpec extends FunSpec {
  val schema = StructType(Seq(StructField("test_vec", TensorType(BasicType.Double)))).get
  val dataset = Seq(Row(Tensor.denseVector(Array(2.0, 1.0, 0.0))))
  val frame = DefaultLeapFrame(schema, dataset)

  val pc = new DenseMatrix(3, 2, Array(1d, -1, 2,
    0, -3, 1))
  val input = Vectors.dense(Array(2d, 1, 0))
  val pca = Pca(
    shape = NodeShape.feature(inputCol = "test_vec", outputCol = "test_pca"),
    model = PcaModel(pc))

  describe("#transform") {
    it("extracts the principal components from the input column") {
      val frame2 = pca.transform(frame).get
      val data = frame2.dataset(0).getTensor[Double](1).toArray

      assert(data sameElements Array[Double](1, -3))
    }

    describe("with invalid input column") {
      val pca2 = pca.copy(shape = NodeShape.feature(inputCol = "bad_input"))

      it("returns a Failure") { assert(pca2.transform(frame).isFailure) }
    }
  }

  describe("input/output schema") {
    it("has the correct inputs and outputs") {
      assert(pca.schema.fields ==
        Seq(StructField("test_vec", TensorType.Double()),
          StructField("test_pca", TensorType.Double())))
    }
  }
} 
Example 5
Source File: VectorConverters.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.util

import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
import ml.combust.mleap.tensor.{DenseTensor, SparseTensor, Tensor}
import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector, Matrices, Matrix, SparseMatrix, SparseVector, Vector, Vectors}

import scala.language.implicitConversions


trait VectorConverters {
  implicit def sparkVectorToMleapTensor(vector: Vector): Tensor[Double] = vector match {
    case vector: DenseVector => DenseTensor(vector.toArray, Seq(vector.size))
    case vector: SparseVector => SparseTensor(indices = vector.indices.map(i => Seq(i)),
      values = vector.values,
      dimensions = Seq(vector.size))
  }

  implicit def mleapTensorToSparkVector(tensor: Tensor[Double]): Vector = tensor match {
    case tensor: DenseTensor[_] =>
      Vectors.dense(tensor.rawValues.asInstanceOf[Array[Double]])
    case tensor: SparseTensor[_] =>
      Vectors.sparse(tensor.dimensions.product,
        tensor.indices.map(_.head).toArray,
        tensor.values.asInstanceOf[Array[Double]])
  }

  implicit def sparkMatrixToMleapTensor(matrix: Matrix): Tensor[Double] = matrix match {
    case matrix: DenseMatrix =>
      DenseTensor(matrix.toArray, Seq(matrix.numRows, matrix.numCols))
    case matrix: SparseMatrix =>
      val indices = matrix.rowIndices.zip(matrix.colPtrs).map {
        case (r, c) => Seq(r, c)
      }.toSeq
      SparseTensor(indices = indices,
      values = matrix.values,
      dimensions = Seq(matrix.numRows, matrix.numCols))
  }

  implicit def mleapTensorToSparkMatrix(tensor: Tensor[Double]): Matrix = tensor match {
    case tensor: DenseTensor[_] =>
      Matrices.dense(tensor.dimensions.head,
        tensor.dimensions(1),
        tensor.rawValues.asInstanceOf[Array[Double]])
    case tensor: SparseTensor[_] =>
      val (rows, cols) = tensor.indices.map(v => (v.head, v(1))).unzip
      Matrices.sparse(tensor.dimensions.head,
        tensor.dimensions(1),
        cols.toArray,
        rows.toArray,
        tensor.values.asInstanceOf[Array[Double]])
  }

  implicit def breezeVectorToMLeapTensor(vector: BV[Double]): Tensor[Double] = vector match {
    case vector : BDV[Double] => DenseTensor(vector.toArray, Seq(vector.size))
    case vector : BSV[Double] => SparseTensor(vector.index.map(i => Seq(i)), vector.data, Seq(vector.values.size))
  }


  implicit def mleapTensorToBreezeVector(tensor: Tensor[Double]): BV[Double] = tensor match {
    case tensor: DenseTensor[_] =>
      new BDV(tensor.rawValues.asInstanceOf[Array[Double]])
    case tensor: SparseTensor[_] =>
      new BSV(tensor.indices.map(_.head).toArray,
        tensor.values.asInstanceOf[Array[Double]],
        tensor.dimensions.product)
  }
}
object VectorConverters extends VectorConverters 
Example 6
Source File: PcaModelSpec.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.feature

import ml.combust.mleap.core.types.{StructField, TensorType}
import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, Vectors}
import org.scalatest.FunSpec


class PcaModelSpec extends FunSpec {
  describe("pca model") {
    val pc = new DenseMatrix(3, 2, Array[Double](1, -1, 2,
      0, -3, 1))
    val pca = PcaModel(pc)

    it("uses the principal components matrix to transform a vector to a lower-dimensional vector") {

      val input = Vectors.dense(Array[Double](2, 1, 0))

      assert(pca(input).toArray sameElements Array[Double](1, -3))
    }

    it("has the right input schema") {
      assert(pca.inputSchema.fields == Seq(StructField("input", TensorType.Double())))
    }

    it("has the right output schema") {
      assert(pca.outputSchema.fields == Seq(StructField("output", TensorType.Double())))
    }
  }
} 
Example 7
Source File: PcaOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.ops.feature

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.mleap.tensor.DenseTensor
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.feature.PCAModel
import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector}


class PcaOp extends SimpleSparkOp[PCAModel] {
  override val Model: OpModel[SparkBundleContext, PCAModel] = new OpModel[SparkBundleContext, PCAModel] {
    override val klazz: Class[PCAModel] = classOf[PCAModel]

    override def opName: String = Bundle.BuiltinOps.feature.pca

    override def store(model: Model, obj: PCAModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      model.withValue("principal_components", Value.tensor[Double](DenseTensor(obj.pc.values,
        Seq(obj.pc.numRows, obj.pc.numCols))))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): PCAModel = {
      val values = model.value("principal_components").getTensor[Double]
      new PCAModel(uid = "",
        pc = new DenseMatrix(values.dimensions.head, values.dimensions(1), values.toArray),
        explainedVariance = new DenseVector(Array()))
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: PCAModel): PCAModel = {
    new PCAModel(uid = uid, pc = model.pc, explainedVariance = model.explainedVariance)
  }

  override def sparkInputs(obj: PCAModel): Seq[ParamSpec] = {
    Seq("input" -> obj.inputCol)
  }

  override def sparkOutputs(obj: PCAModel): Seq[SimpleParamSpec] = {
    Seq("output" -> obj.outputCol)
  }
} 
Example 8
Source File: MLUserDefinedType.scala    From spark-testing-base   with Apache License 2.0 5 votes vote down vote up
package com.holdenkarau.spark.testing

import org.apache.spark.sql.types.DataType
import org.apache.spark.ml.linalg.SQLDataTypes.{MatrixType, VectorType}
import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
import org.scalacheck.{Arbitrary, Gen}


object MLUserDefinedType {
  def unapply(dataType: DataType): Option[Gen[Any]] =
    dataType match {
      case MatrixType => {
        val dense = for {
          rows <- Gen.choose(0, 20)
          cols <- Gen.choose(0, 20)
          values <- Gen.containerOfN[Array, Double](rows * cols, Arbitrary.arbitrary[Double])
        } yield new DenseMatrix(rows, cols, values)
        val sparse = dense.map(_.toSparse)
        Some(Gen.oneOf(dense, sparse))
      }
      case VectorType => {
        val dense = Arbitrary.arbitrary[Array[Double]].map(Vectors.dense)
        val sparse = for {
          indices <- Gen.nonEmptyContainerOf[Set, Int](Gen.choose(0, Int.MaxValue - 1))
          values <- Gen.listOfN(indices.size, Arbitrary.arbitrary[Double])
        } yield Vectors.sparse(indices.max + 1, indices.toSeq.zip(values))
        Some(Gen.oneOf(dense, sparse))
      }
      case _ => None
    }
} 
Example 9
Source File: MLSerDeSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.python

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors}

class MLSerDeSuite extends SparkFunSuite {

  MLSerDe.initialize()

  test("pickle vector") {
    val vectors = Seq(
      Vectors.dense(Array.empty[Double]),
      Vectors.dense(0.0),
      Vectors.dense(0.0, -2.0),
      Vectors.sparse(0, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(1, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(2, Array(1), Array(-2.0)))
    vectors.foreach { v =>
      val u = MLSerDe.loads(MLSerDe.dumps(v))
      assert(u.getClass === v.getClass)
      assert(u === v)
    }
  }

  test("pickle double") {
    for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) {
      val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double]
      // We use `equals` here for comparison because we cannot use `==` for NaN
      assert(x.equals(deser))
    }
  }

  test("pickle matrix") {
    val values = Array[Double](0, 1.2, 3, 4.56, 7, 8)
    val matrix = Matrices.dense(2, 3, values)
    val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix]
    assert(matrix === nm)

    // Test conversion for empty matrix
    val empty = Array.empty[Double]
    val emptyMatrix = Matrices.dense(0, 0, empty)
    val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix]
    assert(emptyMatrix == ne)

    val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4))
    val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix]
    assert(sm.toArray === nsm.toArray)

    val smt = new SparseMatrix(
      3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9),
      isTransposed = true)
    val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix]
    assert(smt.toArray === nsmt.toArray)
  }
} 
Example 10
Source File: NaiveBayes.scala    From spark-sql-perf   with Apache License 2.0 5 votes vote down vote up
package com.databricks.spark.sql.perf.mllib.classification

import org.apache.spark.ml
import org.apache.spark.ml.{ModelBuilderSSP, PipelineStage, Transformer}
import org.apache.spark.ml.evaluation.{Evaluator, MulticlassClassificationEvaluator}
import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}

import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator


object NaiveBayes extends BenchmarkAlgorithm
  with TestFromTraining with TrainingSetFromTransformer with ScoringWithEvaluator {

  override protected def initialData(ctx: MLBenchContext) = {
    import ctx.params._
    val rng = ctx.newGenerator()
    // Max possible arity of a feature in generated training/test data for NaiveBayes models
    val maxFeatureArity = 20
    // All features for Naive Bayes must be categorical, i.e. have arity >= 2
    val featureArity = 0.until(numFeatures).map(_ => 2 + rng.nextInt(maxFeatureArity - 2)).toArray
    DataGenerator.generateMixedFeatures(
      ctx.sqlContext,
      numExamples,
      ctx.seed(),
      numPartitions,
      featureArity)
  }

  override protected def trueModel(ctx: MLBenchContext): Transformer = {
    import ctx.params._
    val rng = ctx.newGenerator()
    // pi = log of class priors, whose dimension is C (number of classes)
    // theta = log of class conditional probabilities, whose dimension is C (number of classes)
    // by D (number of features)
    val unnormalizedProbs = 0.until(numClasses).map(_ => rng.nextDouble() + 1e-5).toArray
    val logProbSum = math.log(unnormalizedProbs.sum)
    val piArray = unnormalizedProbs.map(prob => math.log(prob) - logProbSum)

    // For class i, set the class-conditional probability of feature i to 0.7, and split up the
    // remaining probability mass across the other features
    val currClassProb = 0.7
    val thetaArray = Array.tabulate(numClasses) { i: Int =>
      val baseProbMass = (1 - currClassProb) / (numFeatures - 1)
      val probs = Array.fill[Double](numFeatures)(baseProbMass)
      probs(i) = currClassProb
      probs
    }.map(_.map(math.log))

    // Initialize new Naive Bayes model
    val pi = Vectors.dense(piArray)
    val theta = new DenseMatrix(numClasses, numFeatures, thetaArray.flatten, true)
    ModelBuilderSSP.newNaiveBayesModel(pi, theta)
  }

  override def getPipelineStage(ctx: MLBenchContext): PipelineStage = {
    import ctx.params._
    new ml.classification.NaiveBayes()
      .setSmoothing(smoothing)
  }

  override protected def evaluator(ctx: MLBenchContext): Evaluator =
    new MulticlassClassificationEvaluator()
} 
Example 11
Source File: MLSerDeSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.python

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors}

class MLSerDeSuite extends SparkFunSuite {

  MLSerDe.initialize()

  test("pickle vector") {
    val vectors = Seq(
      Vectors.dense(Array.empty[Double]),
      Vectors.dense(0.0),
      Vectors.dense(0.0, -2.0),
      Vectors.sparse(0, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(1, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(2, Array(1), Array(-2.0)))
    vectors.foreach { v =>
      val u = MLSerDe.loads(MLSerDe.dumps(v))
      assert(u.getClass === v.getClass)
      assert(u === v)
    }
  }

  test("pickle double") {
    for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) {
      val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double]
      // We use `equals` here for comparison because we cannot use `==` for NaN
      assert(x.equals(deser))
    }
  }

  test("pickle matrix") {
    val values = Array[Double](0, 1.2, 3, 4.56, 7, 8)
    val matrix = Matrices.dense(2, 3, values)
    val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix]
    assert(matrix === nm)

    // Test conversion for empty matrix
    val empty = Array.empty[Double]
    val emptyMatrix = Matrices.dense(0, 0, empty)
    val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix]
    assert(emptyMatrix == ne)

    val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4))
    val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix]
    assert(sm.toArray === nsm.toArray)

    val smt = new SparseMatrix(
      3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9),
      isTransposed = true)
    val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix]
    assert(smt.toArray === nsmt.toArray)
  }
} 
Example 12
Source File: MLeapModelConverterTest.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.local

import com.salesforce.op.test.TestCommon
import ml.combust.mleap.core.feature._
import ml.combust.mleap.core.types.ScalarShape
import org.apache.spark.ml.linalg.{DenseMatrix, Vectors}
import org.junit.runner.RunWith
import org.scalatest.PropSpec
import org.scalatest.junit.JUnitRunner
import org.scalatest.prop.PropertyChecks

@RunWith(classOf[JUnitRunner])
class MLeapModelConverterTest extends PropSpec with PropertyChecks with TestCommon {

  val mleapModels = Table("mleapModels",
    BinarizerModel(0.0, ScalarShape()),
    BucketedRandomProjectionLSHModel(Seq(), 0.0, 0),
    BucketizerModel(Array.empty),
    ChiSqSelectorModel(Seq(), 0),
    CoalesceModel(Seq()),
    CountVectorizerModel(Array.empty, false, 0.0),
    DCTModel(false, 0),
    ElementwiseProductModel(Vectors.zeros(0)),
    FeatureHasherModel(0, Seq(), Seq(), Seq()),
    HashingTermFrequencyModel(),
    IDFModel(Vectors.zeros(0)),
    ImputerModel(0.0, 0.0, ""),
    InteractionModel(Array(), Seq()),
    MathBinaryModel(BinaryOperation.Add),
    MathUnaryModel(UnaryOperation.Log),
    MaxAbsScalerModel(Vectors.zeros(0)),
    MinHashLSHModel(Seq(), 0),
    MinMaxScalerModel(Vectors.zeros(0), Vectors.zeros(0)),
    NGramModel(0),
    NormalizerModel(0.0, 0),
    OneHotEncoderModel(Array()),
    PcaModel(DenseMatrix.zeros(0, 0)),
    PolynomialExpansionModel(0, 0),
    RegexIndexerModel(Seq(), None),
    RegexTokenizerModel(".*".r),
    ReverseStringIndexerModel(Seq()),
    StandardScalerModel(Some(Vectors.dense(Array(1.0))), Some(Vectors.dense(Array(1.0)))),
    StopWordsRemoverModel(Seq(), false),
    StringIndexerModel(Seq()),
    StringMapModel(Map()),
    TokenizerModel(),
    VectorAssemblerModel(Seq()),
    VectorIndexerModel(0, Map()),
    VectorSlicerModel(Array(), Array(), 0),
    WordLengthFilterModel(),
    WordToVectorModel(Map("a" -> 1), Array(1))
  )

  property("convert mleap models to functions") {
    forAll(mleapModels) { m =>
      val fn = MLeapModelConverter.modelToFunction(m)
      fn shouldBe a[Function[_, _]]
    }
  }

  property("error on unsupported models") {
    the[RuntimeException] thrownBy MLeapModelConverter.modelToFunction(model = "not at model") should have message
      "Unsupported MLeap model: java.lang.String"
  }

} 
Example 13
Source File: MatrixUtils.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.odkl

import org.apache.spark.ml.linalg.{DenseMatrix, Matrix, VectorUDT}


object MatrixUtils {

  def vectorUDT = new VectorUDT()

  def transformDense(matrix: DenseMatrix, transformer: (Int, Int, Double) => Double): DenseMatrix = {
    matrix.foreachActive((i, j, v) => {
      matrix(i, j) = transformer(i, j, v)
    })
    matrix
  }

  def applyNonZeros(source: Matrix, target: DenseMatrix, transformer: (Int, Int, Double, Double) => Double): DenseMatrix = {
    source.foreachActive((i, j, v) => {
      val index = target.index(i, j)
      target.values(index) = transformer(i, j, v, target.values(index))
    })
    target
  }

  def applyAll(source: Matrix, target: DenseMatrix, transformer: (Int, Int, Double, Double) => Double): DenseMatrix = {
    for (j <- 0 until source.numCols; i <- 0 until source.numRows) {
      val index = target.index(i, j)
      target.values(index) = transformer(i, j, source(i, j), target.values(index))
    }
    target
  }
} 
Example 14
Source File: MLSerDeSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.python

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors}

class MLSerDeSuite extends SparkFunSuite {

  MLSerDe.initialize()

  test("pickle vector") {
    val vectors = Seq(
      Vectors.dense(Array.empty[Double]),
      Vectors.dense(0.0),
      Vectors.dense(0.0, -2.0),
      Vectors.sparse(0, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(1, Array.empty[Int], Array.empty[Double]),
      Vectors.sparse(2, Array(1), Array(-2.0)))
    vectors.foreach { v =>
      val u = MLSerDe.loads(MLSerDe.dumps(v))
      assert(u.getClass === v.getClass)
      assert(u === v)
    }
  }

  test("pickle double") {
    for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) {
      val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double]
      // We use `equals` here for comparison because we cannot use `==` for NaN
      assert(x.equals(deser))
    }
  }

  test("pickle matrix") {
    val values = Array[Double](0, 1.2, 3, 4.56, 7, 8)
    val matrix = Matrices.dense(2, 3, values)
    val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix]
    assert(matrix === nm)

    // Test conversion for empty matrix
    val empty = Array.empty[Double]
    val emptyMatrix = Matrices.dense(0, 0, empty)
    val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix]
    assert(emptyMatrix == ne)

    val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4))
    val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix]
    assert(sm.toArray === nsm.toArray)

    val smt = new SparseMatrix(
      3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9),
      isTransposed = true)
    val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix]
    assert(smt.toArray === nsmt.toArray)
  }
}