org.apache.spark.ml.linalg.DenseMatrix Scala Examples
The following examples show how to use org.apache.spark.ml.linalg.DenseMatrix.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MLSerDeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.python import org.apache.spark.SparkFunSuite import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} class MLSerDeSuite extends SparkFunSuite { MLSerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = MLSerDe.loads(MLSerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } }
Example 2
Source File: LocalPCAModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.preprocessors import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common.utils.DataUtils._ import io.hydrosphere.spark_ml_serving.common._ import io.hydrosphere.spark_ml_serving.common.utils.DataUtils import org.apache.spark.ml.feature.PCAModel import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector, Matrices, Vectors} import org.apache.spark.mllib.linalg.{DenseMatrix => OldDenseMatrix, Matrices => OldMatrices} class LocalPCAModel(override val sparkTransformer: PCAModel) extends LocalTransformer[PCAModel] { override def transform(localData: LocalData): LocalData = { localData.column(sparkTransformer.getInputCol) match { case Some(column) => val pc = OldMatrices.fromML(sparkTransformer.pc).asInstanceOf[OldDenseMatrix] val newData = column.data.mapToMlLibVectors.map(pc.transpose.multiply).map(_.toList) localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData)) case None => localData } } } object LocalPCAModel extends SimpleModelLoader[PCAModel] with TypedTransformerConverter[PCAModel] { override def build(metadata: Metadata, data: LocalData): PCAModel = { val constructor = classOf[PCAModel].getDeclaredConstructor( classOf[String], classOf[DenseMatrix], classOf[DenseVector] ) constructor.setAccessible(true) val pcMap = data.column("pc").get.data.head.asInstanceOf[Map[String, Any]] val pcMat = DataUtils.constructMatrix(pcMap).asInstanceOf[DenseMatrix] data.column("explainedVariance") match { case Some(ev) => // NOTE: Spark >= 2 val evParams = ev.data.head.asInstanceOf[Map[String, Any]] val explainedVariance = DataUtils.constructVector(evParams).toDense constructor .newInstance(metadata.uid, pcMat, explainedVariance) .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String]) .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String]) case None => // NOTE: Spark < 2 constructor .newInstance( metadata.uid, pcMat, Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector] ) .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String]) .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String]) } } override implicit def toLocal(transformer: PCAModel) = new LocalPCAModel(transformer) }
Example 3
Source File: PcaOp.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.bundle.ops.feature import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.OpModel import ml.combust.mleap.bundle.ops.MleapOp import ml.combust.mleap.core.feature.PcaModel import ml.combust.mleap.runtime.MleapContext import ml.combust.mleap.runtime.transformer.feature.Pca import ml.combust.mleap.tensor.DenseTensor import org.apache.spark.ml.linalg.DenseMatrix class PcaOp extends MleapOp[Pca, PcaModel] { override val Model: OpModel[MleapContext, PcaModel] = new OpModel[MleapContext, PcaModel] { override val klazz: Class[PcaModel] = classOf[PcaModel] override def opName: String = Bundle.BuiltinOps.feature.pca override def store(model: Model, obj: PcaModel) (implicit context: BundleContext[MleapContext]): Model = { model.withValue("principal_components", Value.tensor[Double](DenseTensor(obj.principalComponents.values, Seq(obj.principalComponents.numRows, obj.principalComponents.numCols)))) } override def load(model: Model) (implicit context: BundleContext[MleapContext]): PcaModel = { val values = model.value("principal_components").getTensor[Double] PcaModel(new DenseMatrix(values.dimensions.head, values.dimensions(1), values.toArray)) } } override def model(node: Pca): PcaModel = node.model }
Example 4
Source File: PcaSpec.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.runtime.transformer.feature import ml.combust.mleap.core.feature.PcaModel import ml.combust.mleap.core.types._ import ml.combust.mleap.runtime.frame.{DefaultLeapFrame, Row} import ml.combust.mleap.tensor.Tensor import org.apache.spark.ml.linalg.{DenseMatrix, Vectors} import org.scalatest.FunSpec class PcaSpec extends FunSpec { val schema = StructType(Seq(StructField("test_vec", TensorType(BasicType.Double)))).get val dataset = Seq(Row(Tensor.denseVector(Array(2.0, 1.0, 0.0)))) val frame = DefaultLeapFrame(schema, dataset) val pc = new DenseMatrix(3, 2, Array(1d, -1, 2, 0, -3, 1)) val input = Vectors.dense(Array(2d, 1, 0)) val pca = Pca( shape = NodeShape.feature(inputCol = "test_vec", outputCol = "test_pca"), model = PcaModel(pc)) describe("#transform") { it("extracts the principal components from the input column") { val frame2 = pca.transform(frame).get val data = frame2.dataset(0).getTensor[Double](1).toArray assert(data sameElements Array[Double](1, -3)) } describe("with invalid input column") { val pca2 = pca.copy(shape = NodeShape.feature(inputCol = "bad_input")) it("returns a Failure") { assert(pca2.transform(frame).isFailure) } } } describe("input/output schema") { it("has the correct inputs and outputs") { assert(pca.schema.fields == Seq(StructField("test_vec", TensorType.Double()), StructField("test_pca", TensorType.Double()))) } } }
Example 5
Source File: VectorConverters.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.core.util import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV} import ml.combust.mleap.tensor.{DenseTensor, SparseTensor, Tensor} import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector, Matrices, Matrix, SparseMatrix, SparseVector, Vector, Vectors} import scala.language.implicitConversions trait VectorConverters { implicit def sparkVectorToMleapTensor(vector: Vector): Tensor[Double] = vector match { case vector: DenseVector => DenseTensor(vector.toArray, Seq(vector.size)) case vector: SparseVector => SparseTensor(indices = vector.indices.map(i => Seq(i)), values = vector.values, dimensions = Seq(vector.size)) } implicit def mleapTensorToSparkVector(tensor: Tensor[Double]): Vector = tensor match { case tensor: DenseTensor[_] => Vectors.dense(tensor.rawValues.asInstanceOf[Array[Double]]) case tensor: SparseTensor[_] => Vectors.sparse(tensor.dimensions.product, tensor.indices.map(_.head).toArray, tensor.values.asInstanceOf[Array[Double]]) } implicit def sparkMatrixToMleapTensor(matrix: Matrix): Tensor[Double] = matrix match { case matrix: DenseMatrix => DenseTensor(matrix.toArray, Seq(matrix.numRows, matrix.numCols)) case matrix: SparseMatrix => val indices = matrix.rowIndices.zip(matrix.colPtrs).map { case (r, c) => Seq(r, c) }.toSeq SparseTensor(indices = indices, values = matrix.values, dimensions = Seq(matrix.numRows, matrix.numCols)) } implicit def mleapTensorToSparkMatrix(tensor: Tensor[Double]): Matrix = tensor match { case tensor: DenseTensor[_] => Matrices.dense(tensor.dimensions.head, tensor.dimensions(1), tensor.rawValues.asInstanceOf[Array[Double]]) case tensor: SparseTensor[_] => val (rows, cols) = tensor.indices.map(v => (v.head, v(1))).unzip Matrices.sparse(tensor.dimensions.head, tensor.dimensions(1), cols.toArray, rows.toArray, tensor.values.asInstanceOf[Array[Double]]) } implicit def breezeVectorToMLeapTensor(vector: BV[Double]): Tensor[Double] = vector match { case vector : BDV[Double] => DenseTensor(vector.toArray, Seq(vector.size)) case vector : BSV[Double] => SparseTensor(vector.index.map(i => Seq(i)), vector.data, Seq(vector.values.size)) } implicit def mleapTensorToBreezeVector(tensor: Tensor[Double]): BV[Double] = tensor match { case tensor: DenseTensor[_] => new BDV(tensor.rawValues.asInstanceOf[Array[Double]]) case tensor: SparseTensor[_] => new BSV(tensor.indices.map(_.head).toArray, tensor.values.asInstanceOf[Array[Double]], tensor.dimensions.product) } } object VectorConverters extends VectorConverters
Example 6
Source File: PcaModelSpec.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.core.feature import ml.combust.mleap.core.types.{StructField, TensorType} import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, Vectors} import org.scalatest.FunSpec class PcaModelSpec extends FunSpec { describe("pca model") { val pc = new DenseMatrix(3, 2, Array[Double](1, -1, 2, 0, -3, 1)) val pca = PcaModel(pc) it("uses the principal components matrix to transform a vector to a lower-dimensional vector") { val input = Vectors.dense(Array[Double](2, 1, 0)) assert(pca(input).toArray sameElements Array[Double](1, -3)) } it("has the right input schema") { assert(pca.inputSchema.fields == Seq(StructField("input", TensorType.Double()))) } it("has the right output schema") { assert(pca.outputSchema.fields == Seq(StructField("output", TensorType.Double()))) } } }
Example 7
Source File: PcaOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.feature import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.mleap.tensor.DenseTensor import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.feature.PCAModel import org.apache.spark.ml.linalg.{DenseMatrix, DenseVector} class PcaOp extends SimpleSparkOp[PCAModel] { override val Model: OpModel[SparkBundleContext, PCAModel] = new OpModel[SparkBundleContext, PCAModel] { override val klazz: Class[PCAModel] = classOf[PCAModel] override def opName: String = Bundle.BuiltinOps.feature.pca override def store(model: Model, obj: PCAModel) (implicit context: BundleContext[SparkBundleContext]): Model = { model.withValue("principal_components", Value.tensor[Double](DenseTensor(obj.pc.values, Seq(obj.pc.numRows, obj.pc.numCols)))) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): PCAModel = { val values = model.value("principal_components").getTensor[Double] new PCAModel(uid = "", pc = new DenseMatrix(values.dimensions.head, values.dimensions(1), values.toArray), explainedVariance = new DenseVector(Array())) } } override def sparkLoad(uid: String, shape: NodeShape, model: PCAModel): PCAModel = { new PCAModel(uid = uid, pc = model.pc, explainedVariance = model.explainedVariance) } override def sparkInputs(obj: PCAModel): Seq[ParamSpec] = { Seq("input" -> obj.inputCol) } override def sparkOutputs(obj: PCAModel): Seq[SimpleParamSpec] = { Seq("output" -> obj.outputCol) } }
Example 8
Source File: MLUserDefinedType.scala From spark-testing-base with Apache License 2.0 | 5 votes |
package com.holdenkarau.spark.testing import org.apache.spark.sql.types.DataType import org.apache.spark.ml.linalg.SQLDataTypes.{MatrixType, VectorType} import org.apache.spark.ml.linalg.{DenseMatrix, Vectors} import org.scalacheck.{Arbitrary, Gen} object MLUserDefinedType { def unapply(dataType: DataType): Option[Gen[Any]] = dataType match { case MatrixType => { val dense = for { rows <- Gen.choose(0, 20) cols <- Gen.choose(0, 20) values <- Gen.containerOfN[Array, Double](rows * cols, Arbitrary.arbitrary[Double]) } yield new DenseMatrix(rows, cols, values) val sparse = dense.map(_.toSparse) Some(Gen.oneOf(dense, sparse)) } case VectorType => { val dense = Arbitrary.arbitrary[Array[Double]].map(Vectors.dense) val sparse = for { indices <- Gen.nonEmptyContainerOf[Set, Int](Gen.choose(0, Int.MaxValue - 1)) values <- Gen.listOfN(indices.size, Arbitrary.arbitrary[Double]) } yield Vectors.sparse(indices.max + 1, indices.toSeq.zip(values)) Some(Gen.oneOf(dense, sparse)) } case _ => None } }
Example 9
Source File: MLSerDeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.python import org.apache.spark.SparkFunSuite import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} class MLSerDeSuite extends SparkFunSuite { MLSerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = MLSerDe.loads(MLSerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } }
Example 10
Source File: NaiveBayes.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.mllib.classification import org.apache.spark.ml import org.apache.spark.ml.{ModelBuilderSSP, PipelineStage, Transformer} import org.apache.spark.ml.evaluation.{Evaluator, MulticlassClassificationEvaluator} import org.apache.spark.ml.linalg.{DenseMatrix, Vectors} import com.databricks.spark.sql.perf.mllib.OptionImplicits._ import com.databricks.spark.sql.perf.mllib._ import com.databricks.spark.sql.perf.mllib.data.DataGenerator object NaiveBayes extends BenchmarkAlgorithm with TestFromTraining with TrainingSetFromTransformer with ScoringWithEvaluator { override protected def initialData(ctx: MLBenchContext) = { import ctx.params._ val rng = ctx.newGenerator() // Max possible arity of a feature in generated training/test data for NaiveBayes models val maxFeatureArity = 20 // All features for Naive Bayes must be categorical, i.e. have arity >= 2 val featureArity = 0.until(numFeatures).map(_ => 2 + rng.nextInt(maxFeatureArity - 2)).toArray DataGenerator.generateMixedFeatures( ctx.sqlContext, numExamples, ctx.seed(), numPartitions, featureArity) } override protected def trueModel(ctx: MLBenchContext): Transformer = { import ctx.params._ val rng = ctx.newGenerator() // pi = log of class priors, whose dimension is C (number of classes) // theta = log of class conditional probabilities, whose dimension is C (number of classes) // by D (number of features) val unnormalizedProbs = 0.until(numClasses).map(_ => rng.nextDouble() + 1e-5).toArray val logProbSum = math.log(unnormalizedProbs.sum) val piArray = unnormalizedProbs.map(prob => math.log(prob) - logProbSum) // For class i, set the class-conditional probability of feature i to 0.7, and split up the // remaining probability mass across the other features val currClassProb = 0.7 val thetaArray = Array.tabulate(numClasses) { i: Int => val baseProbMass = (1 - currClassProb) / (numFeatures - 1) val probs = Array.fill[Double](numFeatures)(baseProbMass) probs(i) = currClassProb probs }.map(_.map(math.log)) // Initialize new Naive Bayes model val pi = Vectors.dense(piArray) val theta = new DenseMatrix(numClasses, numFeatures, thetaArray.flatten, true) ModelBuilderSSP.newNaiveBayesModel(pi, theta) } override def getPipelineStage(ctx: MLBenchContext): PipelineStage = { import ctx.params._ new ml.classification.NaiveBayes() .setSmoothing(smoothing) } override protected def evaluator(ctx: MLBenchContext): Evaluator = new MulticlassClassificationEvaluator() }
Example 11
Source File: MLSerDeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.python import org.apache.spark.SparkFunSuite import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} class MLSerDeSuite extends SparkFunSuite { MLSerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = MLSerDe.loads(MLSerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } }
Example 12
Source File: MLeapModelConverterTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.local import com.salesforce.op.test.TestCommon import ml.combust.mleap.core.feature._ import ml.combust.mleap.core.types.ScalarShape import org.apache.spark.ml.linalg.{DenseMatrix, Vectors} import org.junit.runner.RunWith import org.scalatest.PropSpec import org.scalatest.junit.JUnitRunner import org.scalatest.prop.PropertyChecks @RunWith(classOf[JUnitRunner]) class MLeapModelConverterTest extends PropSpec with PropertyChecks with TestCommon { val mleapModels = Table("mleapModels", BinarizerModel(0.0, ScalarShape()), BucketedRandomProjectionLSHModel(Seq(), 0.0, 0), BucketizerModel(Array.empty), ChiSqSelectorModel(Seq(), 0), CoalesceModel(Seq()), CountVectorizerModel(Array.empty, false, 0.0), DCTModel(false, 0), ElementwiseProductModel(Vectors.zeros(0)), FeatureHasherModel(0, Seq(), Seq(), Seq()), HashingTermFrequencyModel(), IDFModel(Vectors.zeros(0)), ImputerModel(0.0, 0.0, ""), InteractionModel(Array(), Seq()), MathBinaryModel(BinaryOperation.Add), MathUnaryModel(UnaryOperation.Log), MaxAbsScalerModel(Vectors.zeros(0)), MinHashLSHModel(Seq(), 0), MinMaxScalerModel(Vectors.zeros(0), Vectors.zeros(0)), NGramModel(0), NormalizerModel(0.0, 0), OneHotEncoderModel(Array()), PcaModel(DenseMatrix.zeros(0, 0)), PolynomialExpansionModel(0, 0), RegexIndexerModel(Seq(), None), RegexTokenizerModel(".*".r), ReverseStringIndexerModel(Seq()), StandardScalerModel(Some(Vectors.dense(Array(1.0))), Some(Vectors.dense(Array(1.0)))), StopWordsRemoverModel(Seq(), false), StringIndexerModel(Seq()), StringMapModel(Map()), TokenizerModel(), VectorAssemblerModel(Seq()), VectorIndexerModel(0, Map()), VectorSlicerModel(Array(), Array(), 0), WordLengthFilterModel(), WordToVectorModel(Map("a" -> 1), Array(1)) ) property("convert mleap models to functions") { forAll(mleapModels) { m => val fn = MLeapModelConverter.modelToFunction(m) fn shouldBe a[Function[_, _]] } } property("error on unsupported models") { the[RuntimeException] thrownBy MLeapModelConverter.modelToFunction(model = "not at model") should have message "Unsupported MLeap model: java.lang.String" } }
Example 13
Source File: MatrixUtils.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl import org.apache.spark.ml.linalg.{DenseMatrix, Matrix, VectorUDT} object MatrixUtils { def vectorUDT = new VectorUDT() def transformDense(matrix: DenseMatrix, transformer: (Int, Int, Double) => Double): DenseMatrix = { matrix.foreachActive((i, j, v) => { matrix(i, j) = transformer(i, j, v) }) matrix } def applyNonZeros(source: Matrix, target: DenseMatrix, transformer: (Int, Int, Double, Double) => Double): DenseMatrix = { source.foreachActive((i, j, v) => { val index = target.index(i, j) target.values(index) = transformer(i, j, v, target.values(index)) }) target } def applyAll(source: Matrix, target: DenseMatrix, transformer: (Int, Int, Double, Double) => Double): DenseMatrix = { for (j <- 0 until source.numCols; i <- 0 until source.numRows) { val index = target.index(i, j) target.values(index) = transformer(i, j, source(i, j), target.values(index)) } target } }
Example 14
Source File: MLSerDeSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.python import org.apache.spark.SparkFunSuite import org.apache.spark.ml.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} class MLSerDeSuite extends SparkFunSuite { MLSerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = MLSerDe.loads(MLSerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = MLSerDe.loads(MLSerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = MLSerDe.loads(MLSerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = MLSerDe.loads(MLSerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = MLSerDe.loads(MLSerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = MLSerDe.loads(MLSerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } }