org.apache.spark.mllib.linalg.DenseMatrix Scala Examples
The following examples show how to use org.apache.spark.mllib.linalg.DenseMatrix.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MFDataGenerator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import java.{util => ju} import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix} import org.apache.spark.rdd.RDD @DeveloperApi @Since("0.8.0") object MFDataGenerator { @Since("0.8.0") def main(args: Array[String]) { if (args.length < 2) { // scalastyle:off println println("Usage: MFDataGenerator " + "<master> <outputDir> [m] [n] [rank] [trainSampFact] [noise] [sigma] [test] [testSampFact]") // scalastyle:on println System.exit(1) } val sparkMaster: String = args(0) val outputPath: String = args(1) val m: Int = if (args.length > 2) args(2).toInt else 100 val n: Int = if (args.length > 3) args(3).toInt else 100 val rank: Int = if (args.length > 4) args(4).toInt else 10 val trainSampFact: Double = if (args.length > 5) args(5).toDouble else 1.0 val noise: Boolean = if (args.length > 6) args(6).toBoolean else false val sigma: Double = if (args.length > 7) args(7).toDouble else 0.1 val test: Boolean = if (args.length > 8) args(8).toBoolean else false val testSampFact: Double = if (args.length > 9) args(9).toDouble else 0.1 val sc = new SparkContext(sparkMaster, "MFDataGenerator") val random = new ju.Random(42L) val A = DenseMatrix.randn(m, rank, random) val B = DenseMatrix.randn(rank, n, random) val z = 1 / math.sqrt(rank) val fullData = DenseMatrix.zeros(m, n) BLAS.gemm(z, A, B, 1.0, fullData) val df = rank * (m + n - rank) val sampSize = math.min(math.round(trainSampFact * df), math.round(.99 * m * n)).toInt val rand = new Random() val mn = m * n val shuffled = rand.shuffle((0 until mn).toList) val omega = shuffled.slice(0, sampSize) val ordered = omega.sortWith(_ < _).toArray val trainData: RDD[(Int, Int, Double)] = sc.parallelize(ordered) .map(x => (x % m, x / m, fullData.values(x))) // optionally add gaussian noise if (noise) { trainData.map(x => (x._1, x._2, x._3 + rand.nextGaussian * sigma)) } trainData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) // optionally generate testing data if (test) { val testSampSize = math.min(math.round(sampSize * testSampFact).toInt, mn - sampSize) val testOmega = shuffled.slice(sampSize, sampSize + testSampSize) val testOrdered = testOmega.sortWith(_ < _).toArray val testData: RDD[(Int, Int, Double)] = sc.parallelize(testOrdered) .map(x => (x % m, x / m, fullData.values(x))) testData.map(x => x._1 + "," + x._2 + "," + x._3).saveAsTextFile(outputPath) } sc.stop() } }
Example 2
Source File: PythonMLLibAPISuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} import org.apache.spark.mllib.recommendation.Rating import org.apache.spark.mllib.regression.LabeledPoint class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }
Example 3
Source File: LDAModelOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.clustering import ml.combust.mleap.tensor.DenseTensor import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl.Model import ml.combust.bundle.op.{OpModel, OpNode} import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.clustering.LocalLDAModel import org.apache.spark.mllib.clustering.{LocalLDAModel => oldLocalLDAModel} import ml.combust.bundle.dsl._ import org.apache.spark.mllib.linalg.DenseMatrix import org.apache.spark.sql.SparkSession class LDAModelOp extends SimpleSparkOp[LocalLDAModel] { override val Model: OpModel[SparkBundleContext, LocalLDAModel] = new OpModel[SparkBundleContext, LocalLDAModel] { override val klazz: Class[LocalLDAModel] = classOf[LocalLDAModel] override def opName: String = Bundle.BuiltinOps.clustering.lda override def store(model: Model, obj: LocalLDAModel)(implicit context: BundleContext[SparkBundleContext]): Model = { val topicMatrixArray: Array[Double] = obj.topicsMatrix.asBreeze.toDenseMatrix.toArray val topicMatrixRows = obj.topicsMatrix.numRows val topicMatrixCols = obj.topicsMatrix.numCols model.withValue("vocabSize", Value.int(obj.vocabSize)). withValue("docConcentration", Value.doubleList(obj.getEffectiveDocConcentration)). withValue("topicConcentration", Value.double(obj.getEffectiveTopicConcentration)). withValue("topicMatrix", Value.tensor[Double](DenseTensor[Double](topicMatrixArray, Seq(topicMatrixRows, topicMatrixCols)))) } override def load(model: Model)(implicit context: BundleContext[SparkBundleContext]): LocalLDAModel = { val vocabSize = model.value("vocabSize").getInt val topicConcentration = model.value("topicConcentration").getDouble val docConcentration = model.value("docConcentration").getDoubleList.toArray val topicMatrix = model.value("topicMatrix").getTensor[Double] val rows = topicMatrix.dimensions.head val cols = topicMatrix.dimensions(1) new LocalLDAModel("", vocabSize, new oldLocalLDAModel(new DenseMatrix(rows,cols, topicMatrix.toArray, false), new org.apache.spark.mllib.linalg.DenseVector(docConcentration), topicConcentration), SparkSession.builder().getOrCreate()) } } override def sparkLoad(uid: String, shape: NodeShape, model: LocalLDAModel): LocalLDAModel = { val field = classOf[LocalLDAModel].getDeclaredField("oldLocalModel") field.setAccessible(true) val oldLocalModel = field.get(model).asInstanceOf[oldLocalLDAModel] new LocalLDAModel(uid = uid, vocabSize = model.vocabSize, oldLocalModel = oldLocalModel, sparkSession = model.sparkSession) } override def sparkInputs(obj: LocalLDAModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: LocalLDAModel): Seq[SimpleParamSpec] = { Seq("prediction" -> obj.topicDistributionCol) } }
Example 4
Source File: PythonMLLibAPISuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} import org.apache.spark.mllib.recommendation.Rating import org.apache.spark.mllib.regression.LabeledPoint class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }
Example 5
Source File: RandomProjection.scala From spark-neighbors with MIT License | 5 votes |
package com.github.karlhigley.spark.neighbors.linalg import java.util.Random import breeze.stats.distributions.CauchyDistribution import org.apache.spark.mllib.linalg.{ DenseMatrix, Matrices } import org.apache.spark.mllib.linalg.{ DenseVector, Vector } def generateGaussian(originalDim: Int, projectedDim: Int, random: Random): RandomProjection = { val localMatrix = DenseMatrix.randn(projectedDim, originalDim, random) new RandomProjection(localMatrix) } def generateCauchy(originalDim: Int, projectedDim: Int, random: Random): RandomProjection = { def randc(numRows: Int, numCols: Int): DenseMatrix = { require( numRows.toLong * numCols <= Int.MaxValue, s"$numRows x $numCols dense matrix is too large to allocate" ) val cauchyDistribution = new CauchyDistribution(0, 1) new DenseMatrix(numRows, numCols, cauchyDistribution.drawMany(numRows * numCols)) } val localMatrix = randc(projectedDim, originalDim) new RandomProjection(localMatrix) } }
Example 6
Source File: PythonMLLibAPISuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} import org.apache.spark.mllib.recommendation.Rating import org.apache.spark.mllib.regression.LabeledPoint class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }
Example 7
Source File: PythonMLLibAPISuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Vectors, SparseMatrix} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.recommendation.Rating class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array[Double]() val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }
Example 8
Source File: OpStatisticsPropertyTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.utils.stats import com.salesforce.op.test.TestCommon import org.apache.spark.mllib.linalg.DenseMatrix import org.junit.runner.RunWith import org.scalacheck.Gen import org.scalatest.PropSpec import org.scalatest.junit.JUnitRunner import org.scalatest.prop.PropertyChecks @RunWith(classOf[JUnitRunner]) class OpStatisticsPropertyTest extends PropSpec with TestCommon with PropertyChecks { val genInt = Gen.posNum[Int] private def genArray(n: Int) = Gen.containerOfN[Array, Int](n, genInt) val genMatrix = for { rowSize <- Gen.choose(1, 13) colSize <- Gen.choose(1, 13) size = rowSize * colSize array <- genArray(size) } yield { new DenseMatrix(rowSize, colSize, array.map(_.toDouble)) } property("cramerV function should produce results in expected ranges") { forAll(genMatrix) { (matrix: DenseMatrix) => val res = OpStatistics.chiSquaredTest(matrix).cramersV if (matrix.numRows > 1 && matrix.numCols > 1) { res >= 0 shouldBe true res <= 1 shouldBe true } else { res.isNaN shouldBe true } } } }
Example 9
Source File: PythonMLLibAPISuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Vectors, SparseMatrix} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.recommendation.Rating class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array[Double]() val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }
Example 10
Source File: PythonMLLibAPISuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, SparseMatrix, Vectors} import org.apache.spark.mllib.recommendation.Rating import org.apache.spark.mllib.regression.LabeledPoint class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array.empty[Double] val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }
Example 11
Source File: PythonMLLibAPISuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.api.python import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Vectors, SparseMatrix} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.recommendation.Rating class PythonMLLibAPISuite extends SparkFunSuite { SerDe.initialize() test("pickle vector") { val vectors = Seq( Vectors.dense(Array.empty[Double]), Vectors.dense(0.0), Vectors.dense(0.0, -2.0), Vectors.sparse(0, Array.empty[Int], Array.empty[Double]), Vectors.sparse(1, Array.empty[Int], Array.empty[Double]), Vectors.sparse(2, Array(1), Array(-2.0))) vectors.foreach { v => val u = SerDe.loads(SerDe.dumps(v)) assert(u.getClass === v.getClass) assert(u === v) } } test("pickle labeled point") { val points = Seq( LabeledPoint(0.0, Vectors.dense(Array.empty[Double])), LabeledPoint(1.0, Vectors.dense(0.0)), LabeledPoint(-0.5, Vectors.dense(0.0, -2.0)), LabeledPoint(0.0, Vectors.sparse(0, Array.empty[Int], Array.empty[Double])), LabeledPoint(1.0, Vectors.sparse(1, Array.empty[Int], Array.empty[Double])), LabeledPoint(-0.5, Vectors.sparse(2, Array(1), Array(-2.0)))) points.foreach { p => val q = SerDe.loads(SerDe.dumps(p)).asInstanceOf[LabeledPoint] assert(q.label === p.label) assert(q.features.getClass === p.features.getClass) assert(q.features === p.features) } } test("pickle double") { for (x <- List(123.0, -10.0, 0.0, Double.MaxValue, Double.MinValue, Double.NaN)) { val deser = SerDe.loads(SerDe.dumps(x.asInstanceOf[AnyRef])).asInstanceOf[Double] // We use `equals` here for comparison because we cannot use `==` for NaN assert(x.equals(deser)) } } test("pickle matrix") { val values = Array[Double](0, 1.2, 3, 4.56, 7, 8) val matrix = Matrices.dense(2, 3, values) val nm = SerDe.loads(SerDe.dumps(matrix)).asInstanceOf[DenseMatrix] assert(matrix === nm) // Test conversion for empty matrix val empty = Array[Double]() val emptyMatrix = Matrices.dense(0, 0, empty) val ne = SerDe.loads(SerDe.dumps(emptyMatrix)).asInstanceOf[DenseMatrix] assert(emptyMatrix == ne) val sm = new SparseMatrix(3, 2, Array(0, 1, 3), Array(1, 0, 2), Array(0.9, 1.2, 3.4)) val nsm = SerDe.loads(SerDe.dumps(sm)).asInstanceOf[SparseMatrix] assert(sm.toArray === nsm.toArray) val smt = new SparseMatrix( 3, 3, Array(0, 2, 3, 5), Array(0, 2, 1, 0, 2), Array(0.9, 1.2, 3.4, 5.7, 8.9), isTransposed = true) val nsmt = SerDe.loads(SerDe.dumps(smt)).asInstanceOf[SparseMatrix] assert(smt.toArray === nsmt.toArray) } test("pickle rating") { val rat = new Rating(1, 2, 3.0) val rat2 = SerDe.loads(SerDe.dumps(rat)).asInstanceOf[Rating] assert(rat == rat2) // Test name of class only occur once val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray val bytes = SerDe.dumps(rats) assert(bytes.toString.split("Rating").length == 1) assert(bytes.length / 10 < 25) // 25 bytes per rating } }