org.apache.spark.ml.linalg.Vector Java Examples
The following examples show how to use
org.apache.spark.ml.linalg.Vector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testOutputDataFrameDMLVectorWithIDColumn() { System.out.println("MLContextTest - output DataFrame DML, vector with ID column"); String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); Dataset<Row> dataFrame = results.getDataFrameVectorWithIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, ((Vector) row1.get(1)).toArray(), 0.0); Row row2 = list.get(1); Assert.assertEquals(2.0, row2.getDouble(0), 0.0); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, ((Vector) row2.get(1)).toArray(), 0.0); }
Example #2
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, vector with ID column"); List<Tuple2<Double, Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #3
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLMllibVectorWithIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, mllib vector with ID column"); List<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, org.apache.spark.mllib.linalg.Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, org.apache.spark.mllib.linalg.Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, org.apache.spark.mllib.linalg.Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleMllibVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new org.apache.spark.mllib.linalg.VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #4
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumPYDMLVectorWithNoIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum PYDML, vector with no ID column, no format specified"); List<Vector> list = new ArrayList<>(); list.add(Vectors.dense(1.0, 2.0, 3.0)); list.add(Vectors.dense(4.0, 5.0, 6.0)); list.add(Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new VectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #5
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithNoIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum DML, vector with no ID column, no format specified"); List<Vector> list = new ArrayList<>(); list.add(Vectors.dense(1.0, 2.0, 3.0)); list.add(Vectors.dense(4.0, 5.0, 6.0)); list.add(Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new VectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #6
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumPYDMLVectorWithIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum PYDML, vector with ID column, no format specified"); List<Tuple2<Double, Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M))").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #7
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum DML, vector with ID column, no format specified"); List<Tuple2<Double, Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #8
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testOutputDataFrameDMLVectorNoIDColumn() { System.out.println("MLContextTest - output DataFrame DML, vector no ID column"); String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); Dataset<Row> dataFrame = results.getDataFrameVectorNoIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, ((Vector) row1.get(0)).toArray(), 0.0); Row row2 = list.get(1); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, ((Vector) row2.get(0)).toArray(), 0.0); }
Example #9
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testOutputDataFrameDMLVectorWithIDColumn() { System.out.println("MLContextTest - output DataFrame DML, vector with ID column"); String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); Dataset<Row> dataFrame = results.getDataFrameVectorWithIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, ((Vector) row1.get(1)).toArray(), 0.0); Row row2 = list.get(1); Assert.assertEquals(2.0, row2.getDouble(0), 0.0); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, ((Vector) row2.get(1)).toArray(), 0.0); }
Example #10
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithNoIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, vector with no ID column"); List<Vector> list = new ArrayList<>(); list.add(Vectors.dense(1.0, 2.0, 3.0)); list.add(Vectors.dense(4.0, 5.0, 6.0)); list.add(Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new VectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #11
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLMllibVectorWithNoIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, mllib vector with no ID column"); List<org.apache.spark.mllib.linalg.Vector> list = new ArrayList<>(); list.add(org.apache.spark.mllib.linalg.Vectors.dense(1.0, 2.0, 3.0)); list.add(org.apache.spark.mllib.linalg.Vectors.dense(4.0, 5.0, 6.0)); list.add(org.apache.spark.mllib.linalg.Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<org.apache.spark.mllib.linalg.Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new MllibVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new org.apache.spark.mllib.linalg.VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #12
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testOutputDataFrameDMLVectorNoIDColumn() { System.out.println("MLContextTest - output DataFrame DML, vector no ID column"); String s = "M = matrix('1 2 3 4', rows=2, cols=2);"; Script script = dml(s).out("M"); MLResults results = ml.execute(script); Dataset<Row> dataFrame = results.getDataFrameVectorNoIDColumn("M"); List<Row> list = dataFrame.collectAsList(); Row row1 = list.get(0); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, ((Vector) row1.get(0)).toArray(), 0.0); Row row2 = list.get(1); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, ((Vector) row2.get(0)).toArray(), 0.0); }
Example #13
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLMllibVectorWithNoIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, mllib vector with no ID column"); List<org.apache.spark.mllib.linalg.Vector> list = new ArrayList<>(); list.add(org.apache.spark.mllib.linalg.Vectors.dense(1.0, 2.0, 3.0)); list.add(org.apache.spark.mllib.linalg.Vectors.dense(4.0, 5.0, 6.0)); list.add(org.apache.spark.mllib.linalg.Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<org.apache.spark.mllib.linalg.Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new MllibVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new org.apache.spark.mllib.linalg.VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #14
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithNoIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, vector with no ID column"); List<Vector> list = new ArrayList<>(); list.add(Vectors.dense(1.0, 2.0, 3.0)); list.add(Vectors.dense(4.0, 5.0, 6.0)); list.add(Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new VectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #15
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLMllibVectorWithIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, mllib vector with ID column"); List<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, org.apache.spark.mllib.linalg.Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, org.apache.spark.mllib.linalg.Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, org.apache.spark.mllib.linalg.Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, org.apache.spark.mllib.linalg.Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleMllibVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new org.apache.spark.mllib.linalg.VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #16
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithIDColumn() { System.out.println("MLContextTest - DataFrame sum DML, vector with ID column"); List<Tuple2<Double, Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_VECTOR_WITH_INDEX); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #17
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum DML, vector with ID column, no format specified"); List<Tuple2<Double, Vector>> list = new ArrayList<>(); list.add(new Tuple2<>(1.0, Vectors.dense(1.0, 2.0, 3.0))); list.add(new Tuple2<>(2.0, Vectors.dense(4.0, 5.0, 6.0))); list.add(new Tuple2<>(3.0, Vectors.dense(7.0, 8.0, 9.0))); JavaRDD<Tuple2<Double, Vector>> javaRddTuple = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddTuple.map(new DoubleVectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #18
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithNoIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum DML, vector with no ID column, no format specified"); List<Vector> list = new ArrayList<>(); list.add(Vectors.dense(1.0, 2.0, 3.0)); list.add(Vectors.dense(4.0, 5.0, 6.0)); list.add(Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new VectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example #19
Source File: KMeansModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 6 votes |
@Override public ClusteringModel encodeModel(Schema schema){ KMeansModel model = getTransformer(); List<Cluster> clusters = new ArrayList<>(); Vector[] clusterCenters = model.clusterCenters(); for(int i = 0; i < clusterCenters.length; i++){ Cluster cluster = new Cluster(PMMLUtil.createRealArray(VectorUtil.toList(clusterCenters[i]))) .setId(String.valueOf(i)); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()) .setCompareFunction(CompareFunction.ABS_DIFF); return new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters); }
Example #20
Source File: RDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
private static Vector createVector(MatrixBlock row) { if( row.isEmptyBlock(false) ) //EMPTY SPARSE ROW return Vectors.sparse(row.getNumColumns(), new int[0], new double[0]); else if( row.isInSparseFormat() ) //SPARSE ROW return Vectors.sparse(row.getNumColumns(), row.getSparseBlock().indexes(0), row.getSparseBlock().values(0)); else // DENSE ROW return Vectors.dense(row.getDenseBlockValues()); }
Example #21
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testOutputDataFrameVectorsWithIDColumnFromMatrixDML() { System.out.println("MLContextTest - output DataFrame of vectors with ID column from matrix DML"); String s = "M = matrix('1 2 3 4', rows=1, cols=4);"; Script script = dml(s).out("M"); Dataset<Row> df = ml.execute(script).getMatrix("M").toDFVectorWithIDColumn(); List<Row> list = df.collectAsList(); Row row = list.get(0); Assert.assertEquals(1.0, row.getDouble(0), 0.0); Assert.assertArrayEquals(new double[] { 1.0, 2.0, 3.0, 4.0 }, ((Vector) row.get(1)).toArray(), 0.0); }
Example #22
Source File: StandardScalerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
private void assertCorrectness(List<Row> sparkOutput, double[][] expected, Transformer transformer) { for (int i = 0; i < 2; i++) { double[] input = ((Vector) sparkOutput.get(i).get(0)).toArray(); Map<String, Object> data = new HashMap<String, Object>(); data.put("features", input); transformer.transform(data); double[] transformedOp = (double[]) data.get("scaledOutput"); double[] sparkOp = ((Vector) sparkOutput.get(i).get(1)).toArray(); assertArrayEquals(transformedOp, sparkOp, 0.01); assertArrayEquals(transformedOp, expected[i], 0.01); } }
Example #23
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testOutputDataFrameVectorsNoIDColumnFromMatrixDML() { System.out.println("MLContextTest - output DataFrame of vectors with no ID column from matrix DML"); String s = "M = matrix('1 2 3 4', rows=1, cols=4);"; Script script = dml(s).out("M"); Dataset<Row> df = ml.execute(script).getMatrix("M").toDFVectorNoIDColumn(); List<Row> list = df.collectAsList(); Row row = list.get(0); Assert.assertArrayEquals(new double[] { 1.0, 2.0, 3.0, 4.0 }, ((Vector) row.get(0)).toArray(), 0.0); }
Example #24
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testOutputDataFrameOfVectorsDML() { System.out.println("MLContextTest - output DataFrame of vectors DML"); String s = "m=matrix('1 2 3 4',rows=2,cols=2);"; Script script = dml(s).out("m"); MLResults results = ml.execute(script); Dataset<Row> df = results.getDataFrame("m", true); Dataset<Row> sortedDF = df.sort(RDDConverterUtils.DF_ID_COLUMN); // verify column types StructType schema = sortedDF.schema(); StructField[] fields = schema.fields(); StructField idColumn = fields[0]; StructField vectorColumn = fields[1]; Assert.assertTrue(idColumn.dataType() instanceof DoubleType); Assert.assertTrue(vectorColumn.dataType() instanceof VectorUDT); List<Row> list = sortedDF.collectAsList(); Row row1 = list.get(0); Assert.assertEquals(1.0, row1.getDouble(0), 0.0); Vector v1 = (DenseVector) row1.get(1); double[] arr1 = v1.toArray(); Assert.assertArrayEquals(new double[] { 1.0, 2.0 }, arr1, 0.0); Row row2 = list.get(1); Assert.assertEquals(2.0, row2.getDouble(0), 0.0); Vector v2 = (DenseVector) row2.get(1); double[] arr2 = v2.toArray(); Assert.assertArrayEquals(new double[] { 3.0, 4.0 }, arr2, 0.0); }
Example #25
Source File: VectorUtil.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
static public List<Double> toList(Vector vector){ DenseVector denseVector = vector.toDense(); double[] values = denseVector.values(); return Doubles.asList(values); }
Example #26
Source File: LogisticRegression1BridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testLogisticRegression() { //prepare data String datapath = "src/test/resources/binary_classification_test.libsvm"; Dataset<Row> trainingData = spark.read().format("libsvm").load(datapath); //Train model in spark LogisticRegressionModel lrmodel = new LogisticRegression().fit(trainingData); //Export this model byte[] exportedModel = ModelExporter.export(lrmodel); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //validate predictions List<LabeledPoint> testPoints = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD().collect(); for (LabeledPoint i : testPoints) { Vector v = i.features().asML(); double actual = lrmodel.predict(v); Map<String, Object> data = new HashMap<String, Object>(); data.put("features", v.toArray()); transformer.transform(data); double predicted = (double) data.get("prediction"); assertEquals(actual, predicted, 0.01); } }
Example #27
Source File: MinMaxScalerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
private void assertCorrectness(List<Row> sparkOutput, double[][] expected, Transformer transformer) { for (int i = 0; i < 3; i++) { double[] input = ((Vector) sparkOutput.get(i).get(0)).toArray(); Map<String, Object> data = new HashMap<String, Object>(); data.put("features", input); transformer.transform(data); double[] transformedOp = (double[]) data.get("scaled"); double[] sparkOp = ((Vector) sparkOutput.get(i).get(1)).toArray(); assertArrayEquals(transformedOp, sparkOp, 0.01); assertArrayEquals(transformedOp, expected[i], 0.01); } }
Example #28
Source File: SimplePredictionFromTextFile.java From net.jgp.labs.spark with Apache License 2.0 | 5 votes |
private void start() { SparkSession spark = SparkSession.builder().appName( "Simple prediction from Text File").master("local").getOrCreate(); spark.udf().register("vectorBuilder", new VectorBuilder(), new VectorUDT()); String filename = "data/tuple-data-file.csv"; StructType schema = new StructType( new StructField[] { new StructField("_c0", DataTypes.DoubleType, false, Metadata.empty()), new StructField("_c1", DataTypes.DoubleType, false, Metadata .empty()), new StructField("features", new VectorUDT(), true, Metadata .empty()), }); Dataset<Row> df = spark.read().format("csv").schema(schema).option("header", "false") .load(filename); df = df.withColumn("valuefeatures", df.col("_c0")).drop("_c0"); df = df.withColumn("label", df.col("_c1")).drop("_c1"); df.printSchema(); df = df.withColumn("features", callUDF("vectorBuilder", df.col( "valuefeatures"))); df.printSchema(); df.show(); LinearRegression lr = new LinearRegression().setMaxIter(20);// .setRegParam(1).setElasticNetParam(1); // Fit the model to the data. LinearRegressionModel model = lr.fit(df); // Given a dataset, predict each point's label, and show the results. model.transform(df).show(); LinearRegressionTrainingSummary trainingSummary = model.summary(); System.out.println("numIterations: " + trainingSummary.totalIterations()); System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary .objectiveHistory())); trainingSummary.residuals().show(); System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError()); System.out.println("r2: " + trainingSummary.r2()); double intercept = model.intercept(); System.out.println("Interesection: " + intercept); double regParam = model.getRegParam(); System.out.println("Regression parameter: " + regParam); double tol = model.getTol(); System.out.println("Tol: " + tol); Double feature = 7.0; Vector features = Vectors.dense(feature); double p = model.predict(features); System.out.println("Prediction for feature " + feature + " is " + p); System.out.println(8 * regParam + intercept); }
Example #29
Source File: RemoteDPParForSpark.java From systemds with Apache License 2.0 | 5 votes |
@Override public Tuple2<Long, Writable> call(Tuple2<Row, Long> arg0) throws Exception { long rowix = arg0._2() + 1; //process row data int off = _containsID ? 1: 0; Object obj = _isVector ? arg0._1().get(off) : arg0._1(); boolean sparse = (obj instanceof SparseVector); MatrixBlock mb = new MatrixBlock(1, (int)_clen, sparse); if( _isVector ) { Vector vect = (Vector) obj; if( vect instanceof SparseVector ) { SparseVector svect = (SparseVector) vect; int lnnz = svect.numNonzeros(); for( int k=0; k<lnnz; k++ ) mb.appendValue(0, svect.indices()[k], svect.values()[k]); } else { //dense for( int j=0; j<_clen; j++ ) mb.appendValue(0, j, vect.apply(j)); } } else { //row Row row = (Row) obj; for( int j=off; j<off+_clen; j++ ) mb.appendValue(0, j-off, UtilFunctions.getDouble(row.get(j))); } mb.examSparsity(); return new Tuple2<>(rowix, new PairWritableBlock(new MatrixIndexes(1,1),mb)); }
Example #30
Source File: PMMLBuilder.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
static private List<?> getVectorColumn(Dataset<Row> dataset, String name, int index){ List<Vector> column = (List<Vector>)getColumn(dataset, name); return column.stream() .map(vector -> vector.apply(index)) .collect(Collectors.toList()); }