org.apache.spark.mllib.linalg.DenseVector Java Examples
The following examples show how to use
org.apache.spark.mllib.linalg.DenseVector.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VectorBinarizerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testVectorBinarizerDense() { // prepare data JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList( RowFactory.create(0d, 1d, new DenseVector(new double[]{-2d, -3d, -4d, -1d, 6d, -7d, 8d, 0d, 0d, 0d, 0d, 0d})), RowFactory.create(1d, 2d, new DenseVector(new double[]{4d, -5d, 6d, 7d, -8d, 9d, -10d, 0d, 0d, 0d, 0d, 0d})), RowFactory.create(2d, 3d, new DenseVector(new double[]{-5d, 6d, -8d, 9d, 10d, 11d, 12d, 0d, 0d, 0d, 0d, 0d})) )); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()), new StructField("vector1", new VectorUDT(), false, Metadata.empty()) }); DataFrame df = sqlContext.createDataFrame(jrdd, schema); VectorBinarizer vectorBinarizer = new VectorBinarizer() .setInputCol("vector1") .setOutputCol("binarized") .setThreshold(2d); //Export this model byte[] exportedModel = ModelExporter.export(vectorBinarizer, df); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = vectorBinarizer.transform(df).orderBy("id").select("id", "value1", "vector1", "binarized").collect(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<>(); data.put(vectorBinarizer.getInputCol(), ((DenseVector) row.get(2)).toArray()); transformer.transform(data); double[] output = (double[]) data.get(vectorBinarizer.getOutputCol()); assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d); } }
Example #2
Source File: SparkMLibUtils.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
/** * * ExecRow is one-based as far as the elements * * @param execRow * @return */ public static Vector convertExecRowToVector(ExecRow execRow) throws StandardException { int length = execRow.nColumns(); double[] vectorValues = new double[length]; for (int i=1;i<=length;i++) { vectorValues[i] = execRow.getColumn(i).getDouble(); } return new DenseVector(vectorValues); }
Example #3
Source File: VectorAssemblerBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testVectorAssembler() { // prepare data JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList( RowFactory.create(0d, 1d, new DenseVector(new double[]{2d, 3d})), RowFactory.create(1d, 2d, new DenseVector(new double[]{3d, 4d})), RowFactory.create(2d, 3d, new DenseVector(new double[]{4d, 5d})), RowFactory.create(3d, 4d, new DenseVector(new double[]{5d, 6d})), RowFactory.create(4d, 5d, new DenseVector(new double[]{6d, 7d})) )); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("value1", DataTypes.DoubleType, false, Metadata.empty()), new StructField("vector1", new VectorUDT(), false, Metadata.empty()) }); DataFrame df = sqlContext.createDataFrame(jrdd, schema); VectorAssembler vectorAssembler = new VectorAssembler() .setInputCols(new String[]{"value1", "vector1"}) .setOutputCol("feature"); //Export this model byte[] exportedModel = ModelExporter.export(vectorAssembler, null); String exportedModelJson = new String(exportedModel); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = vectorAssembler.transform(df).orderBy("id").select("id", "value1", "vector1", "feature").collect(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<>(); data.put(vectorAssembler.getInputCols()[0], row.get(1)); data.put(vectorAssembler.getInputCols()[1], ((DenseVector) row.get(2)).toArray()); transformer.transform(data); double[] output = (double[]) data.get(vectorAssembler.getOutputCol()); assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d); } }
Example #4
Source File: ChiSqSelectorBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testChiSqSelector() { // prepare data JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList( RowFactory.create(0d, 0d, new DenseVector(new double[]{8d, 7d, 0d})), RowFactory.create(1d, 1d, new DenseVector(new double[]{0d, 9d, 6d})), RowFactory.create(2d, 1d, new DenseVector(new double[]{0.0d, 9.0d, 8.0d})), RowFactory.create(3d, 2d, new DenseVector(new double[]{8.0d, 9.0d, 5.0d})) )); double[] preFilteredData = {0.0d, 6.0d, 8.0d, 5.0d}; StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); DataFrame df = sqlContext.createDataFrame(jrdd, schema); ChiSqSelector chiSqSelector = new ChiSqSelector(); chiSqSelector.setNumTopFeatures(1); chiSqSelector.setFeaturesCol("features"); chiSqSelector.setLabelCol("label"); chiSqSelector.setOutputCol("output"); ChiSqSelectorModel chiSqSelectorModel = chiSqSelector.fit(df); //Export this model byte[] exportedModel = ModelExporter.export(chiSqSelectorModel, null); String exportedModelJson = new String(exportedModel); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = chiSqSelectorModel.transform(df).orderBy("id").select("id", "label", "features", "output").collect(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<>(); data.put(chiSqSelectorModel.getFeaturesCol(), ((DenseVector) row.get(2)).toArray()); transformer.transform(data); double[] output = (double[]) data.get(chiSqSelectorModel.getOutputCol()); System.out.println(Arrays.toString(output)); assertArrayEquals(output, ((DenseVector) row.get(3)).toArray(), 0d); } }
Example #5
Source File: ProbabilityColumnProducer.java From jpmml-evaluator-spark with GNU Affero General Public License v3.0 | 4 votes |
@Override public Vector format(Object value){ List<String> labels = getLabels(); HasProbability hasProbability = (HasProbability)value; double[] probabilities = new double[labels.size()]; for(int i = 0; i < labels.size(); i++){ String label = labels.get(i); probabilities[i] = hasProbability.getProbability(label); } return new DenseVector(probabilities); }
Example #6
Source File: SparkMLibUtils.java From spliceengine with GNU Affero General Public License v3.0 | 3 votes |
/** * * ExecRow is one-based as far as the elements * * @param execRow * @param fieldsToConvert * @return */ public static Vector convertExecRowToVector(ExecRow execRow,int[] fieldsToConvert) throws StandardException { double[] vectorValues = new double[fieldsToConvert.length]; for (int i=0;i<fieldsToConvert.length;i++) { vectorValues[i] = execRow.getColumn(fieldsToConvert[i]).getDouble(); } return new DenseVector(vectorValues); }