org.apache.spark.ml.regression.RandomForestRegressionModel Java Examples
The following examples show how to use
org.apache.spark.ml.regression.RandomForestRegressionModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RandomForestRegressionModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 5 votes |
@Override RandomForestModelInfo getModelInfo(final RandomForestRegressionModel sparkRfModel, final DataFrame df) { final RandomForestModelInfo modelInfo = new RandomForestModelInfo(); modelInfo.setNumFeatures(sparkRfModel.numFeatures()); modelInfo.setRegression(true); //true for regression final List<Double> treeWeights = new ArrayList<Double>(); for (double w : sparkRfModel.treeWeights()) { treeWeights.add(w); } modelInfo.setTreeWeights(treeWeights); final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>(); for (DecisionTreeModel decisionTreeModel : sparkRfModel.trees()) { decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeRegressionModel) decisionTreeModel, df)); } modelInfo.setTrees(decisionTrees); final Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(sparkRfModel.getFeaturesCol()); inputKeys.add(sparkRfModel.getLabelCol()); modelInfo.setInputKeys(inputKeys); final Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(sparkRfModel.getPredictionCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
Example #2
Source File: RandomForestRegressionModelInfoAdapterBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testRandomForestRegression() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm"); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a RandomForest model. RandomForestRegressionModel regressionModel = new RandomForestRegressor() .setFeaturesCol("features").fit(trainingData); byte[] exportedModel = ModelExporter.export(regressionModel, null); Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = regressionModel.transform(testData).select("features", "prediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(0); double actual = row.getDouble(1); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put(transformer.getInputKeys().iterator().next(), v.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); System.out.println(actual + ", " + predicted); assertEquals(actual, predicted, EPSILON); } }
Example #3
Source File: RandomForestRegressionModelInfoAdapterBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testRandomForestRegressionWithPipeline() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/regression_test.libsvm"); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a RandomForest model. RandomForestRegressionModel regressionModel = new RandomForestRegressor() .setFeaturesCol("features").fit(trainingData); Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[]{regressionModel}); // Train model. This also runs the indexer. PipelineModel sparkPipeline = pipeline.fit(trainingData); //Export this model byte[] exportedModel = ModelExporter.export(sparkPipeline, null); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = sparkPipeline.transform(testData).select("features", "prediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(0); double actual = row.getDouble(1); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put(transformer.getInputKeys().iterator().next(), v.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); assertEquals(actual, predicted, EPSILON); } }
Example #4
Source File: JavaRandomForestRegressorExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaRandomForestRegressorExample") .getOrCreate(); // $example on$ // Load and parse the data file, converting it to a DataFrame. Dataset<Row> data = spark.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt"); // Automatically identify categorical features, and index them. // Set maxCategories so features with > 4 distinct values are treated as continuous. VectorIndexerModel featureIndexer = new VectorIndexer() .setInputCol("features") .setOutputCol("indexedFeatures") .setMaxCategories(4) .fit(data); // Split the data into training and test sets (30% held out for testing) Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3}); Dataset<Row> trainingData = splits[0]; Dataset<Row> testData = splits[1]; // Train a RandomForest model. RandomForestRegressor rf = new RandomForestRegressor() .setLabelCol("label") .setFeaturesCol("indexedFeatures"); // Chain indexer and forest in a Pipeline Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[] {featureIndexer, rf}); // Train model. This also runs the indexer. PipelineModel model = pipeline.fit(trainingData); // Make predictions. Dataset<Row> predictions = model.transform(testData); // Select example rows to display. predictions.select("prediction", "label", "features").show(5); // Select (prediction, true label) and compute test error RegressionEvaluator evaluator = new RegressionEvaluator() .setLabelCol("label") .setPredictionCol("prediction") .setMetricName("rmse"); double rmse = evaluator.evaluate(predictions); System.out.println("Root Mean Squared Error (RMSE) on test data = " + rmse); RandomForestRegressionModel rfModel = (RandomForestRegressionModel)(model.stages()[1]); System.out.println("Learned regression forest model:\n" + rfModel.toDebugString()); // $example off$ spark.stop(); }
Example #5
Source File: RandomForestRegressionModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
public RandomForestRegressionModelConverter(RandomForestRegressionModel model){ super(model); }
Example #6
Source File: RandomForestRegressionModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 4 votes |
@Override public Class<RandomForestRegressionModel> getSource() { return RandomForestRegressionModel.class; }