org.apache.spark.ml.classification.GBTClassificationModel Java Examples
The following examples show how to use
org.apache.spark.ml.classification.GBTClassificationModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GBTClassificationModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 6 votes |
@Override public MiningModel encodeModel(Schema schema){ GBTClassificationModel model = getTransformer(); String lossType = model.getLossType(); switch(lossType){ case "logistic": break; default: throw new IllegalArgumentException("Loss function " + lossType + " is not supported"); } Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE); List<TreeModel> treeModels = TreeModelUtil.encodeDecisionTreeEnsemble(this, segmentSchema); MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(segmentSchema.getLabel())) .setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, Doubles.asList(model.treeWeights()))) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbtValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, 2d, 0d, RegressionModel.NormalizationMethod.LOGIT, false, schema); }
Example #2
Source File: GradientBoostClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 5 votes |
@Override GradientBoostModelInfo getModelInfo(final GBTClassificationModel sparkGbModel) { final GradientBoostModelInfo modelInfo = new GradientBoostModelInfo(); modelInfo.setNumFeatures(sparkGbModel.numFeatures()); modelInfo.setRegression(false); //false for classification final List<Double> treeWeights = new ArrayList<Double>(); for (double w : sparkGbModel.treeWeights()) { treeWeights.add(w); } modelInfo.setTreeWeights(treeWeights); final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>(); for (DecisionTreeModel decisionTreeModel : sparkGbModel.trees()) { decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeRegressionModel) decisionTreeModel)); } modelInfo.setTrees(decisionTrees); final Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(sparkGbModel.getFeaturesCol()); inputKeys.add(sparkGbModel.getLabelCol()); modelInfo.setInputKeys(inputKeys); final Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(sparkGbModel.getPredictionCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
Example #3
Source File: GradientBoostClassificationModelTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testGradientBoostClassification() { // Load the data stored in LIBSVM format as a DataFrame. String datapath = "src/test/resources/binary_classification_test.libsvm"; Dataset<Row> data = spark.read().format("libsvm").load(datapath); // Split the data into training and test sets (30% held out for testing) Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3}); Dataset<Row> trainingData = splits[0]; Dataset<Row> testData = splits[1]; // Train a RandomForest model. GBTClassificationModel classificationModel = new GBTClassifier().fit(trainingData); byte[] exportedModel = ModelExporter.export(classificationModel); Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); List<Row> sparkOutput = classificationModel.transform(testData).select("features", "prediction","label").collectAsList(); // compare predictions for (Row row : sparkOutput) { Map<String, Object> data_ = new HashMap<>(); data_.put("features", ((SparseVector) row.get(0)).toArray()); data_.put("label", (row.get(2)).toString()); transformer.transform(data_); System.out.println(data_); System.out.println(data_.get("prediction")+" ,"+row.get(1)); assertEquals((double) data_.get("prediction"), (double) row.get(1), EPSILON); } }
Example #4
Source File: GradientBoostClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 5 votes |
@Override GradientBoostModelInfo getModelInfo(final GBTClassificationModel sparkGbModel, final DataFrame df) { final GradientBoostModelInfo modelInfo = new GradientBoostModelInfo(); modelInfo.setNumFeatures(sparkGbModel.numFeatures()); modelInfo.setRegression(false); //false for classification final List<Double> treeWeights = new ArrayList<Double>(); for (double w : sparkGbModel.treeWeights()) { treeWeights.add(w); } modelInfo.setTreeWeights(treeWeights); final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>(); for (DecisionTreeModel decisionTreeModel : sparkGbModel.trees()) { decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeRegressionModel) decisionTreeModel,df)); } modelInfo.setTrees(decisionTrees); final Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(sparkGbModel.getFeaturesCol()); inputKeys.add(sparkGbModel.getLabelCol()); modelInfo.setInputKeys(inputKeys); final Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(sparkGbModel.getPredictionCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
Example #5
Source File: JavaGradientBoostedTreeClassifierExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaGradientBoostedTreeClassifierExample") .getOrCreate(); // $example on$ // Load and parse the data file, converting it to a DataFrame. Dataset<Row> data = spark .read() .format("libsvm") .load("data/mllib/sample_libsvm_data.txt"); // Index labels, adding metadata to the label column. // Fit on whole dataset to include all labels in index. StringIndexerModel labelIndexer = new StringIndexer() .setInputCol("label") .setOutputCol("indexedLabel") .fit(data); // Automatically identify categorical features, and index them. // Set maxCategories so features with > 4 distinct values are treated as continuous. VectorIndexerModel featureIndexer = new VectorIndexer() .setInputCol("features") .setOutputCol("indexedFeatures") .setMaxCategories(4) .fit(data); // Split the data into training and test sets (30% held out for testing) Dataset<Row>[] splits = data.randomSplit(new double[] {0.7, 0.3}); Dataset<Row> trainingData = splits[0]; Dataset<Row> testData = splits[1]; // Train a GBT model. GBTClassifier gbt = new GBTClassifier() .setLabelCol("indexedLabel") .setFeaturesCol("indexedFeatures") .setMaxIter(10); // Convert indexed labels back to original labels. IndexToString labelConverter = new IndexToString() .setInputCol("prediction") .setOutputCol("predictedLabel") .setLabels(labelIndexer.labels()); // Chain indexers and GBT in a Pipeline. Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[] {labelIndexer, featureIndexer, gbt, labelConverter}); // Train model. This also runs the indexers. PipelineModel model = pipeline.fit(trainingData); // Make predictions. Dataset<Row> predictions = model.transform(testData); // Select example rows to display. predictions.select("predictedLabel", "label", "features").show(5); // Select (prediction, true label) and compute test error. MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() .setLabelCol("indexedLabel") .setPredictionCol("prediction") .setMetricName("accuracy"); double accuracy = evaluator.evaluate(predictions); System.out.println("Test Error = " + (1.0 - accuracy)); GBTClassificationModel gbtModel = (GBTClassificationModel)(model.stages()[2]); System.out.println("Learned classification GBT model:\n" + gbtModel.toDebugString()); // $example off$ spark.stop(); }
Example #6
Source File: GBTClassificationModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
public GBTClassificationModelConverter(GBTClassificationModel model){ super(model); }
Example #7
Source File: GradientBoostClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 4 votes |
@Override public Class<GBTClassificationModel> getSource() { return GBTClassificationModel.class; }
Example #8
Source File: GradientBoostClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 4 votes |
@Override public Class<GBTClassificationModel> getSource() { return GBTClassificationModel.class; }