org.apache.spark.ml.classification.DecisionTreeClassificationModel Java Exaples

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

6 votes

public DecisionTreeModelInfo getModelInfo(final DecisionTreeClassificationModel decisionTreeModel) {
    final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();

    Node rootNode = decisionTreeModel.rootNode();
    treeInfo.setRoot(DecisionNodeAdapterUtils.adaptNode(rootNode));

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(decisionTreeModel.getFeaturesCol());
    inputKeys.add(decisionTreeModel.getLabelCol());
    treeInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(decisionTreeModel.getPredictionCol());
    outputKeys.add(decisionTreeModel.getProbabilityCol());
    outputKeys.add(decisionTreeModel.getRawPredictionCol());
    treeInfo.setProbabilityKey(decisionTreeModel.getProbabilityCol());
    treeInfo.setRawPredictionKey(decisionTreeModel.getRawPredictionCol());
    treeInfo.setOutputKeys(outputKeys);

    return treeInfo;
}

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

6 votes

public DecisionTreeModelInfo getModelInfo(final DecisionTreeClassificationModel decisionTreeModel,final DataFrame df) {
    final DecisionTreeModelInfo treeInfo = new DecisionTreeModelInfo();

    Node rootNode = decisionTreeModel.rootNode();
    treeInfo.setRoot(DecisionNodeAdapterUtils.adaptNode(rootNode));

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(decisionTreeModel.getFeaturesCol());
    inputKeys.add(decisionTreeModel.getLabelCol());
    treeInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(decisionTreeModel.getPredictionCol());
    outputKeys.add(decisionTreeModel.getProbabilityCol());
    outputKeys.add(decisionTreeModel.getRawPredictionCol());
    treeInfo.setProbabilityKey(decisionTreeModel.getProbabilityCol());
    treeInfo.setRawPredictionKey(decisionTreeModel.getRawPredictionCol());
    treeInfo.setOutputKeys(outputKeys);

    return treeInfo;
}

Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0

5 votes

@Test
public void testDecisionTreeClassificationPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
	String datapath = "src/test/resources/classification_test.libsvm";
	Dataset<Row> data = spark.read().format("libsvm").load(datapath);


    // Split the data into training and test sets (30% held out for testing)
    Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
    Dataset<Row> trainingData = splits[0];
    Dataset<Row> testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classifierModel = new DecisionTreeClassifier().fit(trainingData);
    trainingData.printSchema();
    
    List<Row> output = classifierModel.transform(testData).select("features", "prediction","rawPrediction").collectAsList();
    byte[] exportedModel = ModelExporter.export(classifierModel);

    DecisionTreeTransformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    //compare predictions
    for (Row row : output) {
    	Map<String, Object> data_ = new HashMap<>();
    	double [] actualRawPrediction = ((DenseVector) row.get(2)).toArray();
        data_.put("features", ((SparseVector) row.get(0)).toArray());
        transformer.transform(data_);
        System.out.println(data_);
        System.out.println(data_.get("prediction"));
        assertEquals((double)data_.get("prediction"), (double)row.get(1), EPSILON);
        assertArrayEquals((double[]) data_.get("rawPrediction"), actualRawPrediction, EPSILON);
    }
}

Source File: RandomForestClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

5 votes

@Override
RandomForestModelInfo getModelInfo(final RandomForestClassificationModel sparkRfModel, final DataFrame df) {
    final RandomForestModelInfo modelInfo = new RandomForestModelInfo();

    modelInfo.setNumClasses(sparkRfModel.numClasses());
    modelInfo.setNumFeatures(sparkRfModel.numFeatures());
    modelInfo.setRegression(false); //false for classification

    final List<Double> treeWeights = new ArrayList<Double>();
    for (double w : sparkRfModel.treeWeights()) {
        treeWeights.add(w);
    }
    modelInfo.setTreeWeights(treeWeights);

    final List<DecisionTreeModelInfo> decisionTrees = new ArrayList<>();
    for (DecisionTreeModel decisionTreeModel : sparkRfModel.trees()) {
        decisionTrees.add(DECISION_TREE_ADAPTER.getModelInfo((DecisionTreeClassificationModel) decisionTreeModel, df));
    }
    modelInfo.setTrees(decisionTrees);

    final Set<String> inputKeys = new LinkedHashSet<String>();
    inputKeys.add(sparkRfModel.getFeaturesCol());
    inputKeys.add(sparkRfModel.getLabelCol());
    modelInfo.setInputKeys(inputKeys);

    final Set<String> outputKeys = new LinkedHashSet<String>();
    outputKeys.add(sparkRfModel.getPredictionCol());
    outputKeys.add(sparkRfModel.getProbabilityCol());
    outputKeys.add(sparkRfModel.getRawPredictionCol());
    modelInfo.setProbabilityKey(sparkRfModel.getProbabilityCol());
    modelInfo.setRawPredictionKey(sparkRfModel.getRawPredictionCol());
    modelInfo.setOutputKeys(outputKeys);

    return modelInfo;
}

Source File: JavaDecisionTreeClassificationExample.java From SparkDemo with MIT License

4 votes

public static void main(String[] args) {
  SparkSession spark = SparkSession
    .builder()
    .appName("JavaDecisionTreeClassificationExample")
    .getOrCreate();

  // $example on$
  // Load the data stored in LIBSVM format as a DataFrame.
  Dataset<Row> data = spark
    .read()
    .format("libsvm")
    .load("data/mllib/sample_libsvm_data.txt");

  // Index labels, adding metadata to the label column.
  // Fit on whole dataset to include all labels in index.
  StringIndexerModel labelIndexer = new StringIndexer()
    .setInputCol("label")
    .setOutputCol("indexedLabel")
    .fit(data);

  // Automatically identify categorical features, and index them.
  VectorIndexerModel featureIndexer = new VectorIndexer()
    .setInputCol("features")
    .setOutputCol("indexedFeatures")
    .setMaxCategories(4) // features with > 4 distinct values are treated as continuous.
    .fit(data);

  // Split the data into training and test sets (30% held out for testing).
  Dataset<Row>[] splits = data.randomSplit(new double[]{0.7, 0.3});
  Dataset<Row> trainingData = splits[0];
  Dataset<Row> testData = splits[1];

  // Train a DecisionTree model.
  DecisionTreeClassifier dt = new DecisionTreeClassifier()
    .setLabelCol("indexedLabel")
    .setFeaturesCol("indexedFeatures");

  // Convert indexed labels back to original labels.
  IndexToString labelConverter = new IndexToString()
    .setInputCol("prediction")
    .setOutputCol("predictedLabel")
    .setLabels(labelIndexer.labels());

  // Chain indexers and tree in a Pipeline.
  Pipeline pipeline = new Pipeline()
    .setStages(new PipelineStage[]{labelIndexer, featureIndexer, dt, labelConverter});

  // Train model. This also runs the indexers.
  PipelineModel model = pipeline.fit(trainingData);

  // Make predictions.
  Dataset<Row> predictions = model.transform(testData);

  // Select example rows to display.
  predictions.select("predictedLabel", "label", "features").show(5);

  // Select (prediction, true label) and compute test error.
  MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator()
    .setLabelCol("indexedLabel")
    .setPredictionCol("prediction")
    .setMetricName("accuracy");
  double accuracy = evaluator.evaluate(predictions);
  System.out.println("Test Error = " + (1.0 - accuracy));

  DecisionTreeClassificationModel treeModel =
    (DecisionTreeClassificationModel) (model.stages()[2]);
  System.out.println("Learned classification tree model:\n" + treeModel.toDebugString());
  // $example off$

  spark.stop();
}

Source File: DecisionTreeClassificationModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

public DecisionTreeClassificationModelConverter(DecisionTreeClassificationModel model){
	super(model);
}

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

4 votes

@Override
public Class<DecisionTreeClassificationModel> getSource() {
    return DecisionTreeClassificationModel.class;
}

Source File: DecisionTreeClassificationModelInfoAdapter.java From spark-transformers with Apache License 2.0

4 votes

@Override
public Class<DecisionTreeClassificationModel> getSource() {
    return DecisionTreeClassificationModel.class;
}

Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0

4 votes

@Test
public void testDecisionTreeClassificationRawPrediction() {
    // Load the data stored in LIBSVM format as a DataFrame.
    DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm");

    StringIndexerModel stringIndexerModel = new StringIndexer()
            .setInputCol("label")
            .setOutputCol("labelIndex")
            .fit(data);

    data = stringIndexerModel.transform(data);

    // Split the data into training and test sets (30% held out for testing)
    DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3});
    DataFrame trainingData = splits[0];
    DataFrame testData = splits[1];

    // Train a DecisionTree model.
    DecisionTreeClassificationModel classificationModel = new DecisionTreeClassifier()
            .setLabelCol("labelIndex")
            .setFeaturesCol("features")
            .setRawPredictionCol("rawPrediction")
            .setPredictionCol("prediction")
            .fit(trainingData);

    byte[] exportedModel = ModelExporter.export(classificationModel, null);

    Transformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel);

    Row[] sparkOutput = classificationModel.transform(testData).select("features", "prediction", "rawPrediction").collect();

    //compare predictions
    for (Row row : sparkOutput) {
        Vector inp = (Vector) row.get(0);
        double actual = row.getDouble(1);
        double[] actualRaw = ((Vector) row.get(2)).toArray();

        Map<String, Object> inputData = new HashMap<>();
        inputData.put(transformer.getInputKeys().iterator().next(), inp.toArray());
        transformer.transform(inputData);
        double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next());
        double[] rawPrediction = (double[]) inputData.get("rawPrediction");

        assertEquals(actual, predicted, EPSILON);
        assertArrayEquals(actualRaw, rawPrediction, EPSILON);
    }
}

org.apache.spark.ml.classification.DecisionTreeClassificationModel Java Examples