org.dmg.pmml.ScoreDistribution Java Exaples

Source File: ScoreDistributionManager.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

public List<ScoreDistribution> createScoreDistribution(CategoricalLabel categoricalLabel, double[] recordCounts){
	List<ScoreDistribution> result = new ArrayList<>();

	for(int i = 0; i < categoricalLabel.size(); i++){
		Object value = categoricalLabel.getValue(i);
		double recordCount = recordCounts[i];

		ScoreDistribution scoreDistribution = new InternableScoreDistribution()
			.setValue(value)
			.setRecordCount(recordCount);

		scoreDistribution = intern(scoreDistribution);

		result.add(scoreDistribution);
	}

	return result;
}

Source File: ScoreDistributionInternerTest.java From jpmml-evaluator with GNU Affero General Public License v3.0

6 votes

@Test
public void intern(){
	ScoreDistribution left = new ScoreDistribution("event", 0.33d);
	ScoreDistribution right = new ScoreDistribution("event", 0.33d);

	Node leftChild = createNode(left);
	Node rightChild = createNode(right);

	Node root = new ComplexNode(True.INSTANCE)
		.addNodes(leftChild, rightChild);

	TreeModel treeModel = new TreeModel()
		.setNode(root);

	for(int i = 0; i < 2; i++){
		assertNotSame((leftChild.getScoreDistributions()).get(i), (rightChild.getScoreDistributions()).get(i));
	}

	ScoreDistributionInterner interner = new ScoreDistributionInterner();
	interner.applyTo(treeModel);

	for(int i = 0; i < 2; i++){
		assertSame((leftChild.getScoreDistributions()).get(i), (rightChild.getScoreDistributions()).get(i));
	}
}

Source File: RDFUpdateIT.java From oryx with Apache License 2.0

5 votes

private static void checkNode(Node node) {
  assertNotNull(node.getId());
  if (!node.hasScoreDistributions()) {
    // Non-leaf
    List<Node> children = node.getNodes();
    assertEquals(2, children.size());
    Node rightChild = children.get(0);
    Node leftChild = children.get(1);
    assertInstanceOf(leftChild.getPredicate(), True.class);
    assertEquals(node.getRecordCount().intValue(),
                 leftChild.getRecordCount().intValue() + rightChild.getRecordCount().intValue());
    assertEquals(node.getId() + "+", rightChild.getId());
    assertEquals(node.getId() + "-", leftChild.getId());
    checkNode(rightChild);
    checkNode(leftChild);
  } else {
    // Leaf
    List<ScoreDistribution> scoreDists = node.getScoreDistributions();
    int numDists = scoreDists.size();
    assertRange(numDists, 1, 2);
    ScoreDistribution first = scoreDists.get(0);
    if (numDists == 1) {
      assertEquals(1.0, first.getConfidence().doubleValue());
    } else {
      assertGreater(first.getConfidence().doubleValue(), 0.0);
      assertLess(first.getConfidence().doubleValue(), 1.0);
      ScoreDistribution second = scoreDists.get(1);
      assertGreater(second.getConfidence().doubleValue(), 0.0);
      assertLess(second.getConfidence().doubleValue(), 1.0);
    }
  }
}

Source File: ClassifierNode.java From jpmml-model with BSD 3-Clause "New" or "Revised" License

5 votes

@Override
public List<ScoreDistribution> getScoreDistributions(){

	if(this.scoreDistributions == null){
		this.scoreDistributions = new ArrayList<>();
	}

	return this.scoreDistributions;
}

Source File: TargetCategoryParser.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

@Override
public VisitorAction visit(ScoreDistribution scoreDistribution){
	Object value = scoreDistribution.getValue();
	if(value == null){
		throw new MissingAttributeException(scoreDistribution, PMMLAttributes.SCOREDISTRIBUTION_VALUE);
	}

	scoreDistribution.setValue(parseTargetValue(value));

	return super.visit(scoreDistribution);
}

Source File: ScoreDistributionInterner.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

@Override
public VisitorAction visit(Node node){

	if(node.hasScoreDistributions()){
		List<ScoreDistribution> scoreDistributions = node.getScoreDistributions();

		for(ListIterator<ScoreDistribution> it = scoreDistributions.listIterator(); it.hasNext(); ){
			it.set(intern(it.next()));
		}
	}

	return super.visit(node);
}

Source File: ScoreDistributionInterner.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

private ScoreDistribution intern(ScoreDistribution scoreDistribution){

		if(scoreDistribution == null || scoreDistribution.hasExtensions()){
			return scoreDistribution;
		}

		return this.cache.intern(scoreDistribution);
	}

Source File: ScoreDistributionInternerTest.java From jpmml-evaluator with GNU Affero General Public License v3.0

5 votes

static
private Node createNode(ScoreDistribution event){
	ScoreDistribution noEvent = new ScoreDistribution("no-event", 1d - NumberUtil.asDouble(event.getRecordCount()));

	Node node = new ComplexNode()
		.addScoreDistributions(event, noEvent);

	return node;
}

Source File: ScoreDistributionManager.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

public ScoreDistribution intern(ScoreDistribution scoreDistribution){
	return this.interner.intern(scoreDistribution);
}

Source File: DummyClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public TreeModel encodeModel(Schema schema){
	List<?> classes = getClasses();
	List<? extends Number> classPrior = getClassPrior();
	Object constant = getConstant();
	String strategy = getStrategy();

	ClassDictUtil.checkSize(classes, classPrior);

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	int index;

	double[] probabilities;

	switch(strategy){
		case "constant":
			{
				index = classes.indexOf(constant);

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "most_frequent":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "prior":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = Doubles.toArray(classPrior);
			}
			break;
		default:
			throw new IllegalArgumentException(strategy);
	}

	Node root = new ClassifierNode(ValueUtil.asString(classes.get(index)), True.INSTANCE);

	List<ScoreDistribution> scoreDistributions = root.getScoreDistributions();

	for(int i = 0; i < classes.size(); i++){
		ScoreDistribution scoreDistribution = new ScoreDistribution(ValueUtil.asString(classes.get(i)), probabilities[i]);

		scoreDistributions.add(scoreDistribution);
	}

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), root)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return treeModel;
}

Source File: RDFPMMLUtilsTest.java From oryx with Apache License 2.0

4 votes

private static PMML buildDummyClassificationModel(int numTrees) {
  PMML pmml = PMMLUtils.buildSkeletonPMML();

  List<DataField> dataFields = new ArrayList<>();
  DataField predictor =
      new DataField(FieldName.create("color"), OpType.CATEGORICAL, DataType.STRING);
  predictor.addValues(new Value("yellow"), new Value("red"));
  dataFields.add(predictor);
  DataField target =
      new DataField(FieldName.create("fruit"), OpType.CATEGORICAL, DataType.STRING);
  target.addValues(new Value("banana"), new Value("apple"));
  dataFields.add(target);
  DataDictionary dataDictionary =
      new DataDictionary(dataFields).setNumberOfFields(dataFields.size());
  pmml.setDataDictionary(dataDictionary);

  List<MiningField> miningFields = new ArrayList<>();
  MiningField predictorMF = new MiningField(FieldName.create("color"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.ACTIVE)
      .setImportance(0.5);
  miningFields.add(predictorMF);
  MiningField targetMF = new MiningField(FieldName.create("fruit"))
      .setOpType(OpType.CATEGORICAL)
      .setUsageType(MiningField.UsageType.PREDICTED);
  miningFields.add(targetMF);
  MiningSchema miningSchema = new MiningSchema(miningFields);

  double dummyCount = 2.0;
  Node rootNode =
    new ComplexNode().setId("r").setRecordCount(dummyCount).setPredicate(new True());

  double halfCount = dummyCount / 2;

  Node left = new ComplexNode().setId("r-").setRecordCount(halfCount).setPredicate(new True());
  left.addScoreDistributions(new ScoreDistribution("apple", halfCount));
  Node right = new ComplexNode().setId("r+").setRecordCount(halfCount)
      .setPredicate(new SimpleSetPredicate(FieldName.create("color"),
                                           SimpleSetPredicate.BooleanOperator.IS_NOT_IN,
                                           new Array(Array.Type.STRING, "red")));
  right.addScoreDistributions(new ScoreDistribution("banana", halfCount));

  rootNode.addNodes(right, left);

  TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, miningSchema, rootNode)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);

  if (numTrees > 1) {
    MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, miningSchema);
    List<Segment> segments = new ArrayList<>();
    for (int i = 0; i < numTrees; i++) {
      segments.add(new Segment()
          .setId(Integer.toString(i))
          .setPredicate(new True())
          .setModel(treeModel)
          .setWeight(1.0));
    }
    miningModel.setSegmentation(
        new Segmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, segments));
    pmml.addModels(miningModel);
  } else {
    pmml.addModels(treeModel);
  }

  return pmml;
}

Source File: RDFUpdate.java From oryx with Apache License 2.0

4 votes

private TreeModel toTreeModel(DecisionTreeModel dtModel,
                              CategoricalValueEncodings categoricalValueEncodings,
                              IntLongMap nodeIDCounts) {

  boolean classificationTask = dtModel.algo().equals(Algo.Classification());
  Preconditions.checkState(classificationTask == inputSchema.isClassification());

  Node root = new ComplexNode();
  root.setId("r");

  Queue<Node> modelNodes = new ArrayDeque<>();
  modelNodes.add(root);

  Queue<Pair<org.apache.spark.mllib.tree.model.Node,Split>> treeNodes = new ArrayDeque<>();
  treeNodes.add(new Pair<>(dtModel.topNode(), null));

  while (!treeNodes.isEmpty()) {

    Pair<org.apache.spark.mllib.tree.model.Node,Split> treeNodePredicate = treeNodes.remove();
    Node modelNode = modelNodes.remove();

    // This is the decision that got us here from the parent, if any;
    // not the predicate at this node
    Predicate predicate = buildPredicate(treeNodePredicate.getSecond(),
                                         categoricalValueEncodings);
    modelNode.setPredicate(predicate);

    org.apache.spark.mllib.tree.model.Node treeNode = treeNodePredicate.getFirst();
    long nodeCount = nodeIDCounts.get(treeNode.id());
    modelNode.setRecordCount((double) nodeCount);

    if (treeNode.isLeaf()) {

      Predict prediction = treeNode.predict();
      int targetEncodedValue = (int) prediction.predict();
      if (classificationTask) {
        Map<Integer,String> targetEncodingToValue =
            categoricalValueEncodings.getEncodingValueMap(inputSchema.getTargetFeatureIndex());
        double predictedProbability = prediction.prob();
        Preconditions.checkState(predictedProbability >= 0.0 && predictedProbability <= 1.0);
        // Not sure how nodeCount == 0 can happen but it does in the MLlib model
        long effectiveNodeCount = Math.max(1, nodeCount);
        // Problem: MLlib only gives a predicted class and its probability, and no distribution
        // over the rest. Infer that the rest of the probability is evenly distributed.
        double restProbability = (1.0 - predictedProbability) / (targetEncodingToValue.size() - 1);

        targetEncodingToValue.forEach((encodedValue, value) -> {
          double probability = encodedValue == targetEncodedValue ? predictedProbability : restProbability;
          // Yes, recordCount may be fractional; it's a relative indicator
          double recordCount = probability * effectiveNodeCount;
          if (recordCount > 0.0) {
            ScoreDistribution distribution = new ScoreDistribution(value, recordCount);
            // Not "confident" enough in the "probability" to call it one
            distribution.setConfidence(probability);
            modelNode.addScoreDistributions(distribution);
          }
        });
      } else {
        modelNode.setScore(Double.toString(targetEncodedValue));
      }

    } else {

      Split split = treeNode.split().get();

      Node positiveModelNode = new ComplexNode().setId(modelNode.getId() + "+");
      Node negativeModelNode = new ComplexNode().setId(modelNode.getId() + "-");
      modelNode.addNodes(positiveModelNode, negativeModelNode);

      org.apache.spark.mllib.tree.model.Node rightTreeNode = treeNode.rightNode().get();
      org.apache.spark.mllib.tree.model.Node leftTreeNode = treeNode.leftNode().get();

      boolean defaultRight = nodeIDCounts.get(rightTreeNode.id()) > nodeIDCounts.get(leftTreeNode.id());
      modelNode.setDefaultChild(defaultRight ? positiveModelNode.getId() : negativeModelNode.getId());

      // Right node is "positive", so carries the predicate. It must evaluate first
      // and therefore come first in the tree
      modelNodes.add(positiveModelNode);
      modelNodes.add(negativeModelNode);
      treeNodes.add(new Pair<>(rightTreeNode, split));
      treeNodes.add(new Pair<>(leftTreeNode, null));

    }

  }

  return new TreeModel()
      .setNode(root)
      .setSplitCharacteristic(TreeModel.SplitCharacteristic.BINARY_SPLIT)
      .setMissingValueStrategy(TreeModel.MissingValueStrategy.DEFAULT_CHILD);
}

Source File: Node.java From jpmml-model with BSD 3-Clause "New" or "Revised" License

4 votes

public List<ScoreDistribution> getScoreDistributions(){
	throw new UnsupportedOperationException();
}

Source File: Node.java From jpmml-model with BSD 3-Clause "New" or "Revised" License

4 votes

public Node addScoreDistributions(ScoreDistribution... scoreDistributions){
	getScoreDistributions().addAll(Arrays.asList(scoreDistributions));

	return this;
}

Source File: ScoreDistributionInterner.java From jpmml-evaluator with GNU Affero General Public License v3.0

4 votes

@Override
public ElementKey createKey(ScoreDistribution scoreDistribution){
	Object[] content = {scoreDistribution.getValue(), scoreDistribution.getRecordCount(), scoreDistribution.getProbability(), scoreDistribution.getConfidence()};

	return new ElementKey(content);
}

org.dmg.pmml.ScoreDistribution Java Examples