org.jpmml.converter.CategoricalLabel Java Exaples

Source File: Classification.java From jpmml-lightgbm with GNU Affero General Public License v3.0

6 votes

@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class_);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class_){
			throw new IllegalArgumentException("Expected " + this.num_class_ + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}

Source File: BoostingConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector boosting = getObject();

	RGenericVector trees = boosting.getGenericElement("trees");
	RDoubleVector weights = boosting.getDoubleElement("weights");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<TreeModel> treeModels = encodeTreeModels(trees);

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.WEIGHTED_MAJORITY_VOTE, treeModels, weights.getValues()))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}

Source File: GeneralizedLinearRegressionModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

6 votes

@Override
public List<OutputField> registerOutputFields(Label label, Model pmmlModel, SparkMLEncoder encoder){
	GeneralizedLinearRegressionModel model = getTransformer();

	List<OutputField> result = super.registerOutputFields(label, pmmlModel, encoder);

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case CLASSIFICATION:
			CategoricalLabel categoricalLabel = (CategoricalLabel)label;

			result = new ArrayList<>(result);
			result.addAll(ModelUtil.createProbabilityFields(DataType.DOUBLE, categoricalLabel.getValues()));
			break;
		default:
			break;
	}

	return result;
}

Source File: GBDTLRClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	Classifier gbdt = getGBDT();
	MultiOneHotEncoder ohe = getOHE();
	LinearClassifier lr = getLR();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	List<? extends Number> coef = lr.getCoef();
	List<? extends Number> intercept = lr.getIntercept();

	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = GBDTUtil.encodeModel(gbdt, ohe, coef, Iterables.getOnlyElement(intercept), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, lr.hasProbabilityDistribution(), schema);
}

Source File: MultinomialLogisticRegression.java From jpmml-lightgbm with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	List<MiningModel> miningModels = new ArrayList<>();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	for(int i = 0, rows = categoricalLabel.size(), columns = (trees.size() / rows); i < rows; i++){
		MiningModel miningModel = createMiningModel(FortranMatrixUtil.getRow(trees, rows, columns, i), numIteration, segmentSchema)
			.setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

		miningModels.add(miningModel);
	}

	return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
}

Source File: ScoreDistributionManager.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

public List<ScoreDistribution> createScoreDistribution(CategoricalLabel categoricalLabel, double[] recordCounts){
	List<ScoreDistribution> result = new ArrayList<>();

	for(int i = 0; i < categoricalLabel.size(); i++){
		Object value = categoricalLabel.getValue(i);
		double recordCount = recordCounts[i];

		ScoreDistribution scoreDistribution = new InternableScoreDistribution()
			.setValue(value)
			.setRecordCount(recordCount);

		scoreDistribution = intern(scoreDistribution);

		result.add(scoreDistribution);
	}

	return result;
}

Source File: MultinomialLogisticRegression.java From jpmml-xgboost with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){
	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.FLOAT);

	List<MiningModel> miningModels = new ArrayList<>();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	for(int i = 0, columns = categoricalLabel.size(), rows = (trees.size() / columns); i < columns; i++){
		MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(trees, rows, columns, i), (weights != null) ? CMatrixUtil.getColumn(weights, rows, columns, i) : null, base_score, ntreeLimit, segmentSchema)
			.setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.FLOAT));

		miningModels.add(miningModel);
	}

	return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
}

Source File: Classification.java From jpmml-xgboost with GNU Affero General Public License v3.0

6 votes

@Override
public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){
	DataField dataField;

	if(targetCategories == null){
		targetCategories = LabelUtil.createTargetCategories(this.num_class);

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories);
	} else

	{
		if(targetCategories.size() != this.num_class){
			throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories");
		}

		dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories);
	}

	return new CategoricalLabel(dataField);
}

Source File: BaggingClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public MiningModel encodeModel(Schema schema){
	List<? extends Classifier> estimators = getEstimators();
	List<List<Integer>> estimatorsFeatures = getEstimatorsFeatures();

	Segmentation.MultipleModelMethod multipleModelMethod = Segmentation.MultipleModelMethod.AVERAGE;

	for(Classifier estimator : estimators){

		if(!estimator.hasProbabilityDistribution()){
			multipleModelMethod = Segmentation.MultipleModelMethod.MAJORITY_VOTE;

			break;
		}
	}

	MiningModel miningModel = BaggingUtil.encodeBagging(estimators, estimatorsFeatures, multipleModelMethod, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return miningModel;
}

Source File: TreeClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public TreeModel encodeModel(Schema schema){
	TreeModel treeModel = TreeUtil.encodeTreeModel(this, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return TreeUtil.transform(this, treeModel);
}

Source File: PMMLPipeline.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

private List<String> initProbabilityFields(CategoricalLabel categoricalLabel){
	List<String> probabilityFields = new ArrayList<>();

	List<?> values = categoricalLabel.getValues();
	for(Object value : values){
		probabilityFields.add("probability(" + value + ")"); // XXX
	}

	return probabilityFields;
}

Source File: MLPClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public NeuralNetwork encodeModel(Schema schema){
	String activation = getActivation();

	List<? extends HasArray> coefs = getCoefs();
	List<? extends HasArray> intercepts = getIntercepts();

	NeuralNetwork neuralNetwork = MultilayerPerceptronUtil.encodeNeuralNetwork(MiningFunction.CLASSIFICATION, activation, coefs, intercepts, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return neuralNetwork;
}

Source File: GeneralizedLinearRegressionModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

5 votes

@Override
public GeneralRegressionModel encodeModel(Schema schema){
	GeneralizedLinearRegressionModel model = getTransformer();

	Object targetCategory = null;

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case CLASSIFICATION:
			CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

			SchemaUtil.checkSize(2, categoricalLabel);

			targetCategory = categoricalLabel.getValue(1);
			break;
		default:
			break;
	}

	List<Feature> features = new ArrayList<>(schema.getFeatures());
	List<Double> featureCoefficients = new ArrayList<>(VectorUtil.toList(model.coefficients()));

	RegressionTableUtil.simplify(this, targetCategory, features, featureCoefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setDistribution(parseFamily(model.getFamily()))
		.setLinkFunction(parseLinkFunction(model.getLink()))
		.setLinkParameter(parseLinkParameter(model.getLink()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, model.intercept(), targetCategory);

	return generalRegressionModel;
}

Source File: ForestClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

@Override
public MiningModel encodeModel(Schema schema){
	MiningModel miningModel = ForestUtil.encodeBaseForest(this, Segmentation.MultipleModelMethod.AVERAGE, MiningFunction.CLASSIFICATION, schema)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)schema.getLabel()));

	return miningModel;
}

Source File: DummyClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public TreeModel encodeModel(Schema schema){
	List<?> classes = getClasses();
	List<? extends Number> classPrior = getClassPrior();
	Object constant = getConstant();
	String strategy = getStrategy();

	ClassDictUtil.checkSize(classes, classPrior);

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	int index;

	double[] probabilities;

	switch(strategy){
		case "constant":
			{
				index = classes.indexOf(constant);

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "most_frequent":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "prior":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = Doubles.toArray(classPrior);
			}
			break;
		default:
			throw new IllegalArgumentException(strategy);
	}

	Node root = new ClassifierNode(ValueUtil.asString(classes.get(index)), True.INSTANCE);

	List<ScoreDistribution> scoreDistributions = root.getScoreDistributions();

	for(int i = 0; i < classes.size(); i++){
		ScoreDistribution scoreDistribution = new ScoreDistribution(ValueUtil.asString(classes.get(i)), probabilities[i]);

		scoreDistributions.add(scoreDistribution);
	}

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), root)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return treeModel;
}

Source File: MiningModelUtil.java From pyramid with Apache License 2.0

4 votes

static
public MiningModel createClassification(List<? extends Model> models, RegressionModel.NormalizationMethod normalizationMethod, boolean hasProbabilityDistribution, Schema schema){
    CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

    // modified here
    if(categoricalLabel.size() != models.size()){
        throw new IllegalArgumentException();
    } // End if

    if(normalizationMethod != null){

        switch(normalizationMethod){
            case NONE:
            case SIMPLEMAX:
            case SOFTMAX:
                break;
            default:
                throw new IllegalArgumentException();
        }
    }

    MathContext mathContext = null;

    List<RegressionTable> regressionTables = new ArrayList<>();

    for(int i = 0; i < categoricalLabel.size(); i++){
        Model model = models.get(i);

        MathContext modelMathContext = model.getMathContext();
        if(modelMathContext == null){
            modelMathContext = MathContext.DOUBLE;
        } // End if

        if(mathContext == null){
            mathContext = modelMathContext;
        } else

        {
            if(!Objects.equals(mathContext, modelMathContext)){
                throw new IllegalArgumentException();
            }
        }

        Feature feature = MODEL_PREDICTION.apply(model);

        RegressionTable regressionTable = RegressionModelUtil.createRegressionTable(Collections.singletonList(feature), Collections.singletonList(1d), null)
                .setTargetCategory(categoricalLabel.getValue(i));

        regressionTables.add(regressionTable);
    }

    RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables)
            .setNormalizationMethod(normalizationMethod)
            .setMathContext(ModelUtil.simplifyMathContext(mathContext))
            .setOutput(hasProbabilityDistribution ? ModelUtil.createProbabilityOutput(mathContext, categoricalLabel) : null);

    List<Model> segmentationModels = new ArrayList<>(models);
    segmentationModels.add(regressionModel);

    return createModelChain(segmentationModels, schema);
}

Source File: GBMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

private MiningModel encodeMultinomialClassification(List<TreeModel> treeModels, Double initF, Schema schema){
	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE);

	List<Model> miningModels = new ArrayList<>();

	for(int i = 0, columns = categoricalLabel.size(), rows = (treeModels.size() / columns); i < columns; i++){
		MiningModel miningModel = createMiningModel(CMatrixUtil.getColumn(treeModels, rows, columns, i), initF, segmentSchema)
			.setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

		miningModels.add(miningModel);
	}

	return MiningModelUtil.createClassification(miningModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
}

Source File: BaggingConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector bagging = getObject();

	RGenericVector trees = bagging.getGenericElement("trees");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<TreeModel> treeModels = encodeTreeModels(trees);

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.MAJORITY_VOTE, treeModels))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}

Source File: DNNClassifier.java From jpmml-tensorflow with GNU Affero General Public License v3.0

4 votes

@Override
public NeuralNetwork encodeModel(TensorFlowEncoder encoder){
	DataField dataField = encoder.createDataField(FieldName.create("_target"), OpType.CATEGORICAL, DataType.INTEGER);

	NeuralNetwork neuralNetwork = encodeNeuralNetwork(encoder);

	List<NeuralLayer> neuralLayers = neuralNetwork.getNeuralLayers();

	NeuralLayer neuralLayer = Iterables.getLast(neuralLayers);

	List<Neuron> neurons = neuralLayer.getNeurons();

	List<String> categories;

	if(neurons.size() == 1){
		neuralLayer.setActivationFunction(NeuralNetwork.ActivationFunction.LOGISTIC);

		Neuron neuron = Iterables.getOnlyElement(neurons);

		neuralLayer = new NeuralLayer()
			.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY);

		categories = Arrays.asList("0", "1");

		// p(no event) = 1 - p(event)
		Neuron passiveNeuron = new Neuron()
			.setId(String.valueOf(neuralLayers.size() + 1) + "/" + categories.get(0))
			.setBias(ValueUtil.floatToDouble(1f))
			.addConnections(new Connection(neuron.getId(), -1f));

		// p(event)
		Neuron activeNeuron = new Neuron()
			.setId(String.valueOf(neuralLayers.size() + 1) + "/" + categories.get(1))
			.setBias(null)
			.addConnections(new Connection(neuron.getId(), 1f));

		neuralLayer.addNeurons(passiveNeuron, activeNeuron);

		neuralNetwork.addNeuralLayers(neuralLayer);

		neurons = neuralLayer.getNeurons();
	} else

	if(neurons.size() > 2){
		neuralLayer
			.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY)
			.setNormalizationMethod(NeuralNetwork.NormalizationMethod.SOFTMAX);

		categories = new ArrayList<>();

		for(int i = 0; i < neurons.size(); i++){
			String category = String.valueOf(i);

			categories.add(category);
		}
	} else

	{
		throw new IllegalArgumentException();
	}

	dataField = encoder.toCategorical(dataField.getName(), categories);

	CategoricalLabel categoricalLabel = new CategoricalLabel(dataField);

	neuralNetwork
		.setMiningFunction(MiningFunction.CLASSIFICATION)
		.setMiningSchema(ModelUtil.createMiningSchema(categoricalLabel))
		.setNeuralOutputs(NeuralNetworkUtil.createClassificationNeuralOutputs(neurons, categoricalLabel))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.FLOAT, categoricalLabel));

	return neuralNetwork;
}

Source File: GLMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector glm = getObject();

	RDoubleVector coefficients = glm.getDoubleElement("coefficients");
	RGenericVector family = glm.getGenericElement("family");

	Double intercept = coefficients.getElement(getInterceptName(), false);

	RStringVector familyFamily = family.getStringElement("family");
	RStringVector familyLink = family.getStringElement("link");

	Label label = schema.getLabel();
	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar());

	Object targetCategory = null;

	switch(miningFunction){
		case CLASSIFICATION:
			{
				CategoricalLabel categoricalLabel = (CategoricalLabel)label;

				SchemaUtil.checkSize(2, categoricalLabel);

				targetCategory = categoricalLabel.getValue(1);
			}
			break;
		default:
			break;
	}

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(label), null, null, null)
		.setDistribution(parseFamily(familyFamily.asScalar()))
		.setLinkFunction(parseLinkFunction(familyLink.asScalar()))
		.setLinkParameter(parseLinkParameter(familyLink.asScalar()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	switch(miningFunction){
		case CLASSIFICATION:
			generalRegressionModel.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)label));
			break;
		default:
			break;
	}

	return generalRegressionModel;
}

Source File: RandomForestConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

private MiningModel encodeClassification(RGenericVector forest, Schema schema){
	RNumberVector<?> bestvar = forest.getNumericElement("bestvar");
	RNumberVector<?> treemap = forest.getNumericElement("treemap");
	RIntegerVector nodepred = forest.getIntegerElement("nodepred");
	RDoubleVector xbestsplit = forest.getDoubleElement("xbestsplit");
	RIntegerVector nrnodes = forest.getIntegerElement("nrnodes");
	RDoubleVector ntree = forest.getDoubleElement("ntree");

	int rows = nrnodes.asScalar();
	int columns = ValueUtil.asInt(ntree.asScalar());

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	ScoreEncoder<Integer> scoreEncoder = new ScoreEncoder<Integer>(){

		@Override
		public Object encode(Integer value){
			return categoricalLabel.getValue(value - 1);
		}
	};

	Schema segmentSchema = schema.toAnonymousSchema();

	List<TreeModel> treeModels = new ArrayList<>();

	for(int i = 0; i < columns; i++){
		List<? extends Number> daughters = FortranMatrixUtil.getColumn(treemap.getValues(), 2 * rows, columns, i);

		TreeModel treeModel = encodeTreeModel(
				MiningFunction.CLASSIFICATION,
				scoreEncoder,
				FortranMatrixUtil.getColumn(daughters, rows, 2, 0),
				FortranMatrixUtil.getColumn(daughters, rows, 2, 1),
				FortranMatrixUtil.getColumn(nodepred.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(bestvar.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(xbestsplit.getValues(), rows, columns, i),
				segmentSchema
			);

		treeModels.add(treeModel);
	}

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.MAJORITY_VOTE, treeModels))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}

Source File: LinearClassifier.java From jpmml-tensorflow with GNU Affero General Public License v3.0

4 votes

@Override
public RegressionModel encodeModel(TensorFlowEncoder encoder){
	DataField dataField = encoder.createDataField(FieldName.create("_target"), OpType.CATEGORICAL, DataType.INTEGER);

	RegressionModel regressionModel = encodeRegressionModel(encoder);

	List<RegressionTable> regressionTables = regressionModel.getRegressionTables();

	List<String> categories;

	if(regressionTables.size() == 1){
		categories = Arrays.asList("0", "1");

		RegressionTable activeRegressionTable = regressionTables.get(0)
			.setTargetCategory(categories.get(1));

		RegressionTable passiveRegressionTable = new RegressionTable(0)
			.setTargetCategory(categories.get(0));

		regressionModel.addRegressionTables(passiveRegressionTable);
	} else

	if(regressionTables.size() > 2){
		categories = new ArrayList<>();

		for(int i = 0; i < regressionTables.size(); i++){
			RegressionTable regressionTable = regressionTables.get(i);
			String category = String.valueOf(i);

			regressionTable.setTargetCategory(category);

			categories.add(category);
		}
	} else

	{
		throw new IllegalArgumentException();
	}

	dataField = encoder.toCategorical(dataField.getName(), categories);

	CategoricalLabel categoricalLabel = new CategoricalLabel(dataField);

	regressionModel
		.setMiningFunction(MiningFunction.CLASSIFICATION)
		.setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX)
		.setMiningSchema(ModelUtil.createMiningSchema(categoricalLabel))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.FLOAT, categoricalLabel));

	return regressionModel;
}

Source File: LinearDiscriminantAnalysis.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

private Model encodeMultinomialModel(Schema schema){
	String sklearnVersion = getSkLearnVersion();
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	// See https://github.com/scikit-learn/scikit-learn/issues/6848
	boolean corrected = (sklearnVersion != null && SkLearnUtil.compareVersion(sklearnVersion, "0.21") >= 0);

	if(!corrected){
		return super.encodeModel(schema);
	} // End if

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.NONE, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}

Source File: LinearModelUtil.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

static
public <C extends ModelConverter<?> & HasRegressionTableOptions> Model createBinaryLogisticClassification(C converter, Vector coefficients, double intercept, Schema schema){
	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	String representation = (String)converter.getOption(HasRegressionTableOptions.OPTION_REPRESENTATION, null);

	List<Feature> features = new ArrayList<>(schema.getFeatures());
	List<Double> featureCoefficients = new ArrayList<>(VectorUtil.toList(coefficients));

	RegressionTableUtil.simplify(converter, null, features, featureCoefficients);

	if(representation != null && (GeneralRegressionModel.class.getSimpleName()).equalsIgnoreCase(representation)){
		Object targetCategory = categoricalLabel.getValue(1);

		GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), null, null, null)
			.setLinkFunction(GeneralRegressionModel.LinkFunction.LOGIT);

		GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

		return generalRegressionModel;
	}

	return RegressionModelUtil.createBinaryLogisticClassification(features, featureCoefficients, intercept, RegressionModel.NormalizationMethod.LOGIT, true, schema);
}

Source File: MultilayerPerceptronUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

static
public NeuralNetwork encodeNeuralNetwork(MiningFunction miningFunction, String activation, List<? extends HasArray> coefs, List<? extends HasArray> intercepts, Schema schema){
	NeuralNetwork.ActivationFunction activationFunction = parseActivationFunction(activation);

	ClassDictUtil.checkSize(coefs, intercepts);

	Label label = schema.getLabel();
	List<? extends Feature> features = schema.getFeatures();

	NeuralInputs neuralInputs = NeuralNetworkUtil.createNeuralInputs(features, DataType.DOUBLE);

	List<? extends NeuralEntity> entities = neuralInputs.getNeuralInputs();

	List<NeuralLayer> neuralLayers = new ArrayList<>();

	for(int layer = 0; layer < coefs.size(); layer++){
		HasArray coef = coefs.get(layer);
		HasArray intercept = intercepts.get(layer);

		int[] shape = coef.getArrayShape();

		int rows = shape[0];
		int columns = shape[1];

		NeuralLayer neuralLayer = new NeuralLayer();

		List<?> coefMatrix = coef.getArrayContent();
		List<?> interceptVector = intercept.getArrayContent();

		for(int column = 0; column < columns; column++){
			List<? extends Number> weights = (List)CMatrixUtil.getColumn(coefMatrix, rows, columns, column);
			Number bias = (Number)interceptVector.get(column);

			Neuron neuron = NeuralNetworkUtil.createNeuron(entities, weights, bias)
				.setId(String.valueOf(layer + 1) + "/" + String.valueOf(column + 1));

			neuralLayer.addNeurons(neuron);
		}

		neuralLayers.add(neuralLayer);

		entities = neuralLayer.getNeurons();

		if(layer == (coefs.size() - 1)){
			neuralLayer.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY);

			switch(miningFunction){
				case REGRESSION:
					break;
				case CLASSIFICATION:
					CategoricalLabel categoricalLabel = (CategoricalLabel)label;

					// Binary classification
					if(categoricalLabel.size() == 2){
						List<NeuralLayer> transformationNeuralLayers = NeuralNetworkUtil.createBinaryLogisticTransformation(Iterables.getOnlyElement(entities));

						neuralLayers.addAll(transformationNeuralLayers);

						neuralLayer = Iterables.getLast(transformationNeuralLayers);

						entities = neuralLayer.getNeurons();
					} else

					// Multi-class classification
					if(categoricalLabel.size() > 2){
						neuralLayer.setNormalizationMethod(NeuralNetwork.NormalizationMethod.SOFTMAX);
					} else

					{
						throw new IllegalArgumentException();
					}
					break;
				default:
					break;
			}
		}
	}

	NeuralOutputs neuralOutputs = null;

	switch(miningFunction){
		case REGRESSION:
			neuralOutputs = NeuralNetworkUtil.createRegressionNeuralOutputs(entities, (ContinuousLabel)label);
			break;
		case CLASSIFICATION:
			neuralOutputs = NeuralNetworkUtil.createClassificationNeuralOutputs(entities, (CategoricalLabel)label);
			break;
		default:
			break;
	}

	NeuralNetwork neuralNetwork = new NeuralNetwork(miningFunction, activationFunction, ModelUtil.createMiningSchema(label), neuralInputs, neuralLayers)
		.setNeuralOutputs(neuralOutputs);

	return neuralNetwork;
}

Source File: VotingClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	List<? extends Classifier> estimators = getEstimators();
	List<? extends Number> weights = getWeights();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<Model> models = new ArrayList<>();

	for(Classifier estimator : estimators){
		Model model = estimator.encodeModel(schema);

		models.add(model);
	}

	String voting = getVoting();

	Segmentation.MultipleModelMethod multipleModelMethod = parseVoting(voting, (weights != null && weights.size() > 0));

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(multipleModelMethod, models, weights))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}

Source File: GaussianNB.java From jpmml-sklearn with GNU Affero General Public License v3.0

3 votes

@Override
public NaiveBayesModel encodeModel(Schema schema){
	int[] shape = getThetaShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> theta = getTheta();
	List<? extends Number> sigma = getSigma();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	BayesInputs bayesInputs = new BayesInputs();

	for(int i = 0; i < numberOfFeatures; i++){
		Feature feature = schema.getFeature(i);

		List<? extends Number> means = CMatrixUtil.getColumn(theta, numberOfClasses, numberOfFeatures, i);
		List<? extends Number> variances = CMatrixUtil.getColumn(sigma, numberOfClasses, numberOfFeatures, i);

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		BayesInput bayesInput = new BayesInput(continuousFeature.getName(), encodeTargetValueStats(categoricalLabel.getValues(), means, variances), null);

		bayesInputs.addBayesInputs(bayesInput);
	}

	List<Integer> classCount = getClassCount();

	BayesOutput bayesOutput = new BayesOutput(categoricalLabel.getName(), null)
		.setTargetValueCounts(encodeTargetValueCounts(categoricalLabel.getValues(), classCount));

	NaiveBayesModel naiveBayesModel = new NaiveBayesModel(0d, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), bayesInputs, bayesOutput)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return naiveBayesModel;
}

Source File: LRMConverter.java From jpmml-r with GNU Affero General Public License v3.0

3 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector lrm = getObject();

	RDoubleVector coefficients = lrm.getDoubleElement("coefficients");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	Object targetCategory = categoricalLabel.getValue(1);

	Double intercept = coefficients.getElement(getInterceptName(), false);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.LOGIT)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	return generalRegressionModel;
}

Source File: LinearModelUtil.java From jpmml-sparkml with GNU Affero General Public License v3.0

3 votes

static
public <C extends ModelConverter<?> & HasRegressionTableOptions> Model createSoftmaxClassification(C converter, Matrix coefficients, Vector intercepts, Schema schema){
	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	MatrixUtil.checkRows(categoricalLabel.size(), coefficients);

	List<RegressionTable> regressionTables = new ArrayList<>();

	for(int i = 0; i < categoricalLabel.size(); i++){
		Object targetCategory = categoricalLabel.getValue(i);

		List<Feature> features = new ArrayList<>(schema.getFeatures());
		List<Double> featureCoefficients = new ArrayList<>(MatrixUtil.getRow(coefficients, i));

		RegressionTableUtil.simplify(converter, targetCategory, features, featureCoefficients);

		double intercept = intercepts.apply(i);

		RegressionTable regressionTable = RegressionModelUtil.createRegressionTable(features, featureCoefficients, intercept)
			.setTargetCategory(targetCategory);

		regressionTables.add(regressionTable);
	}

	RegressionModel regressionModel = new RegressionModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), regressionTables)
		.setNormalizationMethod(RegressionModel.NormalizationMethod.SOFTMAX);

	return regressionModel;
}

Source File: LinearClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

2 votes

@Override
public Model encodeModel(Schema schema){
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	boolean hasProbabilityDistribution = hasProbabilityDistribution();

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	if(numberOfClasses == 1){
		SchemaUtil.checkSize(2, categoricalLabel);

		return RegressionModelUtil.createBinaryLogisticClassification(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0), intercept.get(0), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema);
	} else

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.LOGIT, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}

org.jpmml.converter.CategoricalLabel Java Examples