org.jpmml.converter.ValueUtil Java Exaples

Source File: FormulaUtil.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

static
private List<String> parseVector(FunctionExpression.Argument argument){
	List<String> result = new ArrayList<>();

	FunctionExpression vectorExpression = toVectorExpression(argument);

	List<FunctionExpression.Argument> objectArguments = vectorExpression.getArguments();
	for(FunctionExpression.Argument objectArgument : objectArguments){
		Constant constant = (Constant)objectArgument.getExpression();

		String string = ValueUtil.asString(constant.getValue());

		result.add(string);
	}

	return result;
}

Source File: RangerConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

private List<TreeModel> encodeForest(RGenericVector forest, MiningFunction miningFunction, ScoreEncoder scoreEncoder, Schema schema){
	RNumberVector<?> numTrees = forest.getNumericElement("num.trees");
	RGenericVector childNodeIDs = forest.getGenericElement("child.nodeIDs");
	RGenericVector splitVarIDs = forest.getGenericElement("split.varIDs");
	RGenericVector splitValues = forest.getGenericElement("split.values");
	RGenericVector terminalClassCounts = forest.getGenericElement("terminal.class.counts", false);

	Schema segmentSchema = schema.toAnonymousSchema();

	List<TreeModel> treeModels = new ArrayList<>();

	for(int i = 0; i < ValueUtil.asInt(numTrees.asScalar()); i++){
		TreeModel treeModel = encodeTreeModel(miningFunction, scoreEncoder, (RGenericVector)childNodeIDs.getValue(i), (RNumberVector<?>)splitVarIDs.getValue(i), (RNumberVector<?>)splitValues.getValue(i), (terminalClassCounts != null ? (RGenericVector)terminalClassCounts.getValue(i) : null), segmentSchema);

		treeModels.add(treeModel);
	}

	return treeModels;
}

Source File: OrdinalEncoder.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

static
public Map<Object, Integer> getCategoryMapping(Mapping mapping){
	SingleBlockManager mappingData = (mapping.getMapping(Series.class)).getData();

	Index blockItem = mappingData.getOnlyBlockItem();
	List<?> categories = (blockItem.getData()).getData();

	HasArray blockValue = mappingData.getOnlyBlockValue();
	List<Integer> indices = ValueUtil.asIntegers((List)blockValue.getArrayContent());

	ClassDictUtil.checkSize(categories, indices);

	Map<Object, Integer> result = new LinkedHashMap<>();

	for(int i = 0; i < categories.size(); i++){
		result.put(categories.get(i), indices.get(i));
	}

	return result;
}

Source File: RPartConverter.java From jpmml-r with GNU Affero General Public License v3.0

6 votes

public RPartConverter(RGenericVector rpart){
	super(rpart);

	RGenericVector control = rpart.getGenericElement("control");

	RNumberVector<?> useSurrogate = control.getNumericElement("usesurrogate");

	this.useSurrogate = ValueUtil.asInt(useSurrogate.asScalar());

	switch(this.useSurrogate){
		case 0:
		case 1:
		case 2:
			break;
		default:
			throw new IllegalArgumentException();
	}
}

Source File: RegressionTree.java From pyramid with Apache License 2.0

5 votes

static
private Predicate encodePredicate(Feature feature, Node node, boolean left){
    FieldName name = feature.getName();
    SimplePredicate.Operator operator;
    String value;

    if(feature instanceof BinaryFeature){
        BinaryFeature binaryFeature = (BinaryFeature)feature;

        operator = (left ? SimplePredicate.Operator.NOT_EQUAL : SimplePredicate.Operator.EQUAL);
        value = binaryFeature.getValue();
    } else

    {
        ContinuousFeature continuousFeature = feature.toContinuousFeature();

        Number splitValue = node.getThreshold();

        DataType dataType = continuousFeature.getDataType();
        switch(dataType){
            case INTEGER:
                splitValue = (int)(splitValue.floatValue() + 1f);
                break;
            case FLOAT:
                break;
            default:
                throw new IllegalArgumentException();
        }

        operator = (left ? SimplePredicate.Operator.LESS_OR_EQUAL : SimplePredicate.Operator.GREATER_THAN);
        value = ValueUtil.formatValue(splitValue);
    }

    SimplePredicate simplePredicate = new SimplePredicate(name, operator)
            .setValue(value);

    return simplePredicate;
}

Source File: SVMConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

private void encodeFormula(RExpEncoder encoder){
	RGenericVector svm = getObject();

	RDoubleVector type = svm.getDoubleElement("type");
	RDoubleVector sv = svm.getDoubleElement("SV");
	RVector<?> levels = svm.getVectorElement("levels");
	RExp terms = svm.getElement("terms");
	RGenericVector xlevels = DecorationUtil.getGenericElement(svm, "xlevels");

	Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];

	RStringVector rowNames = sv.dimnames(0);
	RStringVector columnNames = sv.dimnames(1);

	FormulaContext context = new XLevelsFormulaContext(xlevels);

	Formula formula = FormulaUtil.createFormula(terms, context, encoder);

	switch(svmType){
		case C_CLASSIFICATION:
		case NU_CLASSIFICATION:
			FormulaUtil.setLabel(formula, terms, levels, encoder);
			break;
		case ONE_CLASSIFICATION:
			encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE));
			break;
		case EPS_REGRESSION:
		case NU_REGRESSION:
			FormulaUtil.setLabel(formula, terms, null, encoder);
			break;
	}

	FormulaUtil.addFeatures(formula, columnNames, true, encoder);

	scaleFeatures(encoder);
}

Source File: LightGBMUtil.java From jpmml-lightgbm with GNU Affero General Public License v3.0

5 votes

@Override
public Integer apply(String string){

	try {
		return Integer.valueOf(string);
	} catch(NumberFormatException nfe){
		return ValueUtil.asInteger(Double.valueOf(string));
	}
}

Source File: DiscreteDomain.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

static
public DiscrStats createDiscrStats(DataType dataType, Object[] objects){
	List<Object> values = (List)asArray(objects[0]);
	List<Integer> counts = ValueUtil.asIntegers((List)asArray(objects[1]));

	ClassDictUtil.checkSize(values, counts);

	DiscrStats discrStats = new DiscrStats()
		.addArrays(PMMLUtil.createStringArray(standardizeValues(dataType, values)), PMMLUtil.createIntArray(counts));

	return discrStats;
}

Source File: MVRConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

@Override
public GeneralRegressionModel encodeModel(Schema schema){
	RGenericVector mvr = getObject();

	RDoubleVector coefficients = mvr.getDoubleElement("coefficients");
	RDoubleVector xMeans = mvr.getDoubleElement("Xmeans");
	RDoubleVector yMeans = mvr.getDoubleElement("Ymeans");
	RNumberVector<?> ncomp = mvr.getNumericElement("ncomp");

	RStringVector rowNames = coefficients.dimnames(0);
	RStringVector columnNames = coefficients.dimnames(1);
	RStringVector compNames = coefficients.dimnames(2);

	int rows = rowNames.size();
	int columns = columnNames.size();
	int components = compNames.size();

	List<? extends Feature> features = schema.getFeatures();

	List<Double> featureCoefficients = FortranMatrixUtil.getColumn(coefficients.getValues(), rows, (columns * components), 0 + (ValueUtil.asInt(ncomp.asScalar()) - 1));

	Double intercept = yMeans.getValue(0);

	for(int j = 0; j < rowNames.size(); j++){
		intercept -= (featureCoefficients.get(j) * xMeans.getValue(j));
	}

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.IDENTITY);

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, null);

	return generalRegressionModel;
}

Source File: MVRConverter.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

private void scaleFeatures(RExpEncoder encoder){
	RGenericVector mvr = getObject();

	RDoubleVector scale = mvr.getDoubleElement("scale", false);
	if(scale == null){
		return;
	}

	List<Feature> features = encoder.getFeatures();

	if(scale.size() != features.size()){
		throw new IllegalArgumentException();
	}

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);
		Double factor = scale.getValue(i);

		if(ValueUtil.isOne(factor)){
			continue;
		}

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		Apply apply = PMMLUtil.createApply(PMMLFunctions.DIVIDE, continuousFeature.ref(), PMMLUtil.createConstant(factor));

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("scale", feature), OpType.CONTINUOUS, DataType.DOUBLE, apply);

		features.set(i, new ContinuousFeature(encoder, derivedField));
	}
}

Source File: PreProcessEncoder.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

private Expression encodeExpression(FieldName name, Expression expression){
	List<Double> ranges = this.ranges.get(name);
	if(ranges != null){
		Double min = ranges.get(0);
		Double max = ranges.get(1);

		if(!ValueUtil.isZero(min)){
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, expression, PMMLUtil.createConstant(min));
		} // End if

		if(!ValueUtil.isOne(max - min)){
			expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, expression, PMMLUtil.createConstant(max - min));
		}
	}

	Double mean = this.mean.get(name);
	if(mean != null && !ValueUtil.isZero(mean)){
		expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, expression, PMMLUtil.createConstant(mean));
	}

	Double std = this.std.get(name);
	if(std != null && !ValueUtil.isOne(std)){
		expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, expression, PMMLUtil.createConstant(std));
	}

	Double median = this.median.get(name);
	if(median != null){
		expression = PMMLUtil.createApply(PMMLFunctions.IF)
			.addExpressions(PMMLUtil.createApply(PMMLFunctions.ISNOTMISSING, new FieldRef(name)))
			.addExpressions(expression, PMMLUtil.createConstant(median));
	}

	return expression;
}

Source File: Formula.java From jpmml-r with GNU Affero General Public License v3.0

5 votes

public void addField(Field<?> field){
	RExpEncoder encoder = getEncoder();

	Feature feature = new ContinuousFeature(encoder, field);

	if(field instanceof DerivedField){
		DerivedField derivedField = (DerivedField)field;

		Expression expression = derivedField.getExpression();
		if(expression instanceof Apply){
			Apply apply = (Apply)expression;

			if(checkApply(apply, PMMLFunctions.POW, FieldRef.class, Constant.class)){
				List<Expression> expressions = apply.getExpressions();

				FieldRef fieldRef = (FieldRef)expressions.get(0);
				Constant constant = (Constant)expressions.get(1);

				try {
					String string = ValueUtil.asString(constant.getValue());

					int power = Integer.parseInt(string);

					feature = new PowerFeature(encoder, fieldRef.getField(), DataType.DOUBLE, power);
				} catch(NumberFormatException nfe){
					// Ignored
				}
			}
		}
	}

	putFeature(field.getName(), feature);

	this.fields.add(field);
}

Source File: EnsembleUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0

5 votes

static
public List<List<Integer>> transformEstimatorsFeatures(List<? extends HasArray> estimatorsFeatures){
	Function<HasArray, List<Integer>> function = new Function<HasArray, List<Integer>>(){

		@Override
		public List<Integer> apply(HasArray hasArray){
			return ValueUtil.asIntegers((List)hasArray.getArrayContent());
		}
	};

	return Lists.transform(estimatorsFeatures, function);
}

Source File: RegTree.java From jpmml-xgboost with GNU Affero General Public License v3.0

5 votes

public boolean isEmpty(){
	Node node = this.nodes[0];

	if(!node.is_leaf()){
		return false;
	} else

	{
		Float value = node.leaf_value();

		return ValueUtil.isZero(value);
	}
}

Source File: MiningModelUtil.java From pyramid with Apache License 2.0

5 votes

static
public Segmentation createSegmentation(Segmentation.MultipleModelMethod multipleModelMethod, List<? extends Model> models, List<? extends Number> weights){

    if((weights != null) && (models.size() != weights.size())){
        throw new IllegalArgumentException();
    }

    List<Segment> segments = new ArrayList<>();

    for(int i = 0; i < models.size(); i++){
        Model model = models.get(i);
        Number weight = (weights != null ? weights.get(i) : null);

        Segment segment = new Segment()
                .setId(String.valueOf(i + 1))
                .setPredicate(new True())
                .setModel(model);

        if(weight != null && !ValueUtil.isOne(weight)){
            segment.setWeight(ValueUtil.asDouble(weight));
        }

        segments.add(segment);
    }

    return new Segmentation(multipleModelMethod, segments);
}

Source File: XGBoostConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public void encodeSchema(RExpEncoder encoder){
	RGenericVector booster = getObject();

	RStringVector featureNames = booster.getStringElement("feature_names", false);
	RGenericVector schema = booster.getGenericElement("schema", false);

	FeatureMap featureMap = ensureFeatureMap();

	if(featureNames != null){
		checkFeatureMap(featureMap, featureNames);
	} // End if

	if(schema != null){
		RVector<?> missing = schema.getVectorElement("missing", false);

		if(missing != null){
			featureMap.addMissingValue(ValueUtil.asString(missing.asScalar()));
		}
	}

	Learner learner = ensureLearner();

	ObjFunction obj = learner.obj();

	FieldName targetField = FieldName.create("_target");
	List<String> targetCategories = null;

	if(schema != null){
		RStringVector responseName = schema.getStringElement("response_name", false);
		RStringVector responseLevels = schema.getStringElement("response_levels", false);

		if(responseName != null){
			targetField = FieldName.create(responseName.asScalar());
		} // End if

		if(responseLevels != null){
			targetCategories = responseLevels.getValues();
		}
	}

	Label label = obj.encodeLabel(targetField, targetCategories, encoder);

	encoder.setLabel(label);

	List<Feature> features = featureMap.encodeFeatures(encoder);
	for(Feature feature : features){
		encoder.addFeature(feature);
	}
}

Source File: XGBoostConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public MiningModel encodeModel(Schema schema){
	RGenericVector booster = getObject();

	RNumberVector<?> ntreeLimit = booster.getNumericElement("ntreelimit", false);

	Learner learner = ensureLearner();

	Map<String, Object> options = new LinkedHashMap<>();
	options.put(HasXGBoostOptions.OPTION_COMPACT, this.compact);
	options.put(HasXGBoostOptions.OPTION_NTREE_LIMIT, ntreeLimit != null ? ValueUtil.asInteger(ntreeLimit.asScalar()) : null);

	Schema xgbSchema = learner.toXGBoostSchema(schema);

	MiningModel miningModel = learner.encodeMiningModel(options, xgbSchema);

	return miningModel;
}

Source File: RandomForestConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

private MiningModel encodeClassification(RGenericVector forest, Schema schema){
	RNumberVector<?> bestvar = forest.getNumericElement("bestvar");
	RNumberVector<?> treemap = forest.getNumericElement("treemap");
	RIntegerVector nodepred = forest.getIntegerElement("nodepred");
	RDoubleVector xbestsplit = forest.getDoubleElement("xbestsplit");
	RIntegerVector nrnodes = forest.getIntegerElement("nrnodes");
	RDoubleVector ntree = forest.getDoubleElement("ntree");

	int rows = nrnodes.asScalar();
	int columns = ValueUtil.asInt(ntree.asScalar());

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	ScoreEncoder<Integer> scoreEncoder = new ScoreEncoder<Integer>(){

		@Override
		public Object encode(Integer value){
			return categoricalLabel.getValue(value - 1);
		}
	};

	Schema segmentSchema = schema.toAnonymousSchema();

	List<TreeModel> treeModels = new ArrayList<>();

	for(int i = 0; i < columns; i++){
		List<? extends Number> daughters = FortranMatrixUtil.getColumn(treemap.getValues(), 2 * rows, columns, i);

		TreeModel treeModel = encodeTreeModel(
				MiningFunction.CLASSIFICATION,
				scoreEncoder,
				FortranMatrixUtil.getColumn(daughters, rows, 2, 0),
				FortranMatrixUtil.getColumn(daughters, rows, 2, 1),
				FortranMatrixUtil.getColumn(nodepred.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(bestvar.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(xbestsplit.getValues(), rows, columns, i),
				segmentSchema
			);

		treeModels.add(treeModel);
	}

	MiningModel miningModel = new MiningModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.MAJORITY_VOTE, treeModels))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return miningModel;
}

Source File: RandomForestConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

private MiningModel encodeRegression(RGenericVector forest, Schema schema){
	RNumberVector<?> leftDaughter = forest.getNumericElement("leftDaughter");
	RNumberVector<?> rightDaughter = forest.getNumericElement("rightDaughter");
	RDoubleVector nodepred = forest.getDoubleElement("nodepred");
	RNumberVector<?> bestvar = forest.getNumericElement("bestvar");
	RDoubleVector xbestsplit = forest.getDoubleElement("xbestsplit");
	RIntegerVector nrnodes = forest.getIntegerElement("nrnodes");
	RNumberVector<?> ntree = forest.getNumericElement("ntree");

	ScoreEncoder<Double> scoreEncoder = new ScoreEncoder<Double>(){

		@Override
		public Double encode(Double value){
			return value;
		}
	};

	int rows = nrnodes.asScalar();
	int columns = ValueUtil.asInt(ntree.asScalar());

	Schema segmentSchema = schema.toAnonymousSchema();

	List<TreeModel> treeModels = new ArrayList<>();

	for(int i = 0; i < columns; i++){
		TreeModel treeModel = encodeTreeModel(
				MiningFunction.REGRESSION,
				scoreEncoder,
				FortranMatrixUtil.getColumn(leftDaughter.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(rightDaughter.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(nodepred.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(bestvar.getValues(), rows, columns, i),
				FortranMatrixUtil.getColumn(xbestsplit.getValues(), rows, columns, i),
				segmentSchema
			);

		treeModels.add(treeModel);
	}

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()))
		.setSegmentation(MiningModelUtil.createSegmentation(Segmentation.MultipleModelMethod.AVERAGE, treeModels));

	return miningModel;
}

Source File: RPartConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

private List<Predicate> encodePredicates(Feature feature, int splitOffset, RNumberVector<?> splits, RIntegerVector csplit){
	Predicate leftPredicate;
	Predicate rightPredicate;

	RIntegerVector splitsDim = splits.dim();

	int splitRows = splitsDim.getValue(0);
	int splitColumns = splitsDim.getValue(1);

	List<? extends Number> ncat = FortranMatrixUtil.getColumn(splits.getValues(), splitRows, splitColumns, 1);
	List<? extends Number> index = FortranMatrixUtil.getColumn(splits.getValues(), splitRows, splitColumns, 3);

	int splitType = ValueUtil.asInt(ncat.get(splitOffset));

	Number splitValue = index.get(splitOffset);

	if(Math.abs(splitType) == 1){
		SimplePredicate.Operator leftOperator;
		SimplePredicate.Operator rightOperator;

		if(splitType == -1){
			leftOperator = SimplePredicate.Operator.LESS_THAN;
			rightOperator = SimplePredicate.Operator.GREATER_OR_EQUAL;
		} else

		{
			leftOperator = SimplePredicate.Operator.GREATER_OR_EQUAL;
			rightOperator = SimplePredicate.Operator.LESS_THAN;
		}

		leftPredicate = createSimplePredicate(feature, leftOperator, splitValue);
		rightPredicate = createSimplePredicate(feature, rightOperator, splitValue);
	} else

	{
		CategoricalFeature categoricalFeature = (CategoricalFeature)feature;

		RIntegerVector csplitDim = csplit.dim();

		int csplitRows = csplitDim.getValue(0);
		int csplitColumns = csplitDim.getValue(1);

		List<Integer> csplitRow = FortranMatrixUtil.getRow(csplit.getValues(), csplitRows, csplitColumns, ValueUtil.asInt(splitValue) - 1);

		List<?> values = categoricalFeature.getValues();

		leftPredicate = createSimpleSetPredicate(categoricalFeature, selectValues(values, csplitRow, 1));
		rightPredicate = createSimpleSetPredicate(categoricalFeature, selectValues(values, csplitRow, 3));
	}

	return Arrays.asList(leftPredicate, rightPredicate);
}

Source File: SVMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public SupportVectorMachineModel encodeModel(Schema schema){
	RGenericVector svm = getObject();

	RDoubleVector type = svm.getDoubleElement("type");
	RDoubleVector kernel = svm.getDoubleElement("kernel");
	RDoubleVector degree = svm.getDoubleElement("degree");
	RDoubleVector gamma = svm.getDoubleElement("gamma");
	RDoubleVector coef0 = svm.getDoubleElement("coef0");
	RGenericVector yScale = svm.getGenericElement("y.scale");
	RIntegerVector nSv = svm.getIntegerElement("nSV");
	RDoubleVector sv = svm.getDoubleElement("SV");
	RDoubleVector rho = svm.getDoubleElement("rho");
	RDoubleVector coefs = svm.getDoubleElement("coefs");

	Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())];
	Kernel svmKernel = Kernel.values()[ValueUtil.asInt(kernel.asScalar())];

	org.dmg.pmml.support_vector_machine.Kernel pmmlKernel = svmKernel.createKernel(degree.asScalar(), gamma.asScalar(), coef0.asScalar());

	SupportVectorMachineModel supportVectorMachineModel;

	switch(svmType){
		case C_CLASSIFICATION:
		case NU_CLASSIFICATION:
			{
				supportVectorMachineModel = encodeClassification(pmmlKernel, sv, nSv, rho, coefs, schema);
			}
			break;
		case ONE_CLASSIFICATION:
			{
				Transformation outlier = new OutlierTransformation(){

					@Override
					public Expression createExpression(FieldRef fieldRef){
						return PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL, fieldRef, PMMLUtil.createConstant(0d));
					}
				};

				supportVectorMachineModel = encodeRegression(pmmlKernel, sv, rho, coefs, schema)
					.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE, outlier));

				if(yScale != null && yScale.size() > 0){
					throw new IllegalArgumentException();
				}
			}
			break;
		case EPS_REGRESSION:
		case NU_REGRESSION:
			{
				supportVectorMachineModel = encodeRegression(pmmlKernel, sv, rho, coefs, schema);

				if(yScale != null && yScale.size() > 0){
					RDoubleVector yScaledCenter = yScale.getDoubleElement("scaled:center");
					RDoubleVector yScaledScale = yScale.getDoubleElement("scaled:scale");

					supportVectorMachineModel.setTargets(ModelUtil.createRescaleTargets(-1d * yScaledScale.asScalar(), yScaledCenter.asScalar(), (ContinuousLabel)schema.getLabel()));
				}
			}
			break;
		default:
			throw new IllegalArgumentException();
	}

	return supportVectorMachineModel;
}

Source File: RobustScaler.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Boolean withCentering = getWithCentering();
	Boolean withScaling = getWithScaling();

	List<? extends Number> center = (withCentering ? getCenter() : null);
	List<? extends Number> scale = (withScaling ? getScale() : null);

	if(center == null && scale == null){
		return features;
	}

	ClassDictUtil.checkSize(features, center, scale);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);

		Number centerValue = (withCentering ? center.get(i) : 0d);
		Number scaleValue = (withScaling ? scale.get(i) : 1d);

		if(ValueUtil.isZero(centerValue) && ValueUtil.isOne(scaleValue)){
			result.add(feature);

			continue;
		}

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		// "($name - center) / scale"
		Expression expression = continuousFeature.ref();

		if(!ValueUtil.isZero(centerValue)){
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, expression, PMMLUtil.createConstant(centerValue));
		} // End if

		if(!ValueUtil.isOne(scaleValue)){
			expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, expression, PMMLUtil.createConstant(scaleValue));
		}

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("robust_scaler", continuousFeature), expression);

		result.add(new ContinuousFeature(encoder, derivedField));
	}

	return result;
}

Source File: DNNClassifier.java From jpmml-tensorflow with GNU Affero General Public License v3.0

4 votes

@Override
public NeuralNetwork encodeModel(TensorFlowEncoder encoder){
	DataField dataField = encoder.createDataField(FieldName.create("_target"), OpType.CATEGORICAL, DataType.INTEGER);

	NeuralNetwork neuralNetwork = encodeNeuralNetwork(encoder);

	List<NeuralLayer> neuralLayers = neuralNetwork.getNeuralLayers();

	NeuralLayer neuralLayer = Iterables.getLast(neuralLayers);

	List<Neuron> neurons = neuralLayer.getNeurons();

	List<String> categories;

	if(neurons.size() == 1){
		neuralLayer.setActivationFunction(NeuralNetwork.ActivationFunction.LOGISTIC);

		Neuron neuron = Iterables.getOnlyElement(neurons);

		neuralLayer = new NeuralLayer()
			.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY);

		categories = Arrays.asList("0", "1");

		// p(no event) = 1 - p(event)
		Neuron passiveNeuron = new Neuron()
			.setId(String.valueOf(neuralLayers.size() + 1) + "/" + categories.get(0))
			.setBias(ValueUtil.floatToDouble(1f))
			.addConnections(new Connection(neuron.getId(), -1f));

		// p(event)
		Neuron activeNeuron = new Neuron()
			.setId(String.valueOf(neuralLayers.size() + 1) + "/" + categories.get(1))
			.setBias(null)
			.addConnections(new Connection(neuron.getId(), 1f));

		neuralLayer.addNeurons(passiveNeuron, activeNeuron);

		neuralNetwork.addNeuralLayers(neuralLayer);

		neurons = neuralLayer.getNeurons();
	} else

	if(neurons.size() > 2){
		neuralLayer
			.setActivationFunction(NeuralNetwork.ActivationFunction.IDENTITY)
			.setNormalizationMethod(NeuralNetwork.NormalizationMethod.SOFTMAX);

		categories = new ArrayList<>();

		for(int i = 0; i < neurons.size(); i++){
			String category = String.valueOf(i);

			categories.add(category);
		}
	} else

	{
		throw new IllegalArgumentException();
	}

	dataField = encoder.toCategorical(dataField.getName(), categories);

	CategoricalLabel categoricalLabel = new CategoricalLabel(dataField);

	neuralNetwork
		.setMiningFunction(MiningFunction.CLASSIFICATION)
		.setMiningSchema(ModelUtil.createMiningSchema(categoricalLabel))
		.setNeuralOutputs(NeuralNetworkUtil.createClassificationNeuralOutputs(neurons, categoricalLabel))
		.setOutput(ModelUtil.createProbabilityOutput(DataType.FLOAT, categoricalLabel));

	return neuralNetwork;
}

Source File: StandardScalerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	StandardScalerModel transformer = getTransformer();

	Vector mean = transformer.mean();
	Vector std = transformer.std();

	boolean withMean = transformer.getWithMean();
	boolean withStd = transformer.getWithStd();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	if(withMean){
		SchemaUtil.checkSize(mean.size(), features);
	} // End if

	if(withStd){
		SchemaUtil.checkSize(std.size(), features);
	}

	List<Feature> result = new ArrayList<>();

	for(int i = 0, length = features.size(); i < length; i++){
		Feature feature = features.get(i);

		FieldName name = formatName(transformer, i, length);

		Expression expression = null;

		if(withMean){
			double meanValue = mean.apply(i);

			if(!ValueUtil.isZero(meanValue)){
				ContinuousFeature continuousFeature = feature.toContinuousFeature();

				expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, continuousFeature.ref(), PMMLUtil.createConstant(meanValue));
			}
		} // End if

		if(withStd){
			double stdValue = std.apply(i);

			if(!ValueUtil.isOne(stdValue)){
				Double factor = (1d / stdValue);

				if(expression != null){
					expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, PMMLUtil.createConstant(factor));
				} else

				{
					feature = new ProductFeature(encoder, feature, factor){

						@Override
						public ContinuousFeature toContinuousFeature(){
							Supplier<Apply> applySupplier = () -> {
								Feature feature = getFeature();
								Number factor = getFactor();

								return PMMLUtil.createApply(PMMLFunctions.MULTIPLY, (feature.toContinuousFeature()).ref(), PMMLUtil.createConstant(factor));
							};

							return toContinuousFeature(name, DataType.DOUBLE, applySupplier);
						}
					};
				}
			}
		} // End if

		if(expression != null){
			DerivedField derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, expression);

			result.add(new ContinuousFeature(encoder, derivedField));
		} else

		{
			result.add(feature);
		}
	}

	return result;
}

Source File: ObjFunction.java From jpmml-xgboost with GNU Affero General Public License v3.0

4 votes

static
protected MiningModel createMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){

	if(weights != null){

		if(trees.size() != weights.size()){
			throw new IllegalArgumentException();
		}
	} // End if

	if(ntreeLimit != null){

		if(ntreeLimit > trees.size()){
			throw new IllegalArgumentException("Tree limit " + ntreeLimit + " is greater than the number of trees");
		}

		trees = trees.subList(0, ntreeLimit);

		if(weights != null){
			weights = weights.subList(0, ntreeLimit);
		}
	} // End if

	if(weights != null){
		weights = new ArrayList<>(weights);
	}

	ContinuousLabel continuousLabel = (ContinuousLabel)schema.getLabel();

	Schema segmentSchema = schema.toAnonymousSchema();

	PredicateManager predicateManager = new PredicateManager();

	List<TreeModel> treeModels = new ArrayList<>();

	boolean equalWeights = true;

	Iterator<RegTree> treeIt = trees.iterator();
	Iterator<Float> weightIt = (weights != null ? weights.iterator() : null);

	while(treeIt.hasNext()){
		RegTree tree = treeIt.next();
		Float weight = (weightIt != null ? weightIt.next() : null);

		if(tree.isEmpty()){
			weightIt.remove();

			continue;
		} // End if

		if(weight != null){
			equalWeights &= ValueUtil.isOne(weight);
		}

		TreeModel treeModel = tree.encodeTreeModel(predicateManager, segmentSchema);

		treeModels.add(treeModel);
	}

	MiningModel miningModel = new MiningModel(MiningFunction.REGRESSION, ModelUtil.createMiningSchema(continuousLabel))
		.setMathContext(MathContext.FLOAT)
		.setSegmentation(MiningModelUtil.createSegmentation(equalWeights ? Segmentation.MultipleModelMethod.SUM : Segmentation.MultipleModelMethod.WEIGHTED_SUM, treeModels, weights))
		.setTargets(ModelUtil.createRescaleTargets(null, base_score, continuousLabel));

	return miningModel;
}

Source File: DummyClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public TreeModel encodeModel(Schema schema){
	List<?> classes = getClasses();
	List<? extends Number> classPrior = getClassPrior();
	Object constant = getConstant();
	String strategy = getStrategy();

	ClassDictUtil.checkSize(classes, classPrior);

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	int index;

	double[] probabilities;

	switch(strategy){
		case "constant":
			{
				index = classes.indexOf(constant);

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "most_frequent":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = new double[classes.size()];
				probabilities[index] = 1d;
			}
			break;
		case "prior":
			{
				index = classPrior.indexOf(Collections.max((List)classPrior));

				probabilities = Doubles.toArray(classPrior);
			}
			break;
		default:
			throw new IllegalArgumentException(strategy);
	}

	Node root = new ClassifierNode(ValueUtil.asString(classes.get(index)), True.INSTANCE);

	List<ScoreDistribution> scoreDistributions = root.getScoreDistributions();

	for(int i = 0; i < classes.size(); i++){
		ScoreDistribution scoreDistribution = new ScoreDistribution(ValueUtil.asString(classes.get(i)), probabilities[i]);

		scoreDistributions.add(scoreDistribution);
	}

	TreeModel treeModel = new TreeModel(MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), root)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	return treeModel;
}

Source File: SVMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

private void scaleFeatures(RExpEncoder encoder){
	RGenericVector svm = getObject();

	RDoubleVector sv = svm.getDoubleElement("SV");
	RBooleanVector scaled = svm.getBooleanElement("scaled");
	RGenericVector xScale = svm.getGenericElement("x.scale");

	RStringVector rowNames = sv.dimnames(0);
	RStringVector columnNames = sv.dimnames(1);

	List<Feature> features = encoder.getFeatures();

	if((scaled.size() != columnNames.size()) || (scaled.size() != features.size())){
		throw new IllegalArgumentException();
	}

	RDoubleVector xScaledCenter = xScale.getDoubleElement("scaled:center");
	RDoubleVector xScaledScale = xScale.getDoubleElement("scaled:scale");

	for(int i = 0; i < columnNames.size(); i++){
		String columnName = columnNames.getValue(i);

		if(!scaled.getValue(i)){
			continue;
		}

		Feature feature = features.get(i);

		Double center = xScaledCenter.getElement(columnName);
		Double scale = xScaledScale.getElement(columnName);

		if(ValueUtil.isZero(center) && ValueUtil.isOne(scale)){
			continue;
		}

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		Expression expression = continuousFeature.ref();

		if(!ValueUtil.isZero(center)){
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, expression, PMMLUtil.createConstant(center));
		} // End if

		if(!ValueUtil.isOne(scale)){
			expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, expression, PMMLUtil.createConstant(scale));
		}

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("scale", feature), OpType.CONTINUOUS, DataType.DOUBLE, expression);

		features.set(i, new ContinuousFeature(encoder, derivedField));
	}
}

Source File: MinMaxScaler.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<? extends Number> min = getMin();
	List<? extends Number> scale = getScale();

	ClassDictUtil.checkSize(features, min, scale);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);

		Number minValue = min.get(i);
		Number scaleValue = scale.get(i);

		if(ValueUtil.isOne(scaleValue) && ValueUtil.isZero(minValue)){
			result.add(feature);

			continue;
		}

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		// "($name * scale) + min"
		Expression expression = continuousFeature.ref();

		if(!ValueUtil.isOne(scaleValue)){
			expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, PMMLUtil.createConstant(scaleValue));
		} // End if

		if(!ValueUtil.isZero(minValue)){
			expression = PMMLUtil.createApply(PMMLFunctions.ADD, expression, PMMLUtil.createConstant(minValue));
		}

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("mix_max_scaler", continuousFeature), expression);

		result.add(new ContinuousFeature(encoder, derivedField));
	}

	return result;
}

Source File: StandardScaler.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	Boolean withMean = getWithMean();
	Boolean withStd = getWithStd();

	List<? extends Number> mean = (withMean ? getMean() : null);
	List<? extends Number> std = (withStd ? getStd() : null);

	if(mean == null && std == null){
		return features;
	}

	ClassDictUtil.checkSize(features, mean, std);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);

		Number meanValue = (withMean ? mean.get(i) : 0d);
		Number stdValue = (withStd ? std.get(i) : 1d);

		if(ValueUtil.isZero(meanValue) && ValueUtil.isOne(stdValue)){
			result.add(feature);

			continue;
		}

		ContinuousFeature continuousFeature = feature.toContinuousFeature();

		// "($name - mean) / std"
		Expression expression = continuousFeature.ref();

		if(!ValueUtil.isZero(meanValue)){
			expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, expression, PMMLUtil.createConstant(meanValue));
		} // End if

		if(!ValueUtil.isOne(stdValue)){
			expression = PMMLUtil.createApply(PMMLFunctions.DIVIDE, expression, PMMLUtil.createConstant(stdValue));
		}

		DerivedField derivedField = encoder.createDerivedField(FeatureUtil.createName("standard_scaler", continuousFeature), expression);

		result.add(new ContinuousFeature(encoder, derivedField));
	}

	return result;
}

Source File: LabelBinarizer.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){
	List<?> classes = getClasses();

	Number negLabel = getNegLabel();
	Number posLabel = getPosLabel();

	ClassDictUtil.checkSize(1, features);

	Feature feature = features.get(0);

	List<Object> categories = new ArrayList<>();
	categories.addAll(classes);

	List<Number> labelCategories = new ArrayList<>();
	labelCategories.add(negLabel);
	labelCategories.add(posLabel);

	List<Feature> result = new ArrayList<>();

	classes = prepareClasses(classes);

	for(int i = 0; i < classes.size(); i++){
		Object value = classes.get(i);

		if(ValueUtil.isZero(negLabel) && ValueUtil.isOne(posLabel)){
			result.add(new BinaryFeature(encoder, feature, value));
		} else

		{
			// "($name == value) ? pos_label : neg_label"
			Apply apply = PMMLUtil.createApply(PMMLFunctions.IF)
				.addExpressions(PMMLUtil.createApply(PMMLFunctions.EQUAL, feature.ref(), PMMLUtil.createConstant(value, feature.getDataType())))
				.addExpressions(PMMLUtil.createConstant(posLabel), PMMLUtil.createConstant(negLabel));

			FieldName name = (classes.size() > 1 ? FeatureUtil.createName("label_binarizer", feature, i) : FeatureUtil.createName("label_binarizer", feature));

			DerivedField derivedField = encoder.createDerivedField(name, apply);

			result.add(new CategoricalFeature(encoder, derivedField, labelCategories));
		}
	}

	encoder.toCategorical(feature.getName(), categories);

	return result;
}

org.jpmml.converter.ValueUtil Java Examples