org.jpmml.converter.SchemaUtil Java Exaples

Source File: RegressionTableUtil.java From jpmml-sparkml with GNU Affero General Public License v3.0

6 votes

static
public <C extends ModelConverter<?> & HasRegressionTableOptions> void simplify(C converter, Object identifier, List<Feature> features, List<Double> coefficients){
	SchemaUtil.checkSize(coefficients.size(), features);

	Integer lookupThreshold = (Integer)converter.getOption(HasRegressionTableOptions.OPTION_LOOKUP_THRESHOLD, null);
	if(lookupThreshold == null){
		return;
	}

	Map<FieldName, Long> countMap = features.stream()
		.filter(feature -> (feature instanceof BinaryFeature))
		.collect(Collectors.groupingBy(feature -> ((BinaryFeature)feature).getName(), Collectors.counting()));

	Collection<? extends Map.Entry<FieldName, Long>> entries = countMap.entrySet();
	for(Map.Entry<FieldName, Long> entry : entries){

		if(entry.getValue() < lookupThreshold){
			continue;
		}

		createMapValues(entry.getKey(), identifier, features, coefficients);
	}
}

Source File: GBDTLRClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

6 votes

@Override
public Model encodeModel(Schema schema){
	Classifier gbdt = getGBDT();
	MultiOneHotEncoder ohe = getOHE();
	LinearClassifier lr = getLR();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	List<? extends Number> coef = lr.getCoef();
	List<? extends Number> intercept = lr.getIntercept();

	Schema segmentSchema = schema.toAnonymousSchema();

	MiningModel miningModel = GBDTUtil.encodeModel(gbdt, ohe, coef, Iterables.getOnlyElement(intercept), segmentSchema)
		.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction"), OpType.CONTINUOUS, DataType.DOUBLE));

	return MiningModelUtil.createBinaryLogisticClassification(miningModel, 1d, 0d, RegressionModel.NormalizationMethod.LOGIT, lr.hasProbabilityDistribution(), schema);
}

Source File: GeneralizedLinearRegressionModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

5 votes

@Override
public GeneralRegressionModel encodeModel(Schema schema){
	GeneralizedLinearRegressionModel model = getTransformer();

	Object targetCategory = null;

	MiningFunction miningFunction = getMiningFunction();
	switch(miningFunction){
		case CLASSIFICATION:
			CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

			SchemaUtil.checkSize(2, categoricalLabel);

			targetCategory = categoricalLabel.getValue(1);
			break;
		default:
			break;
	}

	List<Feature> features = new ArrayList<>(schema.getFeatures());
	List<Double> featureCoefficients = new ArrayList<>(VectorUtil.toList(model.coefficients()));

	RegressionTableUtil.simplify(this, targetCategory, features, featureCoefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setDistribution(parseFamily(model.getFamily()))
		.setLinkFunction(parseLinkFunction(model.getLink()))
		.setLinkParameter(parseLinkParameter(model.getLink()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, model.intercept(), targetCategory);

	return generalRegressionModel;
}

Source File: StandardScalerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	StandardScalerModel transformer = getTransformer();

	Vector mean = transformer.mean();
	Vector std = transformer.std();

	boolean withMean = transformer.getWithMean();
	boolean withStd = transformer.getWithStd();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	if(withMean){
		SchemaUtil.checkSize(mean.size(), features);
	} // End if

	if(withStd){
		SchemaUtil.checkSize(std.size(), features);
	}

	List<Feature> result = new ArrayList<>();

	for(int i = 0, length = features.size(); i < length; i++){
		Feature feature = features.get(i);

		FieldName name = formatName(transformer, i, length);

		Expression expression = null;

		if(withMean){
			double meanValue = mean.apply(i);

			if(!ValueUtil.isZero(meanValue)){
				ContinuousFeature continuousFeature = feature.toContinuousFeature();

				expression = PMMLUtil.createApply(PMMLFunctions.SUBTRACT, continuousFeature.ref(), PMMLUtil.createConstant(meanValue));
			}
		} // End if

		if(withStd){
			double stdValue = std.apply(i);

			if(!ValueUtil.isOne(stdValue)){
				Double factor = (1d / stdValue);

				if(expression != null){
					expression = PMMLUtil.createApply(PMMLFunctions.MULTIPLY, expression, PMMLUtil.createConstant(factor));
				} else

				{
					feature = new ProductFeature(encoder, feature, factor){

						@Override
						public ContinuousFeature toContinuousFeature(){
							Supplier<Apply> applySupplier = () -> {
								Feature feature = getFeature();
								Number factor = getFactor();

								return PMMLUtil.createApply(PMMLFunctions.MULTIPLY, (feature.toContinuousFeature()).ref(), PMMLUtil.createConstant(factor));
							};

							return toContinuousFeature(name, DataType.DOUBLE, applySupplier);
						}
					};
				}
			}
		} // End if

		if(expression != null){
			DerivedField derivedField = encoder.createDerivedField(name, OpType.CONTINUOUS, DataType.DOUBLE, expression);

			result.add(new ContinuousFeature(encoder, derivedField));
		} else

		{
			result.add(feature);
		}
	}

	return result;
}

Source File: VectorIndexerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

4 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	VectorIndexerModel transformer = getTransformer();

	int numFeatures = transformer.numFeatures();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	SchemaUtil.checkSize(numFeatures, features);

	Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps();

	List<Feature> result = new ArrayList<>();

	for(int i = 0, length = numFeatures; i < length; i++){
		Feature feature = features.get(i);

		Map<Double, Integer> categoryMap = categoryMaps.get(i);
		if(categoryMap != null){
			List<Double> categories = new ArrayList<>();
			List<Integer> values = new ArrayList<>();

			List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet());
			Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR);

			for(Map.Entry<Double, Integer> entry : entries){
				Double category = entry.getKey();
				Integer value = entry.getValue();

				categories.add(category);
				values.add(value);
			}

			encoder.toCategorical(feature.getName(), categories);

			MapValues mapValues = PMMLUtil.createMapValues(feature.getName(), categories, values)
				.setDataType(DataType.INTEGER);

			DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i, length), OpType.CATEGORICAL, DataType.INTEGER, mapValues);

			result.add(new CategoricalFeature(encoder, derivedField, values));
		} else

		{
			result.add((ContinuousFeature)feature);
		}
	}

	return result;
}

Source File: LinearDiscriminantAnalysis.java From jpmml-sklearn with GNU Affero General Public License v3.0

4 votes

private Model encodeMultinomialModel(Schema schema){
	String sklearnVersion = getSkLearnVersion();
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	// See https://github.com/scikit-learn/scikit-learn/issues/6848
	boolean corrected = (sklearnVersion != null && SkLearnUtil.compareVersion(sklearnVersion, "0.21") >= 0);

	if(!corrected){
		return super.encodeModel(schema);
	} // End if

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.NONE, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SOFTMAX, true, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}

Source File: GLMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector glm = getObject();

	RDoubleVector coefficients = glm.getDoubleElement("coefficients");
	RGenericVector family = glm.getGenericElement("family");

	Double intercept = coefficients.getElement(getInterceptName(), false);

	RStringVector familyFamily = family.getStringElement("family");
	RStringVector familyLink = family.getStringElement("link");

	Label label = schema.getLabel();
	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar());

	Object targetCategory = null;

	switch(miningFunction){
		case CLASSIFICATION:
			{
				CategoricalLabel categoricalLabel = (CategoricalLabel)label;

				SchemaUtil.checkSize(2, categoricalLabel);

				targetCategory = categoricalLabel.getValue(1);
			}
			break;
		default:
			break;
	}

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, miningFunction, ModelUtil.createMiningSchema(label), null, null, null)
		.setDistribution(parseFamily(familyFamily.asScalar()))
		.setLinkFunction(parseLinkFunction(familyLink.asScalar()))
		.setLinkParameter(parseLinkParameter(familyLink.asScalar()));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	switch(miningFunction){
		case CLASSIFICATION:
			generalRegressionModel.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, (CategoricalLabel)label));
			break;
		default:
			break;
	}

	return generalRegressionModel;
}

Source File: LMConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector lm = getObject();

	RDoubleVector coefficients = lm.getDoubleElement("coefficients");

	Double intercept = coefficients.getElement(getInterceptName(), false);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	return RegressionModelUtil.createRegression(features, featureCoefficients, intercept, null, schema);
}

Source File: EarthConverter.java From jpmml-r with GNU Affero General Public License v3.0

4 votes

@Override
public GeneralRegressionModel encodeModel(Schema schema){
	RGenericVector earth = getObject();

	RDoubleVector coefficients = earth.getDoubleElement("coefficients");

	Double intercept = coefficients.getValue(0);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - 1, features);

	List<Double> featureCoefficients = (coefficients.getValues()).subList(1, features.size() + 1);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.REGRESSION, ModelUtil.createMiningSchema(schema.getLabel()), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.IDENTITY);

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, null);

	return generalRegressionModel;
}

Source File: VectorSizeHintConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

3 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	VectorSizeHint transformer = getTransformer();

	int size = transformer.getSize();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	SchemaUtil.checkSize(size, features);

	return features;
}

Source File: IDFModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0

3 votes

@Override
public List<Feature> encodeFeatures(SparkMLEncoder encoder){
	IDFModel transformer = getTransformer();

	Vector idf = transformer.idf();

	List<Feature> features = encoder.getFeatures(transformer.getInputCol());

	SchemaUtil.checkSize(idf.size(), features);

	List<Feature> result = new ArrayList<>();

	for(int i = 0; i < features.size(); i++){
		Feature feature = features.get(i);
		Double weight = idf.apply(i);

		ProductFeature productFeature = new ProductFeature(encoder, feature, weight){

			private WeightedTermFeature weightedTermFeature = null;


			@Override
			public ContinuousFeature toContinuousFeature(){

				if(this.weightedTermFeature == null){
					TermFeature termFeature = (TermFeature)getFeature();
					Number factor = getFactor();

					this.weightedTermFeature = termFeature.toWeightedTermFeature(factor);
				}

				return this.weightedTermFeature.toContinuousFeature();
			}
		};

		result.add(productFeature);
	}

	return result;
}

Source File: LRMConverter.java From jpmml-r with GNU Affero General Public License v3.0

3 votes

@Override
public Model encodeModel(Schema schema){
	RGenericVector lrm = getObject();

	RDoubleVector coefficients = lrm.getDoubleElement("coefficients");

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	SchemaUtil.checkSize(2, categoricalLabel);

	Object targetCategory = categoricalLabel.getValue(1);

	Double intercept = coefficients.getElement(getInterceptName(), false);

	List<? extends Feature> features = schema.getFeatures();

	SchemaUtil.checkSize(coefficients.size() - (intercept != null ? 1 : 0), features);

	List<Double> featureCoefficients = getFeatureCoefficients(features, coefficients);

	GeneralRegressionModel generalRegressionModel = new GeneralRegressionModel(GeneralRegressionModel.ModelType.GENERALIZED_LINEAR, MiningFunction.CLASSIFICATION, ModelUtil.createMiningSchema(categoricalLabel), null, null, null)
		.setLinkFunction(GeneralRegressionModel.LinkFunction.LOGIT)
		.setOutput(ModelUtil.createProbabilityOutput(DataType.DOUBLE, categoricalLabel));

	GeneralRegressionModelUtil.encodeRegressionTable(generalRegressionModel, features, featureCoefficients, intercept, targetCategory);

	return generalRegressionModel;
}

Source File: LinearClassifier.java From jpmml-sklearn with GNU Affero General Public License v3.0

2 votes

@Override
public Model encodeModel(Schema schema){
	int[] shape = getCoefShape();

	int numberOfClasses = shape[0];
	int numberOfFeatures = shape[1];

	boolean hasProbabilityDistribution = hasProbabilityDistribution();

	List<? extends Number> coef = getCoef();
	List<? extends Number> intercept = getIntercept();

	CategoricalLabel categoricalLabel = (CategoricalLabel)schema.getLabel();

	List<? extends Feature> features = schema.getFeatures();

	if(numberOfClasses == 1){
		SchemaUtil.checkSize(2, categoricalLabel);

		return RegressionModelUtil.createBinaryLogisticClassification(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, 0), intercept.get(0), RegressionModel.NormalizationMethod.LOGIT, hasProbabilityDistribution, schema);
	} else

	if(numberOfClasses >= 3){
		SchemaUtil.checkSize(numberOfClasses, categoricalLabel);

		Schema segmentSchema = (schema.toAnonymousRegressorSchema(DataType.DOUBLE)).toEmptySchema();

		List<RegressionModel> regressionModels = new ArrayList<>();

		for(int i = 0, rows = categoricalLabel.size(); i < rows; i++){
			RegressionModel regressionModel = RegressionModelUtil.createRegression(features, CMatrixUtil.getRow(coef, numberOfClasses, numberOfFeatures, i), intercept.get(i), RegressionModel.NormalizationMethod.LOGIT, segmentSchema)
				.setOutput(ModelUtil.createPredictedOutput(FieldName.create("decisionFunction(" + categoricalLabel.getValue(i) + ")"), OpType.CONTINUOUS, DataType.DOUBLE));

			regressionModels.add(regressionModel);
		}

		return MiningModelUtil.createClassification(regressionModels, RegressionModel.NormalizationMethod.SIMPLEMAX, hasProbabilityDistribution, schema);
	} else

	{
		throw new IllegalArgumentException();
	}
}

org.jpmml.converter.SchemaUtil Java Examples