org.dmg.pmml.DataType Java Examples
The following examples show how to use
org.dmg.pmml.DataType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Composite.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 6 votes |
@Override public DataType getDataType(){ if(hasTransformers()){ List<? extends Transformer> transformers = getTransformers(); for(Transformer transformer : transformers){ return transformer.getDataType(); } } // End if if(hasFinalEstimator()){ Estimator estimator = getFinalEstimator(); return estimator.getDataType(); } throw new UnsupportedOperationException(); }
Example #2
Source File: Transformer.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 6 votes |
public DataField updateDataField(DataField dataField, OpType opType, DataType dataType, SkLearnEncoder encoder){ FieldName name = dataField.getName(); if(encoder.isFrozen(name)){ return dataField; } switch(dataType){ case DOUBLE: // If the DataField element already specifies a non-default data type, then keep it if(!(DataType.DOUBLE).equals(dataField.getDataType())){ dataType = dataField.getDataType(); } break; } dataField .setOpType(opType) .setDataType(dataType); return dataField; }
Example #3
Source File: TreeUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 6 votes |
static private Schema toTreeModelSchema(DataType dataType, Schema schema){ Function<Feature, Feature> function = new Function<Feature, Feature>(){ @Override public Feature apply(Feature feature){ if(feature instanceof BinaryFeature){ BinaryFeature binaryFeature = (BinaryFeature)feature; return binaryFeature; } else { ContinuousFeature continuousFeature = feature.toContinuousFeature(dataType); return continuousFeature; } } }; return schema.toTransformedSchema(function); }
Example #4
Source File: TypeUtil.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
/** * @see DataType#DATE */ static private LocalDate toDate(Object value){ if(value instanceof LocalDate){ return (LocalDate)value; } else if(value instanceof LocalDateTime){ LocalDateTime instant = (LocalDateTime)value; return instant.toLocalDate(); } throw new TypeCheckException(DataType.DATE, value); }
Example #5
Source File: RExpUtil.java From jpmml-r with GNU Affero General Public License v3.0 | 6 votes |
static public DataType getDataType(String type){ switch(type){ case "character": case "factor": return DataType.STRING; case "numeric": return DataType.DOUBLE; case "logical": return DataType.BOOLEAN; default: break; } throw new IllegalArgumentException(type); }
Example #6
Source File: TransformerUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 6 votes |
static public OpType getOpType(DataType dataType){ switch(dataType){ case STRING: return OpType.CATEGORICAL; case INTEGER: case FLOAT: case DOUBLE: return OpType.CONTINUOUS; case BOOLEAN: return OpType.CATEGORICAL; case DATE: case DATE_TIME: return OpType.ORDINAL; default: throw new IllegalArgumentException(); } }
Example #7
Source File: RegexTokenizerConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 6 votes |
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ RegexTokenizer transformer = getTransformer(); if(!transformer.getGaps()){ throw new IllegalArgumentException("Expected splitter mode, got token matching mode"); } // End if if(transformer.getMinTokenLength() != 1){ throw new IllegalArgumentException("Expected 1 as minimum token length, got " + transformer.getMinTokenLength() + " as minimum token length"); } Feature feature = encoder.getOnlyFeature(transformer.getInputCol()); Field<?> field = feature.getField(); if(transformer.getToLowercase()){ Apply apply = PMMLUtil.createApply(PMMLFunctions.LOWERCASE, feature.ref()); field = encoder.createDerivedField(FeatureUtil.createName("lowercase", feature), OpType.CATEGORICAL, DataType.STRING, apply); } return Collections.singletonList(new DocumentFeature(encoder, field, transformer.getPattern())); }
Example #8
Source File: TypeUtilTest.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
@Test public void getConstantDataType(){ assertEquals(DataType.STRING, TypeUtil.getConstantDataType("")); assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("-1")); assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("1")); assertEquals(DataType.INTEGER, TypeUtil.getConstantDataType("+1")); assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1E0")); assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1X")); assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("-1.0")); assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0")); assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("+1.0")); assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E-1")); assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E1")); assertEquals(DataType.DOUBLE, TypeUtil.getConstantDataType("1.0E+1")); assertEquals(DataType.STRING, TypeUtil.getConstantDataType("1.0X")); }
Example #9
Source File: Classification.java From jpmml-xgboost with GNU Affero General Public License v3.0 | 6 votes |
@Override public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){ DataField dataField; if(targetCategories == null){ targetCategories = LabelUtil.createTargetCategories(this.num_class); dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories); } else { if(targetCategories.size() != this.num_class){ throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories"); } dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories); } return new CategoricalLabel(dataField); }
Example #10
Source File: FunctionTransformer.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 6 votes |
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ UFunc func = getFunc(); if(func == null){ return features; } List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ ContinuousFeature continuousFeature = (features.get(i)).toContinuousFeature(); DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName(func.getName(), continuousFeature), OpType.CONTINUOUS, DataType.DOUBLE, () -> UFuncUtil.encodeUFunc(func, Collections.singletonList(continuousFeature.ref()))); result.add(new ContinuousFeature(encoder, derivedField)); } return result; }
Example #11
Source File: OrdinalEncoder.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 5 votes |
@Override public DataType getDataType(){ List<List<?>> categories = getCategories(); DataType result = null; for(int i = 0; i < categories.size(); i++){ List<?> featureCategories = categories.get(i); DataType dataType = TypeUtil.getDataType(featureCategories, null); if(result == null){ result = dataType; } else { if(!(result).equals(dataType)){ throw new UnsupportedOperationException(); } } } if(result == null){ result = DataType.STRING; } return result; }
Example #12
Source File: RExpEncoder.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
@Override public DataField createDataField(FieldName name, OpType opType, DataType dataType, List<?> values){ if(dataType == null){ dataType = TypeUtil.getDataType(values); } return super.createDataField(name, opType, dataType, values); }
Example #13
Source File: RichDataFieldTest.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Test public void getValueMapping(){ Value invalidValue = createValue("0", Value.Property.INVALID); Value validValueOne = createValue("1", Value.Property.VALID); Value validValueTwo = createValue("2", null); Value validValueThree = createValue("3", null); Value missingValue = createValue("N/A", Value.Property.MISSING); DataField dataField = new DataField(FieldName.create("x"), OpType.CATEGORICAL, DataType.STRING) .addValues(invalidValue, validValueOne, validValueTwo, validValueThree, missingValue); RichDataField richDataField = new RichDataField(dataField); Map<?, Integer> valueMap = richDataField.getMap(); assertEquals(5, valueMap.size()); assertEquals(FieldValue.STATUS_UNKNOWN_INVALID, valueMap.get("0")); assertEquals((Integer)1, valueMap.get("1")); assertEquals((Integer)2, valueMap.get("2")); assertEquals((Integer)3, valueMap.get("3")); assertEquals(FieldValue.STATUS_MISSING, valueMap.get("N/A")); dataField.setDataType(DataType.INTEGER); richDataField = new RichDataField(dataField); valueMap = richDataField.getMap(); assertEquals(4, valueMap.size()); assertEquals(FieldValue.STATUS_UNKNOWN_INVALID, valueMap.get(0)); assertEquals((Integer)1, valueMap.get(1)); assertEquals((Integer)2, valueMap.get(2)); assertEquals((Integer)3, valueMap.get(3)); }
Example #14
Source File: ContinuousValue.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
ContinuousFloat(Object value){ super(DataType.FLOAT, value); Float floatValue = (Float)getValue(); if(floatValue.isNaN()){ setValid(false); } }
Example #15
Source File: ExpressionTranslatorTest.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Test public void translateLogicalExpression(){ String string = "isnull(x1) and not(isnotnull(x2))"; FieldRef first = new FieldRef(FieldName.create("x1")); FieldRef second = new FieldRef(FieldName.create("x2")); Apply expected = PMMLUtil.createApply(PMMLFunctions.AND) .addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING) .addExpressions(first) ) // "not(isnotnull(..)) -> "isnull(..)" .addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING) .addExpressions(second) ); checkExpression(expected, string); string = "(x1 <= 0) or (x2 >= 0)"; expected = PMMLUtil.createApply(PMMLFunctions.OR) .addExpressions(PMMLUtil.createApply(PMMLFunctions.LESSOREQUAL) .addExpressions(first, PMMLUtil.createConstant(0, DataType.DOUBLE)) ) .addExpressions(PMMLUtil.createApply(PMMLFunctions.GREATEROREQUAL) .addExpressions(second, PMMLUtil.createConstant(0, DataType.DOUBLE)) ); checkExpression(expected, string); }
Example #16
Source File: TermFeature.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
public Apply createApply(){ DefineFunction defineFunction = getDefineFunction(); Feature feature = getFeature(); String value = getValue(); Constant constant = PMMLUtil.createConstant(value, DataType.STRING); return PMMLUtil.createApply(defineFunction.getName(), feature.ref(), constant); }
Example #17
Source File: KMeans.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 5 votes |
@Override public ClusteringModel encodeModel(Schema schema){ int[] shape = getClusterCentersShape(); int numberOfClusters = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> clusterCenters = getClusterCenters(); List<Integer> labels = getLabels(); Multiset<Integer> labelCounts = HashMultiset.create(); if(labels != null){ labelCounts.addAll(labels); } List<Cluster> clusters = new ArrayList<>(); for(int i = 0; i < numberOfClusters; i++){ Cluster cluster = new Cluster(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i))) .setId(String.valueOf(i)) .setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null)); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()) .setCompareFunction(CompareFunction.ABS_DIFF); ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters) .setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters)); return clusteringModel; }
Example #18
Source File: AppPMMLUtilsTest.java From oryx with Apache License 2.0 | 5 votes |
private static void checkDataField(DataField field, String name, Boolean categorical) { assertEquals(name, field.getName().getValue()); if (categorical == null) { assertNull(field.getOpType()); assertNull(field.getDataType()); } else if (categorical) { assertEquals(OpType.CATEGORICAL, field.getOpType()); assertEquals(DataType.STRING, field.getDataType()); } else { assertEquals(OpType.CONTINUOUS, field.getOpType()); assertEquals(DataType.DOUBLE, field.getDataType()); } }
Example #19
Source File: GBMConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
private MiningModel encodeBinaryClassification(List<TreeModel> treeModels, Double initF, double coefficient, Schema schema){ Schema segmentSchema = schema.toAnonymousRegressorSchema(DataType.DOUBLE); MiningModel miningModel = createMiningModel(treeModels, initF, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("gbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createBinaryLogisticClassification(miningModel, -coefficient, 0d, RegressionModel.NormalizationMethod.LOGIT, true, schema); }
Example #20
Source File: IndexToStringConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ IndexToString transformer = getTransformer(); DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels())); return Collections.singletonList(new CategoricalFeature(encoder, dataField)); }
Example #21
Source File: NaNAsMissingDecorator.java From jpmml-lightgbm with GNU Affero General Public License v3.0 | 5 votes |
@Override public VisitorAction visit(DataField dataField){ DataType dataType = dataField.getDataType(); switch(dataType){ case FLOAT: case DOUBLE: PMMLUtil.addValues(dataField, Collections.singletonList("NaN"), Property.MISSING); break; default: break; } return super.visit(dataField); }
Example #22
Source File: LogisticRegression.java From jpmml-xgboost with GNU Affero General Public License v3.0 | 5 votes |
@Override public MiningModel encodeMiningModel(List<RegTree> trees, List<Float> weights, float base_score, Integer ntreeLimit, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = createMiningModel(trees, weights, base_score, ntreeLimit, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("xgbValue"), OpType.CONTINUOUS, DataType.FLOAT)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.LOGIT, schema); }
Example #23
Source File: Regression.java From jpmml-lightgbm with GNU Affero General Public License v3.0 | 5 votes |
@Override public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){ if(targetCategories != null && targetCategories.size() > 0){ throw new IllegalArgumentException("Regression requires zero target categories"); } DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE); return new ContinuousLabel(dataField); }
Example #24
Source File: TypeUtil.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
static public DataType getConstantDataType(String value){ if(("").equals(value)){ return DataType.STRING; } else if(("NaN").equalsIgnoreCase(value) || ("INF").equalsIgnoreCase(value) || ("-INF").equalsIgnoreCase(value)){ return DataType.DOUBLE; } try { if(value.indexOf('.') > -1){ Double.parseDouble(value); return DataType.DOUBLE; } else { Long.parseLong(value); return DataType.INTEGER; } } catch(NumberFormatException nfe){ return DataType.STRING; } }
Example #25
Source File: MemoryMeasurerTest.java From jpmml-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void measure(){ Interval interval = new Interval(Interval.Closure.CLOSED_CLOSED) .setLeftMargin(0d) .setRightMargin(1d); DataField left = new DataField(FieldName.create("x"), null, null) .addIntervals(interval); DataField right = new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE) .addIntervals(interval); assertEquals(getSize(left), getSize(right)); }
Example #26
Source File: CategoricalValue.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Override public int compareToValue(FieldValue value){ if(value instanceof ScalarValue){ ScalarValue that = (ScalarValue)value; if((DataType.BOOLEAN).equals(that.getDataType())){ return Boolean.compare(this.asBoolean(), that.asBoolean()); } } return compareToValue(value.getValue()); }
Example #27
Source File: ExpressionTranslatorTest.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
@Test public void translateIfExpression(){ String string = "if(is.na(x)) TRUE else FALSE"; Expression expected = PMMLUtil.createApply(PMMLFunctions.IF) .addExpressions(PMMLUtil.createApply(PMMLFunctions.ISMISSING) .addExpressions(new FieldRef(FieldName.create("x"))) ) .addExpressions(PMMLUtil.createConstant("true", DataType.BOOLEAN), PMMLUtil.createConstant("false", DataType.BOOLEAN)); Expression actual = ExpressionTranslator.translateExpression(string); assertTrue(ReflectionUtil.equals(expected, actual)); }
Example #28
Source File: SVMConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
private void encodeFormula(RExpEncoder encoder){ RGenericVector svm = getObject(); RDoubleVector type = svm.getDoubleElement("type"); RDoubleVector sv = svm.getDoubleElement("SV"); RVector<?> levels = svm.getVectorElement("levels"); RExp terms = svm.getElement("terms"); RGenericVector xlevels = DecorationUtil.getGenericElement(svm, "xlevels"); Type svmType = Type.values()[ValueUtil.asInt(type.asScalar())]; RStringVector rowNames = sv.dimnames(0); RStringVector columnNames = sv.dimnames(1); FormulaContext context = new XLevelsFormulaContext(xlevels); Formula formula = FormulaUtil.createFormula(terms, context, encoder); switch(svmType){ case C_CLASSIFICATION: case NU_CLASSIFICATION: FormulaUtil.setLabel(formula, terms, levels, encoder); break; case ONE_CLASSIFICATION: encoder.setLabel(new ContinuousLabel(null, DataType.DOUBLE)); break; case EPS_REGRESSION: case NU_REGRESSION: FormulaUtil.setLabel(formula, terms, null, encoder); break; } FormulaUtil.addFeatures(formula, columnNames, true, encoder); scaleFeatures(encoder); }
Example #29
Source File: PoissonRegression.java From jpmml-lightgbm with GNU Affero General Public License v3.0 | 5 votes |
@Override public MiningModel encodeMiningModel(List<Tree> trees, Integer numIteration, Schema schema){ Schema segmentSchema = schema.toAnonymousSchema(); MiningModel miningModel = super.encodeMiningModel(trees, numIteration, segmentSchema) .setOutput(ModelUtil.createPredictedOutput(FieldName.create("lgbmValue"), OpType.CONTINUOUS, DataType.DOUBLE)); return MiningModelUtil.createRegression(miningModel, RegressionModel.NormalizationMethod.EXP, schema); }
Example #30
Source File: Functions.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Override public FieldValue evaluate(FieldValue first, FieldValue second){ DataType dataType = TypeUtil.getCommonDataType(first.getDataType(), second.getDataType()); Integer result = evaluate(first.asNumber(), second.asNumber()); return FieldValueUtil.create(dataType, OpType.CONTINUOUS, result); }