org.dmg.pmml.DataField Java Examples
The following examples show how to use
org.dmg.pmml.DataField.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
protected boolean assessParentCompatibility(){ List<InputField> inputFields = getInputFields(); for(InputField inputField : inputFields){ Field<?> field = inputField.getField(); MiningField miningField = inputField.getMiningField(); if(!(field instanceof DataField)){ continue; } // End if if(!InputFieldUtil.isDefault(field, miningField)){ return false; } } return true; }
Example #2
Source File: Classification.java From jpmml-lightgbm with GNU Affero General Public License v3.0 | 6 votes |
@Override public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){ DataField dataField; if(targetCategories == null){ targetCategories = LabelUtil.createTargetCategories(this.num_class_); dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories); } else { if(targetCategories.size() != this.num_class_){ throw new IllegalArgumentException("Expected " + this.num_class_ + " target categories, got " + targetCategories.size() + " target categories"); } dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories); } return new CategoricalLabel(dataField); }
Example #3
Source File: Classification.java From jpmml-xgboost with GNU Affero General Public License v3.0 | 6 votes |
@Override public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){ DataField dataField; if(targetCategories == null){ targetCategories = LabelUtil.createTargetCategories(this.num_class); dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.INTEGER, targetCategories); } else { if(targetCategories.size() != this.num_class){ throw new IllegalArgumentException("Expected " + this.num_class + " target categories, got " + targetCategories.size() + " target categories"); } dataField = encoder.createDataField(targetField, OpType.CATEGORICAL, DataType.STRING, targetCategories); } return new CategoricalLabel(dataField); }
Example #4
Source File: AppPMMLUtilsTest.java From oryx with Apache License 2.0 | 6 votes |
@Test public void testBuildCategoricalEncoding() { List<DataField> dataFields = new ArrayList<>(); dataFields.add(new DataField(FieldName.create("foo"), OpType.CONTINUOUS, DataType.DOUBLE)); DataField barField = new DataField(FieldName.create("bar"), OpType.CATEGORICAL, DataType.STRING); barField.addValues(new Value("b"), new Value("a")); dataFields.add(barField); DataDictionary dictionary = new DataDictionary(dataFields).setNumberOfFields(dataFields.size()); CategoricalValueEncodings encodings = AppPMMLUtils.buildCategoricalValueEncodings(dictionary); assertEquals(2, encodings.getValueCount(1)); assertEquals(0, encodings.getValueEncodingMap(1).get("b").intValue()); assertEquals(1, encodings.getValueEncodingMap(1).get("a").intValue()); assertEquals("b", encodings.getEncodingValueMap(1).get(0)); assertEquals("a", encodings.getEncodingValueMap(1).get(1)); assertEquals(Collections.singletonMap(1, 2), encodings.getCategoryCounts()); }
Example #5
Source File: Transformer.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 6 votes |
public DataField updateDataField(DataField dataField, OpType opType, DataType dataType, SkLearnEncoder encoder){ FieldName name = dataField.getName(); if(encoder.isFrozen(name)){ return dataField; } switch(dataType){ case DOUBLE: // If the DataField element already specifies a non-default data type, then keep it if(!(DataType.DOUBLE).equals(dataField.getDataType())){ dataType = dataField.getDataType(); } break; } dataField .setOpType(opType) .setDataType(dataType); return dataField; }
Example #6
Source File: ModelManager.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
/** * @return A synthetic {@link DataField} element describing the default target field. */ public DataField getDefaultDataField(){ if(this.defaultDataField != null){ return this.defaultDataField; } MiningFunction miningFunction = getMiningFunction(); switch(miningFunction){ case REGRESSION: MathContext mathContext = getMathContext(); switch(mathContext){ case FLOAT: return ModelManager.DEFAULT_TARGET_CONTINUOUS_FLOAT; default: return ModelManager.DEFAULT_TARGET_CONTINUOUS_DOUBLE; } case CLASSIFICATION: case CLUSTERING: return ModelManager.DEFAULT_TARGET_CATEGORICAL_STRING; default: return null; } }
Example #7
Source File: FieldUtil.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
static private List<Object> parseCategories(DataField dataField){ List<Object> result = new ArrayList<>(); if(dataField.hasValues()){ List<Value> pmmlValues = dataField.getValues(); for(Value pmmlValue : pmmlValues){ Object simpleValue = pmmlValue.getValue(); if(simpleValue == null){ throw new MissingAttributeException(pmmlValue, PMMLAttributes.VALUE_VALUE); } Value.Property property = pmmlValue.getProperty(); switch(property){ case VALID: result.add(simpleValue); break; default: break; } } } return result; }
Example #8
Source File: MapHolderParser.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
@Override public VisitorAction visit(DataDictionary dataDictionary){ if(dataDictionary.hasDataFields()){ List<DataField> dataFields = dataDictionary.getDataFields(); for(ListIterator<DataField> it = dataFields.listIterator(); it.hasNext(); ){ DataField dataField = it.next(); if(dataField.hasValues()){ it.set(new RichDataField(dataField)); } } } return super.visit(dataDictionary); }
Example #9
Source File: MiningModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
@Override public DataField getDefaultDataField(){ MiningModel miningModel = getModel(); Segmentation segmentation = miningModel.getSegmentation(); Segmentation.MultipleModelMethod multipleModelMethod = segmentation.getMultipleModelMethod(); switch(multipleModelMethod){ case SELECT_FIRST: case SELECT_ALL: case MODEL_CHAIN: return null; default: return super.getDefaultDataField(); } }
Example #10
Source File: NearestNeighborModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Override public DataField getDefaultDataField(){ MiningFunction miningFunction = getMiningFunction(); switch(miningFunction){ case REGRESSION: case CLASSIFICATION: case MIXED: return null; default: return super.getDefaultDataField(); } }
Example #11
Source File: ModelEvaluationContext.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Override protected FieldValue prepare(FieldName name, Object value){ ModelEvaluator<?> modelEvaluator = getModelEvaluator(); DataField dataField = modelEvaluator.getDataField(name); if(dataField == null){ throw new MissingFieldException(name); } MiningField miningField = modelEvaluator.getMiningField(name); if(miningField == null){ throw new InvisibleFieldException(name); } MiningField.UsageType usageType = miningField.getUsageType(); switch(usageType){ case ACTIVE: case GROUP: case ORDER: { return InputFieldUtil.prepareInputValue(dataField, miningField, value); } case PREDICTED: case TARGET: { return InputFieldUtil.prepareResidualInputValue(dataField, miningField, value); } default: throw new UnsupportedAttributeException(miningField, usageType); } }
Example #12
Source File: BinaryTreeConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
private void encodeVariableList(RGenericVector tree, RExpEncoder encoder){ RBooleanVector terminal = tree.getBooleanElement("terminal"); RGenericVector psplit = tree.getGenericElement("psplit"); RGenericVector left = tree.getGenericElement("left"); RGenericVector right = tree.getGenericElement("right"); if((Boolean.TRUE).equals(terminal.asScalar())){ return; } RNumberVector<?> splitpoint = psplit.getNumericElement("splitpoint"); RStringVector variableName = psplit.getStringElement("variableName"); FieldName name = FieldName.create(variableName.asScalar()); DataField dataField = encoder.getDataField(name); if(dataField == null){ if(splitpoint instanceof RIntegerVector){ RStringVector levels = splitpoint.getStringAttribute("levels"); dataField = encoder.createDataField(name, OpType.CATEGORICAL, null, levels.getValues()); } else if(splitpoint instanceof RDoubleVector){ dataField = encoder.createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE); } else { throw new IllegalArgumentException(); } encoder.addFeature(dataField); this.featureIndexes.put(name, this.featureIndexes.size()); } encodeVariableList(left, encoder); encodeVariableList(right, encoder); }
Example #13
Source File: Regression.java From jpmml-xgboost with GNU Affero General Public License v3.0 | 5 votes |
@Override public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){ if(targetCategories != null){ throw new IllegalArgumentException("Regression requires zero target categories"); } DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.FLOAT); return new ContinuousLabel(dataField); }
Example #14
Source File: SparkMLEncoder.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
public DataField createDataField(FieldName name){ StructType schema = getSchema(); StructField field = schema.apply(name.getValue()); org.apache.spark.sql.types.DataType sparkDataType = field.dataType(); if(sparkDataType instanceof StringType){ return createDataField(name, OpType.CATEGORICAL, DataType.STRING); } else if(sparkDataType instanceof IntegralType){ return createDataField(name, OpType.CONTINUOUS, DataType.INTEGER); } else if(sparkDataType instanceof DoubleType){ return createDataField(name, OpType.CONTINUOUS, DataType.DOUBLE); } else if(sparkDataType instanceof BooleanType){ return createDataField(name, OpType.CATEGORICAL, DataType.BOOLEAN); } else { throw new IllegalArgumentException("Expected string, integral, double or boolean data type, got " + sparkDataType.typeName() + " data type"); } }
Example #15
Source File: PMMLPipeline.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 5 votes |
static private List<Feature> initFeatures(List<String> activeFields, OpType opType, DataType dataType, SkLearnEncoder encoder){ List<Feature> result = new ArrayList<>(); for(String activeField : activeFields){ DataField dataField = encoder.createDataField(FieldName.create(activeField), opType, dataType); result.add(new WildcardFeature(encoder, dataField)); } return result; }
Example #16
Source File: Regression.java From jpmml-lightgbm with GNU Affero General Public License v3.0 | 5 votes |
@Override public Label encodeLabel(FieldName targetField, List<?> targetCategories, PMMLEncoder encoder){ if(targetCategories != null && targetCategories.size() > 0){ throw new IllegalArgumentException("Regression requires zero target categories"); } DataField dataField = encoder.createDataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE); return new ContinuousLabel(dataField); }
Example #17
Source File: NaNAsMissingDecorator.java From jpmml-lightgbm with GNU Affero General Public License v3.0 | 5 votes |
@Override public VisitorAction visit(DataField dataField){ DataType dataType = dataField.getDataType(); switch(dataType){ case FLOAT: case DOUBLE: PMMLUtil.addValues(dataField, Collections.singletonList("NaN"), Property.MISSING); break; default: break; } return super.visit(dataField); }
Example #18
Source File: IndexToStringConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ IndexToString transformer = getTransformer(); DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels())); return Collections.singletonList(new CategoricalFeature(encoder, dataField)); }
Example #19
Source File: InputFieldUtilTest.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
static private void clearDomain(DataField dataField){ List<Interval> intervals = dataField.getIntervals(); intervals.clear(); List<Value> values = dataField.getValues(); values.clear(); }
Example #20
Source File: CategoricalDomain.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 5 votes |
@Override public Feature encode(WildcardFeature wildcardFeature, List<?> values){ PMMLEncoder encoder = wildcardFeature.getEncoder(); if(values == null || values.isEmpty()){ DataField dataField = (DataField)encoder.getField(wildcardFeature.getName()); dataField.setOpType(OpType.CATEGORICAL); return new ObjectFeature(encoder, dataField.getName(), dataField.getDataType()); } return wildcardFeature.toCategoricalFeature(standardizeValues(wildcardFeature.getDataType(), values)); }
Example #21
Source File: GLMConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
@Override public void encodeSchema(RExpEncoder encoder){ RGenericVector glm = getObject(); RGenericVector family = glm.getGenericElement("family"); RGenericVector model = glm.getGenericElement("model", false); RStringVector familyFamily = family.getStringElement("family"); super.encodeSchema(encoder); MiningFunction miningFunction = getMiningFunction(familyFamily.asScalar()); switch(miningFunction){ case CLASSIFICATION: Label label = encoder.getLabel(); if(model != null){ RIntegerVector variable = model.getFactorElement((label.getName()).getValue()); DataField dataField = (DataField)encoder.toCategorical(label.getName(), RExpUtil.getFactorLevels(variable)); encoder.setLabel(dataField); } break; default: break; } }
Example #22
Source File: KMeansConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
@Override public void encodeSchema(RExpEncoder encoder){ RGenericVector kmeans = getObject(); RDoubleVector centers = kmeans.getDoubleElement("centers"); RStringVector columnNames = centers.dimnames(1); for(int i = 0; i < columnNames.size(); i++){ String columnName = columnNames.getValue(i); DataField dataField = encoder.createDataField(FieldName.create(columnName), OpType.CONTINUOUS, DataType.DOUBLE); encoder.addFeature(dataField); } }
Example #23
Source File: XPathUtilTest.java From jpmml-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void formatDataField() throws Exception { assertEquals("DataField", XPathUtil.formatElement(DataField.class)); assertEquals("DataField@name", XPathUtil.formatElementOrAttribute(PMMLAttributes.DATAFIELD_NAME)); assertEquals("DataField/Value", XPathUtil.formatElementOrAttribute(PMMLElements.DATAFIELD_VALUES)); assertEquals("DataField@isCyclic", XPathUtil.formatAttribute(PMMLAttributes.DATAFIELD_CYCLIC, null)); assertEquals("DataField@isCyclic=0", XPathUtil.formatAttribute(PMMLAttributes.DATAFIELD_CYCLIC, "0")); }
Example #24
Source File: EnumUtilTest.java From jpmml-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void getEnumValue(){ DataField.Cyclic zero = DataField.Cyclic.ZERO; assertEquals("ZERO", zero.name()); assertEquals("0", zero.value()); assertEquals("0", EnumUtil.getEnumValue(zero)); }
Example #25
Source File: AppPMMLUtils.java From oryx with Apache License 2.0 | 5 votes |
public static DataDictionary buildDataDictionary( InputSchema schema, CategoricalValueEncodings categoricalValueEncodings) { List<String> featureNames = schema.getFeatureNames(); List<DataField> dataFields = new ArrayList<>(); for (int featureIndex = 0; featureIndex < featureNames.size(); featureIndex++) { String featureName = featureNames.get(featureIndex); OpType opType; DataType dataType; if (schema.isNumeric(featureName)) { opType = OpType.CONTINUOUS; dataType = DataType.DOUBLE; } else if (schema.isCategorical(featureName)) { opType = OpType.CATEGORICAL; dataType = DataType.STRING; } else { // Don't know opType = null; dataType = null; } DataField field = new DataField(FieldName.create(featureName), opType, dataType); if (schema.isCategorical(featureName)) { Objects.requireNonNull(categoricalValueEncodings); categoricalValueEncodings.getEncodingValueMap(featureIndex).entrySet().stream(). sorted(Comparator.comparing(Map.Entry::getKey)). map(Map.Entry::getValue). forEach(value -> field.addValues(new Value(value))); } dataFields.add(field); } return new DataDictionary(dataFields).setNumberOfFields(dataFields.size()); }
Example #26
Source File: AppPMMLUtils.java From oryx with Apache License 2.0 | 5 votes |
/** * @param dictionary {@link DataDictionary} from model * @return names of features in order */ public static List<String> getFeatureNames(DataDictionary dictionary) { List<DataField> dataFields = dictionary.getDataFields(); Preconditions.checkArgument(dataFields != null && !dataFields.isEmpty(), "No fields in DataDictionary"); return dataFields.stream().map(field -> field.getName().getValue()).collect(Collectors.toList()); }
Example #27
Source File: AppPMMLUtils.java From oryx with Apache License 2.0 | 5 votes |
public static CategoricalValueEncodings buildCategoricalValueEncodings( DataDictionary dictionary) { Map<Integer,Collection<String>> indexToValues = new HashMap<>(); List<DataField> dataFields = dictionary.getDataFields(); for (int featureIndex = 0; featureIndex < dataFields.size(); featureIndex++) { DataField field = dataFields.get(featureIndex); Collection<Value> values = field.getValues(); if (values != null && !values.isEmpty()) { Collection<String> categoricalValues = values.stream().map(v -> v.getValue().toString()).collect(Collectors.toList()); indexToValues.put(featureIndex, categoricalValues); } } return new CategoricalValueEncodings(indexToValues); }
Example #28
Source File: AppPMMLUtilsTest.java From oryx with Apache License 2.0 | 5 votes |
private static void checkDataField(DataField field, String name, Boolean categorical) { assertEquals(name, field.getName().getValue()); if (categorical == null) { assertNull(field.getOpType()); assertNull(field.getDataType()); } else if (categorical) { assertEquals(OpType.CATEGORICAL, field.getOpType()); assertEquals(DataType.STRING, field.getDataType()); } else { assertEquals(OpType.CONTINUOUS, field.getOpType()); assertEquals(DataType.DOUBLE, field.getDataType()); } }
Example #29
Source File: MemoryMeasurerTest.java From jpmml-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void measure(){ Interval interval = new Interval(Interval.Closure.CLOSED_CLOSED) .setLeftMargin(0d) .setRightMargin(1d); DataField left = new DataField(FieldName.create("x"), null, null) .addIntervals(interval); DataField right = new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE) .addIntervals(interval); assertEquals(getSize(left), getSize(right)); }
Example #30
Source File: AbstractAppMLlibIT.java From oryx with Apache License 2.0 | 5 votes |
protected static void checkDataDictionary(InputSchema schema, DataDictionary dataDictionary) { assertNotNull(dataDictionary); assertEquals("Wrong number of features", schema.getNumFeatures(), dataDictionary.getNumberOfFields().intValue()); List<DataField> dataFields = dataDictionary.getDataFields(); assertEquals(schema.getNumFeatures(), dataFields.size()); for (DataField dataField : dataFields) { String featureName = dataField.getName().getValue(); if (schema.isNumeric(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CONTINUOUS, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.DOUBLE, dataField.getDataType()); } else if (schema.isCategorical(featureName)) { assertEquals("Wrong op type for feature " + featureName, OpType.CATEGORICAL, dataField.getOpType()); assertEquals("Wrong data type for feature " + featureName, DataType.STRING, dataField.getDataType()); } else { assertNull(dataField.getOpType()); assertNull(dataField.getDataType()); } } }