org.jpmml.converter.CategoricalFeature Java Examples
The following examples show how to use
org.jpmml.converter.CategoricalFeature.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndexToStringConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ IndexToString transformer = getTransformer(); DataField dataField = encoder.createDataField(formatName(transformer), OpType.CATEGORICAL, DataType.STRING, Arrays.asList(transformer.getLabels())); return Collections.singletonList(new CategoricalFeature(encoder, dataField)); }
Example #2
Source File: EncoderUtil.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 5 votes |
static public Feature encodeIndexFeature(Feature feature, List<?> categories, List<? extends Number> indexCategories, Number mapMissingTo, Number defaultValue, DataType dataType, SkLearnEncoder encoder){ ClassDictUtil.checkSize(categories, indexCategories); encoder.toCategorical(feature.getName(), categories); Supplier<MapValues> mapValuesSupplier = () -> { MapValues mapValues = PMMLUtil.createMapValues(feature.getName(), categories, indexCategories) .setMapMissingTo(mapMissingTo) .setDefaultValue(defaultValue); return mapValues; }; DerivedField derivedField = encoder.ensureDerivedField(FeatureUtil.createName("encoder", feature), OpType.CATEGORICAL, dataType, mapValuesSupplier); Feature encodedFeature = new IndexFeature(encoder, derivedField, indexCategories); Feature result = new CategoricalFeature(encoder, feature, categories){ @Override public ContinuousFeature toContinuousFeature(){ return encodedFeature.toContinuousFeature(); } }; return result; }
Example #3
Source File: OneHotEncoderModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ OneHotEncoderModel transformer = getTransformer(); boolean dropLast = transformer.getDropLast(); InOutMode inputMode = getInputMode(); List<Feature> result = new ArrayList<>(); String[] inputCols = inputMode.getInputCols(transformer); for(String inputCol : inputCols){ CategoricalFeature categoricalFeature = (CategoricalFeature)encoder.getOnlyFeature(inputCol); List<?> values = categoricalFeature.getValues(); List<BinaryFeature> binaryFeatures = OneHotEncoderModelConverter.encodeFeature(encoder, categoricalFeature, values, dropLast); result.add(new BinarizedCategoricalFeature(encoder, categoricalFeature.getName(), categoricalFeature.getDataType(), binaryFeatures)); } return result; }
Example #4
Source File: VectorIndexerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
@Override public List<Feature> encodeFeatures(SparkMLEncoder encoder){ VectorIndexerModel transformer = getTransformer(); int numFeatures = transformer.numFeatures(); List<Feature> features = encoder.getFeatures(transformer.getInputCol()); SchemaUtil.checkSize(numFeatures, features); Map<Integer, Map<Double, Integer>> categoryMaps = transformer.javaCategoryMaps(); List<Feature> result = new ArrayList<>(); for(int i = 0, length = numFeatures; i < length; i++){ Feature feature = features.get(i); Map<Double, Integer> categoryMap = categoryMaps.get(i); if(categoryMap != null){ List<Double> categories = new ArrayList<>(); List<Integer> values = new ArrayList<>(); List<Map.Entry<Double, Integer>> entries = new ArrayList<>(categoryMap.entrySet()); Collections.sort(entries, VectorIndexerModelConverter.COMPARATOR); for(Map.Entry<Double, Integer> entry : entries){ Double category = entry.getKey(); Integer value = entry.getValue(); categories.add(category); values.add(value); } encoder.toCategorical(feature.getName(), categories); MapValues mapValues = PMMLUtil.createMapValues(feature.getName(), categories, values) .setDataType(DataType.INTEGER); DerivedField derivedField = encoder.createDerivedField(formatName(transformer, i, length), OpType.CATEGORICAL, DataType.INTEGER, mapValues); result.add(new CategoricalFeature(encoder, derivedField, values)); } else { result.add((ContinuousFeature)feature); } } return result; }
Example #5
Source File: MultiOneHotEncoder.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 4 votes |
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<List<?>> categories = getCategories(); ClassDictUtil.checkSize(categories, features); Object drop = getDrop(); List<Integer> dropIdx = (drop != null ? getDropIdx() : null); List<Feature> result = new ArrayList<>(); for(int i = 0; i < features.size(); i++){ Feature feature = features.get(i); List<?> featureCategories = categories.get(i); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; ClassDictUtil.checkSize(featureCategories, categoricalFeature.getValues()); featureCategories = categoricalFeature.getValues(); } else if(feature instanceof ObjectFeature){ ObjectFeature objectFeature = (ObjectFeature)feature; } else if(feature instanceof WildcardFeature){ WildcardFeature wildcardFeature = (WildcardFeature)feature; feature = wildcardFeature.toCategoricalFeature(featureCategories); } else { throw new IllegalArgumentException(); } // End if if(dropIdx != null){ // Unbox to primitive value in order to ensure correct List#remove(int) vs. List#remove(Object) method resolution int index = dropIdx.get(i); featureCategories = new ArrayList<>(featureCategories); featureCategories.remove(index); } for(int j = 0; j < featureCategories.size(); j++){ Object featureCategory = featureCategories.get(j); result.add(new BinaryFeature(encoder, feature, featureCategory)); } } return result; }
Example #6
Source File: LabelBinarizer.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 4 votes |
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<?> classes = getClasses(); Number negLabel = getNegLabel(); Number posLabel = getPosLabel(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<Object> categories = new ArrayList<>(); categories.addAll(classes); List<Number> labelCategories = new ArrayList<>(); labelCategories.add(negLabel); labelCategories.add(posLabel); List<Feature> result = new ArrayList<>(); classes = prepareClasses(classes); for(int i = 0; i < classes.size(); i++){ Object value = classes.get(i); if(ValueUtil.isZero(negLabel) && ValueUtil.isOne(posLabel)){ result.add(new BinaryFeature(encoder, feature, value)); } else { // "($name == value) ? pos_label : neg_label" Apply apply = PMMLUtil.createApply(PMMLFunctions.IF) .addExpressions(PMMLUtil.createApply(PMMLFunctions.EQUAL, feature.ref(), PMMLUtil.createConstant(value, feature.getDataType()))) .addExpressions(PMMLUtil.createConstant(posLabel), PMMLUtil.createConstant(negLabel)); FieldName name = (classes.size() > 1 ? FeatureUtil.createName("label_binarizer", feature, i) : FeatureUtil.createName("label_binarizer", feature)); DerivedField derivedField = encoder.createDerivedField(name, apply); result.add(new CategoricalFeature(encoder, derivedField, labelCategories)); } } encoder.toCategorical(feature.getName(), categories); return result; }
Example #7
Source File: RPartConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 4 votes |
private List<Predicate> encodePredicates(Feature feature, int splitOffset, RNumberVector<?> splits, RIntegerVector csplit){ Predicate leftPredicate; Predicate rightPredicate; RIntegerVector splitsDim = splits.dim(); int splitRows = splitsDim.getValue(0); int splitColumns = splitsDim.getValue(1); List<? extends Number> ncat = FortranMatrixUtil.getColumn(splits.getValues(), splitRows, splitColumns, 1); List<? extends Number> index = FortranMatrixUtil.getColumn(splits.getValues(), splitRows, splitColumns, 3); int splitType = ValueUtil.asInt(ncat.get(splitOffset)); Number splitValue = index.get(splitOffset); if(Math.abs(splitType) == 1){ SimplePredicate.Operator leftOperator; SimplePredicate.Operator rightOperator; if(splitType == -1){ leftOperator = SimplePredicate.Operator.LESS_THAN; rightOperator = SimplePredicate.Operator.GREATER_OR_EQUAL; } else { leftOperator = SimplePredicate.Operator.GREATER_OR_EQUAL; rightOperator = SimplePredicate.Operator.LESS_THAN; } leftPredicate = createSimplePredicate(feature, leftOperator, splitValue); rightPredicate = createSimplePredicate(feature, rightOperator, splitValue); } else { CategoricalFeature categoricalFeature = (CategoricalFeature)feature; RIntegerVector csplitDim = csplit.dim(); int csplitRows = csplitDim.getValue(0); int csplitColumns = csplitDim.getValue(1); List<Integer> csplitRow = FortranMatrixUtil.getRow(csplit.getValues(), csplitRows, csplitColumns, ValueUtil.asInt(splitValue) - 1); List<?> values = categoricalFeature.getValues(); leftPredicate = createSimpleSetPredicate(categoricalFeature, selectValues(values, csplitRow, 1)); rightPredicate = createSimpleSetPredicate(categoricalFeature, selectValues(values, csplitRow, 3)); } return Arrays.asList(leftPredicate, rightPredicate); }
Example #8
Source File: OneHotEncoder.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 2 votes |
@Override public List<Feature> encodeFeatures(List<Feature> features, SkLearnEncoder encoder){ List<? extends Number> values = getValues(); ClassDictUtil.checkSize(1, features); Feature feature = features.get(0); List<Feature> result = new ArrayList<>(); if(feature instanceof CategoricalFeature){ CategoricalFeature categoricalFeature = (CategoricalFeature)feature; ClassDictUtil.checkSize(values, categoricalFeature.getValues()); for(int i = 0; i < values.size(); i++){ result.add(new BinaryFeature(encoder, categoricalFeature, categoricalFeature.getValue(i))); } } else if(feature instanceof WildcardFeature){ WildcardFeature wildcardFeature = (WildcardFeature)feature; List<Integer> categories = new ArrayList<>(); for(int i = 0; i < values.size(); i++){ Number value = values.get(i); Integer category = ValueUtil.asInt(value); categories.add(category); result.add(new BinaryFeature(encoder, wildcardFeature, category)); } wildcardFeature.toCategoricalFeature(categories); } else { throw new IllegalArgumentException(); } return result; }
Example #9
Source File: ImputerTest.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 2 votes |
@Test public void encodeCategorical(){ FieldName name = FieldName.create("x"); FieldName imputedName = FieldName.create("imputer(x)"); Imputer imputer = new Imputer("sklearn.preprocessing.imputation", "Imputer"); imputer.put("strategy", "most_frequent"); imputer.put("missing_values", "NaN"); imputer.put("statistics_", 0); SkLearnEncoder encoder = new SkLearnEncoder(); Feature feature = encodeFeature(name.getValue(), Arrays.asList(imputer), encoder); assertNotNull(encoder.getDataField(name)); assertNull(encoder.getDerivedField(imputedName)); List<Decorator> decorators = encoder.getDecorators(name); assertEquals(1, decorators.size()); assertTrue(feature instanceof WildcardFeature); assertEquals(name, feature.getName()); NDArray array = new NDArray(); array.put("data", Arrays.asList(0, 1, 2, 3, 4, 5, 6)); array.put("fortran_order", Boolean.FALSE); CategoricalDomain categoricalDomain = new CategoricalDomain("sklearn2pmml.decoration", "CategoricalDomain"); categoricalDomain.put("invalid_value_treatment", "as_is"); categoricalDomain.put("data_", array); encoder = new SkLearnEncoder(); feature = encodeFeature(name.getValue(), Arrays.asList(categoricalDomain, imputer), encoder); assertNotNull(encoder.getDataField(name)); assertNull(encoder.getDerivedField(imputedName)); decorators = encoder.getDecorators(name); assertEquals(2, decorators.size()); assertTrue(feature instanceof CategoricalFeature); assertEquals(name, feature.getName()); }