org.dmg.pmml.clustering.ClusteringModel Java Examples
The following examples show how to use
org.dmg.pmml.clustering.ClusteringModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KMeansModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 6 votes |
@Override public ClusteringModel encodeModel(Schema schema){ KMeansModel model = getTransformer(); List<Cluster> clusters = new ArrayList<>(); Vector[] clusterCenters = model.clusterCenters(); for(int i = 0; i < clusterCenters.length; i++){ Cluster cluster = new Cluster(PMMLUtil.createRealArray(VectorUtil.toList(clusterCenters[i]))) .setId(String.valueOf(i)); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()) .setCompareFunction(CompareFunction.ABS_DIFF); return new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters); }
Example #2
Source File: KMeansPMMLUtils.java From oryx with Apache License 2.0 | 6 votes |
/** * Validates that the encoded PMML model received matches expected schema. * * @param pmml {@link PMML} encoding of KMeans Clustering * @param schema expected schema attributes of KMeans Clustering */ public static void validatePMMLVsSchema(PMML pmml, InputSchema schema) { List<Model> models = pmml.getModels(); Preconditions.checkArgument(models.size() == 1, "Should have exactly one model, but had %s", models.size()); Model model = models.get(0); Preconditions.checkArgument(model instanceof ClusteringModel); Preconditions.checkArgument(model.getMiningFunction() == MiningFunction.CLUSTERING); DataDictionary dictionary = pmml.getDataDictionary(); Preconditions.checkArgument( schema.getFeatureNames().equals(AppPMMLUtils.getFeatureNames(dictionary)), "Feature names in schema don't match names in PMML"); MiningSchema miningSchema = model.getMiningSchema(); Preconditions.checkArgument(schema.getFeatureNames().equals( AppPMMLUtils.getFeatureNames(miningSchema))); }
Example #3
Source File: InvalidMarkupInspector.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
@Override public VisitorAction visit(ClusteringModel clusteringModel){ check(new CollectionSize(clusteringModel){ @Override public Integer getSize(){ return clusteringModel.getNumberOfClusters(); } @Override public Collection<?> getCollection(){ return clusteringModel.getClusters(); } }); return super.visit(clusteringModel); }
Example #4
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 6 votes |
private List<ClusteringField> getCenterClusteringFields(){ ClusteringModel clusteringModel = getModel(); List<ClusteringField> clusteringFields = clusteringModel.getClusteringFields(); List<ClusteringField> result = new ArrayList<>(clusteringFields.size()); for(int i = 0, max = clusteringFields.size(); i < max; i++){ ClusteringField clusteringField = clusteringFields.get(i); ClusteringField.CenterField centerField = clusteringField.getCenterField(); switch(centerField){ case TRUE: result.add(clusteringField); break; case FALSE: break; default: throw new UnsupportedAttributeException(clusteringField, centerField); } } return result; }
Example #5
Source File: KMeans.java From jpmml-sklearn with GNU Affero General Public License v3.0 | 5 votes |
@Override public ClusteringModel encodeModel(Schema schema){ int[] shape = getClusterCentersShape(); int numberOfClusters = shape[0]; int numberOfFeatures = shape[1]; List<? extends Number> clusterCenters = getClusterCenters(); List<Integer> labels = getLabels(); Multiset<Integer> labelCounts = HashMultiset.create(); if(labels != null){ labelCounts.addAll(labels); } List<Cluster> clusters = new ArrayList<>(); for(int i = 0; i < numberOfClusters; i++){ Cluster cluster = new Cluster(PMMLUtil.createRealArray(CMatrixUtil.getRow(clusterCenters, numberOfClusters, numberOfFeatures, i))) .setId(String.valueOf(i)) .setSize((labelCounts.size () > 0 ? labelCounts.count(i) : null)); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()) .setCompareFunction(CompareFunction.ABS_DIFF); ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, numberOfClusters, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters) .setOutput(ClusteringModelUtil.createOutput(FieldName.create("Cluster"), DataType.DOUBLE, clusters)); return clusteringModel; }
Example #6
Source File: UnsupportedMarkupInspectorTest.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Test public void inspect(){ ClusteringModel clusteringModel = new ClusteringModel() .setModelClass(ClusteringModel.ModelClass.DISTRIBUTION_BASED) .setCenterFields(new CustomCenterFields()); PMML pmml = new PMML(Version.PMML_4_3.getVersion(), new Header(), new DataDictionary()) .addModels(clusteringModel); UnsupportedMarkupInspector inspector = new UnsupportedMarkupInspector(); try { inspector.applyTo(pmml); fail(); } catch(UnsupportedMarkupException ume){ List<UnsupportedMarkupException> exceptions = inspector.getExceptions(); assertEquals(2, exceptions.size()); assertEquals(0, exceptions.indexOf(ume)); UnsupportedMarkupException exception = exceptions.get(0); String message = exception.getMessage(); assertTrue(message.contains("ClusteringModel@modelClass=distributionBased")); exception = exceptions.get(1); message = exception.getMessage(); assertTrue(message.contains("CenterFields")); assertTrue(message.contains(CustomCenterFields.class.getName())); } }
Example #7
Source File: UnsupportedMarkupInspector.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
@Override public VisitorAction visit(ClusteringModel clusteringModel){ ClusteringModel.ModelClass modelClass = clusteringModel.getModelClass(); switch(modelClass){ case DISTRIBUTION_BASED: report(new UnsupportedAttributeException(clusteringModel, modelClass)); break; default: break; } return super.visit(clusteringModel); }
Example #8
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
private <V extends Number> ClusterAffinityDistribution<V> evaluateDistance(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){ ClusteringModel clusteringModel = getModel(); List<Cluster> clusters = clusteringModel.getClusters(); Value<V> adjustment; MissingValueWeights missingValueWeights = clusteringModel.getMissingValueWeights(); if(missingValueWeights != null){ Array array = missingValueWeights.getArray(); List<? extends Number> adjustmentValues = ArrayUtil.asNumberList(array); if(values.size() != adjustmentValues.size()){ throw new InvalidElementException(missingValueWeights); } adjustment = MeasureUtil.calculateAdjustment(valueFactory, values, adjustmentValues); } else { adjustment = MeasureUtil.calculateAdjustment(valueFactory, values); } ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.DISTANCE, clusters); for(Cluster cluster : clusters){ List<FieldValue> clusterValues = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterValueCache); if(values.size() != clusterValues.size()){ throw new InvalidElementException(cluster); } Value<V> distance = MeasureUtil.evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values, clusterValues, adjustment); result.put(cluster, distance); } return result; }
Example #9
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 5 votes |
public ClusteringModelEvaluator(PMML pmml, ClusteringModel clusteringModel){ super(pmml, clusteringModel); ComparisonMeasure comparisonMeasure = clusteringModel.getComparisonMeasure(); if(comparisonMeasure == null){ throw new MissingElementException(clusteringModel, PMMLElements.CLUSTERINGMODEL_COMPARISONMEASURE); } ClusteringModel.ModelClass modelClass = clusteringModel.getModelClass(); switch(modelClass){ case CENTER_BASED: break; default: throw new UnsupportedAttributeException(clusteringModel, modelClass); } CenterFields centerFields = clusteringModel.getCenterFields(); if(centerFields != null){ throw new UnsupportedElementException(centerFields); } if(!clusteringModel.hasClusteringFields()){ throw new MissingElementException(clusteringModel, PMMLElements.CLUSTERINGMODEL_CLUSTERINGFIELDS); } // End if if(!clusteringModel.hasClusters()){ throw new MissingElementException(clusteringModel, PMMLElements.CLUSTERINGMODEL_CLUSTERS); } Targets targets = clusteringModel.getTargets(); if(targets != null){ throw new MisplacedElementException(targets); } }
Example #10
Source File: KMeansUpdate.java From oryx with Apache License 2.0 | 5 votes |
private ClusteringModel pmmlClusteringModel(KMeansModel model, Map<Integer,Long> clusterSizesMap) { Vector[] clusterCenters = model.clusterCenters(); List<ClusteringField> clusteringFields = new ArrayList<>(); for (int i = 0; i < inputSchema.getNumFeatures(); i++) { if (inputSchema.isActive(i)) { FieldName fieldName = FieldName.create(inputSchema.getFeatureNames().get(i)); ClusteringField clusteringField = new ClusteringField(fieldName).setCenterField(ClusteringField.CenterField.TRUE); clusteringFields.add(clusteringField); } } List<Cluster> clusters = new ArrayList<>(clusterCenters.length); for (int i = 0; i < clusterCenters.length; i++) { clusters.add(new Cluster().setId(Integer.toString(i)) .setSize(clusterSizesMap.get(i).intValue()) .setArray(AppPMMLUtils.toArray(clusterCenters[i].toArray()))); } return new ClusteringModel( MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), AppPMMLUtils.buildMiningSchema(inputSchema), new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()), clusteringFields, clusters); }
Example #11
Source File: KMeansUpdate.java From oryx with Apache License 2.0 | 5 votes |
/** * @param model {@link KMeansModel} to translate to PMML * @return PMML representation of a KMeans cluster model */ private PMML kMeansModelToPMML(KMeansModel model, Map<Integer,Long> clusterSizesMap) { ClusteringModel clusteringModel = pmmlClusteringModel(model, clusterSizesMap); PMML pmml = PMMLUtils.buildSkeletonPMML(); pmml.setDataDictionary(AppPMMLUtils.buildDataDictionary(inputSchema, null)); pmml.addModels(clusteringModel); return pmml; }
Example #12
Source File: KMeansPMMLUtils.java From oryx with Apache License 2.0 | 5 votes |
/** * @param pmml PMML representation of Clusters * @return List of {@link ClusterInfo} */ public static List<ClusterInfo> read(PMML pmml) { Model model = pmml.getModels().get(0); Preconditions.checkArgument(model instanceof ClusteringModel); ClusteringModel clusteringModel = (ClusteringModel) model; return clusteringModel.getClusters().stream().map(cluster -> new ClusterInfo(Integer.parseInt(cluster.getId()), VectorMath.parseVector( TextUtils.parseDelimited(cluster.getArray().getValue().toString(), ' ')), cluster.getSize()) ).collect(Collectors.toList()); }
Example #13
Source File: KMeansConverter.java From jpmml-r with GNU Affero General Public License v3.0 | 5 votes |
@Override public Model encodeModel(Schema schema){ RGenericVector kmeans = getObject(); RDoubleVector centers = kmeans.getDoubleElement("centers"); RIntegerVector size = kmeans.getIntegerElement("size"); RIntegerVector centersDim = centers.dim(); int rows = centersDim.getValue(0); int columns = centersDim.getValue(1); List<Cluster> clusters = new ArrayList<>(); RStringVector rowNames = centers.dimnames(0); for(int i = 0; i < rowNames.size(); i++){ Cluster cluster = new Cluster(PMMLUtil.createRealArray(FortranMatrixUtil.getRow(centers.getValues(), rows, columns, i))) .setId(String.valueOf(i + 1)) .setName(rowNames.getValue(i)) .setSize(size.getValue(i)); clusters.add(cluster); } ComparisonMeasure comparisonMeasure = new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()) .setCompareFunction(CompareFunction.ABS_DIFF); ClusteringModel clusteringModel = new ClusteringModel(MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, rows, ModelUtil.createMiningSchema(schema.getLabel()), comparisonMeasure, ClusteringModelUtil.createClusteringFields(schema.getFeatures()), clusters) .setOutput(ClusteringModelUtil.createOutput(FieldName.create("cluster"), DataType.DOUBLE, clusters)); return clusteringModel; }
Example #14
Source File: KMeansUpdateIT.java From oryx with Apache License 2.0 | 4 votes |
@Test public void testKMeans() throws Exception { Path tempDir = getTempDir(); Path dataDir = tempDir.resolve("data"); Path modelDir = tempDir.resolve("model"); Map<String,Object> overlayConfig = new HashMap<>(); overlayConfig.put("oryx.batch.update-class", KMeansUpdate.class.getName()); ConfigUtils.set(overlayConfig, "oryx.batch.storage.data-dir", dataDir); ConfigUtils.set(overlayConfig, "oryx.batch.storage.model-dir", modelDir); overlayConfig.put("oryx.batch.streaming.generation-interval-sec", GEN_INTERVAL_SEC); overlayConfig.put("oryx.kmeans.hyperparams.k", NUM_CLUSTERS); overlayConfig.put("oryx.kmeans.iterations", 5); overlayConfig.put("oryx.input-schema.num-features", NUM_FEATURES); overlayConfig.put("oryx.input-schema.categorical-features", "[]"); overlayConfig.put("oryx.kmeans.evaluation-strategy", EVALUATION_STRATEGY); Config config = ConfigUtils.overlayOn(overlayConfig, getConfig()); startMessaging(); List<KeyMessage<String, String>> updates = startServerProduceConsumeTopics( config, new RandomKMeansDataGenerator(NUM_FEATURES), DATA_TO_WRITE, WRITE_INTERVAL_MSEC); List<Path> modelInstanceDirs = IOUtils.listFiles(modelDir, "*"); int generations = modelInstanceDirs.size(); checkIntervals(generations, DATA_TO_WRITE, WRITE_INTERVAL_MSEC, GEN_INTERVAL_SEC); for (Path modelInstanceDir : modelInstanceDirs) { Path modelFile = modelInstanceDir.resolve(MLUpdate.MODEL_FILE_NAME); assertNonEmpty(modelFile); PMMLUtils.read(modelFile); // Shouldn't throw exception } InputSchema schema = new InputSchema(config); for (KeyMessage<String,String> km : updates) { String type = km.getKey(); String value = km.getMessage(); assertContains(Arrays.asList("MODEL", "MODEL-REF"), type); PMML pmml = AppPMMLUtils.readPMMLFromUpdateKeyMessage(type, value, null); assertNotNull(pmml); checkHeader(pmml.getHeader()); checkDataDictionary(schema, pmml.getDataDictionary()); Model rootModel = pmml.getModels().get(0); ClusteringModel clusteringModel = (ClusteringModel) rootModel; // Check if Basic hyperparameters match assertEquals(NUM_CLUSTERS, clusteringModel.getNumberOfClusters().intValue()); assertEquals(NUM_CLUSTERS, clusteringModel.getClusters().size()); assertEquals(NUM_FEATURES, clusteringModel.getClusteringFields().size()); assertEquals(ComparisonMeasure.Kind.DISTANCE, clusteringModel.getComparisonMeasure().getKind()); assertEquals(NUM_FEATURES, clusteringModel.getClusters().get(0).getArray().getN().intValue()); for (Cluster cluster : clusteringModel.getClusters()) { assertGreater(cluster.getSize(), 0); } } }
Example #15
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 4 votes |
public ClusteringModelEvaluator(PMML pmml){ this(pmml, PMMLUtil.findModel(pmml, ClusteringModel.class)); }
Example #16
Source File: KMeansHyperParamTuningIT.java From oryx with Apache License 2.0 | 4 votes |
@Test public void testKMeans() throws Exception { Path tempDir = getTempDir(); Path dataDir = tempDir.resolve("data"); Path modelDir = tempDir.resolve("model"); Map<String,Object> overlayConfig = new HashMap<>(); overlayConfig.put("oryx.batch.update-class", KMeansUpdate.class.getName()); ConfigUtils.set(overlayConfig, "oryx.batch.storage.data-dir", dataDir); ConfigUtils.set(overlayConfig, "oryx.batch.storage.model-dir", modelDir); overlayConfig.put("oryx.batch.streaming.generation-interval-sec", GEN_INTERVAL_SEC); overlayConfig.put("oryx.kmeans.hyperparams.k", "[2,100]"); overlayConfig.put("oryx.kmeans.iterations", 20); overlayConfig.put("oryx.input-schema.num-features", NUM_FEATURES); overlayConfig.put("oryx.input-schema.categorical-features", "[]"); overlayConfig.put("oryx.ml.eval.candidates", 3); overlayConfig.put("oryx.ml.eval.parallelism", 2); overlayConfig.put("oryx.kmeans.evaluation-strategy", EVALUATION_STRATEGY); Config config = ConfigUtils.overlayOn(overlayConfig, getConfig()); startMessaging(); startServerProduceConsumeTopics( config, new RandomKMeansDataGenerator(NUM_FEATURES), DATA_TO_WRITE, WRITE_INTERVAL_MSEC); List<Path> modelInstanceDirs = IOUtils.listFiles(modelDir, "*"); checkIntervals(modelInstanceDirs.size(), DATA_TO_WRITE, WRITE_INTERVAL_MSEC, GEN_INTERVAL_SEC); Path latestModelDir = modelInstanceDirs.get(modelInstanceDirs.size() - 1); Path modelFile = latestModelDir.resolve(MLUpdate.MODEL_FILE_NAME); assertTrue("No such model file: " + modelFile, Files.exists(modelFile)); PMML pmml = PMMLUtils.read(modelFile); Model rootModel = pmml.getModels().get(0); ClusteringModel clusteringModel = (ClusteringModel) rootModel; // Should have picked highest k assertEquals(100, clusteringModel.getNumberOfClusters().intValue()); }
Example #17
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 4 votes |
@Override protected <V extends Number> Map<FieldName, ClusterAffinityDistribution<V>> evaluateClustering(ValueFactory<V> valueFactory, EvaluationContext context){ ClusteringModel clusteringModel = getModel(); ComparisonMeasure comparisonMeasure = clusteringModel.getComparisonMeasure(); List<ClusteringField> clusteringFields = getCenterClusteringFields(); List<FieldValue> values = new ArrayList<>(clusteringFields.size()); for(int i = 0, max = clusteringFields.size(); i < max; i++){ ClusteringField clusteringField = clusteringFields.get(i); FieldName name = clusteringField.getField(); if(name == null){ throw new MissingAttributeException(clusteringField, PMMLAttributes.CLUSTERINGFIELD_FIELD); } FieldValue value = context.evaluate(name); values.add(value); } ClusterAffinityDistribution<V> result; Measure measure = MeasureUtil.ensureMeasure(comparisonMeasure); if(measure instanceof Similarity){ result = evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, values); } else if(measure instanceof Distance){ result = evaluateDistance(valueFactory, comparisonMeasure, clusteringFields, values); } else { throw new UnsupportedElementException(measure); } // "For clustering models, the identifier of the winning cluster is returned as the predictedValue" result.computeResult(DataType.STRING); return Collections.singletonMap(getTargetName(), result); }
Example #18
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 4 votes |
private <V extends Number> ClusterAffinityDistribution<V> evaluateSimilarity(ValueFactory<V> valueFactory, ComparisonMeasure comparisonMeasure, List<ClusteringField> clusteringFields, List<FieldValue> values){ ClusteringModel clusteringModel = getModel(); List<Cluster> clusters = clusteringModel.getClusters(); ClusterAffinityDistribution<V> result = createClusterAffinityDistribution(Classification.Type.SIMILARITY, clusters); BitSet flags = MeasureUtil.toBitSet(values); for(Cluster cluster : clusters){ BitSet clusterFlags = CacheUtil.getValue(cluster, ClusteringModelEvaluator.clusterFlagCache); if(flags.size() != clusterFlags.size()){ throw new InvalidElementException(cluster); } Value<V> similarity = MeasureUtil.evaluateSimilarity(valueFactory, comparisonMeasure, clusteringFields, flags, clusterFlags); result.put(cluster, similarity); } return result; }
Example #19
Source File: ClusteringModelEvaluator.java From jpmml-evaluator with GNU Affero General Public License v3.0 | 4 votes |
@Override public BiMap<String, Cluster> load(ClusteringModel clusteringModel){ return EntityUtil.buildBiMap(clusteringModel.getClusters()); }
Example #20
Source File: KMeansPMMLUtilsTest.java From oryx with Apache License 2.0 | 4 votes |
public static PMML buildDummyClusteringModel() { PMML pmml = PMMLUtils.buildSkeletonPMML(); List<DataField> dataFields = new ArrayList<>(); dataFields.add(new DataField(FieldName.create("x"), OpType.CONTINUOUS, DataType.DOUBLE)); dataFields.add(new DataField(FieldName.create("y"), OpType.CONTINUOUS, DataType.DOUBLE)); DataDictionary dataDictionary = new DataDictionary(dataFields).setNumberOfFields(dataFields.size()); pmml.setDataDictionary(dataDictionary); List<MiningField> miningFields = new ArrayList<>(); MiningField xMF = new MiningField(FieldName.create("x")) .setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE); miningFields.add(xMF); MiningField yMF = new MiningField(FieldName.create("y")) .setOpType(OpType.CONTINUOUS).setUsageType(MiningField.UsageType.ACTIVE); miningFields.add(yMF); MiningSchema miningSchema = new MiningSchema(miningFields); List<ClusteringField> clusteringFields = new ArrayList<>(); clusteringFields.add(new ClusteringField( FieldName.create("x")).setCenterField(ClusteringField.CenterField.TRUE)); clusteringFields.add(new ClusteringField( FieldName.create("y")).setCenterField(ClusteringField.CenterField.TRUE)); List<Cluster> clusters = new ArrayList<>(); clusters.add(new Cluster().setId("0").setSize(1).setArray(AppPMMLUtils.toArray(1.0, 0.0))); clusters.add(new Cluster().setId("1").setSize(2).setArray(AppPMMLUtils.toArray(2.0, -1.0))); clusters.add(new Cluster().setId("2").setSize(3).setArray(AppPMMLUtils.toArray(-1.0, 0.0))); pmml.addModels(new ClusteringModel( MiningFunction.CLUSTERING, ClusteringModel.ModelClass.CENTER_BASED, clusters.size(), miningSchema, new ComparisonMeasure(ComparisonMeasure.Kind.DISTANCE, new SquaredEuclidean()), clusteringFields, clusters)); return pmml; }