org.apache.spark.ml.feature.StringIndexerModel Java Examples
The following examples show how to use
org.apache.spark.ml.feature.StringIndexerModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StringIndexerModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 6 votes |
@Override public StringIndexerModelInfo getModelInfo(final StringIndexerModel from) { final String[] labels = from.labels(); final Map<String, Double> labelToIndex = new HashMap<String, Double>(); for (int i = 0; i < labels.length; i++) { labelToIndex.put(labels[i], (double) i); } final StringIndexerModelInfo modelInfo = new StringIndexerModelInfo(); modelInfo.setLabelToIndex(labelToIndex); Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(from.getInputCol()); modelInfo.setInputKeys(inputKeys); Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(from.getOutputCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
Example #2
Source File: StringIndexerModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 6 votes |
@Override public StringIndexerModelInfo getModelInfo(final StringIndexerModel from, DataFrame df) { final String[] labels = from.labels(); final Map<String, Double> labelToIndex = new HashMap<String, Double>(); for (int i = 0; i < labels.length; i++) { labelToIndex.put(labels[i], (double) i); } final StringIndexerModelInfo modelInfo = new StringIndexerModelInfo(); modelInfo.setLabelToIndex(labelToIndex); Set<String> inputKeys = new LinkedHashSet<String>(); inputKeys.add(from.getInputCol()); modelInfo.setInputKeys(inputKeys); Set<String> outputKeys = new LinkedHashSet<String>(); outputKeys.add(from.getOutputCol()); modelInfo.setOutputKeys(outputKeys); return modelInfo; }
Example #3
Source File: JavaOneHotEncoderExample.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaOneHotEncoderExample") .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( RowFactory.create(0, "a"), RowFactory.create(1, "b"), RowFactory.create(2, "c"), RowFactory.create(3, "a"), RowFactory.create(4, "a"), RowFactory.create(5, "c") ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty()) }); Dataset<Row> df = spark.createDataFrame(data, schema); StringIndexerModel indexer = new StringIndexer() .setInputCol("category") .setOutputCol("categoryIndex") .fit(df); Dataset<Row> indexed = indexer.transform(df); OneHotEncoder encoder = new OneHotEncoder() .setInputCol("categoryIndex") .setOutputCol("categoryVec"); Dataset<Row> encoded = encoder.transform(indexed); encoded.show(); // $example off$ spark.stop(); }
Example #4
Source File: CMMModel.java From vn.vitk with GNU General Public License v3.0 | 5 votes |
/** * Creates a conditional Markov model. * @param pipelineModel * @param weights * @param markovOrder */ public CMMModel(PipelineModel pipelineModel, Vector weights, MarkovOrder markovOrder, Map<String, Set<Integer>> tagDictionary) { this.pipelineModel = pipelineModel; this.contextExtractor = new ContextExtractor(markovOrder, Constants.REGEXP_FILE); this.weights = weights; this.tags = ((StringIndexerModel)(pipelineModel.stages()[2])).labels(); String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary(); featureMap = new HashMap<String, Integer>(); for (int j = 0; j < features.length; j++) { featureMap.put(features[j], j); } this.tagDictionary = tagDictionary; }
Example #5
Source File: TransitionBasedParserMLP.java From vn.vitk with GNU General Public License v3.0 | 5 votes |
/** * Creates a transition-based parser using a MLP transition classifier. * @param jsc * @param classifierFileName * @param featureFrame */ public TransitionBasedParserMLP(JavaSparkContext jsc, String classifierFileName, FeatureFrame featureFrame) { this.featureFrame = featureFrame; this.classifier = TransitionClassifier.load(jsc, new Path(classifierFileName, "data").toString()); this.pipelineModel = PipelineModel.load(new Path(classifierFileName, "pipelineModel").toString()); this.transitionName = ((StringIndexerModel)pipelineModel.stages()[2]).labels(); String[] features = ((CountVectorizerModel)(pipelineModel.stages()[1])).vocabulary(); this.featureMap = new HashMap<String, Integer>(); for (int j = 0; j < features.length; j++) { this.featureMap.put(features[j], j); } }
Example #6
Source File: StringIndexerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testStringIndexer() { //prepare data StructType schema = createStructType(new StructField[]{ createStructField("id", IntegerType, false), createStructField("label", StringType, false) }); List<Row> trainingData = Arrays.asList( cr(0, "a"), cr(1, "b"), cr(2, "c"), cr(3, "a"), cr(4, "a"), cr(5, "c")); Dataset<Row> dataset = spark.createDataFrame(trainingData, schema); //train model in spark StringIndexerModel model = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex").fit(dataset); //Export this model byte[] exportedModel = ModelExporter.export(model); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions List<Row> sparkOutput = model.transform(dataset).orderBy("id").select("id", "label", "labelIndex").collectAsList(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<String, Object>(); data.put(model.getInputCol(), (String) row.get(1)); transformer.transform(data); double output = (double) data.get(model.getOutputCol()); double indexerOutput = (output); assertEquals(indexerOutput, (double) row.get(2), 0.01); } }
Example #7
Source File: StringIndexerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test(expected=RuntimeException.class) public void testStringIndexerForUnseenValues() { //prepare data StructType schema = createStructType(new StructField[]{ createStructField("id", IntegerType, false), createStructField("label", DoubleType, false) }); List<Row> trainingData = Arrays.asList( cr(0, 1.0), cr(1, 2.0), cr(2, 3.0), cr(3, 1.0), cr(4, 1.0), cr(5, 3.0)); DataFrame dataset = sqlContext.createDataFrame(trainingData, schema); //train model in spark StringIndexerModel model = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex").fit(dataset); //Export this model byte[] exportedModel = ModelExporter.export(model, dataset); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //unseen value Map<String, Object> data = new HashMap<String, Object>(); data.put(model.getInputCol(), 7.0); transformer.transform(data); }
Example #8
Source File: StringIndexerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testStringIndexerForDoubleColumn() { //prepare data StructType schema = createStructType(new StructField[]{ createStructField("id", IntegerType, false), createStructField("label", DoubleType, false) }); List<Row> trainingData = Arrays.asList( cr(0, 1.0), cr(1, 2.0), cr(2, 3.0), cr(3, 1.0), cr(4, 1.0), cr(5, 3.0)); DataFrame dataset = sqlContext.createDataFrame(trainingData, schema); //train model in spark StringIndexerModel model = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex").fit(dataset); //Export this model byte[] exportedModel = ModelExporter.export(model, dataset); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = model.transform(dataset).orderBy("id").select("id", "label", "labelIndex").collect(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<String, Object>(); data.put(model.getInputCol(), row.getDouble(1)); transformer.transform(data); double indexerOutput = (double) data.get(model.getOutputCol()); assertEquals(indexerOutput, row.getDouble(2), EPSILON); } }
Example #9
Source File: StringIndexerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testStringIndexer() { //prepare data StructType schema = createStructType(new StructField[]{ createStructField("id", IntegerType, false), createStructField("label", StringType, false) }); List<Row> trainingData = Arrays.asList( cr(0, "a"), cr(1, "b"), cr(2, "c"), cr(3, "a"), cr(4, "a"), cr(5, "c")); DataFrame dataset = sqlContext.createDataFrame(trainingData, schema); //train model in spark StringIndexerModel model = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex").fit(dataset); //Export this model byte[] exportedModel = ModelExporter.export(model, dataset); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = model.transform(dataset).orderBy("id").select("id", "label", "labelIndex").collect(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<String, Object>(); data.put(model.getInputCol(), (String) row.get(1)); transformer.transform(data); double indexerOutput = (double) data.get(model.getOutputCol()); assertEquals(indexerOutput, (double) row.get(2), EPSILON); } }
Example #10
Source File: DecisionTreeClassificationModelBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testDecisionTreeClassificationRawPrediction() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm"); StringIndexerModel stringIndexerModel = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex") .fit(data); data = stringIndexerModel.transform(data); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a DecisionTree model. DecisionTreeClassificationModel classificationModel = new DecisionTreeClassifier() .setLabelCol("labelIndex") .setFeaturesCol("features") .setRawPredictionCol("rawPrediction") .setPredictionCol("prediction") .fit(trainingData); byte[] exportedModel = ModelExporter.export(classificationModel, null); Transformer transformer = (DecisionTreeTransformer) ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = classificationModel.transform(testData).select("features", "prediction", "rawPrediction").collect(); //compare predictions for (Row row : sparkOutput) { Vector inp = (Vector) row.get(0); double actual = row.getDouble(1); double[] actualRaw = ((Vector) row.get(2)).toArray(); Map<String, Object> inputData = new HashMap<>(); inputData.put(transformer.getInputKeys().iterator().next(), inp.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get(transformer.getOutputKeys().iterator().next()); double[] rawPrediction = (double[]) inputData.get("rawPrediction"); assertEquals(actual, predicted, EPSILON); assertArrayEquals(actualRaw, rawPrediction, EPSILON); } }
Example #11
Source File: SparkMultiClassClassifier.java From mmtf-spark with Apache License 2.0 | 4 votes |
/** * Dataset must at least contain the following two columns: * label: the class labels * features: feature vector * @param data * @return map with metrics */ public Map<String,String> fit(Dataset<Row> data) { int classCount = (int)data.select(label).distinct().count(); StringIndexerModel labelIndexer = new StringIndexer() .setInputCol(label) .setOutputCol("indexedLabel") .fit(data); // Split the data into training and test sets (30% held out for testing) Dataset<Row>[] splits = data.randomSplit(new double[] {1.0-testFraction, testFraction}, seed); Dataset<Row> trainingData = splits[0]; Dataset<Row> testData = splits[1]; String[] labels = labelIndexer.labels(); System.out.println(); System.out.println("Class\tTrain\tTest"); for (String l: labels) { System.out.println(l + "\t" + trainingData.select(label).filter(label + " = '" + l + "'").count() + "\t" + testData.select(label).filter(label + " = '" + l + "'").count()); } // Set input columns predictor .setLabelCol("indexedLabel") .setFeaturesCol("features"); // Convert indexed labels back to original labels. IndexToString labelConverter = new IndexToString() .setInputCol("prediction") .setOutputCol("predictedLabel") .setLabels(labelIndexer.labels()); // Chain indexers and forest in a Pipeline Pipeline pipeline = new Pipeline() .setStages(new PipelineStage[] {labelIndexer, predictor, labelConverter}); // Train model. This also runs the indexers. PipelineModel model = pipeline.fit(trainingData); // Make predictions. Dataset<Row> predictions = model.transform(testData).cache(); // Display some sample predictions System.out.println(); System.out.println("Sample predictions: " + predictor.getClass().getSimpleName()); predictions.sample(false, 0.1, seed).show(25); predictions = predictions.withColumnRenamed(label, "stringLabel"); predictions = predictions.withColumnRenamed("indexedLabel", label); // collect metrics Dataset<Row> pred = predictions.select("prediction",label); Map<String,String> metrics = new LinkedHashMap<>(); metrics.put("Method", predictor.getClass().getSimpleName()); if (classCount == 2) { BinaryClassificationMetrics b = new BinaryClassificationMetrics(pred); metrics.put("AUC", Float.toString((float)b.areaUnderROC())); } MulticlassMetrics m = new MulticlassMetrics(pred); metrics.put("F", Float.toString((float)m.weightedFMeasure())); metrics.put("Accuracy", Float.toString((float)m.accuracy())); metrics.put("Precision", Float.toString((float)m.weightedPrecision())); metrics.put("Recall", Float.toString((float)m.weightedRecall())); metrics.put("False Positive Rate", Float.toString((float)m.weightedFalsePositiveRate())); metrics.put("True Positive Rate", Float.toString((float)m.weightedTruePositiveRate())); metrics.put("", "\nConfusion Matrix\n" + Arrays.toString(labels) +"\n" + m.confusionMatrix().toString()); return metrics; }
Example #12
Source File: RandomForestClassificationModelInfoAdapterBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testRandomForestClassification() { // Load the data stored in LIBSVM format as a DataFrame. DataFrame data = sqlContext.read().format("libsvm").load("src/test/resources/classification_test.libsvm"); StringIndexerModel stringIndexerModel = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex") .fit(data); data = stringIndexerModel.transform(data); // Split the data into training and test sets (30% held out for testing) DataFrame[] splits = data.randomSplit(new double[]{0.7, 0.3}); DataFrame trainingData = splits[0]; DataFrame testData = splits[1]; // Train a RandomForest model. RandomForestClassificationModel classificationModel = new RandomForestClassifier() .setLabelCol("labelIndex") .setFeaturesCol("features") .setPredictionCol("prediction") .setRawPredictionCol("rawPrediction") .setProbabilityCol("probability") .fit(trainingData); byte[] exportedModel = ModelExporter.export(classificationModel, null); Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); Row[] sparkOutput = classificationModel.transform(testData).select("features", "prediction", "rawPrediction", "probability").collect(); //compare predictions for (Row row : sparkOutput) { Vector v = (Vector) row.get(0); double actual = row.getDouble(1); double [] actualProbability = ((Vector) row.get(3)).toArray(); double[] actualRaw = ((Vector) row.get(2)).toArray(); Map<String, Object> inputData = new HashMap<String, Object>(); inputData.put(transformer.getInputKeys().iterator().next(), v.toArray()); transformer.transform(inputData); double predicted = (double) inputData.get("prediction"); double[] probability = (double[]) inputData.get("probability"); double[] rawPrediction = (double[]) inputData.get("rawPrediction"); assertEquals(actual, predicted, EPSILON); assertArrayEquals(actualProbability, probability, EPSILON); assertArrayEquals(actualRaw, rawPrediction, EPSILON); } }
Example #13
Source File: StringIndexerBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testStringIndexerForHandlingUnseenValues() { //prepare data StructType schema = createStructType(new StructField[]{ createStructField("id", IntegerType, false), createStructField("label", DoubleType, false) }); List<Row> trainingData = Arrays.asList( cr(0, 1.0), cr(1, 2.0), cr(2, 3.0), cr(3, 1.0), cr(4, 1.0), cr(5, 3.0)); DataFrame dataset = sqlContext.createDataFrame(trainingData, schema); //train model in spark StringIndexerModel model = new StringIndexer() .setInputCol("label") .setOutputCol("labelIndex").fit(dataset); //Export this model byte[] exportedModel = ModelExporter.export(model, dataset); StringIndexerModelInfo stringIndexerModelInfo = (StringIndexerModelInfo)ModelImporter.importModelInfo(exportedModel); stringIndexerModelInfo.setFailOnUnseenValues(false); //Import and get Transformer Transformer transformer = stringIndexerModelInfo.getTransformer(); //unseen value Map<String, Object> data = new HashMap<String, Object>(); data.put(model.getInputCol(), 7.0); transformer.transform(data); double indexerOutput = (double) data.get(model.getOutputCol()); assertEquals(indexerOutput, 3.0, EPSILON); //unseen value data.put(model.getInputCol(), 9.0); transformer.transform(data); indexerOutput = (double) data.get(model.getOutputCol()); assertEquals(indexerOutput, 3.0, EPSILON); //unseen value data.put(model.getInputCol(), 0.0); transformer.transform(data); indexerOutput = (double) data.get(model.getOutputCol()); assertEquals(indexerOutput, 3.0, EPSILON); //seen value data.put(model.getInputCol(), 2.0); transformer.transform(data); indexerOutput = (double) data.get(model.getOutputCol()); assertEquals(indexerOutput, stringIndexerModelInfo.getLabelToIndex().get("2.0"), EPSILON); }
Example #14
Source File: CustomOneHotEncoderBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testCustomOneHotEncoding() { // prepare data JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList( RowFactory.create(0d, "a"), RowFactory.create(1d, "b"), RowFactory.create(2d, "c"), RowFactory.create(3d, "a"), RowFactory.create(4d, "a"), RowFactory.create(5d, "c") )); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty()) }); DataFrame df = sqlContext.createDataFrame(jrdd, schema); StringIndexerModel indexer = new StringIndexer() .setInputCol("category") .setOutputCol("categoryIndex") .fit(df); DataFrame indexed = indexer.transform(df); CustomOneHotEncoderModel sparkModel = new CustomOneHotEncoder() .setInputCol("categoryIndex") .setOutputCol("categoryVec") .fit(indexed); //Export this model byte[] exportedModel = ModelExporter.export(sparkModel, indexed); //Create spark's OneHotEncoder OneHotEncoder sparkOneHotModel = new OneHotEncoder() .setInputCol("categoryIndex") .setOutputCol("categoryVec"); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = sparkModel.transform(indexed).orderBy("id").select("id", "categoryIndex", "categoryVec").collect(); Row[] sparkOneHotOutput = sparkOneHotModel.transform(indexed).orderBy("id").select("id", "categoryIndex", "categoryVec").collect(); //Compare Spark's OneHotEncoder with CustomOneHotEncoder //See if the dictionary size is equal assertEquals(sparkOutput.length, sparkOneHotOutput.length); for (int i = 0; i < sparkOutput.length; i++) { Row row = sparkOutput[i]; Map<String, Object> data = new HashMap<String, Object>(); data.put(sparkModel.getInputCol(), row.getDouble(1)); transformer.transform(data); double[] transformedOp = (double[]) data.get(sparkModel.getOutputCol()); double[] sparkOp = ((Vector) row.get(2)).toArray(); //get spark's OneHotEncoder output double[] sparkOneHotOp = ((Vector) sparkOneHotOutput[i].get(2)).toArray(); assertArrayEquals(transformedOp, sparkOp, EPSILON); assertArrayEquals(sparkOneHotOp, sparkOp, EPSILON); } }
Example #15
Source File: OneHotEncoderBridgeTest.java From spark-transformers with Apache License 2.0 | 4 votes |
@Test public void testOneHotEncoding() { // prepare data JavaRDD<Row> jrdd = sc.parallelize(Arrays.asList( RowFactory.create(0d, "a"), RowFactory.create(1d, "b"), RowFactory.create(2d, "c"), RowFactory.create(3d, "a"), RowFactory.create(4d, "a"), RowFactory.create(5d, "c") )); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.DoubleType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty()) }); DataFrame df = sqlContext.createDataFrame(jrdd, schema); StringIndexerModel indexer = new StringIndexer() .setInputCol("category") .setOutputCol("categoryIndex") .fit(df); DataFrame indexed = indexer.transform(df); OneHotEncoder sparkModel = new OneHotEncoder() .setInputCol("categoryIndex") .setOutputCol("categoryVec"); //Export this model byte[] exportedModel = ModelExporter.export(sparkModel, indexed); //Import and get Transformer Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = sparkModel.transform(indexed).orderBy("id").select("id", "categoryIndex", "categoryVec").collect(); for (Row row : sparkOutput) { Map<String, Object> data = new HashMap<String, Object>(); data.put(sparkModel.getInputCol(), row.getDouble(1)); transformer.transform(data); double[] transformedOp = (double[]) data.get(sparkModel.getOutputCol()); double[] sparkOp = ((Vector) row.get(2)).toArray(); assertArrayEquals(transformedOp, sparkOp, EPSILON); } }
Example #16
Source File: StringIndexerModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 4 votes |
@Override public Class<StringIndexerModel> getSource() { return StringIndexerModel.class; }
Example #17
Source File: StringIndexerModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 4 votes |
@Override public Class<StringIndexerModel> getSource() { return StringIndexerModel.class; }
Example #18
Source File: StringIndexerModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
public StringIndexerModelConverter(StringIndexerModel transformer){ super(transformer); }
Example #19
Source File: JavaIndexToStringExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaIndexToStringExample") .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( RowFactory.create(0, "a"), RowFactory.create(1, "b"), RowFactory.create(2, "c"), RowFactory.create(3, "a"), RowFactory.create(4, "a"), RowFactory.create(5, "c") ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("category", DataTypes.StringType, false, Metadata.empty()) }); Dataset<Row> df = spark.createDataFrame(data, schema); StringIndexerModel indexer = new StringIndexer() .setInputCol("category") .setOutputCol("categoryIndex") .fit(df); Dataset<Row> indexed = indexer.transform(df); System.out.println("Transformed string column '" + indexer.getInputCol() + "' " + "to indexed column '" + indexer.getOutputCol() + "'"); indexed.show(); StructField inputColSchema = indexed.schema().apply(indexer.getOutputCol()); System.out.println("StringIndexer will store labels in output column metadata: " + Attribute.fromStructField(inputColSchema).toString() + "\n"); IndexToString converter = new IndexToString() .setInputCol("categoryIndex") .setOutputCol("originalCategory"); Dataset<Row> converted = converter.transform(indexed); System.out.println("Transformed indexed column '" + converter.getInputCol() + "' back to " + "original string column '" + converter.getOutputCol() + "' using labels in metadata"); converted.select("id", "categoryIndex", "originalCategory").show(); // $example off$ spark.stop(); }