org.apache.spark.ml.Transformer Java Examples
The following examples show how to use
org.apache.spark.ml.Transformer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RFormulaModelConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Override public void registerFeatures(SparkMLEncoder encoder){ RFormulaModel transformer = getTransformer(); ResolvedRFormula resolvedFormula = transformer.resolvedFormula(); String targetCol = resolvedFormula.label(); String labelCol = transformer.getLabelCol(); if(!(targetCol).equals(labelCol)){ List<Feature> features = encoder.getFeatures(targetCol); encoder.putFeatures(labelCol, features); } ConverterFactory converterFactory = encoder.getConverterFactory(); PipelineModel pipelineModel = transformer.pipelineModel(); Transformer[] stages = pipelineModel.stages(); for(Transformer stage : stages){ TransformerConverter<?> converter = converterFactory.newConverter(stage); if(converter instanceof FeatureConverter){ FeatureConverter<?> featureConverter = (FeatureConverter<?>)converter; featureConverter.registerFeatures(encoder); } else { throw new IllegalArgumentException("Expected a subclass of " + FeatureConverter.class.getName() + ", got " + (converter != null ? ("class " + (converter.getClass()).getName()) : null)); } } }
Example #2
Source File: MultiFeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
static public <T extends Transformer & HasOutputCol & HasOutputCols> FieldName formatName(T transformer, int index){ if(transformer.isSet(transformer.outputCols())){ return FieldName.create(transformer.getOutputCols()[index]); } // End if if(index != 0){ throw new IllegalArgumentException(); } return FieldName.create(transformer.getOutputCol()); }
Example #3
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Override public <T extends Transformer> String[] getOutputCols(T transformer){ if(transformer instanceof HasOutputCol){ HasOutputCol hasOutputCol = (HasOutputCol)transformer; return new String[]{hasOutputCol.getOutputCol()}; } throw new IllegalArgumentException(); }
Example #4
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
@Override public <T extends Transformer> String[] getOutputCols(T transformer){ if(transformer instanceof HasOutputCols){ HasOutputCols hasOutputCols = (HasOutputCols)transformer; return hasOutputCols.getOutputCols(); } throw new IllegalArgumentException(); }
Example #5
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
static public <T extends Transformer & HasOutputCol> FieldName formatName(T transformer, int index, int length){ if(length > 1){ return FieldName.create(transformer.getOutputCol() + "[" + index + "]"); } return FieldName.create(transformer.getOutputCol()); }
Example #6
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
static protected <T extends Transformer> InOutMode getOutputMode(T transformer){ if(transformer instanceof HasOutputCol){ return InOutMode.SINGLE; } else if(transformer instanceof HasOutputCols){ return InOutMode.MULTIPLE; } return null; }
Example #7
Source File: ConverterFactory.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 5 votes |
public TransformerConverter<?> newConverter(Transformer transformer){ Class<? extends Transformer> clazz = transformer.getClass(); Class<? extends TransformerConverter<?>> converterClazz = ConverterFactory.converters.get(clazz); if(converterClazz == null){ throw new IllegalArgumentException("Transformer class " + clazz.getName() + " is not supported"); } TransformerConverter<?> converter; try { Constructor<? extends TransformerConverter<?>> converterConstructor = converterClazz.getDeclaredConstructor(clazz); converter = converterConstructor.newInstance(transformer); } catch(ReflectiveOperationException roe){ throw new IllegalArgumentException("Transformer class " + clazz.getName() + " is not supported", roe); } if(converter != null){ Map<RegexKey, ? extends Map<String, ?>> options = getOptions(); Map<String, Object> converterOptions = new LinkedHashMap<>(); options.entrySet().stream() .filter(entry -> (entry.getKey()).test(transformer.uid())) .map(entry -> entry.getValue()) .forEach(converterOptions::putAll); converter.setOptions(converterOptions); } return converter; }
Example #8
Source File: PipelineModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 5 votes |
@Override public PipelineModelInfo getModelInfo(final PipelineModel from) { final PipelineModelInfo modelInfo = new PipelineModelInfo(); final ModelInfo stages[] = new ModelInfo[from.stages().length]; for (int i = 0; i < from.stages().length; i++) { Transformer sparkModel = from.stages()[i]; stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel); } modelInfo.setStages(stages); return modelInfo; }
Example #9
Source File: PipelineModelInfoAdapter.java From spark-transformers with Apache License 2.0 | 5 votes |
@Override public PipelineModelInfo getModelInfo(final PipelineModel from, final DataFrame df) { final PipelineModelInfo modelInfo = new PipelineModelInfo(); final ModelInfo stages[] = new ModelInfo[from.stages().length]; for (int i = 0; i < from.stages().length; i++) { Transformer sparkModel = from.stages()[i]; stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel, df); } modelInfo.setStages(stages); return modelInfo; }
Example #10
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
@Override public <T extends Transformer & HasInputCol & HasInputCols> String[] getInputCols(T transformer){ return new String[]{transformer.getInputCol()}; }
Example #11
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
@Override public <T extends Transformer & HasInputCol & HasInputCols> String[] getInputCols(T transformer){ return transformer.getInputCols(); }
Example #12
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
abstract public <T extends Transformer & HasInputCol & HasInputCols> String[] getInputCols(T transformer);
Example #13
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
abstract public <T extends Transformer> String[] getOutputCols(T transformer);
Example #14
Source File: FeatureConverter.java From jpmml-sparkml with GNU Affero General Public License v3.0 | 4 votes |
static public <T extends Transformer & HasOutputCol> FieldName formatName(T transformer){ return FieldName.create(transformer.getOutputCol()); }
Example #15
Source File: TransitionClassifier.java From vn.vitk with GNU General Public License v3.0 | 4 votes |
/** * Trains a transition classifier on the data frame. * @param jsc * @param graphs * @param featureFrame * @param classifierFileName * @param numHiddenUnits * @return a transition classifier. */ public Transformer trainMLP(JavaSparkContext jsc, List<DependencyGraph> graphs, FeatureFrame featureFrame, String classifierFileName, int numHiddenUnits) { // create a SQLContext this.sqlContext = new SQLContext(jsc); // extract a data frame from these graphs DataFrame dataset = toDataFrame(jsc, graphs, featureFrame); // create a processing pipeline and fit it to the data frame Pipeline pipeline = createPipeline(); PipelineModel pipelineModel = pipeline.fit(dataset); DataFrame trainingData = pipelineModel.transform(dataset); // cache the training data for better performance trainingData.cache(); if (verbose) { trainingData.show(false); } // compute the number of different labels, which is the maximum element // in the 'label' column. trainingData.registerTempTable("dfTable"); Row row = sqlContext.sql("SELECT MAX(label) as maxValue from dfTable").first(); int numLabels = (int)row.getDouble(0); numLabels++; int vocabSize = ((CountVectorizerModel)(pipelineModel.stages()[1])).getVocabSize(); // default is a two-layer MLP int[] layers = {vocabSize, numLabels}; // if user specify a hidden layer, use a 3-layer MLP: if (numHiddenUnits > 0) { layers = new int[3]; layers[0] = vocabSize; layers[1] = numHiddenUnits; layers[2] = numLabels; } MultilayerPerceptronClassifier classifier = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setTol((Double)params.getOrDefault(params.getTolerance())) .setMaxIter((Integer)params.getOrDefault(params.getMaxIter())); MultilayerPerceptronClassificationModel model = classifier.fit(trainingData); // compute precision on the training data // DataFrame result = model.transform(trainingData); DataFrame predictionAndLabel = result.select("prediction", "label"); MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision"); if (verbose) { System.out.println("N = " + trainingData.count()); System.out.println("D = " + vocabSize); System.out.println("K = " + numLabels); System.out.println("H = " + numHiddenUnits); System.out.println("training precision = " + evaluator.evaluate(predictionAndLabel)); } // save the trained MLP to a file // String classifierPath = new Path(classifierFileName, "data").toString(); jsc.parallelize(Arrays.asList(model), 1).saveAsObjectFile(classifierPath); // save the pipeline model to sub-directory "pipelineModel" // try { String pipelinePath = new Path(classifierFileName, "pipelineModel").toString(); pipelineModel.write().overwrite().save(pipelinePath); } catch (IOException e) { e.printStackTrace(); } return model; }
Example #16
Source File: TransformerBuilder.java From jpmml-evaluator-spark with GNU Affero General Public License v3.0 | 4 votes |
public Transformer build(){ Evaluator evaluator = getEvaluator(); PMMLTransformer pmmlTransformer = new PMMLTransformer(evaluator, this.columnProducers); if(this.exploded){ ColumnExploder columnExploder = new ColumnExploder(pmmlTransformer.getOutputCol()); ColumnPruner columnPruner = new ColumnPruner(new Set.Set1<>(pmmlTransformer.getOutputCol())); PipelineModel pipelineModel = new PipelineModel(null, new Transformer[]{pmmlTransformer, columnExploder, columnPruner}); return pipelineModel; } return pmmlTransformer; }