org.apache.spark.ml.Transformer Java Examples

The following examples show how to use org.apache.spark.ml.Transformer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RFormulaModelConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void registerFeatures(SparkMLEncoder encoder){
	RFormulaModel transformer = getTransformer();

	ResolvedRFormula resolvedFormula = transformer.resolvedFormula();

	String targetCol = resolvedFormula.label();

	String labelCol = transformer.getLabelCol();
	if(!(targetCol).equals(labelCol)){
		List<Feature> features = encoder.getFeatures(targetCol);

		encoder.putFeatures(labelCol, features);
	}

	ConverterFactory converterFactory = encoder.getConverterFactory();

	PipelineModel pipelineModel = transformer.pipelineModel();

	Transformer[] stages = pipelineModel.stages();
	for(Transformer stage : stages){
		TransformerConverter<?> converter = converterFactory.newConverter(stage);

		if(converter instanceof FeatureConverter){
			FeatureConverter<?> featureConverter = (FeatureConverter<?>)converter;

			featureConverter.registerFeatures(encoder);
		} else

		{
			throw new IllegalArgumentException("Expected a subclass of " + FeatureConverter.class.getName() + ", got " + (converter != null ? ("class " + (converter.getClass()).getName()) : null));
		}
	}
}
 
Example #2
Source File: MultiFeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public <T extends Transformer & HasOutputCol & HasOutputCols> FieldName formatName(T transformer, int index){

	if(transformer.isSet(transformer.outputCols())){
		return FieldName.create(transformer.getOutputCols()[index]);
	} // End if

	if(index != 0){
		throw new IllegalArgumentException();
	}

	return FieldName.create(transformer.getOutputCol());
}
 
Example #3
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public <T extends Transformer> String[] getOutputCols(T transformer){

	if(transformer instanceof HasOutputCol){
		HasOutputCol hasOutputCol = (HasOutputCol)transformer;

		return new String[]{hasOutputCol.getOutputCol()};
	}

	throw new IllegalArgumentException();
}
 
Example #4
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public <T extends Transformer> String[] getOutputCols(T transformer){

	if(transformer instanceof HasOutputCols){
		HasOutputCols hasOutputCols = (HasOutputCols)transformer;

		return hasOutputCols.getOutputCols();
	}

	throw new IllegalArgumentException();
}
 
Example #5
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
public <T extends Transformer & HasOutputCol> FieldName formatName(T transformer, int index, int length){

	if(length > 1){
		return FieldName.create(transformer.getOutputCol() + "[" + index + "]");
	}

	return FieldName.create(transformer.getOutputCol());
}
 
Example #6
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
static
protected <T extends Transformer> InOutMode getOutputMode(T transformer){

	if(transformer instanceof HasOutputCol){
		return InOutMode.SINGLE;
	} else

	if(transformer instanceof HasOutputCols){
		return InOutMode.MULTIPLE;
	}

	return null;
}
 
Example #7
Source File: ConverterFactory.java    From jpmml-sparkml with GNU Affero General Public License v3.0 5 votes vote down vote up
public TransformerConverter<?> newConverter(Transformer transformer){
	Class<? extends Transformer> clazz = transformer.getClass();

	Class<? extends TransformerConverter<?>> converterClazz = ConverterFactory.converters.get(clazz);
	if(converterClazz == null){
		throw new IllegalArgumentException("Transformer class " + clazz.getName() + " is not supported");
	}

	TransformerConverter<?> converter;

	try {
		Constructor<? extends TransformerConverter<?>> converterConstructor = converterClazz.getDeclaredConstructor(clazz);

		converter = converterConstructor.newInstance(transformer);
	} catch(ReflectiveOperationException roe){
		throw new IllegalArgumentException("Transformer class " + clazz.getName() + " is not supported", roe);
	}

	if(converter != null){
		Map<RegexKey, ? extends Map<String, ?>> options = getOptions();

		Map<String, Object> converterOptions = new LinkedHashMap<>();

		options.entrySet().stream()
			.filter(entry -> (entry.getKey()).test(transformer.uid()))
			.map(entry -> entry.getValue())
			.forEach(converterOptions::putAll);

		converter.setOptions(converterOptions);
	}

	return converter;
}
 
Example #8
Source File: PipelineModelInfoAdapter.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Override
public PipelineModelInfo getModelInfo(final PipelineModel from) {
    final PipelineModelInfo modelInfo = new PipelineModelInfo();
    final ModelInfo stages[] = new ModelInfo[from.stages().length];
    for (int i = 0; i < from.stages().length; i++) {
        Transformer sparkModel = from.stages()[i];
        stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel);
    }
    modelInfo.setStages(stages);
    return modelInfo;
}
 
Example #9
Source File: PipelineModelInfoAdapter.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Override
public PipelineModelInfo getModelInfo(final PipelineModel from, final DataFrame df) {
    final PipelineModelInfo modelInfo = new PipelineModelInfo();
    final ModelInfo stages[] = new ModelInfo[from.stages().length];
    for (int i = 0; i < from.stages().length; i++) {
        Transformer sparkModel = from.stages()[i];
        stages[i] = ModelInfoAdapterFactory.getAdapter(sparkModel.getClass()).adapt(sparkModel, df);
    }
    modelInfo.setStages(stages);
    return modelInfo;
}
 
Example #10
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public <T extends Transformer & HasInputCol & HasInputCols> String[] getInputCols(T transformer){
	return new String[]{transformer.getInputCol()};
}
 
Example #11
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public <T extends Transformer & HasInputCol & HasInputCols> String[] getInputCols(T transformer){
	return transformer.getInputCols();
}
 
Example #12
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
abstract
public <T extends Transformer & HasInputCol & HasInputCols> String[] getInputCols(T transformer);
 
Example #13
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
abstract
public <T extends Transformer> String[] getOutputCols(T transformer);
 
Example #14
Source File: FeatureConverter.java    From jpmml-sparkml with GNU Affero General Public License v3.0 4 votes vote down vote up
static
public <T extends Transformer & HasOutputCol> FieldName formatName(T transformer){
	return FieldName.create(transformer.getOutputCol());
}
 
Example #15
Source File: TransitionClassifier.java    From vn.vitk with GNU General Public License v3.0 4 votes vote down vote up
/**
 * Trains a transition classifier on the data frame.
 * @param jsc
 * @param graphs
 * @param featureFrame
 * @param classifierFileName
 * @param numHiddenUnits
 * @return a transition classifier.
 */
public Transformer trainMLP(JavaSparkContext jsc,
		List<DependencyGraph> graphs, FeatureFrame featureFrame,
		String classifierFileName, int numHiddenUnits) {
	// create a SQLContext
	this.sqlContext = new SQLContext(jsc);
	// extract a data frame from these graphs
	DataFrame dataset = toDataFrame(jsc, graphs, featureFrame);
	
	// create a processing pipeline and fit it to the data frame
	Pipeline pipeline = createPipeline();
	PipelineModel pipelineModel = pipeline.fit(dataset);
	DataFrame trainingData = pipelineModel.transform(dataset);
	
	// cache the training data for better performance
	trainingData.cache();
	
	if (verbose) {
		trainingData.show(false);
	}
	
	// compute the number of different labels, which is the maximum element 
	// in the 'label' column.
	trainingData.registerTempTable("dfTable");
	Row row = sqlContext.sql("SELECT MAX(label) as maxValue from dfTable").first();
	int numLabels = (int)row.getDouble(0);
	numLabels++;
	
	int vocabSize = ((CountVectorizerModel)(pipelineModel.stages()[1])).getVocabSize();
	
	// default is a two-layer MLP
	int[] layers = {vocabSize, numLabels};
	// if user specify a hidden layer, use a 3-layer MLP:
	if (numHiddenUnits > 0) {
		layers = new int[3];
		layers[0] = vocabSize;
		layers[1] = numHiddenUnits;
		layers[2] = numLabels;
	}
	MultilayerPerceptronClassifier classifier = new MultilayerPerceptronClassifier()
		.setLayers(layers)
		.setBlockSize(128)
		.setSeed(1234L)
		.setTol((Double)params.getOrDefault(params.getTolerance()))
		.setMaxIter((Integer)params.getOrDefault(params.getMaxIter()));
	MultilayerPerceptronClassificationModel model = classifier.fit(trainingData);
	
	// compute precision on the training data
	//
	DataFrame result = model.transform(trainingData);
	DataFrame predictionAndLabel = result.select("prediction", "label");
	MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision");
	if (verbose) {
		System.out.println("N = " + trainingData.count());
		System.out.println("D = " + vocabSize);
		System.out.println("K = " + numLabels);
		System.out.println("H = " + numHiddenUnits);
		System.out.println("training precision = " + evaluator.evaluate(predictionAndLabel));
	}
	
	// save the trained MLP to a file
	//
	String classifierPath = new Path(classifierFileName, "data").toString();
	jsc.parallelize(Arrays.asList(model), 1).saveAsObjectFile(classifierPath);
	// save the pipeline model to sub-directory "pipelineModel"
	// 
	try {
		String pipelinePath = new Path(classifierFileName, "pipelineModel").toString(); 
		pipelineModel.write().overwrite().save(pipelinePath);
	} catch (IOException e) {
		e.printStackTrace();
	}
	return model;
}
 
Example #16
Source File: TransformerBuilder.java    From jpmml-evaluator-spark with GNU Affero General Public License v3.0 4 votes vote down vote up
public Transformer build(){
	Evaluator evaluator = getEvaluator();

	PMMLTransformer pmmlTransformer = new PMMLTransformer(evaluator, this.columnProducers);

	if(this.exploded){
		ColumnExploder columnExploder = new ColumnExploder(pmmlTransformer.getOutputCol());

		ColumnPruner columnPruner = new ColumnPruner(new Set.Set1<>(pmmlTransformer.getOutputCol()));

		PipelineModel pipelineModel = new PipelineModel(null, new Transformer[]{pmmlTransformer, columnExploder, columnPruner});

		return pipelineModel;
	}

	return pmmlTransformer;
}