org.apache.spark.ml.classification.MultilayerPerceptronClassifier Java Examples
The following examples show how to use
org.apache.spark.ml.classification.MultilayerPerceptronClassifier.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DatasetClassifier.java From mmtf-spark with Apache License 2.0 | 4 votes |
/** * @param args args[0] path to parquet file, args[1] name of classification column * @throws IOException * @throws StructureException */ public static void main(String[] args) throws IOException { if (args.length != 2) { System.err.println("Usage: " + DatasetClassifier.class.getSimpleName() + " <parquet file> <classification column name>"); System.exit(1); } // name of the class label String label = args[1]; long start = System.nanoTime(); SparkSession spark = SparkSession .builder() .master("local[*]") .appName(DatasetClassifier.class.getSimpleName()) .getOrCreate(); Dataset<Row> data = spark.read().parquet(args[0]).cache(); int featureCount = 0; Object vector = data.first().getAs("features"); if (vector instanceof DenseVector) { featureCount = ((DenseVector)vector).numActives(); } else if (vector instanceof SparseVector) { featureCount = ((SparseVector)vector).numActives(); } System.out.println("Feature count : " + featureCount); int classCount = (int)data.select(label).distinct().count(); System.out.println("Class count : " + classCount); System.out.println("Dataset size (unbalanced): " + data.count()); data.groupBy(label).count().show(classCount); data = DatasetBalancer.downsample(data, label, 1); System.out.println("Dataset size (balanced) : " + data.count()); data.groupBy(label).count().show(classCount); double testFraction = 0.3; long seed = 123; SparkMultiClassClassifier mcc; Map<String, String> metrics; DecisionTreeClassifier dtc = new DecisionTreeClassifier(); mcc = new SparkMultiClassClassifier(dtc, label, testFraction, seed); metrics = mcc.fit(data); System.out.println(metrics); RandomForestClassifier rfc = new RandomForestClassifier(); mcc = new SparkMultiClassClassifier(rfc, label, testFraction, seed); metrics = mcc.fit(data); System.out.println(metrics); LogisticRegression lr = new LogisticRegression(); mcc = new SparkMultiClassClassifier(lr, label, testFraction, seed); metrics = mcc.fit(data); System.out.println(metrics); // specify layers for the neural network // input layer: dimension of feature vector // output layer: number of classes int[] layers = new int[] {featureCount, 10, classCount}; MultilayerPerceptronClassifier mpc = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(200); mcc = new SparkMultiClassClassifier(mpc, label, testFraction, seed); metrics = mcc.fit(data); System.out.println(metrics); long end = System.nanoTime(); System.out.println((end-start)/1E9 + " sec"); }
Example #2
Source File: JavaMultilayerPerceptronClassifierExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaMultilayerPerceptronClassifierExample") .getOrCreate(); // $example on$ // Load training data String path = "data/mllib/sample_multiclass_classification_data.txt"; Dataset<Row> dataFrame = spark.read().format("libsvm").load(path); // Split the data into train and test Dataset<Row>[] splits = dataFrame.randomSplit(new double[]{0.6, 0.4}, 1234L); Dataset<Row> train = splits[0]; Dataset<Row> test = splits[1]; // specify layers for the neural network: // input layer of size 4 (features), two intermediate of size 5 and 4 // and output of size 3 (classes) int[] layers = new int[] {4, 5, 4, 3}; // create the trainer and set its parameters MultilayerPerceptronClassifier trainer = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(100); // train the model MultilayerPerceptronClassificationModel model = trainer.fit(train); // compute accuracy on the test set Dataset<Row> result = model.transform(test); Dataset<Row> predictionAndLabels = result.select("prediction", "label"); MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator() .setMetricName("accuracy"); System.out.println("Test set accuracy = " + evaluator.evaluate(predictionAndLabels)); // $example off$ spark.stop(); }
Example #3
Source File: TransitionClassifier.java From vn.vitk with GNU General Public License v3.0 | 4 votes |
/** * Trains a transition classifier on the data frame. * @param jsc * @param graphs * @param featureFrame * @param classifierFileName * @param numHiddenUnits * @return a transition classifier. */ public Transformer trainMLP(JavaSparkContext jsc, List<DependencyGraph> graphs, FeatureFrame featureFrame, String classifierFileName, int numHiddenUnits) { // create a SQLContext this.sqlContext = new SQLContext(jsc); // extract a data frame from these graphs DataFrame dataset = toDataFrame(jsc, graphs, featureFrame); // create a processing pipeline and fit it to the data frame Pipeline pipeline = createPipeline(); PipelineModel pipelineModel = pipeline.fit(dataset); DataFrame trainingData = pipelineModel.transform(dataset); // cache the training data for better performance trainingData.cache(); if (verbose) { trainingData.show(false); } // compute the number of different labels, which is the maximum element // in the 'label' column. trainingData.registerTempTable("dfTable"); Row row = sqlContext.sql("SELECT MAX(label) as maxValue from dfTable").first(); int numLabels = (int)row.getDouble(0); numLabels++; int vocabSize = ((CountVectorizerModel)(pipelineModel.stages()[1])).getVocabSize(); // default is a two-layer MLP int[] layers = {vocabSize, numLabels}; // if user specify a hidden layer, use a 3-layer MLP: if (numHiddenUnits > 0) { layers = new int[3]; layers[0] = vocabSize; layers[1] = numHiddenUnits; layers[2] = numLabels; } MultilayerPerceptronClassifier classifier = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setTol((Double)params.getOrDefault(params.getTolerance())) .setMaxIter((Integer)params.getOrDefault(params.getMaxIter())); MultilayerPerceptronClassificationModel model = classifier.fit(trainingData); // compute precision on the training data // DataFrame result = model.transform(trainingData); DataFrame predictionAndLabel = result.select("prediction", "label"); MulticlassClassificationEvaluator evaluator = new MulticlassClassificationEvaluator().setMetricName("precision"); if (verbose) { System.out.println("N = " + trainingData.count()); System.out.println("D = " + vocabSize); System.out.println("K = " + numLabels); System.out.println("H = " + numHiddenUnits); System.out.println("training precision = " + evaluator.evaluate(predictionAndLabel)); } // save the trained MLP to a file // String classifierPath = new Path(classifierFileName, "data").toString(); jsc.parallelize(Arrays.asList(model), 1).saveAsObjectFile(classifierPath); // save the pipeline model to sub-directory "pipelineModel" // try { String pipelinePath = new Path(classifierFileName, "pipelineModel").toString(); pipelineModel.write().overwrite().save(pipelinePath); } catch (IOException e) { e.printStackTrace(); } return model; }