org.apache.spark.ml.classification.MultilayerPerceptronClassifier Scala Examples
The following examples show how to use org.apache.spark.ml.classification.MultilayerPerceptronClassifier.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MultilayerPerceptronClassifierWrapper.scala From drizzle-spark with Apache License 2.0 | 8 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel, val labelCount: Long, val layers: Array[Int], val weights: Array[Double] ) extends MLWritable { def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadataStr = sc.textFile(rMetadataPath, 1).first() val rMetadata = parse(rMetadataStr) val labelCount = (rMetadata \ "labelCount").extract[Long] val layers = (rMetadata \ "layers").extract[Array[Int]] val weights = (rMetadata \ "weights").extract[Array[Double]] val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline, labelCount, layers, weights) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = ("class" -> instance.getClass.getName) ~ ("labelCount" -> instance.labelCount) ~ ("layers" -> instance.layers.toSeq) ~ ("weights" -> instance.weights.toArray.toSeq) val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 2
Source File: MultilayerPerceptronClassifierWrapper.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.feature.{IndexToString, RFormula} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.r.RWrapperUtils._ import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel ) extends MLWritable { import MultilayerPerceptronClassifierWrapper._ val mlpModel: MultilayerPerceptronClassificationModel = pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel] val weights: Array[Double] = mlpModel.weights.toArray val layers: Array[Int] = mlpModel.layers def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) .drop(mlpModel.getFeaturesCol) .drop(mlpModel.getLabelCol) .drop(PREDICTED_LABEL_INDEX_COL) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val pipelinePath = new Path(path, "pipeline").toString val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = "class" -> instance.getClass.getName val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 3
Source File: PerceptronClassifier.scala From Scalaprof with GNU General Public License v2.0 | 5 votes |
package edu.neu.coe.scala.spark.nn import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.mllib.util.MLUtils import org.apache.spark.sql.Row object PerceptronClassifier extends App { val conf = new SparkConf().setAppName("spam") val sc = new SparkContext(conf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) val sparkHome = "/Applications/spark-1.5.1-bin-hadoop2.6/" val trainingFile = "data/mllib/sample_multiclass_classification_data.txt" // this is used to implicitly convert an RDD to a DataFrame. import sqlContext.implicits._ // Load training data val data = MLUtils.loadLibSVMFile(sc, s"$sparkHome$trainingFile").toDF() // Split the data into train and test val splits = data.randomSplit(Array(0.6, 0.4), seed = 1234L) val train = splits(0) val test = splits(1) // specify layers for the neural network: // input layer of size 4 (features), two intermediate of size 5 and 4 and output of size 3 (classes) val layers = Array[Int](4, 5, 4, 3) // create the trainer and set its parameters val trainer = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(100) // train the model val model = trainer.fit(train) // compute precision on the test set val result = model.transform(test) val predictionAndLabels = result.select("prediction", "label") predictionAndLabels.show val evaluator = new MulticlassClassificationEvaluator() .setMetricName("precision") println("Precision:" + evaluator.evaluate(predictionAndLabels)) }
Example 4
Source File: MLP.scala From Apache-Spark-2x-Machine-Learning-Cookbook with MIT License | 5 votes |
package spark.ml.cookbook.chapter5 import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.sql.SparkSession object MLP { def main(args: Array[String]): Unit = { import org.apache.log4j.Logger import org.apache.log4j.Level Logger.getLogger("org").setLevel(Level.ERROR) Logger.getLogger("akka").setLevel(Level.ERROR) val spark = SparkSession .builder .master("local[*]") .appName("MLP") .config("spark.sql.warehouse.dir", ".") .getOrCreate() val data = spark.read.format("libsvm") .load("../data/sparkml2/chapter5/iris.scale.txt") data.show(false) // Split data val splitData = data.randomSplit(Array(0.8, 0.2), seed = System.currentTimeMillis()) val train = splitData(0) val test = splitData(1) // specify layers for the neural network: // input layer of size 4 (features), and output of size 4 (classes) val layers = Array[Int](4, 5, 4) val mlp = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(110) .setSeed(System.currentTimeMillis()) .setMaxIter(145) val mlpModel = mlp.fit(train) val result = mlpModel.transform(test) result.show(false) val predictions = result.select("prediction", "label") val eval = new MulticlassClassificationEvaluator().setMetricName("accuracy") println("Accuracy: " + eval.evaluate(predictions)) } }
Example 5
Source File: MLPClassifierSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.classification import com.ibm.aardpfark.pfa.PredictorResult import org.apache.spark.ml.classification.MultilayerPerceptronClassifier class MLPClassifierSuite extends SparkClassifierPFASuiteBase[PredictorResult] { val inputPath = "data/sample_multiclass_classification_data.txt" val data = spark.read.format("libsvm").load(inputPath) val layers = Array[Int](4, 5, 3) val trainer = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(100) override val sparkTransformer = trainer.fit(data) val result = sparkTransformer.transform(data) override val input = withColumnAsArray(result, trainer.getFeaturesCol).toJSON.collect() override val expectedOutput = result.select(trainer.getPredictionCol).toJSON.collect() }
Example 6
Source File: SparkPredictionTrainer.scala From smart-meter with MIT License | 5 votes |
package com.logimethods.nats.connector.spark.app import java.util.Properties; import java.io.File import java.io.Serializable import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.storage.StorageLevel; import org.apache.spark.streaming._ import io.nats.client.ConnectionFactory._ import java.nio.ByteBuffer import org.apache.log4j.{Level, LogManager, PropertyConfigurator} import com.logimethods.connector.nats.to_spark._ import com.logimethods.scala.connector.spark.to_nats._ import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import java.util.function._ import java.time.{LocalDateTime, ZoneOffset} import java.time.DayOfWeek._ import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel object SparkPredictionTrainer extends App with SparkPredictionProcessor { log.setLevel(Level.WARN) val (properties, targets, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl) = setup(args) val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt println("STREAMING_DURATION = " + streamingDuration) new Thread(new Runnable { def run() { while( true ){ try { val data = SparkPredictionProcessor.getData(sc, THRESHOLD) val model = trainer.fit(data) model.write.overwrite.save(PREDICTION_MODEL_PATH) println("New model of size " + data.count() + " trained: " + model.uid) Thread.sleep(streamingDuration) } catch { case e: Throwable => log.error(e) } } } }).start() }
Example 7
Source File: MultilayerPerceptronClassifierWrapper.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.feature.{IndexToString, RFormula} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.r.RWrapperUtils._ import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel ) extends MLWritable { import MultilayerPerceptronClassifierWrapper._ private val mlpModel: MultilayerPerceptronClassificationModel = pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel] lazy val weights: Array[Double] = mlpModel.weights.toArray lazy val layers: Array[Int] = mlpModel.layers def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) .drop(mlpModel.getFeaturesCol) .drop(mlpModel.getLabelCol) .drop(PREDICTED_LABEL_INDEX_COL) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val pipelinePath = new Path(path, "pipeline").toString val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = "class" -> instance.getClass.getName val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 8
Source File: MultilayerPerceptronClassifierExample.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator // $example off$ import org.apache.spark.sql.Row import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.types.StringType import org.apache.spark.sql.{SQLContext, DataFrame} result.show(5) val predictionAndLabels = result.select("prediction", "label") //多分类评估 val evaluator = new MulticlassClassificationEvaluator() .setMetricName("precision") //准确率 Accuracy: 0.9636363636363636 println("Accuracy: " + evaluator.evaluate(predictionAndLabels)) // $example off$ sc.stop() } } // scalastyle:on println
Example 9
Source File: OpMultilayerPerceptronClassifierTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.classification import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.linalg.Vectors import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpMultilayerPerceptronClassifierTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[MultilayerPerceptronClassificationModel], OpPredictorWrapper[MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel]] with PredictionEquality { override def specName: String = Spec[OpMultilayerPerceptronClassifier] val (inputData, rawFeature1, feature2) = TestFeatureBuilder("label", "features", Seq[(RealNN, OPVector)]( 1.0.toRealNN -> Vectors.dense(12.0, 4.3, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.0, 0.3, 0.1).toOPVector, 0.0.toRealNN -> Vectors.dense(1.0, 3.9, 4.3).toOPVector, 1.0.toRealNN -> Vectors.dense(10.0, 1.3, 0.9).toOPVector, 1.0.toRealNN -> Vectors.dense(15.0, 4.7, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.5, 0.9, 10.1).toOPVector, 1.0.toRealNN -> Vectors.dense(11.5, 2.3, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.1, 3.3, 0.1).toOPVector ) ) val feature1 = rawFeature1.copy(isResponse = true) val estimator = new OpMultilayerPerceptronClassifier() .setInput(feature1, feature2) .setLayers(Array(3, 5, 4, 2)) val expectedResult = Seq( Prediction(1.0, Array(-9.655814651428148, 9.202335441336952), Array(6.456683124562021E-9, 0.9999999935433168)), Prediction(0.0, Array(9.475612761543069, -10.617525149157993), Array(0.9999999981221492, 1.877850786773977E-9)), Prediction(0.0, Array(9.715293827870028, -10.885255922155942), Array(0.9999999988694366, 1.130563392364822E-9)), Prediction(1.0, Array(-9.66776357765489, 9.215079716735316), Array(6.299199338896916E-9, 0.9999999937008006)), Prediction(1.0, Array(-9.668041712561456, 9.215387575592239), Array(6.2955091287182745E-9, 0.9999999937044908)), Prediction(0.0, Array(9.692904797559496, -10.860273756796797), Array(0.9999999988145918, 1.1854083109077814E-9)), Prediction(1.0, Array(-9.667687253240183, 9.214995747770411), Array(6.300209139771467E-9, 0.9999999936997908)), Prediction(0.0, Array(9.703097414537668, -10.872171694864653), Array(0.9999999988404908, 1.1595091005698914E-9)) ) it should "allow the user to set the desired spark parameters" in { estimator.setMaxIter(50).setBlockSize(2).setSeed(42) estimator.fit(inputData) estimator.predictor.getMaxIter shouldBe 50 estimator.predictor.getBlockSize shouldBe 2 estimator.predictor.getSeed shouldBe 42 } }
Example 10
Source File: OpMultilayerPerceptronClassifier.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.classification import com.salesforce.op.UID import com.salesforce.op.features.types.{OPVector, Prediction, RealNN} import com.salesforce.op.stages.impl.CheckIsResponseValues import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpProbabilisticClassifierModel} import com.salesforce.op.utils.reflection.ReflectionUtils.reflectMethod import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier, OpMultilayerPerceptronClassifierParams} import org.apache.spark.ml.linalg.Vector import scala.reflect.runtime.universe.TypeTag class OpMultilayerPerceptronClassificationModel ( sparkModel: MultilayerPerceptronClassificationModel, uid: String = UID[OpMultilayerPerceptronClassificationModel], operationName: String = classOf[MultilayerPerceptronClassifier].getSimpleName )( implicit tti1: TypeTag[RealNN], tti2: TypeTag[OPVector], tto: TypeTag[Prediction], ttov: TypeTag[Prediction#Value] ) extends OpProbabilisticClassifierModel[MultilayerPerceptronClassificationModel]( sparkModel = sparkModel, uid = uid, operationName = operationName ) { @transient lazy val predictRawMirror = reflectMethod(getSparkMlStage().get, "predictRaw") @transient lazy val raw2probabilityMirror = reflectMethod(getSparkMlStage().get, "raw2probability") @transient lazy val probability2predictionMirror = reflectMethod(getSparkMlStage().get, "probability2prediction") }
Example 11
Source File: PerceptronClassifier.scala From CSYE7200_Old with MIT License | 5 votes |
package edu.neu.coe.csye7200.nn import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.mllib.util.MLUtils import org.apache.spark.sql.Row object PerceptronClassifier extends App { val conf = new SparkConf().setAppName("spam").setMaster("local[*]") val sc = new SparkContext(conf) val sqlContext = new org.apache.spark.sql.SQLContext(sc) val sparkHome = "/Applications/spark-1.5.1-bin-hadoop2.6/" val trainingFile = "data/mllib/sample_multiclass_classification_data.txt" // this is used to implicitly convert an RDD to a DataFrame. import sqlContext.implicits._ // Load training data val data = MLUtils.loadLibSVMFile(sc, s"$sparkHome$trainingFile").toDF() // Split the data into train and test val splits = data.randomSplit(Array(0.6, 0.4), seed = 1234L) val train = splits(0) val test = splits(1) // specify layers for the neural network: // input layer of size 4 (features), two intermediate of size 5 and 4 and output of size 3 (classes) val layers = Array[Int](4, 5, 4, 3) // create the trainer and set its parameters val trainer = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(100) // train the model val model = trainer.fit(train) // compute precision on the test set val result = model.transform(test) val predictionAndLabels = result.select("prediction", "label") predictionAndLabels.show val evaluator = new MulticlassClassificationEvaluator() .setMetricName("precision") println("Precision:" + evaluator.evaluate(predictionAndLabels)) }
Example 12
Source File: MultilayerPerceptronClassifierWrapper.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.feature.{IndexToString, RFormula} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.r.RWrapperUtils._ import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel ) extends MLWritable { import MultilayerPerceptronClassifierWrapper._ val mlpModel: MultilayerPerceptronClassificationModel = pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel] val weights: Array[Double] = mlpModel.weights.toArray val layers: Array[Int] = mlpModel.layers def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) .drop(mlpModel.getFeaturesCol) .drop(mlpModel.getLabelCol) .drop(PREDICTED_LABEL_INDEX_COL) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val pipelinePath = new Path(path, "pipeline").toString val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = "class" -> instance.getClass.getName val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 13
Source File: MultilayerPerceptronClassifierExample.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package org.sparksamples.classification.stumbleupon import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.sql.SparkSession // set VM Option as -Dspark.master=local[1] object MultilayerPerceptronClassifierExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("MultilayerPerceptronClassifierExample") .getOrCreate() // Load the data stored in LIBSVM format as a DataFrame. val data = spark.read.format("libsvm") .load("/Users/manpreet.singh/Sandbox/codehub/github/machinelearning/spark-ml/Chapter_06/2.0.0/scala-spark-app/src/main/scala/org/sparksamples/classification/dataset/spark-data/sample_multiclass_classification_data.txt") // Split the data into train and test val splits = data.randomSplit(Array(0.6, 0.4), seed = 1234L) val train = splits(0) val test = splits(1) // specify layers for the neural network: // input layer of size 4 (features), two intermediate of size 5 and 4 // and output of size 3 (classes) val layers = Array[Int](4, 5, 4, 3) // create the trainer and set its parameters val trainer = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(100) // train the model val model = trainer.fit(train) // compute accuracy on the test set val result = model.transform(test) val predictionAndLabels = result.select("prediction", "label") val evaluator = new MulticlassClassificationEvaluator() .setMetricName("accuracy") println("Test set accuracy = " + evaluator.evaluate(predictionAndLabels)) spark.stop() } }
Example 14
Source File: MultiLayerPerceptronClassifierParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.parity.classification import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.{Pipeline, Transformer} import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.sql._ class MultiLayerPerceptronClassifierParitySpec extends SparkParityBase { override val dataset: DataFrame = multiClassClassificationDataset override val sparkTransformer: Transformer = new Pipeline().setStages(Array( new MultilayerPerceptronClassifier(uid = "mlp"). setThresholds(Array(0.1, 0.2, 0.3)). // specify layers for the neural network: // input layer of size 4 (features), two intermediate of size 5 and 4 // and output of size 3 (classes) setLayers(Array(4, 5, 4, 3)). setFeaturesCol("features"). setPredictionCol("prediction"))).fit(dataset) }
Example 15
Source File: spark-latest.scala From ann-benchmark with Apache License 2.0 | 5 votes |
import org.apache.log4j._ Logger.getRootLogger.setLevel(Level.OFF) import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils import org.apache.spark.ml.classification.MultilayerPerceptronClassifier // maximum number of worker nodes in cluster val numNodes = 5 // batch size, ~10K is good for GPU val batchSize = 1000 // number of iterations to run val numIterations = 5 val train = MLUtils.loadLibSVMFile(sc, "file:///data/mnist/mnist.scale") //val layers = Array[Int](780, 2500, 2000, 1500, 1000, 500, 10) val layers = Array[Int](780, 10) val trainer = new MultilayerPerceptronClassifier().setLayers(layers).setBlockSize(1000).setSeed(1234L).setMaxIter(1) for (i <- 1 to numNodes) { val dataPartitions = sc.parallelize(1 to i, i) val sample = train.sample(true, 1.0 / i, 11L).collect val parallelData = sqlContext.createDataFrame(dataPartitions.flatMap(x => sample)) parallelData.persist parallelData.count val t = System.nanoTime() val model = trainer.fit(parallelData) println(i + "\t" + batchSize + "\t" + (System.nanoTime() - t) / (numIterations * 1e9)) parallelData.unpersist() }
Example 16
Source File: NeuralNetworkCtrModel.scala From CTRmodel with Apache License 2.0 | 5 votes |
package com.ggstar.ctrmodel import com.ggstar.features.FeatureEngineering import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.DataFrame class NeuralNetworkCtrModel extends BaseCtrModel { def train(samples:DataFrame) : Unit = { val prePipelineModel = FeatureEngineering.preProcessSamples(samples) val preparedSamples = prePipelineModel.transform(samples) //network architecture, better to keep tuning it until metrics converge val layers = Array[Int](preparedSamples.first().getAs[DenseVector]("scaledFeatures").toArray.length, preparedSamples.first().getAs[DenseVector]("scaledFeatures").toArray.length / 2, 2) val nnModel = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(150) //max iterations, keep increasing it if loss function or metrics don't converge .setStepSize(0.005) //learning step size, larger size will lead to loss vibration .setFeaturesCol("scaledFeatures") .setLabelCol("label") val pipelineStages = prePipelineModel.stages ++ Array(nnModel) _pipelineModel = new Pipeline().setStages(pipelineStages).fit(samples) } }
Example 17
Source File: InnerProductNNCtrModel.scala From CTRmodel with Apache License 2.0 | 5 votes |
package com.ggstar.ctrmodel import com.ggstar.features.FeatureEngineering import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.{LogisticRegression, MultilayerPerceptronClassifier} import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.DataFrame class InnerProductNNCtrModel extends BaseCtrModel { def train(samples:DataFrame) : Unit = { //calculate inner product between item embedding and user embedding val samplesWithInnerProduct = FeatureEngineering.calculateEmbeddingInnerProduct(samples) val prePipelineModel = FeatureEngineering.preProcessInnerProductSamples(samplesWithInnerProduct) val preparedSamples = prePipelineModel.transform(samplesWithInnerProduct) //network architecture, better to keep tuning it until metrics converge val layers = Array[Int](preparedSamples.first().getAs[DenseVector]("scaledFeatures").toArray.length, preparedSamples.first().getAs[DenseVector]("scaledFeatures").toArray.length / 2, 2) val nnModel = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(150) //max iterations, keep increasing it if loss function or metrics don't converge .setStepSize(0.005) //learning step size, larger size will lead to loss vibration .setFeaturesCol("scaledFeatures") .setLabelCol("label") val pipelineStages = prePipelineModel.stages ++ Array(nnModel) _pipelineModel = new Pipeline().setStages(pipelineStages).fit(samplesWithInnerProduct) } override def transform(samples:DataFrame):DataFrame = { val samplesWithInnerProduct = FeatureEngineering.calculateEmbeddingInnerProduct(samples) _pipelineModel.transform(samplesWithInnerProduct) } }
Example 18
Source File: OuterProductNNCtrModel.scala From CTRmodel with Apache License 2.0 | 5 votes |
package com.ggstar.ctrmodel import com.ggstar.features.FeatureEngineering import org.apache.spark.ml.Pipeline import org.apache.spark.ml.classification.MultilayerPerceptronClassifier import org.apache.spark.ml.linalg.DenseVector import org.apache.spark.sql.DataFrame class OuterProductNNCtrModel extends BaseCtrModel { def train(samples:DataFrame) : Unit = { //calculate inner product between item embedding and user embedding val samplesWithOuterProduct = FeatureEngineering.calculateEmbeddingOuterProduct(samples) val prePipelineModel = FeatureEngineering.preProcessOuterProductSamples(samplesWithOuterProduct) val preparedSamples = prePipelineModel.transform(samplesWithOuterProduct) //network architecture, better to keep tuning it until metrics converge val layers = Array[Int](preparedSamples.first().getAs[DenseVector]("scaledFeatures").toArray.length, preparedSamples.first().getAs[DenseVector]("scaledFeatures").toArray.length / 2, 2) val nnModel = new MultilayerPerceptronClassifier() .setLayers(layers) .setBlockSize(128) .setSeed(1234L) .setMaxIter(150) //max iterations, keep increasing it if loss function or metrics don't converge .setStepSize(0.005) //learning step size, larger size will lead to loss vibration .setFeaturesCol("scaledFeatures") .setLabelCol("label") val pipelineStages = prePipelineModel.stages ++ Array(nnModel) _pipelineModel = new Pipeline().setStages(pipelineStages).fit(samplesWithOuterProduct) } override def transform(samples:DataFrame):DataFrame = { val samplesWithOuterProduct = FeatureEngineering.calculateEmbeddingOuterProduct(samples) _pipelineModel.transform(samplesWithOuterProduct) } }