org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel Scala Example

Source File: MultilayerPerceptronClassifierWrapper.scala From drizzle-spark with Apache License 2.0

8 votes

package org.apache.spark.ml.r

import org.apache.hadoop.fs.Path
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._

import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter}
import org.apache.spark.sql.{DataFrame, Dataset}

private[r] class MultilayerPerceptronClassifierWrapper private (
    val pipeline: PipelineModel,
    val labelCount: Long,
    val layers: Array[Int],
    val weights: Array[Double]
  ) extends MLWritable {

  def transform(dataset: Dataset[_]): DataFrame = {
    pipeline.transform(dataset)
  }

  
  override def read: MLReader[MultilayerPerceptronClassifierWrapper] =
    new MultilayerPerceptronClassifierWrapperReader

  override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path)

  class MultilayerPerceptronClassifierWrapperReader
    extends MLReader[MultilayerPerceptronClassifierWrapper]{

    override def load(path: String): MultilayerPerceptronClassifierWrapper = {
      implicit val format = DefaultFormats
      val rMetadataPath = new Path(path, "rMetadata").toString
      val pipelinePath = new Path(path, "pipeline").toString

      val rMetadataStr = sc.textFile(rMetadataPath, 1).first()
      val rMetadata = parse(rMetadataStr)
      val labelCount = (rMetadata \ "labelCount").extract[Long]
      val layers = (rMetadata \ "layers").extract[Array[Int]]
      val weights = (rMetadata \ "weights").extract[Array[Double]]

      val pipeline = PipelineModel.load(pipelinePath)
      new MultilayerPerceptronClassifierWrapper(pipeline, labelCount, layers, weights)
    }
  }

  class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper)
    extends MLWriter {

    override protected def saveImpl(path: String): Unit = {
      val rMetadataPath = new Path(path, "rMetadata").toString
      val pipelinePath = new Path(path, "pipeline").toString

      val rMetadata = ("class" -> instance.getClass.getName) ~
        ("labelCount" -> instance.labelCount) ~
        ("layers" -> instance.layers.toSeq) ~
        ("weights" -> instance.weights.toArray.toSeq)
      val rMetadataJson: String = compact(render(rMetadata))
      sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)

      instance.pipeline.save(pipelinePath)
    }
  }
}

Source File: MultilayerPerceptronPrediction.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle.ml_classification

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf.{ConfigurableStop, Port, StopGroup}
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel
import org.apache.spark.sql.SparkSession

class MultilayerPerceptronPrediction extends ConfigurableStop{
  val authorEmail: String = "[email protected]"
  val description: String = "Use an existing multilayer perceptron model to predict"
  val inportList: List[String] = List(Port.DefaultPort.toString)
  val outportList: List[String] = List(Port.DefaultPort.toString)
  var test_data_path:String =_
  var model_path:String=_


  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    //load data stored in libsvm format as a dataframe
    val data=spark.read.format("libsvm").load(test_data_path)
    //data.show()

    //load model
    val model=MultilayerPerceptronClassificationModel.load(model_path)

    val predictions=model.transform(data)
    predictions.show()
    out.write(predictions)

  }

  def initialize(ctx: ProcessContext): Unit = {

  }


  def setProperties(map: Map[String, Any]): Unit = {
    test_data_path=MapUtil.get(map,key="test_data_path").asInstanceOf[String]
    model_path=MapUtil.get(map,key="model_path").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val test_data_path = new PropertyDescriptor().name("test_data_path").displayName("TEST_DATA_PATH").defaultValue("").required(true)
    val model_path = new PropertyDescriptor().name("model_path").displayName("MODEL_PATH").defaultValue("").required(true)
    descriptor = test_data_path :: descriptor
    descriptor = model_path :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/ml_classification/MultilayerPerceptronPrediction.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.MLGroup.toString)
  }

}

Source File: LocalMultilayerPerceptronClassificationModel.scala From spark-ml-serving with Apache License 2.0

5 votes

package io.hydrosphere.spark_ml_serving.classification

import io.hydrosphere.spark_ml_serving.TypedTransformerConverter
import io.hydrosphere.spark_ml_serving.common._
import io.hydrosphere.spark_ml_serving.common.utils.DataUtils
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel
import org.apache.spark.ml.linalg.{Vector, Vectors}

class LocalMultilayerPerceptronClassificationModel(
  override val sparkTransformer: MultilayerPerceptronClassificationModel
) extends LocalPredictionModel[MultilayerPerceptronClassificationModel] {}

object LocalMultilayerPerceptronClassificationModel
  extends SimpleModelLoader[MultilayerPerceptronClassificationModel]
  with TypedTransformerConverter[MultilayerPerceptronClassificationModel] {

  override def build(
    metadata: Metadata,
    data: LocalData
  ): MultilayerPerceptronClassificationModel = {
    val layers = data.column("layers").get.data.head.asInstanceOf[Seq[Int]].toArray
    val weightsParam = data.column("weights").get.data.head.asInstanceOf[Map[String, Any]]
    val weights = DataUtils.constructVector(weightsParam)
    val constructor = classOf[MultilayerPerceptronClassificationModel].getDeclaredConstructor(
      classOf[String],
      classOf[Array[Int]],
      classOf[Vector]
    )
    constructor.setAccessible(true)
    constructor
      .newInstance(
        metadata.uid,
        layers,
        weights
      )
      .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String])
      .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String])
  }

  override implicit def toLocal(
    sparkTransformer: MultilayerPerceptronClassificationModel
  ): LocalMultilayerPerceptronClassificationModel = {
    new LocalMultilayerPerceptronClassificationModel(sparkTransformer)
  }
}

Source File: MultiLayerPerceptronClassifierOp.scala From mleap with Apache License 2.0

5 votes

package org.apache.spark.ml.bundle.ops.classification

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.{OpModel, OpNode}
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel
import org.apache.spark.ml.linalg.Vectors


class MultiLayerPerceptronClassifierOp extends SimpleSparkOp[MultilayerPerceptronClassificationModel] {
  override val Model: OpModel[SparkBundleContext, MultilayerPerceptronClassificationModel] = new OpModel[SparkBundleContext, MultilayerPerceptronClassificationModel] {
    override def opName: String = Bundle.BuiltinOps.classification.multi_layer_perceptron_classifier

    override val klazz: Class[MultilayerPerceptronClassificationModel] = classOf[MultilayerPerceptronClassificationModel]

    override def store(model: Model, obj: MultilayerPerceptronClassificationModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      val thresholds = if(obj.isSet(obj.thresholds)) {
        Some(obj.getThresholds)
      } else None
      model.withValue("layers", Value.longList(obj.layers.map(_.toLong))).
        withValue("weights", Value.vector(obj.weights.toArray)).
        withValue("thresholds", thresholds.map(_.toSeq).map(Value.doubleList))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): MultilayerPerceptronClassificationModel = {
      val m = new MultilayerPerceptronClassificationModel(uid = "",
        layers = model.value("layers").getLongList.map(_.toInt).toArray,
        weights = Vectors.dense(model.value("weights").getTensor[Double].toArray))
      model.getValue("thresholds").
        map(t => m.setThresholds(t.getDoubleList.toArray)).
        getOrElse(m)
    }

  }

  override def sparkLoad(uid: String, shape: NodeShape, model: MultilayerPerceptronClassificationModel): MultilayerPerceptronClassificationModel = {
    val m = new MultilayerPerceptronClassificationModel(uid = uid,layers = model.layers, weights = model.weights)
    if (model.isSet(model.thresholds)) m.setThresholds(model.getThresholds)
    m
  }

  override def sparkInputs(obj: MultilayerPerceptronClassificationModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: MultilayerPerceptronClassificationModel):  Seq[SimpleParamSpec] = {
    Seq("raw_prediction" -> obj.rawPredictionCol,
      "probability" -> obj.probabilityCol,
      "prediction" -> obj.predictionCol)
  }
}

Source File: MultilayerPerceptronClassifierWrapper.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.ml.r

import org.apache.hadoop.fs.Path
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._

import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.feature.{IndexToString, RFormula}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.r.RWrapperUtils._
import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter}
import org.apache.spark.sql.{DataFrame, Dataset}

private[r] class MultilayerPerceptronClassifierWrapper private (
    val pipeline: PipelineModel
  ) extends MLWritable {

  import MultilayerPerceptronClassifierWrapper._

  val mlpModel: MultilayerPerceptronClassificationModel =
    pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel]

  val weights: Array[Double] = mlpModel.weights.toArray
  val layers: Array[Int] = mlpModel.layers

  def transform(dataset: Dataset[_]): DataFrame = {
    pipeline.transform(dataset)
      .drop(mlpModel.getFeaturesCol)
      .drop(mlpModel.getLabelCol)
      .drop(PREDICTED_LABEL_INDEX_COL)
  }

  
  override def read: MLReader[MultilayerPerceptronClassifierWrapper] =
    new MultilayerPerceptronClassifierWrapperReader

  override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path)

  class MultilayerPerceptronClassifierWrapperReader
    extends MLReader[MultilayerPerceptronClassifierWrapper]{

    override def load(path: String): MultilayerPerceptronClassifierWrapper = {
      implicit val format = DefaultFormats
      val pipelinePath = new Path(path, "pipeline").toString

      val pipeline = PipelineModel.load(pipelinePath)
      new MultilayerPerceptronClassifierWrapper(pipeline)
    }
  }

  class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper)
    extends MLWriter {

    override protected def saveImpl(path: String): Unit = {
      val rMetadataPath = new Path(path, "rMetadata").toString
      val pipelinePath = new Path(path, "pipeline").toString

      val rMetadata = "class" -> instance.getClass.getName
      val rMetadataJson: String = compact(render(rMetadata))
      sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)

      instance.pipeline.save(pipelinePath)
    }
  }
}

Source File: MultilayerPerceptronClassifierWrapper.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.ml.r

import org.apache.hadoop.fs.Path
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._

import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.feature.{IndexToString, RFormula}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.r.RWrapperUtils._
import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter}
import org.apache.spark.sql.{DataFrame, Dataset}

private[r] class MultilayerPerceptronClassifierWrapper private (
    val pipeline: PipelineModel
  ) extends MLWritable {

  import MultilayerPerceptronClassifierWrapper._

  val mlpModel: MultilayerPerceptronClassificationModel =
    pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel]

  val weights: Array[Double] = mlpModel.weights.toArray
  val layers: Array[Int] = mlpModel.layers

  def transform(dataset: Dataset[_]): DataFrame = {
    pipeline.transform(dataset)
      .drop(mlpModel.getFeaturesCol)
      .drop(mlpModel.getLabelCol)
      .drop(PREDICTED_LABEL_INDEX_COL)
  }

  
  override def read: MLReader[MultilayerPerceptronClassifierWrapper] =
    new MultilayerPerceptronClassifierWrapperReader

  override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path)

  class MultilayerPerceptronClassifierWrapperReader
    extends MLReader[MultilayerPerceptronClassifierWrapper]{

    override def load(path: String): MultilayerPerceptronClassifierWrapper = {
      implicit val format = DefaultFormats
      val pipelinePath = new Path(path, "pipeline").toString

      val pipeline = PipelineModel.load(pipelinePath)
      new MultilayerPerceptronClassifierWrapper(pipeline)
    }
  }

  class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper)
    extends MLWriter {

    override protected def saveImpl(path: String): Unit = {
      val rMetadataPath = new Path(path, "rMetadata").toString
      val pipelinePath = new Path(path, "pipeline").toString

      val rMetadata = "class" -> instance.getClass.getName
      val rMetadataJson: String = compact(render(rMetadata))
      sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)

      instance.pipeline.save(pipelinePath)
    }
  }
}

Source File: OpMultilayerPerceptronClassifier.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.classification

import com.salesforce.op.UID
import com.salesforce.op.features.types.{OPVector, Prediction, RealNN}
import com.salesforce.op.stages.impl.CheckIsResponseValues
import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpProbabilisticClassifierModel}
import com.salesforce.op.utils.reflection.ReflectionUtils.reflectMethod
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier, OpMultilayerPerceptronClassifierParams}
import org.apache.spark.ml.linalg.Vector

import scala.reflect.runtime.universe.TypeTag


class OpMultilayerPerceptronClassificationModel
(
  sparkModel: MultilayerPerceptronClassificationModel,
  uid: String = UID[OpMultilayerPerceptronClassificationModel],
  operationName: String = classOf[MultilayerPerceptronClassifier].getSimpleName
)(
  implicit tti1: TypeTag[RealNN],
  tti2: TypeTag[OPVector],
  tto: TypeTag[Prediction],
  ttov: TypeTag[Prediction#Value]
) extends OpProbabilisticClassifierModel[MultilayerPerceptronClassificationModel](
  sparkModel = sparkModel, uid = uid, operationName = operationName
) {
  @transient lazy val predictRawMirror = reflectMethod(getSparkMlStage().get, "predictRaw")
  @transient lazy val raw2probabilityMirror = reflectMethod(getSparkMlStage().get, "raw2probability")
  @transient lazy val probability2predictionMirror =
    reflectMethod(getSparkMlStage().get, "probability2prediction")
}

Source File: OpMultilayerPerceptronClassifierTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.classification

import com.salesforce.op.features.types._
import com.salesforce.op.stages.impl.PredictionEquality
import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel}
import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder}
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class OpMultilayerPerceptronClassifierTest extends OpEstimatorSpec[Prediction,
  OpPredictorWrapperModel[MultilayerPerceptronClassificationModel],
  OpPredictorWrapper[MultilayerPerceptronClassifier, MultilayerPerceptronClassificationModel]] with PredictionEquality {

  override def specName: String = Spec[OpMultilayerPerceptronClassifier]

  val (inputData, rawFeature1, feature2) = TestFeatureBuilder("label", "features",
    Seq[(RealNN, OPVector)](
      1.0.toRealNN -> Vectors.dense(12.0, 4.3, 1.3).toOPVector,
      0.0.toRealNN -> Vectors.dense(0.0, 0.3, 0.1).toOPVector,
      0.0.toRealNN -> Vectors.dense(1.0, 3.9, 4.3).toOPVector,
      1.0.toRealNN -> Vectors.dense(10.0, 1.3, 0.9).toOPVector,
      1.0.toRealNN -> Vectors.dense(15.0, 4.7, 1.3).toOPVector,
      0.0.toRealNN -> Vectors.dense(0.5, 0.9, 10.1).toOPVector,
      1.0.toRealNN -> Vectors.dense(11.5, 2.3, 1.3).toOPVector,
      0.0.toRealNN -> Vectors.dense(0.1, 3.3, 0.1).toOPVector
    )
  )
  val feature1 = rawFeature1.copy(isResponse = true)
  val estimator = new OpMultilayerPerceptronClassifier()
    .setInput(feature1, feature2)
    .setLayers(Array(3, 5, 4, 2))

  val expectedResult = Seq(
    Prediction(1.0, Array(-9.655814651428148, 9.202335441336952), Array(6.456683124562021E-9, 0.9999999935433168)),
    Prediction(0.0, Array(9.475612761543069, -10.617525149157993), Array(0.9999999981221492, 1.877850786773977E-9)),
    Prediction(0.0, Array(9.715293827870028, -10.885255922155942), Array(0.9999999988694366, 1.130563392364822E-9)),
    Prediction(1.0, Array(-9.66776357765489, 9.215079716735316), Array(6.299199338896916E-9, 0.9999999937008006)),
    Prediction(1.0, Array(-9.668041712561456, 9.215387575592239), Array(6.2955091287182745E-9, 0.9999999937044908)),
    Prediction(0.0, Array(9.692904797559496, -10.860273756796797), Array(0.9999999988145918, 1.1854083109077814E-9)),
    Prediction(1.0, Array(-9.667687253240183, 9.214995747770411), Array(6.300209139771467E-9, 0.9999999936997908)),
    Prediction(0.0, Array(9.703097414537668, -10.872171694864653), Array(0.9999999988404908, 1.1595091005698914E-9))
  )

  it should "allow the user to set the desired spark parameters" in {
    estimator.setMaxIter(50).setBlockSize(2).setSeed(42)
    estimator.fit(inputData)
    estimator.predictor.getMaxIter shouldBe 50
    estimator.predictor.getBlockSize shouldBe 2
    estimator.predictor.getSeed shouldBe 42
  }
}

Source File: MultilayerPerceptronClassifierWrapper.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.ml.r

import org.apache.hadoop.fs.Path
import org.json4s._
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods._

import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.feature.{IndexToString, RFormula}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.r.RWrapperUtils._
import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter}
import org.apache.spark.sql.{DataFrame, Dataset}

private[r] class MultilayerPerceptronClassifierWrapper private (
    val pipeline: PipelineModel
  ) extends MLWritable {

  import MultilayerPerceptronClassifierWrapper._

  private val mlpModel: MultilayerPerceptronClassificationModel =
    pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel]

  lazy val weights: Array[Double] = mlpModel.weights.toArray
  lazy val layers: Array[Int] = mlpModel.layers

  def transform(dataset: Dataset[_]): DataFrame = {
    pipeline.transform(dataset)
      .drop(mlpModel.getFeaturesCol)
      .drop(mlpModel.getLabelCol)
      .drop(PREDICTED_LABEL_INDEX_COL)
  }

  
  override def read: MLReader[MultilayerPerceptronClassifierWrapper] =
    new MultilayerPerceptronClassifierWrapperReader

  override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path)

  class MultilayerPerceptronClassifierWrapperReader
    extends MLReader[MultilayerPerceptronClassifierWrapper]{

    override def load(path: String): MultilayerPerceptronClassifierWrapper = {
      implicit val format = DefaultFormats
      val pipelinePath = new Path(path, "pipeline").toString

      val pipeline = PipelineModel.load(pipelinePath)
      new MultilayerPerceptronClassifierWrapper(pipeline)
    }
  }

  class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper)
    extends MLWriter {

    override protected def saveImpl(path: String): Unit = {
      val rMetadataPath = new Path(path, "rMetadata").toString
      val pipelinePath = new Path(path, "pipeline").toString

      val rMetadata = "class" -> instance.getClass.getName
      val rMetadataJson: String = compact(render(rMetadata))
      sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath)

      instance.pipeline.save(pipelinePath)
    }
  }
}

Source File: SparkPredictionTrainer.scala From smart-meter with MIT License

5 votes

package com.logimethods.nats.connector.spark.app

import java.util.Properties;
import java.io.File
import java.io.Serializable

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.storage.StorageLevel;
import org.apache.spark.streaming._

import io.nats.client.ConnectionFactory._
import java.nio.ByteBuffer

import org.apache.log4j.{Level, LogManager, PropertyConfigurator}

import com.logimethods.connector.nats.to_spark._
import com.logimethods.scala.connector.spark.to_nats._

import org.apache.spark.ml.classification.MultilayerPerceptronClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator

import java.util.function._

import java.time.{LocalDateTime, ZoneOffset}
import java.time.DayOfWeek._

import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel

object SparkPredictionTrainer extends App with SparkPredictionProcessor {
  log.setLevel(Level.WARN)

  val (properties, targets, logLevel, sc, inputNatsStreaming, inputSubject, outputSubject, clusterId, outputNatsStreaming, natsUrl) = setup(args)

  val streamingDuration = scala.util.Properties.envOrElse("STREAMING_DURATION", "2000").toInt
  println("STREAMING_DURATION = " + streamingDuration)

  new Thread(new Runnable {
              def run() {
                 while( true ){
                   try {
                     val data = SparkPredictionProcessor.getData(sc, THRESHOLD)
                     val model = trainer.fit(data)
                     model.write.overwrite.save(PREDICTION_MODEL_PATH)
                     println("New model of size " + data.count() + " trained: " + model.uid)
                     Thread.sleep(streamingDuration)
                   } catch {
                     case e: Throwable => log.error(e)
                   }
                 }
              }
             }).start()
}

Source File: MLPClassifier.scala From aardpfark with Apache License 2.0

5 votes

package com.ibm.aardpfark.spark.ml.classification

import scala.collection.mutable.ArrayBuffer

import com.ibm.aardpfark.pfa.document.{Cell, PFABuilder, PFADocument}
import com.ibm.aardpfark.pfa.dsl._
import com.ibm.aardpfark.pfa.expression._
import com.ibm.aardpfark.pfa.types.WithSchema
import com.ibm.aardpfark.spark.ml.PFAPredictionModel
import breeze.linalg.{DenseMatrix, DenseVector}
import com.sksamuel.avro4s.{AvroNamespace, AvroSchema}
import org.apache.avro.Schema

import org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel

@AvroNamespace("com.ibm.aardpfark.exec.spark.ml.classification")
case class Layer(weights: Array[Array[Double]], bias: Array[Double])
@AvroNamespace("com.ibm.aardpfark.exec.spark.ml.classification")
case class Layers(layers: Seq[Layer]) extends WithSchema {
  override def schema: Schema = AvroSchema[this.type]
}

class PFAMultilayerPerceptronClassificationModel(
  override val sparkTransformer: MultilayerPerceptronClassificationModel)
  extends PFAPredictionModel[Layers] {

  private def getLayers = {
    val weights = sparkTransformer.weights.toArray
    val inputLayers = sparkTransformer.layers
    val layers = ArrayBuffer[Layer]()
    var offset = 0
    for (i <- 0 to inputLayers.size - 2) {
      val in = inputLayers(i)
      val out = inputLayers(i + 1)
      val wOffset = out * in
      val wData = weights.slice(offset, offset + wOffset)
      val bData = weights.slice(offset + wOffset, offset + wOffset + out)
      val w = Array.ofDim[Double](out, in)
      new DenseMatrix[Double](out, in, wData).foreachPair { case ((ii, jj), v) => w(ii)(jj) = v }
      val b = new DenseVector[Double](bData).toArray
      layers += Layer(w, b)
      offset += wOffset + out
    }
    layers.toArray
  }

  override protected def cell = Cell(Layers(getLayers))

  private val doubleSigmoid = NamedFunctionDef("doubleSigmoid", FunctionDef[Double, Double](
    "x", m.link.logit("x")
  ))

  override def action: PFAExpression = {
    val forward = model.neural.simpleLayers(inputExpr, modelCell.ref("layers"), doubleSigmoid.ref)
    val softmax = m.link.softmax(forward)
    NewRecord(outputSchema, Map(predictionCol -> a.argmax(softmax)))
  }

  override def pfa: PFADocument = {
    PFABuilder()
      .withName(sparkTransformer.uid)
      .withMetadata(getMetadata)
      .withInput(inputSchema)
      .withOutput(outputSchema)
      .withCell(modelCell)
      .withFunction(doubleSigmoid)
      .withAction(action)
      .pfa
  }

}

org.apache.spark.ml.classification.MultilayerPerceptronClassificationModel Scala Examples