org.apache.spark.ml.classification.NaiveBayesModel Scala Examples
The following examples show how to use org.apache.spark.ml.classification.NaiveBayesModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: NaiveBayesPrediction.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.ml_classification import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import cn.piflow.conf.{ConfigurableStop, Port, StopGroup} import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import org.apache.spark.ml.classification.NaiveBayesModel import org.apache.spark.sql.SparkSession class NaiveBayesPrediction extends ConfigurableStop{ val authorEmail: String = "[email protected]" val description: String = "Use an existing NaiveBayes model to predict" val inportList: List[String] = List(Port.DefaultPort) val outportList: List[String] = List(Port.DefaultPort) var test_data_path:String =_ var model_path:String=_ def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val spark = pec.get[SparkSession]() //load data stored in libsvm format as a dataframe val data=spark.read.format("libsvm").load(test_data_path) //data.show() //load model val model=NaiveBayesModel.load(model_path) val predictions=model.transform(data) predictions.show() out.write(predictions) } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map: Map[String, Any]): Unit = { test_data_path=MapUtil.get(map,key="test_data_path").asInstanceOf[String] model_path=MapUtil.get(map,key="model_path").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val test_data_path = new PropertyDescriptor().name("test_data_path").displayName("TEST_DATA_PATH").defaultValue("").required(true) val model_path = new PropertyDescriptor().name("model_path").displayName("MODEL_PATH").defaultValue("").required(true) descriptor = test_data_path :: descriptor descriptor = model_path :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/ml_classification/NavieBayesPrediction.png") } override def getGroup(): List[String] = { List(StopGroup.MLGroup.toString) } }
Example 2
Source File: LocalNaiveBayes.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.classification import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common.classification.LocalProbabilisticClassificationModel import io.hydrosphere.spark_ml_serving.common._ import io.hydrosphere.spark_ml_serving.common.utils.DataUtils import org.apache.spark.ml.classification.NaiveBayesModel import org.apache.spark.ml.linalg.{Matrix, Vector, Vectors} class LocalNaiveBayes(override val sparkTransformer: NaiveBayesModel) extends LocalProbabilisticClassificationModel[NaiveBayesModel] {} object LocalNaiveBayes extends SimpleModelLoader[NaiveBayesModel] with TypedTransformerConverter[NaiveBayesModel] { override def build(metadata: Metadata, data: LocalData): NaiveBayesModel = { val constructor = classOf[NaiveBayesModel].getDeclaredConstructor( classOf[String], classOf[Vector], classOf[Matrix] ) constructor.setAccessible(true) val matrixMetadata = data.column("theta").get.data.head.asInstanceOf[Map[String, Any]] val matrix = DataUtils.constructMatrix(matrixMetadata) val piParams = data.column("pi").get.data.head.asInstanceOf[Map[String, Any]] val piVec = DataUtils.constructVector(piParams) val nb = constructor .newInstance(metadata.uid, piVec, matrix) .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String]) .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String]) .setProbabilityCol(metadata.paramMap("probabilityCol").asInstanceOf[String]) .setRawPredictionCol(metadata.paramMap("rawPredictionCol").asInstanceOf[String]) nb.set(nb.smoothing, metadata.paramMap("smoothing").asInstanceOf[Number].doubleValue()) nb.set(nb.modelType, metadata.paramMap("modelType").asInstanceOf[String]) nb.set(nb.labelCol, metadata.paramMap("labelCol").asInstanceOf[String]) nb } override implicit def toLocal(sparkTransformer: NaiveBayesModel): LocalNaiveBayes = { new LocalNaiveBayes(sparkTransformer) } }
Example 3
Source File: NaiveBayesClassifierOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.classification import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.mleap.tensor.DenseTensor import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.classification.NaiveBayesModel import org.apache.spark.ml.linalg.{Matrices, Vectors} class NaiveBayesClassifierOp extends SimpleSparkOp[NaiveBayesModel] { override val Model: OpModel[SparkBundleContext, NaiveBayesModel] = new OpModel[SparkBundleContext, NaiveBayesModel] { override val klazz: Class[NaiveBayesModel] = classOf[NaiveBayesModel] override def opName: String = Bundle.BuiltinOps.classification.naive_bayes override def store(model: Model, obj: NaiveBayesModel) (implicit context: BundleContext[SparkBundleContext]): Model = { val thresholds = if(obj.isSet(obj.thresholds)) { Some(obj.getThresholds) } else None model.withValue("num_features", Value.long(obj.numFeatures)). withValue("num_classes", Value.long(obj.numClasses)). withValue("pi", Value.vector(obj.pi.toArray)). withValue("theta", Value.tensor(DenseTensor(obj.theta.toArray, Seq(obj.theta.numRows, obj.theta.numCols)))). withValue("model_type", Value.string(obj.getModelType)). withValue("thresholds", thresholds.map(Value.doubleList(_))) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): NaiveBayesModel = { val theta = model.value("theta").getTensor[Double] val nb = new NaiveBayesModel(uid = "", pi = Vectors.dense(model.value("pi").getTensor[Double].toArray), theta = Matrices.dense(theta.dimensions.head, theta.dimensions(1), theta.toArray)) val modelType = model.value("model_type").getString model.getValue("thresholds").map(t => nb.setThresholds(t.getDoubleList.toArray)) nb.set(nb.modelType, modelType) } } override def sparkLoad(uid: String, shape: NodeShape, model: NaiveBayesModel): NaiveBayesModel = { val r = new NaiveBayesModel(uid = uid, pi = model.pi, theta = model.theta) if (model.isDefined(model.thresholds)) { r.setThresholds(model.getThresholds) } if (model.isDefined(model.modelType)) { r.set(r.modelType, model.getModelType)} r } override def sparkInputs(obj: NaiveBayesModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: NaiveBayesModel): Seq[SimpleParamSpec] = { Seq("raw_prediction" -> obj.rawPredictionCol, "probability" -> obj.probabilityCol, "prediction" -> obj.predictionCol) } }
Example 4
Source File: OpNaiveBayes.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.classification import com.salesforce.op.UID import com.salesforce.op.features.types.{OPVector, Prediction, RealNN} import com.salesforce.op.stages.impl.CheckIsResponseValues import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpProbabilisticClassifierModel} import com.salesforce.op.utils.reflection.ReflectionUtils.reflectMethod import org.apache.spark.ml.classification.{NaiveBayes, NaiveBayesModel, OpNaiveBayesParams} import scala.reflect.runtime.universe.TypeTag class OpNaiveBayesModel ( sparkModel: NaiveBayesModel, uid: String = UID[OpNaiveBayesModel], operationName: String = classOf[NaiveBayes].getSimpleName )( implicit tti1: TypeTag[RealNN], tti2: TypeTag[OPVector], tto: TypeTag[Prediction], ttov: TypeTag[Prediction#Value] ) extends OpProbabilisticClassifierModel[NaiveBayesModel]( sparkModel = sparkModel, uid = uid, operationName = operationName ) { @transient lazy val predictRawMirror = reflectMethod(getSparkMlStage().get, "predictRaw") @transient lazy val raw2probabilityMirror = reflectMethod(getSparkMlStage().get, "raw2probability") @transient lazy val probability2predictionMirror = reflectMethod(getSparkMlStage().get, "probability2prediction") }
Example 5
Source File: OpNaiveBayesTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.classification import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder} import org.apache.spark.ml.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.ml.linalg.Vectors import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpNaiveBayesTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[NaiveBayesModel], OpPredictorWrapper[NaiveBayes, NaiveBayesModel]] with PredictionEquality { override def specName: String = Spec[OpNaiveBayes] val (inputData, rawFeature1, feature2) = TestFeatureBuilder("label", "features", Seq[(RealNN, OPVector)]( 1.0.toRealNN -> Vectors.dense(12.0, 4.3, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.0, 0.3, 0.1).toOPVector, 0.0.toRealNN -> Vectors.dense(1.0, 3.9, 4.3).toOPVector, 1.0.toRealNN -> Vectors.dense(10.0, 1.3, 0.9).toOPVector, 1.0.toRealNN -> Vectors.dense(15.0, 4.7, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.5, 0.9, 10.1).toOPVector, 1.0.toRealNN -> Vectors.dense(11.5, 2.3, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.1, 3.3, 0.1).toOPVector ) ) val feature1 = rawFeature1.copy(isResponse = true) val estimator = new OpNaiveBayes().setInput(feature1, feature2) val expectedResult = Seq( Prediction(1.0, Array(-34.41, -14.85), Array(0.0, 1.0)), Prediction(0.0, Array(-1.07, -1.42), Array(0.58, 0.41)), Prediction(0.0, Array(-9.70, -17.99), Array(1.0, 0.0)), Prediction(1.0, Array(-26.22, -8.33), Array(0.0, 1.0)), Prediction(1.0, Array(-41.93, -16.49), Array(0.0, 1.0)), Prediction(0.0, Array(-8.60, -27.31), Array(1.0, 0.0)), Prediction(1.0, Array(-31.07, -11.44), Array(0.0, 1.0)), Prediction(0.0, Array(-4.54, -6.32), Array(0.85, 0.14)) ) it should "allow the user to set the desired spark parameters" in { estimator.setSmoothing(2) estimator.fit(inputData) estimator.predictor.getSmoothing shouldBe 2 } }