org.apache.spark.mllib.classification.NaiveBayesModel Scala Examples
The following examples show how to use org.apache.spark.mllib.classification.NaiveBayesModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: NaiveBayesExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.mllib.util.MLUtils // $example off$ object NaiveBayesExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("NaiveBayesExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data file. val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") // Split data into training (60%) and test (40%). val Array(training, test) = data.randomSplit(Array(0.6, 0.4)) val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial") val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() // Save and load model model.save(sc, "target/tmp/myNaiveBayesModel") val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel") // $example off$ } } // scalastyle:on println
Example 2
Source File: PipeClassificationNaiveBayes.scala From sddf with GNU General Public License v3.0 | 5 votes |
package de.unihamburg.vsis.sddf.classification import scala.beans.BeanInfo import org.apache.spark.mllib.classification.NaiveBayes import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD import de.unihamburg.vsis.sddf.SddfContext.SymPairSim import de.unihamburg.vsis.sddf.reading.SymPair import de.unihamburg.vsis.sddf.reading.Tuple import org.apache.spark.mllib.classification.NaiveBayesModel class PipeClassificationNaiveBayes(lambda: Double = 1.0) extends AbstractPipeClassification { val paramMap: Map[String, Any] = Map(("lambda", lambda)) def trainModelAndClassify( trainingData: RDD[LabeledPoint], symPairSim: SymPairSim): RDD[(SymPair[Tuple], Array[Double], Double)] = { val model = NaiveBayes.train(trainingData, lambda) log.debug("Classification Model:" + model) log.debug("Classification Model labels :" + model.labels.mkString(" ")) log.debug("Classification Model pi: " + model.pi.mkString(" ")) log.debug("Classification Model theta: " + model.theta.foreach(_.mkString(" "))) // Marking Missing Values as Not Equal (0) symPairSim.map(pair => (pair._1, pair._2, model.predict(Vectors.dense(pair._2)))) } } object PipeClassificationNaiveBayes { def apply(lambda: Double = 1.0) = { new PipeClassificationNaiveBayes(lambda) } }
Example 3
Source File: Predict.scala From spark-twitter-sentiment with Apache License 2.0 | 5 votes |
package com.dhruv import org.apache.spark.SparkConf import org.apache.spark.mllib.classification.NaiveBayesModel import org.apache.spark.streaming.twitter._ import org.apache.spark.streaming.{Seconds, StreamingContext} object Predict { def main(args: Array[String]) { if (args.length < 1) { System.err.println("Usage: " + this.getClass.getSimpleName + " <modelDirectory> ") System.exit(1) } val Array(modelFile) = Utils.parseCommandLineWithTwitterCredentials(args) println("Initializing Streaming Spark Context...") val conf = new SparkConf().setAppName(this.getClass.getSimpleName) val ssc = new StreamingContext(conf, Seconds(5)) println("Initializing Twitter stream...") val tweets = TwitterUtils.createStream(ssc, Utils.getAuth) val statuses = tweets.filter(_.getLang == "en").map(_.getText) println("Initalizaing the Naive Bayes model...") val model = NaiveBayesModel.load(ssc.sparkContext, modelFile.toString) val labeled_statuses = statuses .map(t => (t, model.predict(Utils.featurize(t)))) labeled_statuses.print() // Start the streaming computation println("Initialization complete.") ssc.start() ssc.awaitTermination() } }
Example 4
Source File: NaiveBayesExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.mllib.util.MLUtils // $example off$ object NaiveBayesExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("NaiveBayesExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data file. val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") // Split data into training (60%) and test (40%). val Array(training, test) = data.randomSplit(Array(0.6, 0.4)) val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial") val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() // Save and load model model.save(sc, "target/tmp/myNaiveBayesModel") val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel") // $example off$ } } // scalastyle:on println
Example 5
Source File: NaiveBayesExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.mllib.util.MLUtils // $example off$ object NaiveBayesExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("NaiveBayesExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data file. val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") // Split data into training (60%) and test (40%). val Array(training, test) = data.randomSplit(Array(0.6, 0.4)) val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial") val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() // Save and load model model.save(sc, "target/tmp/myNaiveBayesModel") val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel") // $example off$ } } // scalastyle:on println
Example 6
Source File: NaiveBayesExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.mllib.util.MLUtils // $example off$ object NaiveBayesExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("NaiveBayesExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data file. val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") // Split data into training (60%) and test (40%). val Array(training, test) = data.randomSplit(Array(0.6, 0.4)) val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial") val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() // Save and load model model.save(sc, "target/tmp/myNaiveBayesModel") val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel") // $example off$ sc.stop() } } // scalastyle:on println
Example 7
Source File: NaiveBayesExample.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib // $example on$ import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint // $example off$ import org.apache.spark.{SparkConf, SparkContext} object NaiveBayesExample { def main(args: Array[String]) : Unit = { val conf = new SparkConf().setAppName("NaiveBayesExample") val sc = new SparkContext(conf) // $example on$ val data = sc.textFile("data/mllib/sample_naive_bayes_data.txt") val parsedData = data.map { line => val parts = line.split(',') LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble))) } // Split data into training (60%) and test (40%). val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L) val training = splits(0) val test = splits(1) val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial") val predictionAndLabel = test.map(p => (model.predict(p.features), p.label)) val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count() // Save and load model model.save(sc, "target/tmp/myNaiveBayesModel") val sameModel = NaiveBayesModel.load(sc, "target/tmp/myNaiveBayesModel") // $example off$ } } // scalastyle:on println