org.apache.spark.mllib.clustering.GaussianMixture Scala Examples
The following examples show how to use org.apache.spark.mllib.clustering.GaussianMixture.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
import java.io.{File, PrintWriter} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.clustering.GaussianMixture import org.apache.spark.sql.functions._ def computeGaussianMixtureModel( pathToTextFile: String, quantity: Int) { case class Point(x: Double, y: Double) def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-GMM-k${quantity}.tsv" val points = sc .textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .map { row => Point(row(0).toDouble, row(1).toDouble) } val features = points .map { p => Vectors.dense(p.x, p.y) } features.cache() val gmm = new GaussianMixture() .setK(quantity) .run(features) val predictions = features .map { f => (f(0), f(1), gmm.predict(f) + 1) } .collect save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => predictions.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Example 2
Source File: GaussianMixtureExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.clustering.{GaussianMixture, GaussianMixtureModel} import org.apache.spark.mllib.linalg.Vectors // $example off$ object GaussianMixtureExample { def main(args: Array[String]) { val conf = new SparkConf().setAppName("GaussianMixtureExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/gmm_data.txt") val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble))).cache() // Cluster the data into two classes using GaussianMixture val gmm = new GaussianMixture().setK(2).run(parsedData) // Save and load model gmm.save(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") val sameModel = GaussianMixtureModel.load(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") // output parameters of max-likelihood model for (i <- 0 until gmm.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (gmm.weights(i), gmm.gaussians(i).mu, gmm.gaussians(i).sigma)) } // $example off$ sc.stop() } } // scalastyle:on println
Example 3
Source File: GaussianMixtureExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.clustering.{GaussianMixture, GaussianMixtureModel} import org.apache.spark.mllib.linalg.Vectors // $example off$ object GaussianMixtureExample { def main(args: Array[String]) { val conf = new SparkConf().setAppName("GaussianMixtureExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/gmm_data.txt") val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble))).cache() // Cluster the data into two classes using GaussianMixture val gmm = new GaussianMixture().setK(2).run(parsedData) // Save and load model gmm.save(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") val sameModel = GaussianMixtureModel.load(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") // output parameters of max-likelihood model for (i <- 0 until gmm.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (gmm.weights(i), gmm.gaussians(i).mu, gmm.gaussians(i).sigma)) } // $example off$ sc.stop() } } // scalastyle:on println
Example 4
Source File: GaussianMixtureExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.clustering.{GaussianMixture, GaussianMixtureModel} import org.apache.spark.mllib.linalg.Vectors // $example off$ object GaussianMixtureExample { def main(args: Array[String]) { val conf = new SparkConf().setAppName("GaussianMixtureExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/gmm_data.txt") val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble))).cache() // Cluster the data into two classes using GaussianMixture val gmm = new GaussianMixture().setK(2).run(parsedData) // Save and load model gmm.save(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") val sameModel = GaussianMixtureModel.load(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") // output parameters of max-likelihood model for (i <- 0 until gmm.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (gmm.weights(i), gmm.gaussians(i).mu, gmm.gaussians(i).sigma)) } // $example off$ sc.stop() } } // scalastyle:on println
Example 5
Source File: DenseGaussianMixture.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.clustering.GaussianMixture import org.apache.spark.mllib.linalg.Vectors object DenseGaussianMixture { def main(args: Array[String]): Unit = { if (args.length < 3) { println("usage: DenseGmmEM <input file> <k> <convergenceTol> [maxIterations]") } else { val maxIterations = if (args.length > 3) args(3).toInt else 100 run(args(0), args(1).toInt, args(2).toDouble, maxIterations) } } private def run(inputFile: String, k: Int, convergenceTol: Double, maxIterations: Int) { val conf = new SparkConf().setAppName("Gaussian Mixture Model EM example") val ctx = new SparkContext(conf) val data = ctx.textFile(inputFile).map { line => Vectors.dense(line.trim.split(' ').map(_.toDouble)) }.cache() val clusters = new GaussianMixture() .setK(k) .setConvergenceTol(convergenceTol) .setMaxIterations(maxIterations) .run(data) for (i <- 0 until clusters.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (clusters.weights(i), clusters.gaussians(i).mu, clusters.gaussians(i).sigma)) } println("Cluster labels (first <= 100):") val clusterLabels = clusters.predict(data) clusterLabels.take(100).foreach { x => print(" " + x) } println() } }
Example 6
Source File: DenseGaussianMixture.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.clustering.GaussianMixture import org.apache.spark.mllib.linalg.Vectors object DenseGaussianMixture { def main(args: Array[String]): Unit = { if (args.length < 3) { println("usage: DenseGmmEM <input file> <k> <convergenceTol> [maxIterations]") } else { val maxIterations = if (args.length > 3) args(3).toInt else 100 run(args(0), args(1).toInt, args(2).toDouble, maxIterations) } } private def run(inputFile: String, k: Int, convergenceTol: Double, maxIterations: Int) { val conf = new SparkConf().setAppName("Gaussian Mixture Model EM example") val ctx = new SparkContext(conf) val data = ctx.textFile(inputFile).map { line => Vectors.dense(line.trim.split(' ').map(_.toDouble)) }.cache() val clusters = new GaussianMixture() .setK(k)//聚类的个数 .setConvergenceTol(convergenceTol) .setMaxIterations(maxIterations) .run(data) for (i <- 0 until clusters.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (clusters.weights(i), clusters.gaussians(i).mu, clusters.gaussians(i).sigma)) } println("Cluster labels (first <= 100):") val clusterLabels = clusters.predict(data) clusterLabels.take(100).foreach { x => print(" " + x) } println() } } // scalastyle:on println
Example 7
Source File: GaussianMixtureExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} // $example on$ import org.apache.spark.mllib.clustering.{GaussianMixture, GaussianMixtureModel} import org.apache.spark.mllib.linalg.Vectors // $example off$ object GaussianMixtureExample { def main(args: Array[String]) { val conf = new SparkConf().setAppName("GaussianMixtureExample") val sc = new SparkContext(conf) // $example on$ // Load and parse the data val data = sc.textFile("data/mllib/gmm_data.txt") val parsedData = data.map(s => Vectors.dense(s.trim.split(' ').map(_.toDouble))).cache() // Cluster the data into two classes using GaussianMixture val gmm = new GaussianMixture().setK(2).run(parsedData) // Save and load model gmm.save(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") val sameModel = GaussianMixtureModel.load(sc, "target/org/apache/spark/GaussianMixtureExample/GaussianMixtureModel") // output parameters of max-likelihood model for (i <- 0 until gmm.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (gmm.weights(i), gmm.gaussians(i).mu, gmm.gaussians(i).sigma)) } // $example off$ sc.stop() } } // scalastyle:on println
Example 8
Source File: DenseGaussianMixture.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.clustering.GaussianMixture import org.apache.spark.mllib.linalg.Vectors object DenseGaussianMixture { def main(args: Array[String]): Unit = { if (args.length < 3) { println("usage: DenseGmmEM <input file> <k> <convergenceTol> [maxIterations]") } else { val maxIterations = if (args.length > 3) args(3).toInt else 100 run(args(0), args(1).toInt, args(2).toDouble, maxIterations) } } private def run(inputFile: String, k: Int, convergenceTol: Double, maxIterations: Int) { val conf = new SparkConf().setAppName("Gaussian Mixture Model EM example") val ctx = new SparkContext(conf) val data = ctx.textFile(inputFile).map { line => Vectors.dense(line.trim.split(' ').map(_.toDouble)) }.cache() val clusters = new GaussianMixture() .setK(k) .setConvergenceTol(convergenceTol) .setMaxIterations(maxIterations) .run(data) for (i <- 0 until clusters.k) { println("weight=%f\nmu=%s\nsigma=\n%s\n" format (clusters.weights(i), clusters.gaussians(i).mu, clusters.gaussians(i).sigma)) } println("Cluster labels (first <= 100):") val clusterLabels = clusters.predict(data) clusterLabels.take(100).foreach { x => print(" " + x) } println() } } // scalastyle:on println