org.apache.spark.mllib.feature.StandardScaler Scala Examples
The following examples show how to use org.apache.spark.mllib.feature.StandardScaler.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StandardScalerExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext // $example on$ import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils // $example off$ object StandardScalerExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("StandardScalerExample") val sc = new SparkContext(conf) // $example on$ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") val scaler1 = new StandardScaler().fit(data.map(x => x.features)) val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features)) // scaler3 is an identical model to scaler2, and will produce identical transformations val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean) // data1 will be unit variance. val data1 = data.map(x => (x.label, scaler1.transform(x.features))) // data2 will be unit variance and zero mean. val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray)))) // $example off$ println("data1: ") data1.foreach(x => println(x)) println("data2: ") data2.foreach(x => println(x)) sc.stop() } } // scalastyle:on println
Example 2
Source File: L9-6Preprocessing.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.mllib.feature.StandardScaler import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext object PreprocessingApp { def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: PreprocessingAppApp <appname> <batchInterval> <hostname> <port>") System.exit(1) } val Seq(appName, batchInterval, hostname, port) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt)) val substream = ssc.socketTextStream(hostname, port.toInt) .filter(!_.contains("NaN")) .map(_.split(" ")) .filter(f => f(1) != "0") substream.map(f => Array(f(2), f(4), f(5), f(6))) .map(f => f.map(v => v.toDouble)) .map(f => Vectors.dense(f)) .foreachRDD(rdd => { val scalerModel = new StandardScaler().fit(rdd) val scaledRDD = scalerModel.transform(rdd) }) ssc.start() ssc.awaitTermination() } }
Example 3
Source File: StandardScalarSample.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils import org.apache.spark.{SparkConf, SparkContext} object StandardScalarSample { def main(args: Array[String]) { val conf = new SparkConf().setMaster("local").setAppName("Word2Vector") val sc = new SparkContext(conf) val data = MLUtils.loadLibSVMFile(sc, org.sparksamples.Util.SPARK_HOME + "/data/mllib/sample_libsvm_data.txt") val scaler1 = new StandardScaler().fit(data.map(x => x.features)) val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features)) // scaler3 is an identical model to scaler2, and will produce identical transformations val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean) // data1 will be unit variance. val data1 = data.map(x => (x.label, scaler1.transform(x.features))) println(data1.first()) // Without converting the features into dense vectors, transformation with zero mean will raise // exception on sparse vector. // data2 will be unit variance and zero mean. val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray)))) println(data2.first()) } }
Example 4
Source File: StandardScalarSample.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils import org.apache.spark.{SparkConf, SparkContext} object StandardScalarSample { def main(args: Array[String]) { val conf = new SparkConf().setMaster("local").setAppName("Word2Vector") val sc = new SparkContext(conf) val data = MLUtils.loadLibSVMFile(sc, "/home/ubuntu/work/spark-1.6.0-bin-hadoop2.6/data/mllib/sample_libsvm_data.txt") val scaler1 = new StandardScaler().fit(data.map(x => x.features)) val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features)) // scaler3 is an identical model to scaler2, and will produce identical transformations val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean) // data1 will be unit variance. val data1 = data.map(x => (x.label, scaler1.transform(x.features))) println(data1.first()) // Without converting the features into dense vectors, transformation with zero mean will raise // exception on sparse vector. // data2 will be unit variance and zero mean. val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray)))) println(data2.first()) } }
Example 5
Source File: StandardScalerExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext // $example on$ import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils // $example off$ object StandardScalerExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("StandardScalerExample") val sc = new SparkContext(conf) // $example on$ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") val scaler1 = new StandardScaler().fit(data.map(x => x.features)) val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features)) // scaler3 is an identical model to scaler2, and will produce identical transformations val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean) // data1 will be unit variance. val data1 = data.map(x => (x.label, scaler1.transform(x.features))) // data2 will be unit variance and zero mean. val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray)))) // $example off$ println("data1: ") data1.foreach(x => println(x)) println("data2: ") data2.foreach(x => println(x)) sc.stop() } } // scalastyle:on println
Example 6
Source File: StandardScalerExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext // $example on$ import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils // $example off$ object StandardScalerExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("StandardScalerExample") val sc = new SparkContext(conf) // $example on$ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") val scaler1 = new StandardScaler().fit(data.map(x => x.features)) val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features)) // scaler3 is an identical model to scaler2, and will produce identical transformations val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean) // data1 will be unit variance. val data1 = data.map(x => (x.label, scaler1.transform(x.features))) // data2 will be unit variance and zero mean. val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray)))) // $example off$ println("data1: ") data1.foreach(x => println(x)) println("data2: ") data2.foreach(x => println(x)) sc.stop() } } // scalastyle:on println
Example 7
Source File: StandardScalerExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.spark.SparkConf import org.apache.spark.SparkContext // $example on$ import org.apache.spark.mllib.feature.{StandardScaler, StandardScalerModel} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils // $example off$ object StandardScalerExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("StandardScalerExample") val sc = new SparkContext(conf) // $example on$ val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") val scaler1 = new StandardScaler().fit(data.map(x => x.features)) val scaler2 = new StandardScaler(withMean = true, withStd = true).fit(data.map(x => x.features)) // scaler3 is an identical model to scaler2, and will produce identical transformations val scaler3 = new StandardScalerModel(scaler2.std, scaler2.mean) // data1 will be unit variance. val data1 = data.map(x => (x.label, scaler1.transform(x.features))) // data2 will be unit variance and zero mean. val data2 = data.map(x => (x.label, scaler2.transform(Vectors.dense(x.features.toArray)))) // $example off$ println("data1: ") data1.foreach(x => println(x)) println("data2: ") data2.foreach(x => println(x)) sc.stop() } } // scalastyle:on println