scala.math.exp Scala Examples
The following examples show how to use scala.math.exp.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkHdfsLR.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo showWarning() val sparkConf = new SparkConf().setAppName("SparkHdfsLR").setMaster("local[2]") val inputPath = "D:\\spark\\spark-1.5.0-hadoop2.6\\data\\mllib\\lr_data.txt"//args(0) val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).cache()//缓存 val ITERATIONS = 6 //args(1).toInt 迭代次数 // Initialize w to a random value //初始化W到一个随机值 var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => //p代表DataPoint Vector p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } } // scalastyle:on println
Example 2
Source File: mathExpr.scala From matryoshka with Apache License 2.0 | 5 votes |
package matryoshka.example import org.specs2.mutable._ import slamdata.Predef._ import matryoshka._ import matryoshka.data._ import matryoshka.implicits._ import scalaz._ import scala.math.exp sealed trait MathExprF[A] object MathExprF { case class VarF[A]() extends MathExprF[A] case class ZeroF[A]() extends MathExprF[A] case class OneF[A]() extends MathExprF[A] case class NegateF[A](a: A) extends MathExprF[A] case class SumF[A](l: A, r: A) extends MathExprF[A] case class ProductF[A](l: A, r: A) extends MathExprF[A] case class ExpF[A](a: A) extends MathExprF[A] type Expr = Fix[MathExprF] implicit val exprFunctor: Functor[MathExprF] = new Functor[MathExprF] { def map[A, B](fa: MathExprF[A])(f: A => B): MathExprF[B] = fa match { case VarF() => VarF() case ZeroF() => ZeroF() case OneF() => OneF() case NegateF(a) => NegateF(f(a)) case SumF(l, r) => SumF(f(l), f(r)) case ProductF(l, r) => ProductF(f(l), f(r)) case ExpF(a) => ExpF(f(a)) } } val varExpr: Expr = Fix(VarF()) val zero: Expr = Fix(ZeroF()) val one: Expr = Fix(OneF()) def neg(expr: Expr): Expr = Fix(NegateF(expr)) def add(l: Expr, r: Expr): Expr = Fix(SumF( l, r )) def prod(l: Expr, r: Expr): Expr = Fix(ProductF( l, r )) def e(expr: Expr): Expr = Fix(ExpF(expr)) def evalAlgebra(x: Double): MathExprF[Double] => Double = { case VarF() => x case ZeroF() => 0 case OneF() => 1 case NegateF(a) => - a case SumF(l, r) => l + r case ProductF(l, r) => l * r case ExpF(a) => exp(a) } def eval(x: Double, expr: Expr): Double = expr.cata(evalAlgebra(x)) def showAlgebra: Algebra[MathExprF, String] = { case VarF() => "x" case ZeroF() => "0" case OneF() => "1" case NegateF(x) => s"-$x" case SumF(l, r) => s"($l + $r)" case ProductF(l, r) => s"($l * $r)" case ExpF(x) => s"e($x)" } def diffGAlgebra: GAlgebra[(Double, ?), MathExprF, Double] = { case VarF() => 1 case ZeroF() => 0 case OneF() => 0 case NegateF((_, a)) => -a case SumF((_, l), (_, r)) => l + r case ProductF((l, ll), (r, rr)) => (l * rr) + (r * ll) case ExpF((x, xx)) => exp(x) * xx } def automaticDifferentiation(x: Double, expr: Expr): Double = expr.zygo(evalAlgebra(x), diffGAlgebra) } class MathExprSpec extends Specification { import MathExprF._ //d exp(x*x - 1) / dx = 2*x*exp(x*x - 1) val expr: Expr = e(add(prod(varExpr, varExpr), neg(one))) "should differentiate a value automatically" >> { automaticDifferentiation(1.0, expr) must beEqualTo(2.0) } }
Example 3
Source File: SparkLR.scala From learning-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.spark._ object SparkLR { val N = 10000 // Number of data points val D = 10 // Numer of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData = { def generatePoint(i: Int) = { val y = if(i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val sparkConf = new SparkConf().setAppName("SparkLR") val sc = new SparkContext(sparkConf) val numSlices = if (args.length > 0) args(0).toInt else 2 val points = sc.parallelize(generateData, numSlices).cache() // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } }
Example 4
Source File: SparkHdfsLR.scala From learning-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo object SparkHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val sparkConf = new SparkConf().setAppName("SparkHdfsLR") val inputPath = args(0) val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } }
Example 5
Source File: SparkTachyonHdfsLR.scala From learning-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo import org.apache.spark.storage.StorageLevel object SparkTachyonHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def main(args: Array[String]) { showWarning() val inputPath = args(0) val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR") val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP) val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } }
Example 6
Source File: SparkLR.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.spark._ object SparkLR { val N = 10000 // Number of data points val D = 10 // Numer of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val sparkConf = new SparkConf().setAppName("SparkLR") val sc = new SparkContext(sparkConf) val numSlices = if (args.length > 0) args(0).toInt else 2 val points = sc.parallelize(generateData, numSlices).cache() // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } } // scalastyle:on println
Example 7
Source File: SparkHdfsLR.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo object SparkHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val sparkConf = new SparkConf().setAppName("SparkHdfsLR") val inputPath = args(0) val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } } // scalastyle:on println
Example 8
Source File: SparkTachyonHdfsLR.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo import org.apache.spark.storage.StorageLevel object SparkTachyonHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def main(args: Array[String]) { showWarning() val inputPath = args(0) val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR") val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP) val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } } // scalastyle:on println
Example 9
Source File: SparkLR.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkLR { val N = 10000 // Number of data points val D = 10 // Number of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D) {rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val spark = SparkSession .builder .appName("SparkLR") .getOrCreate() val numSlices = if (args.length > 0) args(0).toInt else 2 val points = spark.sparkContext.parallelize(generateData, numSlices).cache() // Initialize w to a random value val w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println(s"Initial w: $w") for (i <- 1 to ITERATIONS) { println(s"On iteration $i") val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println(s"Final w: $w") spark.stop() } } // scalastyle:on println
Example 10
Source File: SparkHdfsLR.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkHdfsLR { val D = 10 // Number of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") val y = tok.nextToken.toDouble val x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val spark = SparkSession .builder .appName("SparkHdfsLR") .getOrCreate() val inputPath = args(0) val lines = spark.read.textFile(inputPath).rdd lines.cache() val points = lines.map(parsePoint).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value val w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println(s"Initial w: $w") for (i <- 1 to ITERATIONS) { println(s"On iteration $i") val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println(s"Final w: $w") spark.stop() } } // scalastyle:on println
Example 11
Source File: SparkLR.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.spark._ object SparkLR { val N = 10000 // Number of data points val D = 10 // Numer of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD(SGD随机梯度下降) or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS(BFGS是逆秩2拟牛顿法) |for more conventional use. """.stripMargin) //String.stripMargin 移除每行字符串开头的空格和第一个遇到的垂直分割符| } def main(args: Array[String]) { showWarning() val sparkConf = new SparkConf().setAppName("SparkLR").setMaster("local") val sc = new SparkContext(sparkConf) val numSlices = if (args.length > 0) args(0).toInt else 2 val points = sc.parallelize(generateData, numSlices).cache() // Initialize w to a random value //将w初始化为一个随机值 var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } } // scalastyle:on println
Example 12
Source File: SparkHdfsLR.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkHdfsLR { val D = 10 // Number of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val spark = SparkSession .builder .appName("SparkHdfsLR") .getOrCreate() val inputPath = args(0) val lines = spark.read.textFile(inputPath).rdd val points = lines.map(parsePoint).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) spark.stop() } } // scalastyle:on println
Example 13
Source File: SparkTachyonHdfsLR.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo import org.apache.spark.storage.StorageLevel object SparkTachyonHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD(SGD随机梯度下降) or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS(BFGS是逆秩2拟牛顿法) |for more conventional use. """.stripMargin) } case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def main(args: Array[String]) { showWarning() val inputPath = args(0) val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR") val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP) val ITERATIONS = args(1).toInt // Initialize w to a random value 将w初始化为一个随机值 var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } } // scalastyle:on println
Example 14
Source File: SparkLR.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.spark._ object SparkLR { val N = 10000 // Number of data points val D = 10 // Numer of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val sparkConf = new SparkConf().setAppName("SparkLR") val sc = new SparkContext(sparkConf) val numSlices = if (args.length > 0) args(0).toInt else 2 val points = sc.parallelize(generateData, numSlices).cache() // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } }
Example 15
Source File: SparkHdfsLR.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo object SparkHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val sparkConf = new SparkConf().setAppName("SparkHdfsLR") val inputPath = args(0) val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } }
Example 16
Source File: SparkTachyonHdfsLR.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{Vector, DenseVector} import org.apache.hadoop.conf.Configuration import org.apache.spark._ import org.apache.spark.scheduler.InputFormatInfo import org.apache.spark.storage.StorageLevel object SparkTachyonHdfsLR { val D = 10 // Numer of dimensions val rand = new Random(42) def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use either org.apache.spark.mllib.classification.LogisticRegressionWithSGD or |org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS |for more conventional use. """.stripMargin) } case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def main(args: Array[String]) { showWarning() val inputPath = args(0) val sparkConf = new SparkConf().setAppName("SparkTachyonHdfsLR") val conf = new Configuration() val sc = new SparkContext(sparkConf, InputFormatInfo.computePreferredLocations( Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)) )) val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).persist(StorageLevel.OFF_HEAP) val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D){2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) sc.stop() } }
Example 17
Source File: SparkLR.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkLR { val N = 10000 // Number of data points val D = 10 // Number of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D) {rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val spark = SparkSession .builder .appName("SparkLR") .getOrCreate() val numSlices = if (args.length > 0) args(0).toInt else 2 val points = spark.sparkContext.parallelize(generateData, numSlices).cache() // Initialize w to a random value var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) spark.stop() } } // scalastyle:on println
Example 18
Source File: SparkHdfsLR.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkHdfsLR { val D = 10 // Number of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val spark = SparkSession .builder .appName("SparkHdfsLR") .getOrCreate() val inputPath = args(0) val lines = spark.read.textFile(inputPath).rdd val points = lines.map(parsePoint).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) spark.stop() } } // scalastyle:on println
Example 19
Source File: VowpalWabbitClassifier.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.vw import com.microsoft.ml.spark.core.env.InternalWrapper import com.microsoft.ml.spark.core.schema.DatasetExtensions import org.apache.spark.ml.ComplexParamsReadable import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ import org.apache.spark.ml.classification.{ProbabilisticClassificationModel, ProbabilisticClassifier} import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.sql._ import org.apache.spark.sql.functions.{col, udf} import org.vowpalwabbit.spark.VowpalWabbitExample import com.microsoft.ml.spark.core.schema.DatasetExtensions._ import scala.math.exp object VowpalWabbitClassifier extends DefaultParamsReadable[VowpalWabbitClassifier] @InternalWrapper class VowpalWabbitClassifier(override val uid: String) extends ProbabilisticClassifier[Row, VowpalWabbitClassifier, VowpalWabbitClassificationModel] with VowpalWabbitBase { def this() = this(Identifiable.randomUID("VowpalWabbitClassifier")) // to support Grid search we need to replicate the parameters here... val labelConversion = new BooleanParam(this, "labelConversion", "Convert 0/1 Spark ML style labels to -1/1 VW style labels. Defaults to true.") setDefault(labelConversion -> true) def getLabelConversion: Boolean = $(labelConversion) def setLabelConversion(value: Boolean): this.type = set(labelConversion, value) override protected def train(dataset: Dataset[_]): VowpalWabbitClassificationModel = { val model = new VowpalWabbitClassificationModel(uid) .setFeaturesCol(getFeaturesCol) .setAdditionalFeatures(getAdditionalFeatures) .setPredictionCol(getPredictionCol) .setProbabilityCol(getProbabilityCol) .setRawPredictionCol(getRawPredictionCol) val finalDataset = if (!getLabelConversion) dataset else { val inputLabelCol = dataset.withDerivativeCol("label") dataset .withColumnRenamed(getLabelCol, inputLabelCol) .withColumn(getLabelCol, col(inputLabelCol) * 2 - 1) } trainInternal(finalDataset, model) } override def copy(extra: ParamMap): VowpalWabbitClassifier = defaultCopy(extra) } // Preparation for multi-class learning, though it no fun as numClasses is spread around multiple reductions @InternalWrapper class VowpalWabbitClassificationModel(override val uid: String) extends ProbabilisticClassificationModel[Row, VowpalWabbitClassificationModel] with VowpalWabbitBaseModel { def numClasses: Int = 2 override def transform(dataset: Dataset[_]): DataFrame = { val df = transformImplInternal(dataset) // which mode one wants to use depends a bit on how this should be deployed // 1. if you stay in spark w/o link=logistic is probably more convenient as it also returns the raw prediction // 2. if you want to export the model *and* get probabilities at scoring term w/ link=logistic is preferable // convert raw prediction to probability (if needed) val probabilityUdf = if (vwArgs.getArgs.contains("--link logistic")) udf { (pred: Double) => Vectors.dense(Array(1 - pred, pred)) } else udf { (pred: Double) => { val prob = 1.0 / (1.0 + exp(-pred)) Vectors.dense(Array(1 - prob, prob)) } } val df2 = df.withColumn($(probabilityCol), probabilityUdf(col($(rawPredictionCol)))) // convert probability to prediction val probability2predictionUdf = udf(probability2prediction _) df2.withColumn($(predictionCol), probability2predictionUdf(col($(probabilityCol)))) } override def copy(extra: ParamMap): this.type = defaultCopy(extra) protected override def predictRaw(features: Row): Vector = { throw new NotImplementedError("Not implemented") } protected override def raw2probabilityInPlace(rawPrediction: Vector): Vector= { throw new NotImplementedError("Not implemented") } } object VowpalWabbitClassificationModel extends ComplexParamsReadable[VowpalWabbitClassificationModel]
Example 20
Source File: SparkLR.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkLR { val N = 10000 // Number of data points val D = 10 // Number of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D) {rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val spark = SparkSession .builder .appName("SparkLR") .getOrCreate() val numSlices = if (args.length > 0) args(0).toInt else 2 val points = spark.sparkContext.parallelize(generateData, numSlices).cache() // Initialize w to a random value var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) spark.stop() } } // scalastyle:on println
Example 21
Source File: SparkHdfsLR.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkHdfsLR { val D = 10 // Number of dimensions val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def parsePoint(line: String): DataPoint = { val tok = new java.util.StringTokenizer(line, " ") var y = tok.nextToken.toDouble var x = new Array[Double](D) var i = 0 while (i < D) { x(i) = tok.nextToken.toDouble; i += 1 } DataPoint(new DenseVector(x), y) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { if (args.length < 2) { System.err.println("Usage: SparkHdfsLR <file> <iters>") System.exit(1) } showWarning() val spark = SparkSession .builder .appName("SparkHdfsLR") .getOrCreate() val inputPath = args(0) val lines = spark.read.textFile(inputPath).rdd val points = lines.map(parsePoint).cache() val ITERATIONS = args(1).toInt // Initialize w to a random value var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) spark.stop() } } // scalastyle:on println
Example 22
Source File: SparkLR.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.math.exp import breeze.linalg.{DenseVector, Vector} import org.apache.spark.sql.SparkSession object SparkLR { val N = 10000 // Number of data points val D = 10 // Number of dimensions val R = 0.7 // Scaling factor val ITERATIONS = 5 val rand = new Random(42) case class DataPoint(x: Vector[Double], y: Double) def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D) {rand.nextGaussian + y * R} DataPoint(x, y) } Array.tabulate(N)(generatePoint) } def showWarning() { System.err.println( """WARN: This is a naive implementation of Logistic Regression and is given as an example! |Please use org.apache.spark.ml.classification.LogisticRegression |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val spark = SparkSession .builder .appName("SparkLR") .getOrCreate() val numSlices = if (args.length > 0) args(0).toInt else 2 val points = spark.sparkContext.parallelize(generateData, numSlices).cache() // Initialize w to a random value var w = DenseVector.fill(D) {2 * rand.nextDouble - 1} println("Initial w: " + w) for (i <- 1 to ITERATIONS) { println("On iteration " + i) val gradient = points.map { p => p.x * (1 / (1 + exp(-p.y * (w.dot(p.x)))) - 1) * p.y }.reduce(_ + _) w -= gradient } println("Final w: " + w) spark.stop() } } // scalastyle:on println