org.apache.spark.mllib.optimization.SquaredL2Updater Scala Examples
The following examples show how to use org.apache.spark.mllib.optimization.SquaredL2Updater.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: LinearRegression.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.log4j.{Level, Logger} import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.optimization.{L1Updater, SimpleUpdater, SquaredL2Updater} import org.apache.spark.mllib.regression.LinearRegressionWithSGD import org.apache.spark.mllib.util.MLUtils spark-examples-*.jar \ | data/mllib/sample_linear_regression_data.txt """.stripMargin) } parser.parse(args, defaultParams) match { case Some(params) => run(params) case _ => sys.exit(1) } } def run(params: Params): Unit = { val conf = new SparkConf().setAppName(s"LinearRegression with $params") val sc = new SparkContext(conf) Logger.getRootLogger.setLevel(Level.WARN) val examples = MLUtils.loadLibSVMFile(sc, params.input).cache() val splits = examples.randomSplit(Array(0.8, 0.2)) val training = splits(0).cache() val test = splits(1).cache() val numTraining = training.count() val numTest = test.count() println(s"Training: $numTraining, test: $numTest.") examples.unpersist(blocking = false) val updater = params.regType match { case NONE => new SimpleUpdater() case L1 => new L1Updater() case L2 => new SquaredL2Updater() } val algorithm = new LinearRegressionWithSGD() algorithm.optimizer .setNumIterations(params.numIterations) .setStepSize(params.stepSize) .setUpdater(updater) .setRegParam(params.regParam) val model = val prediction = model.predict( val predictionAndLabel = val loss = { case (p, l) => val err = p - l err * err }.reduce(_ + _) val rmse = math.sqrt(loss / numTest) println(s"Test RMSE = $rmse.") sc.stop() } } // scalastyle:on println
Example 2
Source File: BinaryClassification.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.log4j.{Level, Logger} import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.classification.{LogisticRegressionWithLBFGS, SVMWithSGD} import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics import org.apache.spark.mllib.optimization.{L1Updater, SquaredL2Updater} import org.apache.spark.mllib.util.MLUtils spark-examples-*.jar \ | --algorithm LR --regType L2 --regParam 1.0 \ | data/mllib/sample_binary_classification_data.txt """.stripMargin) } parser.parse(args, defaultParams) match { case Some(params) => run(params) case _ => sys.exit(1) } } def run(params: Params): Unit = { val conf = new SparkConf().setAppName(s"BinaryClassification with $params") val sc = new SparkContext(conf) Logger.getRootLogger.setLevel(Level.WARN) val examples = MLUtils.loadLibSVMFile(sc, params.input).cache() val splits = examples.randomSplit(Array(0.8, 0.2)) val training = splits(0).cache() val test = splits(1).cache() val numTraining = training.count() val numTest = test.count() println(s"Training: $numTraining, test: $numTest.") examples.unpersist(blocking = false) val updater = params.regType match { case L1 => new L1Updater() case L2 => new SquaredL2Updater() } val model = params.algorithm match { case LR => val algorithm = new LogisticRegressionWithLBFGS() algorithm.optimizer .setNumIterations(params.numIterations) .setUpdater(updater) .setRegParam(params.regParam) case SVM => val algorithm = new SVMWithSGD() algorithm.optimizer .setNumIterations(params.numIterations) .setStepSize(params.stepSize) .setUpdater(updater) .setRegParam(params.regParam) } val prediction = model.predict( val predictionAndLabel = val metrics = new BinaryClassificationMetrics(predictionAndLabel) println(s"Test areaUnderPR = ${metrics.areaUnderPR()}.") println(s"Test areaUnderROC = ${metrics.areaUnderROC()}.") sc.stop() } } // scalastyle:on println
Example 3
Source File: LogisticRegressionModel.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.learning import breeze.linalg.Vector import org.apache.spark.mllib.classification.{LogisticRegressionModel => MLlibLRM} import org.apache.spark.mllib.linalg.{Vector => MLlibVector} import org.apache.spark.mllib.optimization.{SquaredL2Updater, LogisticGradient, LBFGS} import org.apache.spark.mllib.regression.{GeneralizedLinearAlgorithm, LabeledPoint} import org.apache.spark.mllib.util.DataValidators import org.apache.spark.rdd.RDD import keystoneml.utils.MLlibUtils.breezeVectorToMLlib import keystoneml.workflow.{LabelEstimator, Transformer} import scala.reflect.ClassTag private[this] class LogisticRegressionWithLBFGS(numClasses: Int, numFeaturesValue: Int) extends GeneralizedLinearAlgorithm[MLlibLRM] with Serializable { this.numFeatures = numFeaturesValue override val optimizer = new LBFGS(new LogisticGradient, new SquaredL2Updater) override protected val validators = List(multiLabelValidator) require(numClasses > 1) numOfLinearPredictor = numClasses - 1 if (numClasses > 2) { optimizer.setGradient(new LogisticGradient(numClasses)) } private def multiLabelValidator: RDD[LabeledPoint] => Boolean = { data => if (numOfLinearPredictor > 1) { DataValidators.multiLabelValidator(numOfLinearPredictor + 1)(data) } else { DataValidators.binaryLabelValidator(data) } } override protected def createModel(weights: MLlibVector, intercept: Double) = { if (numOfLinearPredictor == 1) { new MLlibLRM(weights, intercept) } else { new MLlibLRM(weights, intercept, numFeatures, numOfLinearPredictor + 1) } } } override def fit(in: RDD[T], labels: RDD[Int]): LogisticRegressionModel[T] = { val labeledPoints = => LabeledPoint(x._1, breezeVectorToMLlib(x._2))) val trainer = new LogisticRegressionWithLBFGS(numClasses, numFeatures) trainer.setValidateData(false).optimizer.setNumIterations(numIters).setRegParam(regParam) val model = new LogisticRegressionModel(model) } }
Example 4
package se.uu.farmbio.cp.alg import org.apache.spark.mllib.classification.SVMModel import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.optimization.HingeGradient import org.apache.spark.mllib.optimization.LBFGS import org.apache.spark.mllib.optimization.SquaredL2Updater import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import se.uu.farmbio.cp.UnderlyingAlgorithm //Define a SVMs UnderlyingAlgorithm private object SVM { def trainingProcedure( input: RDD[LabeledPoint], maxNumItearations: Int, regParam: Double, numCorrections: Int, convergenceTol: Double) = { //Train SVM with LBFGS val numFeatures = input.take(1)(0).features.size val training = => (x.label, MLUtils.appendBias(x.features))).cache() val initialWeightsWithIntercept = Vectors.dense(new Array[Double](numFeatures + 1)) val (weightsWithIntercept, _) = LBFGS.runLBFGS( training, new HingeGradient(), new SquaredL2Updater(), numCorrections, convergenceTol, maxNumItearations, regParam, initialWeightsWithIntercept) //Create the model using the weights val model = new SVMModel( Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1)), weightsWithIntercept(weightsWithIntercept.size - 1)) //Return raw score predictor model.clearThreshold() model } } class SVM(val model: SVMModel) extends UnderlyingAlgorithm(model.predict) { def this( input: RDD[LabeledPoint], maxNumItearations: Int = 100, regParam: Double = 0.1, numCorrections: Int = 10, convergenceTol: Double = 1e-4) = { this(SVM.trainingProcedure( input, maxNumItearations, regParam, numCorrections, convergenceTol)) } def nonConformityMeasure(newSample: LabeledPoint) = { val score = predictor(newSample.features) if (newSample.label == 1.0) { -score } else { score } } }
Example 5
Source File: LogisticRegression.scala From spark-cp with Apache License 2.0 | 5 votes |
package se.uu.farmbio.cp.alg import org.apache.spark.mllib.classification.LogisticRegressionModel import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.optimization.LBFGS import org.apache.spark.mllib.optimization.LogisticGradient import org.apache.spark.mllib.optimization.SquaredL2Updater import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD import se.uu.farmbio.cp.UnderlyingAlgorithm //Define a LogisticRegression UnderlyingAlgorithm private object LogisticRegression { def trainingProcedure( input: RDD[LabeledPoint], maxNumItearations: Int, regParam: Double, numCorrections: Int, convergenceTol: Double): (Vector => Double) = { //Train Logistic Regression with LBFGS val numFeatures = input.take(1)(0).features.size val training = => (x.label, MLUtils.appendBias(x.features))).cache() val initialWeightsWithIntercept = Vectors.dense(new Array[Double](numFeatures + 1)) val (weightsWithIntercept, _) = LBFGS.runLBFGS( training, new LogisticGradient(), new SquaredL2Updater(), numCorrections, convergenceTol, maxNumItearations, regParam, initialWeightsWithIntercept) //Create the model using the weights val model = new LogisticRegressionModel( Vectors.dense(weightsWithIntercept.toArray.slice(0, weightsWithIntercept.size - 1)), weightsWithIntercept(weightsWithIntercept.size - 1)) //Return raw score predictor model.clearThreshold() model.predict } } class LogisticRegression( private val input: RDD[LabeledPoint], private val maxNumItearations: Int = 100, private val regParam: Double = 0.1, private val numCorrections: Int = 10, private val convergenceTol: Double = 1e-4) extends UnderlyingAlgorithm( LogisticRegression.trainingProcedure( input, maxNumItearations, regParam, numCorrections, convergenceTol)) { override def nonConformityMeasure(newSample: LabeledPoint) = { val score = predictor(newSample.features) if (newSample.label == 1.0) { 1-score } else { score } } }
Example 6
Source File: SparkSGD.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package linalg.sgd import scala.util.Random import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.optimization.GradientDescent import org.apache.spark.mllib.optimization.SquaredL2Updater import org.apache.spark.mllib.optimization.LogisticGradient import org.apache.spark.SparkContext object SparkSGD { def main(args: Array[String]): Unit = { val m = 4 val n = 200000 val sc = new SparkContext("local[2]", "") val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) => val random = new Random(idx) => (1.0, Vectors.dense(Array.fill(n)(random.nextDouble())))) }.cache() val (weights, loss) = GradientDescent.runMiniBatchSGD( points, new LogisticGradient, new SquaredL2Updater, 0.1, 2, 1.0, 1.0, Vectors.dense(new Array[Double](n))) println("w:" + weights(0)) println("loss:" + loss(0)) sc.stop() } }
Example 7
Source File: LinearRegression.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.mllib import org.apache.log4j.{Level, Logger} import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.regression.LinearRegressionWithSGD import org.apache.spark.mllib.util.MLUtils import org.apache.spark.mllib.optimization.{SimpleUpdater, SquaredL2Updater, L1Updater} spark-examples-*.jar \ | data/mllib/sample_linear_regression_data.txt """.stripMargin) } parser.parse(args, defaultParams).map { params => run(params) } getOrElse { sys.exit(1) } } def run(params: Params) { val conf = new SparkConf().setAppName(s"LinearRegression with $params") val sc = new SparkContext(conf) Logger.getRootLogger.setLevel(Level.WARN) val examples = MLUtils.loadLibSVMFile(sc, params.input).cache() val splits = examples.randomSplit(Array(0.8, 0.2)) val training = splits(0).cache() val test = splits(1).cache() val numTraining = training.count() val numTest = test.count() println(s"Training: $numTraining, test: $numTest.") examples.unpersist(blocking = false) val updater = params.regType match { case NONE => new SimpleUpdater() case L1 => new L1Updater() case L2 => new SquaredL2Updater() } val algorithm = new LinearRegressionWithSGD() algorithm.optimizer .setNumIterations(params.numIterations) .setStepSize(params.stepSize) .setUpdater(updater) .setRegParam(params.regParam) val model = val prediction = model.predict( val predictionAndLabel = val loss = { case (p, l) => val err = p - l err * err }.reduce(_ + _) val rmse = math.sqrt(loss / numTest) println(s"Test RMSE = $rmse.") sc.stop() } }
Example 8
Source File: LinearRegression.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.mllib import org.apache.log4j.{Level, Logger} import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.regression.LinearRegressionWithSGD import org.apache.spark.mllib.util.MLUtils import org.apache.spark.mllib.optimization.{SimpleUpdater, SquaredL2Updater, L1Updater} spark-examples-*.jar \ | data/mllib/sample_linear_regression_data.txt """.stripMargin) } parser.parse(args, defaultParams).map { params => run(params) } getOrElse { sys.exit(1) } } def run(params: Params) { val conf = new SparkConf().setAppName(s"LinearRegression with $params") val sc = new SparkContext(conf) Logger.getRootLogger.setLevel(Level.WARN) val examples = MLUtils.loadLibSVMFile(sc, params.input).cache() val splits = examples.randomSplit(Array(0.8, 0.2)) val training = splits(0).cache() val test = splits(1).cache() val numTraining = training.count() val numTest = test.count() println(s"Training: $numTraining, test: $numTest.") examples.unpersist(blocking = false) val updater = params.regType match { case NONE => new SimpleUpdater() case L1 => new L1Updater() case L2 => new SquaredL2Updater() } val algorithm = new LinearRegressionWithSGD() algorithm.optimizer .setNumIterations(params.numIterations) .setStepSize(params.stepSize) .setUpdater(updater) .setRegParam(params.regParam) val model = val prediction = model.predict( val predictionAndLabel = val loss = { case (p, l) => val err = p - l err * err }.reduce(_ + _) val rmse = math.sqrt(loss / numTest) println(s"Test RMSE = $rmse.") sc.stop() } } // scalastyle:on println