org.apache.spark.ml.regression.LinearRegression Scala Examples
The following examples show how to use org.apache.spark.ml.regression.LinearRegression.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: LinearRegressionWithElasticNetExample.scala From drizzle-spark with Apache License 2.0 | 6 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.LinearRegression // $example off$ import org.apache.spark.sql.SparkSession object LinearRegressionWithElasticNetExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("LinearRegressionWithElasticNetExample") .getOrCreate() // $example on$ // Load training data val training = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val lr = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) // Fit the model val lrModel = lr.fit(training) // Print the coefficients and intercept for linear regression println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary println(s"numIterations: ${trainingSummary.totalIterations}") println(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]") trainingSummary.residuals.show() println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") println(s"r2: ${trainingSummary.r2}") // $example off$ spark.stop() } } // scalastyle:on println
Example 2
Source File: OpLinearRegressionTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.regression import com.salesforce.op.features.types._ import com.salesforce.op.stages.base.binary.{BinaryEstimator, BinaryModel} import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test._ import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpLinearRegressionTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[LinearRegressionModel], OpPredictorWrapper[LinearRegression, LinearRegressionModel]] with PredictionEquality { override def specName: String = Spec[OpLinearRegression] val (inputData, rawLabel, features) = TestFeatureBuilder( Seq[(RealNN, OPVector)]( (10.0.toRealNN, Vectors.dense(1.0, 4.3, 1.3).toOPVector), (20.0.toRealNN, Vectors.dense(2.0, 0.3, 0.1).toOPVector), (30.0.toRealNN, Vectors.dense(3.0, 3.9, 4.3).toOPVector), (40.0.toRealNN, Vectors.dense(4.0, 1.3, 0.9).toOPVector), (50.0.toRealNN, Vectors.dense(5.0, 4.7, 1.3).toOPVector) ) ) val label = rawLabel.copy(isResponse = true) val estimator = new OpLinearRegression().setInput(label, features) val expectedResult = Seq( Prediction(10.0), Prediction(20.0), Prediction(30.0), Prediction(40.0), Prediction(50.0) ) it should "allow the user to set the desired spark parameters" in { estimator .setMaxIter(10) .setRegParam(0.1) .setFitIntercept(true) .setElasticNetParam(0.1) .setSolver("normal") estimator.fit(inputData) estimator.predictor.getMaxIter shouldBe 10 estimator.predictor.getRegParam shouldBe 0.1 estimator.predictor.getFitIntercept shouldBe true estimator.predictor.getElasticNetParam shouldBe 0.1 estimator.predictor.getSolver shouldBe "normal" } }
Example 3
Source File: LinearRegressionSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.regression import com.ibm.aardpfark.pfa.PredictorResult import org.apache.spark.ml.regression.LinearRegression class LinearRegressionSuite extends SparkRegressorPFASuiteBase[PredictorResult] { val dataset = spark.read.format("libsvm").load(inputPath) val lr = new LinearRegression() override val sparkTransformer = lr.fit(dataset) val result = sparkTransformer.transform(dataset) override val input = withColumnAsArray(result, lr.getFeaturesCol).toJSON.collect() override val expectedOutput = result.select(lr.getPredictionCol).toJSON.collect() // Additional tests test("LinearRegression w/o fitIntercept") { val sparkTransformer = lr.setFitIntercept(false).fit(dataset) val result = sparkTransformer.transform(dataset) val expectedOutput = result.select(lr.getPredictionCol).toJSON.collect() parityTest(sparkTransformer, input, expectedOutput) } }
Example 4
Source File: RegressionEvaluatorSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.DefaultReadWriteTest import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") { val trainer = new LinearRegression val model = trainer.fit(dataset) val predictions = model.transform(dataset) // default = rmse val evaluator = new RegressionEvaluator() assert(evaluator.evaluate(predictions) ~== 0.1019382 absTol 0.001) // r2 score evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998196 absTol 0.001) // mae evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== 0.08036075 absTol 0.001) } test("read/write") { val evaluator = new RegressionEvaluator() .setPredictionCol("myPrediction") .setLabelCol("myLabel") .setMetricName("r2") testDefaultReadWrite(evaluator) } }
Example 5
Source File: SparkRWrappers.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.api.r import org.apache.spark.ml.attribute._ import org.apache.spark.ml.feature.RFormula import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.sql.DataFrame private[r] object SparkRWrappers { def fitRModelFormula( value: String, df: DataFrame, family: String, lambda: Double, alpha: Double, standardize: Boolean, solver: String): PipelineModel = { val formula = new RFormula().setFormula(value) val estimator = family match { case "gaussian" => new LinearRegression() .setRegParam(lambda) .setElasticNetParam(alpha) .setFitIntercept(formula.hasIntercept) .setStandardization(standardize) .setSolver(solver) case "binomial" => new LogisticRegression() .setRegParam(lambda) .setElasticNetParam(alpha) .setFitIntercept(formula.hasIntercept) .setStandardization(standardize) } val pipeline = new Pipeline().setStages(Array(formula, estimator)) pipeline.fit(df) } def getModelCoefficients(model: PipelineModel): Array[Double] = { model.stages.last match { case m: LinearRegressionModel => { val coefficientStandardErrorsR = Array(m.summary.coefficientStandardErrors.last) ++ m.summary.coefficientStandardErrors.dropRight(1) val tValuesR = Array(m.summary.tValues.last) ++ m.summary.tValues.dropRight(1) val pValuesR = Array(m.summary.pValues.last) ++ m.summary.pValues.dropRight(1) if (m.getFitIntercept) { Array(m.intercept) ++ m.coefficients.toArray ++ coefficientStandardErrorsR ++ tValuesR ++ pValuesR } else { m.coefficients.toArray ++ coefficientStandardErrorsR ++ tValuesR ++ pValuesR } } case m: LogisticRegressionModel => { if (m.getFitIntercept) { Array(m.intercept) ++ m.coefficients.toArray } else { m.coefficients.toArray } } } } def getModelDevianceResiduals(model: PipelineModel): Array[Double] = { model.stages.last match { case m: LinearRegressionModel => m.summary.devianceResiduals case m: LogisticRegressionModel => throw new UnsupportedOperationException( "No deviance residuals available for LogisticRegressionModel") } } def getModelFeatures(model: PipelineModel): Array[String] = { model.stages.last match { case m: LinearRegressionModel => val attrs = AttributeGroup.fromStructField( m.summary.predictions.schema(m.summary.featuresCol)) if (m.getFitIntercept) { Array("(Intercept)") ++ attrs.attributes.get.map(_.name.get) } else { attrs.attributes.get.map(_.name.get) } case m: LogisticRegressionModel => val attrs = AttributeGroup.fromStructField( m.summary.predictions.schema(m.summary.featuresCol)) if (m.getFitIntercept) { Array("(Intercept)") ++ attrs.attributes.get.map(_.name.get) } else { attrs.attributes.get.map(_.name.get) } } } def getModelName(model: PipelineModel): String = { model.stages.last match { case m: LinearRegressionModel => "LinearRegressionModel" case m: LogisticRegressionModel => "LogisticRegressionModel" } } }
Example 6
Source File: TrainValidationSplitExample.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.ml import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit} import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} object TrainValidationSplitExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("TrainValidationSplitExample") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) // Prepare training and test data. val data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") val Array(training, test) = data.randomSplit(Array(0.9, 0.1), seed = 12345) val lr = new LinearRegression() // We use a ParamGridBuilder to construct a grid of parameters to search over. // TrainValidationSplit will try all combinations of values and determine best model using // the evaluator. val paramGrid = new ParamGridBuilder() .addGrid(lr.regParam, Array(0.1, 0.01)) .addGrid(lr.fitIntercept, Array(true, false)) .addGrid(lr.elasticNetParam, Array(0.0, 0.5, 1.0)) .build() // In this case the estimator is simply the linear regression. // A TrainValidationSplit requires an Estimator, a set of Estimator ParamMaps, and an Evaluator. val trainValidationSplit = new TrainValidationSplit() .setEstimator(lr) .setEvaluator(new RegressionEvaluator) .setEstimatorParamMaps(paramGrid) // 80% of the data will be used for training and the remaining 20% for validation. trainValidationSplit.setTrainRatio(0.8) // Run train validation split, and choose the best set of parameters. val model = trainValidationSplit.fit(training) // Make predictions on test data. model is the model with combination of parameters // that performed best. model.transform(test) .select("features", "label", "prediction") .show() sc.stop() } }
Example 7
Source File: LinearRegressionWithElasticNetExample.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.LinearRegression // $example off$ import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} object LinearRegressionWithElasticNetExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("LinearRegressionWithElasticNetExample") val sc = new SparkContext(conf) val sqlCtx = new SQLContext(sc) // $example on$ // Load training data val training = sqlCtx.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val lr = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) // Fit the model val lrModel = lr.fit(training) // Print the coefficients and intercept for linear regression println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary println(s"numIterations: ${trainingSummary.totalIterations}") println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}") trainingSummary.residuals.show() println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") println(s"r2: ${trainingSummary.r2}") // $example off$ sc.stop() } } // scalastyle:on println
Example 8
Source File: RegressionEvaluatorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { import testImplicits._ test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") { val trainer = new LinearRegression val model = trainer.fit(dataset) val predictions = model.transform(dataset) // default = rmse val evaluator = new RegressionEvaluator() assert(evaluator.evaluate(predictions) ~== 0.1013829 absTol 0.01) // r2 score evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998387 absTol 0.01) // mae evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== 0.08399089 absTol 0.01) } test("read/write") { val evaluator = new RegressionEvaluator() .setPredictionCol("myPrediction") .setLabelCol("myLabel") .setMetricName("r2") testDefaultReadWrite(evaluator) } test("should support all NumericType labels and not support other types") { MLTestingUtils.checkNumericTypes(new RegressionEvaluator, spark) } }
Example 9
Source File: LinearRegressionWithElasticNetExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.LinearRegression // $example off$ import org.apache.spark.sql.SparkSession object LinearRegressionWithElasticNetExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("LinearRegressionWithElasticNetExample") .getOrCreate() // $example on$ // Load training data val training = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val lr = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) // Fit the model val lrModel = lr.fit(training) // Print the coefficients and intercept for linear regression println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary println(s"numIterations: ${trainingSummary.totalIterations}") println(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]") trainingSummary.residuals.show() println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") println(s"r2: ${trainingSummary.r2}") // $example off$ spark.stop() } } // scalastyle:on println
Example 10
Source File: RegressionEvaluatorSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext { test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") {//评估回归:默认参数 val trainer = new LinearRegression //fit()方法将DataFrame转化为一个Transformer的算法 val model = trainer.fit(dataset) //转换 //Prediction 预测 //transform()方法将DataFrame转化为另外一个DataFrame的算法 val predictions = model.transform(dataset) predictions.collect() // default = rmse //默认rmse均方根误差说明样本的离散程度 val evaluator = new RegressionEvaluator() println("==MetricName="+evaluator.getMetricName+"=LabelCol="+evaluator.getLabelCol+"=PredictionCol="+evaluator.getPredictionCol) //==MetricName=rmse=LabelCol=label=PredictionCol=prediction,默认rmse均方根误差说明样本的离散程度 assert(evaluator.evaluate(predictions) ~== 0.1019382 absTol 0.001) // r2 score 评分 //R2平方系统也称判定系数,用来评估模型拟合数据的好坏 evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998196 absTol 0.001) //MAE平均绝对误差是所有单个观测值与算术平均值的偏差的绝对值的平均 evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== 0.08036075 absTol 0.001) } }
Example 11
Source File: SparkRWrappers.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.api.r import org.apache.spark.ml.attribute._ import org.apache.spark.ml.feature.RFormula import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.sql.DataFrame private[r] object SparkRWrappers { def fitRModelFormula( value: String, df: DataFrame, family: String, lambda: Double, alpha: Double): PipelineModel = { val formula = new RFormula().setFormula(value) val estimator = family match { case "gaussian" => new LinearRegression() .setRegParam(lambda) .setElasticNetParam(alpha) .setFitIntercept(formula.hasIntercept) case "binomial" => new LogisticRegression() .setRegParam(lambda) .setElasticNetParam(alpha) .setFitIntercept(formula.hasIntercept) } val pipeline = new Pipeline().setStages(Array(formula, estimator)) pipeline.fit(df) } def getModelWeights(model: PipelineModel): Array[Double] = { model.stages.last match { case m: LinearRegressionModel => Array(m.intercept) ++ m.weights.toArray case _: LogisticRegressionModel => throw new UnsupportedOperationException( "No weights available for LogisticRegressionModel") // SPARK-9492 } } def getModelFeatures(model: PipelineModel): Array[String] = { model.stages.last match { case m: LinearRegressionModel => val attrs = AttributeGroup.fromStructField( m.summary.predictions.schema(m.summary.featuresCol)) Array("(Intercept)") ++ attrs.attributes.get.map(_.name.get) case _: LogisticRegressionModel => throw new UnsupportedOperationException( "No features names available for LogisticRegressionModel") // SPARK-9492 } } }
Example 12
Source File: LinearRegressionWithElasticNetExample.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.LinearRegression // $example off$ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.types.StringType import org.apache.spark.sql.{SQLContext, DataFrame} trainingSummary.residuals.show() //rmse均方根误差说明样本的离散程度 //RMSE: 10.189126225286143 println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") //R2平方系统也称判定系数,用来评估模型拟合数据的好坏 //r2: 0.02285205756871944 println(s"r2: ${trainingSummary.r2}") // $example off$ sc.stop() } } // scalastyle:on println
Example 13
Source File: OpPredictorWrapperTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.sparkwrappers.specific import com.salesforce.op.features.types._ import com.salesforce.op.stages.sparkwrappers.generic.SparkWrapperParams import com.salesforce.op.test.{PrestigeData, TestFeatureBuilder, TestSparkContext} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} import org.junit.runner.RunWith import org.scalatest.FlatSpec import org.scalatest.junit.JUnitRunner import org.slf4j.LoggerFactory @RunWith(classOf[JUnitRunner]) class OpPredictorWrapperTest extends FlatSpec with TestSparkContext with PrestigeData { val log = LoggerFactory.getLogger(this.getClass) val (ds, targetLabel, featureVector) = TestFeatureBuilder[RealNN, OPVector]( prestigeSeq.map(p => p.prestige.toRealNN -> Vectors.dense(p.education, p.income, p.women).toOPVector) ) Spec[OpPredictorWrapper[_, _]] should "be able to run a simple logistic regression model (fitIntercept=true)" in { val lrModel: LinearRegressionModel = fitLinRegModel(fitIntercept = true) lrModel.intercept.abs should be > 1E-6 } it should "be able to run a simple logistic regression model (fitIntercept=false)" in { val lrModel: LinearRegressionModel = fitLinRegModel(fitIntercept = false) lrModel.intercept.abs should be < Double.MinPositiveValue } private def fitLinRegModel(fitIntercept: Boolean): LinearRegressionModel = { val lrBase = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) .setFitIntercept(fitIntercept) val lr = new OpPredictorWrapper[LinearRegression, LinearRegressionModel](lrBase) .setInput(targetLabel, featureVector) // Fit the model val model = lr.fit(ds).asInstanceOf[SparkWrapperParams[LinearRegressionModel]] val lrModel = model.getSparkMlStage().get // Print the coefficients and intercept for linear regression log.info(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary log.info(s"numIterations: ${trainingSummary.totalIterations}") log.info(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]") if (log.isInfoEnabled) trainingSummary.residuals.show() log.info(s"RMSE: ${trainingSummary.rootMeanSquaredError}") log.info(s"r2: ${trainingSummary.r2}") // checking r2 as a cheap way to make sure things are running as intended. assert(trainingSummary.r2 > 0.9) if (log.isInfoEnabled) { val output = lrModel.transform(ds) output.show(false) } lrModel } }
Example 14
Source File: TypedLinearRegression.scala From frameless with Apache License 2.0 | 5 votes |
package frameless package ml package regression import frameless.ml.internals.LinearInputsChecker import frameless.ml.params.linears.{LossStrategy, Solver} import frameless.ml.{AppendTransformer, TypedEstimator} import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} final class TypedLinearRegression [Inputs] private[ml]( lr: LinearRegression, labelCol: String, featuresCol: String, weightCol: Option[String] ) extends TypedEstimator[Inputs, TypedLinearRegression.Outputs, LinearRegressionModel] { val estimatorWithoutWeight : LinearRegression = lr .setLabelCol(labelCol) .setFeaturesCol(featuresCol) .setPredictionCol(AppendTransformer.tempColumnName) val estimator = if (weightCol.isDefined) estimatorWithoutWeight.setWeightCol(weightCol.get) else estimatorWithoutWeight def setRegParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setRegParam(value)) def setFitIntercept(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setFitIntercept(value)) def setStandardization(value: Boolean): TypedLinearRegression[Inputs] = copy(lr.setStandardization(value)) def setElasticNetParam(value: Double): TypedLinearRegression[Inputs] = copy(lr.setElasticNetParam(value)) def setMaxIter(value: Int): TypedLinearRegression[Inputs] = copy(lr.setMaxIter(value)) def setTol(value: Double): TypedLinearRegression[Inputs] = copy(lr.setTol(value)) def setSolver(value: Solver): TypedLinearRegression[Inputs] = copy(lr.setSolver(value.sparkValue)) def setAggregationDepth(value: Int): TypedLinearRegression[Inputs] = copy(lr.setAggregationDepth(value)) def setLoss(value: LossStrategy): TypedLinearRegression[Inputs] = copy(lr.setLoss(value.sparkValue)) def setEpsilon(value: Double): TypedLinearRegression[Inputs] = copy(lr.setEpsilon(value)) private def copy(newLr: LinearRegression): TypedLinearRegression[Inputs] = new TypedLinearRegression[Inputs](newLr, labelCol, featuresCol, weightCol) } object TypedLinearRegression { case class Outputs(prediction: Double) case class Weight(weight: Double) def apply[Inputs](implicit inputsChecker: LinearInputsChecker[Inputs]): TypedLinearRegression[Inputs] = { new TypedLinearRegression(new LinearRegression(), inputsChecker.labelCol, inputsChecker.featuresCol, inputsChecker.weightCol) } }
Example 15
Source File: RegressionEvaluatorSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext { test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") { val trainer = new LinearRegression val model = trainer.fit(dataset) val predictions = model.transform(dataset) // default = rmse val evaluator = new RegressionEvaluator() assert(evaluator.evaluate(predictions) ~== -0.1019382 absTol 0.001) // r2 score evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998196 absTol 0.001) // mae evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== -0.08036075 absTol 0.001) } }
Example 16
Source File: RegressionEvaluatorSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { import testImplicits._ test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") { val trainer = new LinearRegression val model = trainer.fit(dataset) val predictions = model.transform(dataset) // default = rmse val evaluator = new RegressionEvaluator() assert(evaluator.evaluate(predictions) ~== 0.1013829 absTol 0.01) // r2 score evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998387 absTol 0.01) // mae evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== 0.08399089 absTol 0.01) } test("read/write") { val evaluator = new RegressionEvaluator() .setPredictionCol("myPrediction") .setLabelCol("myLabel") .setMetricName("r2") testDefaultReadWrite(evaluator) } test("should support all NumericType labels and not support other types") { MLTestingUtils.checkNumericTypes(new RegressionEvaluator, spark) } }
Example 17
Source File: LinearRegressionWithElasticNetExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.LinearRegression // $example off$ import org.apache.spark.sql.SparkSession object LinearRegressionWithElasticNetExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("LinearRegressionWithElasticNetExample") .getOrCreate() // $example on$ // Load training data val training = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val lr = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) // Fit the model val lrModel = lr.fit(training) // Print the coefficients and intercept for linear regression println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary println(s"numIterations: ${trainingSummary.totalIterations}") println(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]") trainingSummary.residuals.show() println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") println(s"r2: ${trainingSummary.r2}") // $example off$ spark.stop() } } // scalastyle:on println
Example 18
Source File: RegressionEvaluatorSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { import testImplicits._ test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") { val trainer = new LinearRegression val model = trainer.fit(dataset) val predictions = model.transform(dataset) // default = rmse val evaluator = new RegressionEvaluator() assert(evaluator.evaluate(predictions) ~== 0.1013829 absTol 0.01) // r2 score evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998387 absTol 0.01) // mae evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== 0.08399089 absTol 0.01) } test("read/write") { val evaluator = new RegressionEvaluator() .setPredictionCol("myPrediction") .setLabelCol("myLabel") .setMetricName("r2") testDefaultReadWrite(evaluator) } test("should support all NumericType labels and not support other types") { MLTestingUtils.checkNumericTypes(new RegressionEvaluator, spark) } }
Example 19
Source File: LinearRegressionWithElasticNetExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.LinearRegression // $example off$ import org.apache.spark.sql.SparkSession object LinearRegressionWithElasticNetExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("LinearRegressionWithElasticNetExample") .getOrCreate() // $example on$ // Load training data val training = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val lr = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) // Fit the model val lrModel = lr.fit(training) // Print the coefficients and intercept for linear regression println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary println(s"numIterations: ${trainingSummary.totalIterations}") println(s"objectiveHistory: [${trainingSummary.objectiveHistory.mkString(",")}]") trainingSummary.residuals.show() println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") println(s"r2: ${trainingSummary.r2}") // $example off$ spark.stop() } } // scalastyle:on println
Example 20
Source File: LinearRegressionPipeline.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package org.sparksamples.regression.bikesharing import org.apache.log4j.Logger import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature.{VectorAssembler, VectorIndexer} import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.mllib.evaluation.RegressionMetrics import org.apache.spark.sql.{DataFrame, SparkSession} object LinearRegressionPipeline { @transient lazy val logger = Logger.getLogger(getClass.getName) def linearRegressionWithVectorFormat(vectorAssembler: VectorAssembler, vectorIndexer: VectorIndexer, dataFrame: DataFrame) = { val lr = new LinearRegression() .setFeaturesCol("features") .setLabelCol("label") .setRegParam(0.1) .setElasticNetParam(1.0) .setMaxIter(10) val pipeline = new Pipeline().setStages(Array(vectorAssembler, vectorIndexer, lr)) val Array(training, test) = dataFrame.randomSplit(Array(0.8, 0.2), seed = 12345) val model = pipeline.fit(training) val fullPredictions = model.transform(test).cache() val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0)) val labels = fullPredictions.select("label").rdd.map(_.getDouble(0)) val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError println(s" Root mean squared error (RMSE): $RMSE") } def linearRegressionWithSVMFormat(spark: SparkSession) = { // Load training data val training = spark.read.format("libsvm") .load("./src/main/scala/org/sparksamples/regression/dataset/BikeSharing/lsvmHours.txt") val lr = new LinearRegression() .setMaxIter(10) .setRegParam(0.3) .setElasticNetParam(0.8) // Fit the model val lrModel = lr.fit(training) // Print the coefficients and intercept for linear regression println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}") // Summarize the model over the training set and print out some metrics val trainingSummary = lrModel.summary println(s"numIterations: ${trainingSummary.totalIterations}") println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}") trainingSummary.residuals.show() println(s"RMSE: ${trainingSummary.rootMeanSquaredError}") println(s"r2: ${trainingSummary.r2}") } }
Example 21
Source File: LinearRegressionParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.parity.regression import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.ml.feature.{OneHotEncoderEstimator, StringIndexer, VectorAssembler} import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.{Pipeline, Transformer} import org.apache.spark.sql.DataFrame class LinearRegressionParitySpec extends SparkParityBase { override val dataset: DataFrame = baseDataset.select("fico_score_group_fnl", "dti", "loan_amount") override val sparkTransformer: Transformer = new Pipeline().setStages(Array(new StringIndexer(). setInputCol("fico_score_group_fnl"). setOutputCol("fico_index"), new OneHotEncoderEstimator(). setInputCols(Array("fico_index")). setOutputCols(Array("fico")), new VectorAssembler(). setInputCols(Array("fico", "dti")). setOutputCol("features"), new LinearRegression(). setFeaturesCol("features"). setLabelCol("loan_amount"). setPredictionCol("prediction"))).fit(dataset) override val unserializedParams = Set("stringOrderType", "elasticNetParam", "maxIter", "tol", "epsilon", "labelCol", "loss", "regParam", "solver") }
Example 22
Source File: RegressionEvaluatorSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.evaluation import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.regression.LinearRegression import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.mllib.util.TestingUtils._ class RegressionEvaluatorSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { import testImplicits._ test("params") { ParamsSuite.checkParams(new RegressionEvaluator) } test("Regression Evaluator: default params") { val trainer = new LinearRegression val model = trainer.fit(dataset) val predictions = model.transform(dataset) // default = rmse val evaluator = new RegressionEvaluator() assert(evaluator.evaluate(predictions) ~== 0.1013829 absTol 0.01) // r2 score evaluator.setMetricName("r2") assert(evaluator.evaluate(predictions) ~== 0.9998387 absTol 0.01) // mae evaluator.setMetricName("mae") assert(evaluator.evaluate(predictions) ~== 0.08399089 absTol 0.01) } test("read/write") { val evaluator = new RegressionEvaluator() .setPredictionCol("myPrediction") .setLabelCol("myLabel") .setMetricName("r2") testDefaultReadWrite(evaluator) } test("should support all NumericType labels and not support other types") { MLTestingUtils.checkNumericTypes(new RegressionEvaluator, spark) } }