org.apache.spark.ml.regression.GeneralizedLinearRegression Scala Examples
The following examples show how to use org.apache.spark.ml.regression.GeneralizedLinearRegression.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GeneralizedLinearRegressionExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.GeneralizedLinearRegression // $example off$ import org.apache.spark.sql.SparkSession object GeneralizedLinearRegressionExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("GeneralizedLinearRegressionExample") .getOrCreate() // $example on$ // Load training data val dataset = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val glr = new GeneralizedLinearRegression() .setFamily("gaussian") .setLink("identity") .setMaxIter(10) .setRegParam(0.3) // Fit the model val model = glr.fit(dataset) // Print the coefficients and intercept for generalized linear regression model println(s"Coefficients: ${model.coefficients}") println(s"Intercept: ${model.intercept}") // Summarize the model over the training set and print out some metrics val summary = model.summary println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}") println(s"T Values: ${summary.tValues.mkString(",")}") println(s"P Values: ${summary.pValues.mkString(",")}") println(s"Dispersion: ${summary.dispersion}") println(s"Null Deviance: ${summary.nullDeviance}") println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}") println(s"Deviance: ${summary.deviance}") println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}") println(s"AIC: ${summary.aic}") println("Deviance Residuals: ") summary.residuals().show() // $example off$ spark.stop() } } // scalastyle:on println
Example 2
Source File: GeneralizedLinearRegressionParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.parity.regression import org.apache.spark.ml.feature.{OneHotEncoderEstimator, StringIndexer, VectorAssembler} import org.apache.spark.ml.{Pipeline, Transformer} import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.ml.regression.GeneralizedLinearRegression import org.apache.spark.sql._ class GeneralizedLinearRegressionParitySpec extends SparkParityBase { override val dataset: DataFrame = baseDataset.select("fico_score_group_fnl", "dti", "loan_amount") override val sparkTransformer: Transformer = new Pipeline().setStages(Array(new StringIndexer(). setInputCol("fico_score_group_fnl"). setOutputCol("fico_index"), new OneHotEncoderEstimator(). setInputCols(Array("fico_index")). setOutputCols(Array("fico")), new VectorAssembler(). setInputCols(Array("fico", "dti")). setOutputCol("features"), new GeneralizedLinearRegression(). setFamily("gaussian"). setLink("log"). setFeaturesCol("features"). setLabelCol("loan_amount"). setPredictionCol("prediction"))).fit(dataset) override val unserializedParams = Set("stringOrderType", "labelCol", "maxIter", "tol", "regParam", "solver", "variancePower") }
Example 3
Source File: GeneralizedLinearRegressionPipeline.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 5 votes |
package org.sparksamples.regression.bikesharing import org.apache.log4j.Logger import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature.{VectorAssembler, VectorIndexer} import org.apache.spark.ml.regression.GeneralizedLinearRegression import org.apache.spark.mllib.evaluation.RegressionMetrics import org.apache.spark.sql.{SparkSession, _} object GeneralizedLinearRegressionPipeline { @transient lazy val logger = Logger.getLogger(getClass.getName) def genLinearRegressionWithVectorFormat(vectorAssembler: VectorAssembler, vectorIndexer: VectorIndexer, dataFrame: DataFrame) = { val lr = new GeneralizedLinearRegression() .setFeaturesCol("features") .setLabelCol("label") .setFamily("gaussian") .setLink("identity") .setMaxIter(10) .setRegParam(0.3) val pipeline = new Pipeline().setStages(Array(vectorAssembler, vectorIndexer, lr)) val Array(training, test) = dataFrame.randomSplit(Array(0.8, 0.2), seed = 12345) val model = pipeline.fit(training) val fullPredictions = model.transform(test).cache() val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0)) val labels = fullPredictions.select("label").rdd.map(_.getDouble(0)) val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError println(s" Root mean squared error (RMSE): $RMSE") } def genLinearRegressionWithSVMFormat(spark: SparkSession) = { // Load training data val training = spark.read.format("libsvm") .load("./src/main/scala/org/sparksamples/regression/dataset/BikeSharing/lsvmHours.txt") val lr = new GeneralizedLinearRegression() .setFamily("gaussian") .setLink("identity") .setMaxIter(10) .setRegParam(0.3) // Fit the model val model = lr.fit(training) // Print the coefficients and intercept for generalized linear regression model println(s"Coefficients: ${model.coefficients}") println(s"Intercept: ${model.intercept}") // Summarize the model over the training set and print out some metrics val summary = model.summary println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}") println(s"T Values: ${summary.tValues.mkString(",")}") println(s"P Values: ${summary.pValues.mkString(",")}") println(s"Dispersion: ${summary.dispersion}") println(s"Null Deviance: ${summary.nullDeviance}") println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}") println(s"Deviance: ${summary.deviance}") println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}") println(s"AIC: ${summary.aic}") println("Deviance Residuals: ") summary.residuals().show() } }
Example 4
Source File: GeneralizedLinearRegressionExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.GeneralizedLinearRegression // $example off$ import org.apache.spark.sql.SparkSession object GeneralizedLinearRegressionExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("GeneralizedLinearRegressionExample") .getOrCreate() // $example on$ // Load training data val dataset = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val glr = new GeneralizedLinearRegression() .setFamily("gaussian") .setLink("identity") .setMaxIter(10) .setRegParam(0.3) // Fit the model val model = glr.fit(dataset) // Print the coefficients and intercept for generalized linear regression model println(s"Coefficients: ${model.coefficients}") println(s"Intercept: ${model.intercept}") // Summarize the model over the training set and print out some metrics val summary = model.summary println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}") println(s"T Values: ${summary.tValues.mkString(",")}") println(s"P Values: ${summary.pValues.mkString(",")}") println(s"Dispersion: ${summary.dispersion}") println(s"Null Deviance: ${summary.nullDeviance}") println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}") println(s"Deviance: ${summary.deviance}") println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}") println(s"AIC: ${summary.aic}") println("Deviance Residuals: ") summary.residuals().show() // $example off$ spark.stop() } } // scalastyle:on println
Example 5
Source File: GLMRegression.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.mllib.regression import org.apache.spark.ml.evaluation.{Evaluator, RegressionEvaluator} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.regression.GeneralizedLinearRegression import org.apache.spark.ml.{ModelBuilderSSP, PipelineStage, Transformer} import com.databricks.spark.sql.perf.mllib.OptionImplicits._ import com.databricks.spark.sql.perf.mllib._ import com.databricks.spark.sql.perf.mllib.data.DataGenerator object GLMRegression extends BenchmarkAlgorithm with TestFromTraining with TrainingSetFromTransformer with ScoringWithEvaluator { override protected def initialData(ctx: MLBenchContext) = { import ctx.params._ DataGenerator.generateContinuousFeatures( ctx.sqlContext, numExamples, ctx.seed(), numPartitions, numFeatures) } override protected def trueModel(ctx: MLBenchContext): Transformer = { import ctx.params._ val rng = ctx.newGenerator() val coefficients = Vectors.dense(Array.fill[Double](ctx.params.numFeatures)(2 * rng.nextDouble() - 1)) // Small intercept to prevent some skew in the data. val intercept = 0.01 * (2 * rng.nextDouble - 1) val m = ModelBuilderSSP.newGLR(coefficients, intercept) m.set(m.link, link.get) m.set(m.family, family.get) m } override def getPipelineStage(ctx: MLBenchContext): PipelineStage = { import ctx.params._ new GeneralizedLinearRegression() .setLink(link) .setFamily(family) .setRegParam(regParam) .setMaxIter(maxIter) .setTol(tol) } override protected def evaluator(ctx: MLBenchContext): Evaluator = new RegressionEvaluator() }
Example 6
Source File: GeneralizedLinearRegressionExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.GeneralizedLinearRegression // $example off$ import org.apache.spark.sql.SparkSession object GeneralizedLinearRegressionExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("GeneralizedLinearRegressionExample") .getOrCreate() // $example on$ // Load training data val dataset = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val glr = new GeneralizedLinearRegression() .setFamily("gaussian") .setLink("identity") .setMaxIter(10) .setRegParam(0.3) // Fit the model val model = glr.fit(dataset) // Print the coefficients and intercept for generalized linear regression model println(s"Coefficients: ${model.coefficients}") println(s"Intercept: ${model.intercept}") // Summarize the model over the training set and print out some metrics val summary = model.summary println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}") println(s"T Values: ${summary.tValues.mkString(",")}") println(s"P Values: ${summary.pValues.mkString(",")}") println(s"Dispersion: ${summary.dispersion}") println(s"Null Deviance: ${summary.nullDeviance}") println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}") println(s"Deviance: ${summary.deviance}") println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}") println(s"AIC: ${summary.aic}") println("Deviance Residuals: ") summary.residuals().show() // $example off$ spark.stop() } } // scalastyle:on println
Example 7
Source File: OpGeneralizedLinearRegressionTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.regression import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test._ import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.regression.{GeneralizedLinearRegression, GeneralizedLinearRegressionModel} import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpGeneralizedLinearRegressionTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[GeneralizedLinearRegressionModel], OpPredictorWrapper[GeneralizedLinearRegression, GeneralizedLinearRegressionModel]] with PredictionEquality { override def specName: String = Spec[OpGeneralizedLinearRegression] val (inputData, rawLabel, features) = TestFeatureBuilder( Seq[(RealNN, OPVector)]( (10.0.toRealNN, Vectors.dense(1.0, 4.3, 1.3).toOPVector), (20.0.toRealNN, Vectors.dense(2.0, 0.3, 0.1).toOPVector), (30.0.toRealNN, Vectors.dense(3.0, 3.9, 4.3).toOPVector), (40.0.toRealNN, Vectors.dense(4.0, 1.3, 0.9).toOPVector), (50.0.toRealNN, Vectors.dense(5.0, 4.7, 1.3).toOPVector) ) ) val label = rawLabel.copy(isResponse = true) val estimator = new OpGeneralizedLinearRegression().setInput(label, features) val expectedResult = Seq( Prediction(10.0, 9.99), Prediction(20.0, 19.99), Prediction(30.0, 29.99), Prediction(40.0, 40.0), Prediction(50.0, 50.0) ) it should "allow the user to set the desired spark parameters" in { estimator .setMaxIter(10) .setRegParam(0.1) .setFitIntercept(true) .setTol(1E-4) .setSolver("irls") estimator.fit(inputData) estimator.predictor.getMaxIter shouldBe 10 estimator.predictor.getRegParam shouldBe 0.1 estimator.predictor.getFitIntercept shouldBe true estimator.predictor.getTol shouldBe 1E-4 estimator.predictor.getSolver shouldBe "irls" } }
Example 8
Source File: GeneralizedLinearRegressionExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.regression.GeneralizedLinearRegression // $example off$ import org.apache.spark.sql.SparkSession object GeneralizedLinearRegressionExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("GeneralizedLinearRegressionExample") .getOrCreate() // $example on$ // Load training data val dataset = spark.read.format("libsvm") .load("data/mllib/sample_linear_regression_data.txt") val glr = new GeneralizedLinearRegression() .setFamily("gaussian") .setLink("identity") .setMaxIter(10) .setRegParam(0.3) // Fit the model val model = glr.fit(dataset) // Print the coefficients and intercept for generalized linear regression model println(s"Coefficients: ${model.coefficients}") println(s"Intercept: ${model.intercept}") // Summarize the model over the training set and print out some metrics val summary = model.summary println(s"Coefficient Standard Errors: ${summary.coefficientStandardErrors.mkString(",")}") println(s"T Values: ${summary.tValues.mkString(",")}") println(s"P Values: ${summary.pValues.mkString(",")}") println(s"Dispersion: ${summary.dispersion}") println(s"Null Deviance: ${summary.nullDeviance}") println(s"Residual Degree Of Freedom Null: ${summary.residualDegreeOfFreedomNull}") println(s"Deviance: ${summary.deviance}") println(s"Residual Degree Of Freedom: ${summary.residualDegreeOfFreedom}") println(s"AIC: ${summary.aic}") println("Deviance Residuals: ") summary.residuals().show() // $example off$ spark.stop() } } // scalastyle:on println