org.apache.spark.ml.regression.RandomForestRegressionModel Scala Examples
The following examples show how to use org.apache.spark.ml.regression.RandomForestRegressionModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalRandomForestRegressionModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.regression import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common._ import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel} class LocalRandomForestRegressionModel(override val sparkTransformer: RandomForestRegressionModel) extends LocalPredictionModel[RandomForestRegressionModel] {} object LocalRandomForestRegressionModel extends TreeModelLoader[RandomForestRegressionModel] with TypedTransformerConverter[RandomForestRegressionModel] { override def build( metadata: Metadata, data: LocalData, treeData: LocalData ): RandomForestRegressionModel = { val dataRows = data.toMapList val treesMetadata = treeData.toMapList val trees = treesMetadata map { treeRow => val meta = Metadata.fromJson(treeRow("metadata").toString).copy(numFeatures = metadata.numFeatures) val treeNodesData = dataRows .filter(_("treeID") == treeRow("treeID")) .map(_("nodeData")) .asInstanceOf[Seq[Map[String, Any]]] LocalDecisionTreeRegressionModel.createTree( meta, LocalData.fromMapList(treeNodesData.toList) ) } val ctor = classOf[RandomForestRegressionModel].getDeclaredConstructor( classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Int] ) ctor.setAccessible(true) val inst = ctor .newInstance( metadata.uid, trees.to[Array], metadata.numFeatures.get.asInstanceOf[java.lang.Integer] ) .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String]) .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String]) inst .set(inst.seed, metadata.paramMap("seed").toString.toLong) .set(inst.subsamplingRate, metadata.paramMap("subsamplingRate").toString.toDouble) .set(inst.impurity, metadata.paramMap("impurity").toString) } override implicit def toLocal( transformer: RandomForestRegressionModel ) = new LocalRandomForestRegressionModel(transformer) }
Example 2
Source File: RandomForestModelReuse.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML import org.apache.spark.ml.regression.{ RandomForestRegressor, RandomForestRegressionModel } import org.apache.spark.ml.tuning.ParamGridBuilder import org.apache.spark.ml.tuning.{CrossValidator, CrossValidatorModel} import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.mllib.evaluation.RegressionMetrics object RandomForestModelReuse { def main(args: Array[String]) { val spark = SparkSessionCreate.createSession() import spark.implicits._ // Load the workflow back val cvModel = CrossValidatorModel.load("model/RF_model/") // ***************************************** println("Run prediction over test dataset") // ***************************************** // Predicts and saves file ready for Kaggle! //if(!params.outputFile.isEmpty){ cvModel.transform(Preproessing.testData) .select("id", "prediction") .withColumnRenamed("prediction", "loss") .coalesce(1) .write.format("com.databricks.spark.csv") .option("header", "true") .save("output/result_RF_reuse.csv") spark.stop() } }
Example 3
Source File: RandomForestModelReuse.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.ChrunPrediction import org.apache.spark.ml.regression.{ RandomForestRegressor, RandomForestRegressionModel } import org.apache.spark.ml.tuning.ParamGridBuilder import org.apache.spark.ml.tuning.{ CrossValidator, CrossValidatorModel } import org.apache.spark.sql._ import org.apache.spark.sql.functions._ import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics import org.apache.spark.ml.evaluation.BinaryClassificationEvaluator object RandomForestModelReuse { def main(args: Array[String]) { val spark = SparkSessionCreate.createSession("ChurnPredictionRandomForestWithModelReuse") import spark.implicits._ // Load the workflow back val cvModel = CrossValidatorModel.load("model/RF_model_churn/") val predictions = cvModel.transform(Preprocessing.testSet) predictions.show(10) val result = predictions.select("label", "prediction", "probability") val resutDF = result.withColumnRenamed("prediction", "Predicted_label") resutDF.show(10) val evaluator = new BinaryClassificationEvaluator() .setLabelCol("label") .setRawPredictionCol("prediction") val accuracy = evaluator.evaluate(predictions) println("Accuracy: " + accuracy) evaluator.explainParams() val predictionAndLabels = predictions .select("prediction", "label") .rdd.map(x => (x(0).asInstanceOf[Double], x(1) .asInstanceOf[Double])) val metrics = new BinaryClassificationMetrics(predictionAndLabels) val areaUnderPR = metrics.areaUnderPR println("Area under the precision-recall curve: " + areaUnderPR) val areaUnderROC = metrics.areaUnderROC println("Area under the receiver operating characteristic (ROC) curve: " + areaUnderROC) val lp = predictions.select("label", "prediction") val counttotal = predictions.count() val correct = lp.filter($"label" === $"prediction").count() val wrong = lp.filter(not($"label" === $"prediction")).count() val ratioWrong = wrong.toDouble / counttotal.toDouble val ratioCorrect = correct.toDouble / counttotal.toDouble val truep = lp.filter($"prediction" === 0.0).filter($"label" === $"prediction").count() / counttotal.toDouble val truen = lp.filter($"prediction" === 1.0).filter($"label" === $"prediction").count() / counttotal.toDouble val falsep = lp.filter($"prediction" === 1.0).filter(not($"label" === $"prediction")).count() / counttotal.toDouble val falsen = lp.filter($"prediction" === 0.0).filter(not($"label" === $"prediction")).count() / counttotal.toDouble println("Total Count: " + counttotal) println("Correct: " + correct) println("Wrong: " + wrong) println("Ratio wrong: " + ratioWrong) println("Ratio correct: " + ratioCorrect) println("Ratio true positive: " + truep) println("Ratio false positive: " + falsep) println("Ratio true negative: " + truen) println("Ratio false negative: " + falsen) spark.stop() } }
Example 4
Source File: RandomForestRegressionOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.regression import ml.combust.bundle.BundleContext import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.serializer.ModelSerializer import ml.combust.bundle.dsl._ import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.bundle.tree.decision.SparkNodeWrapper import org.apache.spark.ml.param.Param import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel} class RandomForestRegressionOp extends SimpleSparkOp[RandomForestRegressionModel] { implicit val nodeWrapper = SparkNodeWrapper override val Model: OpModel[SparkBundleContext, RandomForestRegressionModel] = new OpModel[SparkBundleContext, RandomForestRegressionModel] { override val klazz: Class[RandomForestRegressionModel] = classOf[RandomForestRegressionModel] override def opName: String = Bundle.BuiltinOps.regression.random_forest_regression override def store(model: Model, obj: RandomForestRegressionModel) (implicit context: BundleContext[SparkBundleContext]): Model = { var i = 0 val trees = obj.trees.map { tree => val name = s"tree$i" ModelSerializer(context.bundleContext(name)).write(tree).get i = i + 1 name } model.withValue("num_features", Value.long(obj.numFeatures)). withValue("tree_weights", Value.doubleList(obj.treeWeights)). withValue("trees", Value.stringList(trees)) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): RandomForestRegressionModel = { val numFeatures = model.value("num_features").getLong.toInt val treeWeights = model.value("tree_weights").getDoubleList // TODO: get rid of this when Spark supports setting tree weights for(weight <- treeWeights) { require(weight == 1.0, "tree weights must be 1.0 for Spark") } val models = model.value("trees").getStringList.map { tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeRegressionModel] }.toArray new RandomForestRegressionModel(uid = "", numFeatures = numFeatures, _trees = models) } } override def sparkLoad(uid: String, shape: NodeShape, model: RandomForestRegressionModel): RandomForestRegressionModel = { new RandomForestRegressionModel(uid = uid, _trees = model.trees, numFeatures = model.numFeatures) } override def sparkInputs(obj: RandomForestRegressionModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: RandomForestRegressionModel): Seq[SimpleParamSpec] = { Seq("prediction" -> obj.predictionCol) } }
Example 5
Source File: TypedRandomForestRegressor.scala From frameless with Apache License 2.0 | 5 votes |
package frameless package ml package regression import frameless.ml.internals.TreesInputsChecker import frameless.ml.params.trees.FeatureSubsetStrategy import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor} final class TypedRandomForestRegressor[Inputs] private[ml]( rf: RandomForestRegressor, labelCol: String, featuresCol: String ) extends TypedEstimator[Inputs, TypedRandomForestRegressor.Outputs, RandomForestRegressionModel] { val estimator: RandomForestRegressor = rf .setLabelCol(labelCol) .setFeaturesCol(featuresCol) .setPredictionCol(AppendTransformer.tempColumnName) def setNumTrees(value: Int): TypedRandomForestRegressor[Inputs] = copy(rf.setNumTrees(value)) def setMaxDepth(value: Int): TypedRandomForestRegressor[Inputs] = copy(rf.setMaxDepth(value)) def setMinInfoGain(value: Double): TypedRandomForestRegressor[Inputs] = copy(rf.setMinInfoGain(value)) def setMinInstancesPerNode(value: Int): TypedRandomForestRegressor[Inputs] = copy(rf.setMinInstancesPerNode(value)) def setMaxMemoryInMB(value: Int): TypedRandomForestRegressor[Inputs] = copy(rf.setMaxMemoryInMB(value)) def setSubsamplingRate(value: Double): TypedRandomForestRegressor[Inputs] = copy(rf.setSubsamplingRate(value)) def setFeatureSubsetStrategy(value: FeatureSubsetStrategy): TypedRandomForestRegressor[Inputs] = copy(rf.setFeatureSubsetStrategy(value.sparkValue)) def setMaxBins(value: Int): TypedRandomForestRegressor[Inputs] = copy(rf.setMaxBins(value)) private def copy(newRf: RandomForestRegressor): TypedRandomForestRegressor[Inputs] = new TypedRandomForestRegressor[Inputs](newRf, labelCol, featuresCol) } object TypedRandomForestRegressor { case class Outputs(prediction: Double) def apply[Inputs](implicit inputsChecker: TreesInputsChecker[Inputs]) : TypedRandomForestRegressor[Inputs] = { new TypedRandomForestRegressor(new RandomForestRegressor(), inputsChecker.labelCol, inputsChecker.featuresCol) } }
Example 6
Source File: OpRandomForestRegressor.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.regression import com.salesforce.op.UID import com.salesforce.op.features.types.{OPVector, Prediction, RealNN} import com.salesforce.op.stages.impl.CheckIsResponseValues import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictionModel, OpPredictorWrapper} import org.apache.spark.ml.regression.{OpRandomForestRegressorParams, RandomForestRegressionModel, RandomForestRegressor} import scala.reflect.runtime.universe.TypeTag class OpRandomForestRegressionModel ( sparkModel: RandomForestRegressionModel, uid: String = UID[OpRandomForestRegressionModel], operationName: String = classOf[RandomForestRegressor].getSimpleName )( implicit tti1: TypeTag[RealNN], tti2: TypeTag[OPVector], tto: TypeTag[Prediction], ttov: TypeTag[Prediction#Value] ) extends OpPredictionModel[RandomForestRegressionModel]( sparkModel = sparkModel, uid = uid, operationName = operationName )
Example 7
Source File: OpRandomForestRegressorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.regression import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test._ import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor} import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpRandomForestRegressorTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[RandomForestRegressionModel], OpPredictorWrapper[RandomForestRegressor, RandomForestRegressionModel]] with PredictionEquality { override def specName: String = Spec[OpRandomForestRegressor] val (inputData, rawLabel, features) = TestFeatureBuilder( Seq[(RealNN, OPVector)]( (10.0.toRealNN, Vectors.dense(1.0, 4.3, 1.3).toOPVector), (20.0.toRealNN, Vectors.dense(2.0, 0.3, 0.1).toOPVector), (30.0.toRealNN, Vectors.dense(3.0, 3.9, 4.3).toOPVector), (40.0.toRealNN, Vectors.dense(4.0, 1.3, 0.9).toOPVector), (50.0.toRealNN, Vectors.dense(5.0, 4.7, 1.3).toOPVector) ) ) val label = rawLabel.copy(isResponse = true) val estimator = new OpRandomForestRegressor().setInput(label, features) val expectedResult = Seq( Prediction(20.0), Prediction(23.5), Prediction(31.5), Prediction(35.5), Prediction(37.0) ) it should "allow the user to set the desired spark parameters" in { estimator .setMaxDepth(7) .setMaxBins(3) .setMinInstancesPerNode(2) .setMinInfoGain(0.1) .setSeed(42L) estimator.fit(inputData) estimator.predictor.getMaxDepth shouldBe 7 estimator.predictor.getMaxBins shouldBe 3 estimator.predictor.getMinInstancesPerNode shouldBe 2 estimator.predictor.getMinInfoGain shouldBe 0.1 estimator.predictor.getSeed shouldBe 42L } }
Example 8
Source File: RandomForestRegressorExample.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.Pipeline import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.ml.feature.VectorIndexer import org.apache.spark.ml.regression.{RandomForestRegressionModel, RandomForestRegressor} // $example off$ import org.apache.spark.sql.Row import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.sql.types.StringType import org.apache.spark.sql.{SQLContext, DataFrame} predictions.select("prediction", "label", "features").show(5) // Select (prediction, true label) and compute test error. val evaluator = new RegressionEvaluator() .setLabelCol("label") //算法预测结果的存储列的名称, 默认是”prediction” .setPredictionCol("prediction") //rmse均方根误差说明样本的离散程度 .setMetricName("rmse") val rmse = evaluator.evaluate(predictions) //Root Mean Squared Error (RMSE) on test data = 0.09854713827168428 println("Root Mean Squared Error (RMSE) on test data = " + rmse) val rfModel = model.stages(1).asInstanceOf[RandomForestRegressionModel] println("Learned regression forest model:\n" + rfModel.toDebugString) // $example off$ sc.stop() } } // scalastyle:on println
Example 9
Source File: BaseTransformerConverter.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap.converter.runtime import com.truecar.mleap.runtime.transformer import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.classification.RandomForestClassificationModel import org.apache.spark.ml.feature.{IndexToString, StandardScalerModel, StringIndexerModel, VectorAssembler} import org.apache.spark.ml.mleap.classification.SVMModel import org.apache.spark.ml.mleap.converter.runtime.classification.{RandomForestClassificationModelToMleap, SupportVectorMachineModelToMleap} import org.apache.spark.ml.mleap.converter.runtime.feature.{IndexToStringToMleap, StandardScalerModelToMleap, StringIndexerModelToMleap, VectorAssemblerModelToMleap} import org.apache.spark.ml.mleap.converter.runtime.regression.{LinearRegressionModelToMleap, RandomForestRegressionModelToMleap} import org.apache.spark.ml.regression.{LinearRegressionModel, RandomForestRegressionModel} trait BaseTransformerConverter extends SparkTransformerConverter { // regression implicit val mleapLinearRegressionModelToMleap: TransformerToMleap[LinearRegressionModel, transformer.LinearRegressionModel] = addConverter(LinearRegressionModelToMleap) implicit val mleapRandomForestRegressionModelToMleap: TransformerToMleap[RandomForestRegressionModel, transformer.RandomForestRegressionModel] = addConverter(RandomForestRegressionModelToMleap) // classification implicit val mleapRandomForestClassificationModelToMleap: TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] = addConverter(RandomForestClassificationModelToMleap) implicit val mleapSupportVectorMachineModelToMleap: TransformerToMleap[SVMModel, transformer.SupportVectorMachineModel] = addConverter(SupportVectorMachineModelToMleap) //feature implicit val mleapIndexToStringToMleap: TransformerToMleap[IndexToString, transformer.ReverseStringIndexerModel] = addConverter(IndexToStringToMleap) implicit val mleapStandardScalerModelToMleap: TransformerToMleap[StandardScalerModel, transformer.StandardScalerModel] = addConverter(StandardScalerModelToMleap) implicit val mleapStringIndexerModelToMleap: TransformerToMleap[StringIndexerModel, transformer.StringIndexerModel] = addConverter(StringIndexerModelToMleap) implicit val mleapVectorAssemblerToMleap: TransformerToMleap[VectorAssembler, transformer.VectorAssemblerModel] = addConverter(VectorAssemblerModelToMleap) // other implicit val mleapPipelineModelToMleap: TransformerToMleap[PipelineModel, transformer.PipelineModel] = addConverter(PipelineModelToMleap(this)) } object BaseTransformerConverter extends BaseTransformerConverter