org.apache.spark.ml.regression.DecisionTreeRegressionModel Scala Examples
The following examples show how to use org.apache.spark.ml.regression.DecisionTreeRegressionModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalGBTClassificationModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.classification import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common._ import io.hydrosphere.spark_ml_serving.regression.LocalDecisionTreeRegressionModel import org.apache.spark.ml.classification.GBTClassificationModel import org.apache.spark.ml.regression.DecisionTreeRegressionModel class LocalGBTClassificationModel(override val sparkTransformer: GBTClassificationModel) extends LocalPredictionModel[GBTClassificationModel] {} object LocalGBTClassificationModel extends TreeModelLoader[GBTClassificationModel] with TypedTransformerConverter[GBTClassificationModel] { override def build( metadata: Metadata, data: LocalData, treeData: LocalData ): GBTClassificationModel = { val dataRows = data.toMapList val treesMetadata = treeData.toMapList val trees = treesMetadata map { treeRow => val meta = Metadata.fromJson(treeRow("metadata").toString) val treeNodesData = dataRows .filter(_("treeID") == treeRow("treeID")) .map(_("nodeData")) .asInstanceOf[Seq[Map[String, Any]]] LocalDecisionTreeRegressionModel.createTree( meta, LocalData.fromMapList(treeNodesData.toList) ) } val weights = treeData.column("weights").get.data.asInstanceOf[Seq[Double]].toArray val ctor = classOf[GBTClassificationModel].getDeclaredConstructor( classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Array[Double]], classOf[Int] ) ctor.setAccessible(true) val inst = ctor .newInstance( metadata.uid, trees.to[Array], weights, metadata.numFeatures.get.asInstanceOf[java.lang.Integer] ) inst .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String]) .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String]) inst } override implicit def toLocal( sparkTransformer: GBTClassificationModel ): LocalGBTClassificationModel = { new LocalGBTClassificationModel(sparkTransformer) } }
Example 2
Source File: LocalRandomForestRegressionModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.regression import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common._ import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel} class LocalRandomForestRegressionModel(override val sparkTransformer: RandomForestRegressionModel) extends LocalPredictionModel[RandomForestRegressionModel] {} object LocalRandomForestRegressionModel extends TreeModelLoader[RandomForestRegressionModel] with TypedTransformerConverter[RandomForestRegressionModel] { override def build( metadata: Metadata, data: LocalData, treeData: LocalData ): RandomForestRegressionModel = { val dataRows = data.toMapList val treesMetadata = treeData.toMapList val trees = treesMetadata map { treeRow => val meta = Metadata.fromJson(treeRow("metadata").toString).copy(numFeatures = metadata.numFeatures) val treeNodesData = dataRows .filter(_("treeID") == treeRow("treeID")) .map(_("nodeData")) .asInstanceOf[Seq[Map[String, Any]]] LocalDecisionTreeRegressionModel.createTree( meta, LocalData.fromMapList(treeNodesData.toList) ) } val ctor = classOf[RandomForestRegressionModel].getDeclaredConstructor( classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Int] ) ctor.setAccessible(true) val inst = ctor .newInstance( metadata.uid, trees.to[Array], metadata.numFeatures.get.asInstanceOf[java.lang.Integer] ) .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String]) .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String]) inst .set(inst.seed, metadata.paramMap("seed").toString.toLong) .set(inst.subsamplingRate, metadata.paramMap("subsamplingRate").toString.toDouble) .set(inst.impurity, metadata.paramMap("impurity").toString) } override implicit def toLocal( transformer: RandomForestRegressionModel ) = new LocalRandomForestRegressionModel(transformer) }
Example 3
Source File: LocalDecisionTreeRegressionModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.regression import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common._ import io.hydrosphere.spark_ml_serving.common.utils.DataUtils import org.apache.spark.ml.regression.DecisionTreeRegressionModel import org.apache.spark.ml.tree.Node class LocalDecisionTreeRegressionModel(override val sparkTransformer: DecisionTreeRegressionModel) extends LocalPredictionModel[DecisionTreeRegressionModel] {} object LocalDecisionTreeRegressionModel extends SimpleModelLoader[DecisionTreeRegressionModel] with TypedTransformerConverter[DecisionTreeRegressionModel] { override def build(metadata: Metadata, data: LocalData): DecisionTreeRegressionModel = { createTree(metadata, data) } def createTree(metadata: Metadata, data: LocalData): DecisionTreeRegressionModel = { val ctor = classOf[DecisionTreeRegressionModel].getDeclaredConstructor( classOf[String], classOf[Node], classOf[Int] ) ctor.setAccessible(true) val inst = ctor.newInstance( metadata.uid, DataUtils.createNode(0, metadata, data), metadata.numFeatures.getOrElse(0).asInstanceOf[java.lang.Integer] ) inst .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String]) .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String]) inst .set(inst.seed, metadata.paramMap("seed").toString.toLong) .set(inst.cacheNodeIds, metadata.paramMap("cacheNodeIds").toString.toBoolean) .set(inst.maxDepth, metadata.paramMap("maxDepth").toString.toInt) .set(inst.labelCol, metadata.paramMap("labelCol").toString) .set(inst.minInfoGain, metadata.paramMap("minInfoGain").toString.toDouble) .set(inst.checkpointInterval, metadata.paramMap("checkpointInterval").toString.toInt) .set(inst.minInstancesPerNode, metadata.paramMap("minInstancesPerNode").toString.toInt) .set(inst.maxMemoryInMB, metadata.paramMap("maxMemoryInMB").toString.toInt) .set(inst.maxBins, metadata.paramMap("maxBins").toString.toInt) .set(inst.impurity, metadata.paramMap("impurity").toString) } override implicit def toLocal( transformer: DecisionTreeRegressionModel ) = new LocalDecisionTreeRegressionModel(transformer) }
Example 4
Source File: GBTClassifierOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.classification import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.serializer.ModelSerializer import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.classification.GBTClassificationModel import org.apache.spark.ml.regression.DecisionTreeRegressionModel class GBTClassifierOp extends SimpleSparkOp[GBTClassificationModel] { override val Model: OpModel[SparkBundleContext, GBTClassificationModel] = new OpModel[SparkBundleContext, GBTClassificationModel] { override val klazz: Class[GBTClassificationModel] = classOf[GBTClassificationModel] override def opName: String = Bundle.BuiltinOps.classification.gbt_classifier override def store(model: Model, obj: GBTClassificationModel) (implicit context: BundleContext[SparkBundleContext]): Model = { var i = 0 val trees = obj.trees.map { tree => val name = s"tree$i" ModelSerializer(context.bundleContext(name)).write(tree).get i = i + 1 name } model.withValue("num_features", Value.long(obj.numFeatures)). withValue("num_classes", Value.long(2)). withValue("tree_weights", Value.doubleList(obj.treeWeights)). withValue("trees", Value.stringList(trees)). withValue("thresholds", obj.get(obj.thresholds).map(Value.doubleList(_))) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): GBTClassificationModel = { if (model.value("num_classes").getLong != 2) { throw new IllegalArgumentException("MLeap only supports binary logistic regression") } val numFeatures = model.value("num_features").getLong.toInt val treeWeights = model.value("tree_weights").getDoubleList.toArray val models = model.value("trees").getStringList.map { tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeRegressionModel] }.toArray val gbt = new GBTClassificationModel(uid = "", _trees = models, _treeWeights = treeWeights, numFeatures = numFeatures) model.getValue("thresholds") .map(t => gbt.setThresholds(t.getDoubleList.toArray)) .getOrElse(gbt) } } override def sparkLoad(uid: String, shape: NodeShape, model: GBTClassificationModel): GBTClassificationModel = { val r = new GBTClassificationModel(uid = uid, _trees = model.trees, _treeWeights = model.treeWeights, numFeatures = model.numFeatures) if(model.isDefined(model.thresholds)) { r.setThresholds(model.getThresholds) } r } override def sparkInputs(obj: GBTClassificationModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: GBTClassificationModel): Seq[SimpleParamSpec] = { Seq("raw_prediction" -> obj.rawPredictionCol, "probability" -> obj.probabilityCol, "prediction" -> obj.predictionCol) } }
Example 5
Source File: GBTRegressionOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.regression import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.serializer.ModelSerializer import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.param.Param import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, GBTRegressionModel} class GBTRegressionOp extends SimpleSparkOp[GBTRegressionModel] { override val Model: OpModel[SparkBundleContext, GBTRegressionModel] = new OpModel[SparkBundleContext, GBTRegressionModel] { override val klazz: Class[GBTRegressionModel] = classOf[GBTRegressionModel] override def opName: String = Bundle.BuiltinOps.regression.gbt_regression override def store(model: Model, obj: GBTRegressionModel) (implicit context: BundleContext[SparkBundleContext]): Model = { var i = 0 val trees = obj.trees.map { tree => val name = s"tree$i" ModelSerializer(context.bundleContext(name)).write(tree).get i = i + 1 name } model.withValue("num_features", Value.long(obj.numFeatures)). withValue("tree_weights", Value.doubleList(obj.treeWeights)). withValue("trees", Value.stringList(trees)) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): GBTRegressionModel = { val numFeatures = model.value("num_features").getLong.toInt val treeWeights = model.value("tree_weights").getDoubleList.toArray val models = model.value("trees").getStringList.map { tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeRegressionModel] }.toArray new GBTRegressionModel(uid = "", _trees = models, _treeWeights = treeWeights, numFeatures = numFeatures) } } override def sparkLoad(uid: String, shape: NodeShape, model: GBTRegressionModel): GBTRegressionModel = { new GBTRegressionModel(uid = uid, _trees = model.trees, _treeWeights = model.treeWeights, numFeatures = model.numFeatures) } override def sparkInputs(obj: GBTRegressionModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: GBTRegressionModel): Seq[SimpleParamSpec] = { Seq("prediction" -> obj.predictionCol) } }
Example 6
Source File: RandomForestRegressionOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.regression import ml.combust.bundle.BundleContext import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.serializer.ModelSerializer import ml.combust.bundle.dsl._ import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.bundle.tree.decision.SparkNodeWrapper import org.apache.spark.ml.param.Param import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, RandomForestRegressionModel} class RandomForestRegressionOp extends SimpleSparkOp[RandomForestRegressionModel] { implicit val nodeWrapper = SparkNodeWrapper override val Model: OpModel[SparkBundleContext, RandomForestRegressionModel] = new OpModel[SparkBundleContext, RandomForestRegressionModel] { override val klazz: Class[RandomForestRegressionModel] = classOf[RandomForestRegressionModel] override def opName: String = Bundle.BuiltinOps.regression.random_forest_regression override def store(model: Model, obj: RandomForestRegressionModel) (implicit context: BundleContext[SparkBundleContext]): Model = { var i = 0 val trees = obj.trees.map { tree => val name = s"tree$i" ModelSerializer(context.bundleContext(name)).write(tree).get i = i + 1 name } model.withValue("num_features", Value.long(obj.numFeatures)). withValue("tree_weights", Value.doubleList(obj.treeWeights)). withValue("trees", Value.stringList(trees)) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): RandomForestRegressionModel = { val numFeatures = model.value("num_features").getLong.toInt val treeWeights = model.value("tree_weights").getDoubleList // TODO: get rid of this when Spark supports setting tree weights for(weight <- treeWeights) { require(weight == 1.0, "tree weights must be 1.0 for Spark") } val models = model.value("trees").getStringList.map { tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeRegressionModel] }.toArray new RandomForestRegressionModel(uid = "", numFeatures = numFeatures, _trees = models) } } override def sparkLoad(uid: String, shape: NodeShape, model: RandomForestRegressionModel): RandomForestRegressionModel = { new RandomForestRegressionModel(uid = uid, _trees = model.trees, numFeatures = model.numFeatures) } override def sparkInputs(obj: RandomForestRegressionModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: RandomForestRegressionModel): Seq[SimpleParamSpec] = { Seq("prediction" -> obj.predictionCol) } }
Example 7
Source File: DecisionTreeRegressionOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.regression import ml.combust.bundle.BundleContext import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.dsl._ import ml.combust.bundle.tree.decision.TreeSerializer import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.bundle.tree.decision.SparkNodeWrapper import org.apache.spark.ml.param.Param import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, LinearRegressionModel} class DecisionTreeRegressionOp extends SimpleSparkOp[DecisionTreeRegressionModel] { implicit val nodeWrapper = SparkNodeWrapper override val Model: OpModel[SparkBundleContext, DecisionTreeRegressionModel] = new OpModel[SparkBundleContext, DecisionTreeRegressionModel] { override val klazz: Class[DecisionTreeRegressionModel] = classOf[DecisionTreeRegressionModel] override def opName: String = Bundle.BuiltinOps.regression.decision_tree_regression override def store(model: Model, obj: DecisionTreeRegressionModel) (implicit context: BundleContext[SparkBundleContext]): Model = { TreeSerializer[org.apache.spark.ml.tree.Node](context.file("tree"), withImpurities = false).write(obj.rootNode) model.withValue("num_features", Value.long(obj.numFeatures)) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): DecisionTreeRegressionModel = { val rootNode = TreeSerializer[org.apache.spark.ml.tree.Node](context.file("tree"), withImpurities = false).read().get new DecisionTreeRegressionModel(uid = "", rootNode = rootNode, numFeatures = model.value("num_features").getLong.toInt) } } override def sparkLoad(uid: String, shape: NodeShape, model: DecisionTreeRegressionModel): DecisionTreeRegressionModel = { new DecisionTreeRegressionModel(uid = uid, rootNode = model.rootNode, numFeatures = model.numFeatures) } override def sparkInputs(obj: DecisionTreeRegressionModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: DecisionTreeRegressionModel): Seq[SimpleParamSpec] = { Seq("prediction" -> obj.predictionCol) } }
Example 8
Source File: OpDecisionTreeRegressor.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.regression import com.salesforce.op.UID import com.salesforce.op.features.types.{OPVector, Prediction, RealNN} import com.salesforce.op.stages.impl.CheckIsResponseValues import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictionModel, OpPredictorWrapper} import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, DecisionTreeRegressor, OpDecisionTreeRegressorParams} import scala.reflect.runtime.universe.TypeTag class OpDecisionTreeRegressionModel ( sparkModel: DecisionTreeRegressionModel, uid: String = UID[OpDecisionTreeRegressionModel], operationName: String = classOf[DecisionTreeRegressor].getSimpleName )( implicit tti1: TypeTag[RealNN], tti2: TypeTag[OPVector], tto: TypeTag[Prediction], ttov: TypeTag[Prediction#Value] ) extends OpPredictionModel[DecisionTreeRegressionModel]( sparkModel = sparkModel, uid = uid, operationName = operationName )
Example 9
Source File: OpDecisionTreeRegressorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.regression import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test._ import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.regression.{DecisionTreeRegressionModel, DecisionTreeRegressor} import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpDecisionTreeRegressorTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[DecisionTreeRegressionModel], OpPredictorWrapper[DecisionTreeRegressor, DecisionTreeRegressionModel]] with PredictionEquality { override def specName: String = Spec[OpDecisionTreeRegressor] val (inputData, rawLabel, features) = TestFeatureBuilder( Seq[(RealNN, OPVector)]( (10.0.toRealNN, Vectors.dense(1.0, 4.3, 1.3).toOPVector), (20.0.toRealNN, Vectors.dense(2.0, 0.3, 0.1).toOPVector), (30.0.toRealNN, Vectors.dense(3.0, 3.9, 4.3).toOPVector), (40.0.toRealNN, Vectors.dense(4.0, 1.3, 0.9).toOPVector), (50.0.toRealNN, Vectors.dense(5.0, 4.7, 1.3).toOPVector) ) ) val label = rawLabel.copy(isResponse = true) val estimator = new OpDecisionTreeRegressor().setInput(label, features) val expectedResult = Seq( Prediction(10.0), Prediction(20.0), Prediction(30.0), Prediction(40.0), Prediction(50.0) ) it should "allow the user to set the desired spark parameters" in { estimator .setMaxDepth(6) .setMaxBins(2) .setMinInstancesPerNode(2) .setMinInfoGain(0.1) estimator.fit(inputData) estimator.predictor.getMaxDepth shouldBe 6 estimator.predictor.getMaxBins shouldBe 2 estimator.predictor.getMinInstancesPerNode shouldBe 2 estimator.predictor.getMinInfoGain shouldBe 0.1 } }
Example 10
Source File: MleapSparkSupport.scala From mleap with Apache License 2.0 | 5 votes |
package com.truecar.mleap.spark import com.truecar.mleap.core.linalg import com.truecar.mleap.runtime.transformer.{Transformer => MleapTransformer} import com.truecar.mleap.runtime.{types, Row => MleapRow} import org.apache.spark.ml.classification.DecisionTreeClassificationModel import org.apache.spark.ml.mleap.converter._ import org.apache.spark.ml.mleap.converter.runtime.{BaseTransformerConverter, TransformerToMleap} import org.apache.spark.ml.mleap.converter.runtime.classification.DecisionTreeClassificationModelToMleap import org.apache.spark.ml.mleap.converter.runtime.regression.DecisionTreeRegressionModelToMleap import org.apache.spark.ml.regression.DecisionTreeRegressionModel import org.apache.spark.ml.tree._ import org.apache.spark.ml.Transformer import org.apache.spark.mllib.linalg._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, SQLContext} trait MleapSparkSupport extends BaseTransformerConverter { import scala.language.implicitConversions implicit def transformerToMleapLifted[T <: Transformer] (t: T) (implicit transformerToMleap: TransformerToMleap[T, _ <: MleapTransformer]): MleapTransformer = { transformerToMleap.toMleapLifted(t) } implicit def mleapTransformerWrapper[T <: MleapTransformer](t: T): MleapTransformerWrapper[T] = { MleapTransformerWrapper(t) } implicit def vectorToSpark(vector: linalg.Vector): VectorToSpark = VectorToSpark(vector) implicit def vectorToMleap(vector: Vector): VectorToMleap = VectorToMleap(vector) implicit def dataFrameToMleap(dataset: DataFrame): DataFrameToMleap = DataFrameToMleap(dataset) implicit def decisionTreeRegressionModelToMleap(tree: DecisionTreeRegressionModel): DecisionTreeRegressionModelToMleap = DecisionTreeRegressionModelToMleap(tree) implicit def decisionTreeClassificationModelToMleap(tree: DecisionTreeClassificationModel): DecisionTreeClassificationModelToMleap = DecisionTreeClassificationModelToMleap(tree) implicit def nodeToMleap(node: Node): NodeToMleap = NodeToMleap(node) implicit def splitToMleap(split: Split): SplitToMleap = SplitToMleap(split) implicit def structTypeToMleap(schema: StructType): StructTypeToMleap = StructTypeToMleap(schema) implicit def rowToSpark(row: MleapRow): RowToSpark = RowToSpark(row) implicit def structTypeToSpark(schema: types.StructType): StructTypeToSpark = StructTypeToSpark(schema) implicit def leapFrameToSpark[T: LeapFrameToSpark](frame: T): LeapFrameToSparkWrapper[T] = { LeapFrameToSparkWrapper(frame) } implicit def leapFrameToSparkConvert[T: LeapFrameToSpark](frame: T) (implicit sqlContext: SQLContext): DataFrame = { implicitly[LeapFrameToSpark[T]].toSpark(frame) } implicit def dataFrameToLeapFrame(dataFrame: DataFrame): SparkLeapFrame = dataFrame.toMleap } object MleapSparkSupport extends MleapSparkSupport