org.apache.spark.ml.classification.RandomForestClassificationModel Scala Examples
The following examples show how to use org.apache.spark.ml.classification.RandomForestClassificationModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: RandomForestPrediction.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.ml_classification import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf.{ConfigurableStop, Port, StopGroup} import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.spark.ml.classification.RandomForestClassificationModel import org.apache.spark.sql.SparkSession class RandomForestPrediction extends ConfigurableStop{ val authorEmail: String = "[email protected]" val description: String = "use an existing RandomForest Model to predict" val inportList: List[String] = List(Port.DefaultPort) val outportList: List[String] = List(Port.DefaultPort) var test_data_path:String =_ var model_path:String=_ def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val spark = pec.get[SparkSession]() //load data stored in libsvm format as a dataframe val data=spark.read.format("libsvm").load(test_data_path) //data.show() //load model val model=RandomForestClassificationModel.load(model_path) val predictions=model.transform(data) predictions.show() out.write(predictions) } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map: Map[String, Any]): Unit = { test_data_path=MapUtil.get(map,key="test_data_path").asInstanceOf[String] model_path=MapUtil.get(map,key="model_path").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val test_data_path = new PropertyDescriptor().name("test_data_path").displayName("TEST_DATA_PATH").defaultValue("").required(true) val model_path = new PropertyDescriptor().name("model_path").displayName("MODEL_PATH").defaultValue("").required(true) descriptor = test_data_path :: descriptor descriptor = model_path :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/ml_classification/RandomForestPrediction.png") } override def getGroup(): List[String] = { List(StopGroup.MLGroup.toString) } }
Example 2
Source File: RandomForestClassifierOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.classification import ml.combust.bundle.BundleContext import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.serializer.ModelSerializer import ml.combust.bundle.dsl._ import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.bundle.tree.decision.SparkNodeWrapper import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, RandomForestClassificationModel} class RandomForestClassifierOp extends SimpleSparkOp[RandomForestClassificationModel] { implicit val nodeWrapper = SparkNodeWrapper override val Model: OpModel[SparkBundleContext, RandomForestClassificationModel] = new OpModel[SparkBundleContext, RandomForestClassificationModel] { override val klazz: Class[RandomForestClassificationModel] = classOf[RandomForestClassificationModel] override def opName: String = Bundle.BuiltinOps.classification.random_forest_classifier override def store(model: Model, obj: RandomForestClassificationModel) (implicit context: BundleContext[SparkBundleContext]): Model = { var i = 0 val trees = obj.trees.map { tree => val name = s"tree$i" ModelSerializer(context.bundleContext(name)).write(tree).get i = i + 1 name } val thresholds = if(obj.isSet(obj.thresholds)) { Some(obj.getThresholds) } else None model.withValue("num_features", Value.long(obj.numFeatures)). withValue("num_classes", Value.long(obj.numClasses)). withValue("tree_weights", Value.doubleList(obj.treeWeights)). withValue("trees", Value.stringList(trees)). withValue("thresholds", thresholds.map(_.toSeq).map(Value.doubleList)) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): RandomForestClassificationModel = { val numFeatures = model.value("num_features").getLong.toInt val numClasses = model.value("num_classes").getLong.toInt val treeWeights = model.value("tree_weights").getDoubleList // TODO: get rid of this when Spark supports setting tree weights for(weight <- treeWeights) { require(weight == 1.0, "tree weights must be 1.0 for Spark") } val models = model.value("trees").getStringList.map { tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeClassificationModel] }.toArray val m = new RandomForestClassificationModel(uid = "", numFeatures = numFeatures, numClasses = numClasses, _trees = models) model.getValue("thresholds"). map(t => m.setThresholds(t.getDoubleList.toArray)). getOrElse(m) } } override def sparkLoad(uid: String, shape: NodeShape, model: RandomForestClassificationModel): RandomForestClassificationModel = { val r = new RandomForestClassificationModel(uid = uid, _trees = model.trees, numFeatures = model.numFeatures, numClasses = model.numClasses) if (model.isDefined(model.thresholds)) { r.setThresholds(model.getThresholds) } r } override def sparkInputs(obj: RandomForestClassificationModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: RandomForestClassificationModel): Seq[SimpleParamSpec] = { Seq("raw_prediction" -> obj.rawPredictionCol, "probability" -> obj.probabilityCol, "prediction" -> obj.predictionCol) } }
Example 3
Source File: TypedRandomForestClassifier.scala From frameless with Apache License 2.0 | 5 votes |
package frameless package ml package classification import frameless.ml.internals.TreesInputsChecker import frameless.ml.params.trees.FeatureSubsetStrategy import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier} import org.apache.spark.ml.linalg.Vector final class TypedRandomForestClassifier[Inputs] private[ml]( rf: RandomForestClassifier, labelCol: String, featuresCol: String ) extends TypedEstimator[Inputs, TypedRandomForestClassifier.Outputs, RandomForestClassificationModel] { val estimator: RandomForestClassifier = rf .setLabelCol(labelCol) .setFeaturesCol(featuresCol) .setPredictionCol(AppendTransformer.tempColumnName) .setRawPredictionCol(AppendTransformer.tempColumnName2) .setProbabilityCol(AppendTransformer.tempColumnName3) def setNumTrees(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setNumTrees(value)) def setMaxDepth(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMaxDepth(value)) def setMinInfoGain(value: Double): TypedRandomForestClassifier[Inputs] = copy(rf.setMinInfoGain(value)) def setMinInstancesPerNode(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMinInstancesPerNode(value)) def setMaxMemoryInMB(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMaxMemoryInMB(value)) def setSubsamplingRate(value: Double): TypedRandomForestClassifier[Inputs] = copy(rf.setSubsamplingRate(value)) def setFeatureSubsetStrategy(value: FeatureSubsetStrategy): TypedRandomForestClassifier[Inputs] = copy(rf.setFeatureSubsetStrategy(value.sparkValue)) def setMaxBins(value: Int): TypedRandomForestClassifier[Inputs] = copy(rf.setMaxBins(value)) private def copy(newRf: RandomForestClassifier): TypedRandomForestClassifier[Inputs] = new TypedRandomForestClassifier[Inputs](newRf, labelCol, featuresCol) } object TypedRandomForestClassifier { case class Outputs(rawPrediction: Vector, probability: Vector, prediction: Double) def apply[Inputs](implicit inputsChecker: TreesInputsChecker[Inputs]): TypedRandomForestClassifier[Inputs] = { new TypedRandomForestClassifier(new RandomForestClassifier(), inputsChecker.labelCol, inputsChecker.featuresCol) } }
Example 4
Source File: OpRandomForestClassifierTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.classification import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder} import org.apache.spark.ml.classification.{RandomForestClassificationModel, RandomForestClassifier} import org.apache.spark.ml.linalg.Vectors import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpRandomForestClassifierTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[RandomForestClassificationModel], OpPredictorWrapper[RandomForestClassifier, RandomForestClassificationModel]] with PredictionEquality { override def specName: String = Spec[OpRandomForestClassifier] lazy val (inputData, rawLabelMulti, featuresMulti) = TestFeatureBuilder[RealNN, OPVector]("labelMulti", "featuresMulti", Seq( (1.0.toRealNN, Vectors.dense(12.0, 4.3, 1.3).toOPVector), (0.0.toRealNN, Vectors.dense(0.0, 0.3, 0.1).toOPVector), (2.0.toRealNN, Vectors.dense(1.0, 3.9, 4.3).toOPVector), (2.0.toRealNN, Vectors.dense(10.0, 1.3, 0.9).toOPVector), (1.0.toRealNN, Vectors.dense(15.0, 4.7, 1.3).toOPVector), (0.0.toRealNN, Vectors.dense(0.5, 0.9, 10.1).toOPVector), (1.0.toRealNN, Vectors.dense(11.5, 2.3, 1.3).toOPVector), (0.0.toRealNN, Vectors.dense(0.1, 3.3, 0.1).toOPVector), (2.0.toRealNN, Vectors.dense(1.0, 4.0, 4.5).toOPVector), (2.0.toRealNN, Vectors.dense(10.0, 1.5, 1.0).toOPVector) ) ) val labelMulti = rawLabelMulti.copy(isResponse = true) val estimator = new OpRandomForestClassifier().setInput(labelMulti, featuresMulti) val expectedResult = Seq( Prediction(1.0, Array(0.0, 17.0, 3.0), Array(0.0, 0.85, 0.15)), Prediction(0.0, Array(19.0, 0.0, 1.0), Array(0.95, 0.0, 0.05)), Prediction(2.0, Array(0.0, 1.0, 19.0), Array(0.0, 0.05, 0.95)), Prediction(2.0, Array(1.0, 2.0, 17.0), Array(0.05, 0.1, 0.85)), Prediction(1.0, Array(0.0, 17.0, 3.0), Array(0.0, 0.85, 0.15)), Prediction(0.0, Array(16.0, 0.0, 4.0), Array(0.8, 0.0, 0.2)), Prediction(1.0, Array(1.0, 17.0, 2.0), Array(0.05, 0.85, 0.1)), Prediction(0.0, Array(17.0, 0.0, 3.0), Array(0.85, 0.0, 0.15)), Prediction(2.0, Array(2.0, 1.0, 17.0), Array(0.1, 0.05, 0.85)), Prediction(2.0, Array(1.0, 2.0, 17.0), Array(0.05, 0.1, 0.85)) ) it should "allow the user to set the desired spark parameters" in { estimator .setMaxDepth(10) .setImpurity(Impurity.Gini.sparkName) .setMaxBins(33) .setMinInstancesPerNode(2) .setMinInfoGain(0.2) .setSubsamplingRate(0.9) .setNumTrees(21) .setSeed(2L) estimator.fit(inputData) estimator.predictor.getMaxDepth shouldBe 10 estimator.predictor.getMaxBins shouldBe 33 estimator.predictor.getImpurity shouldBe Impurity.Gini.sparkName estimator.predictor.getMinInstancesPerNode shouldBe 2 estimator.predictor.getMinInfoGain shouldBe 0.2 estimator.predictor.getSubsamplingRate shouldBe 0.9 estimator.predictor.getNumTrees shouldBe 21 estimator.predictor.getSeed shouldBe 2L } }
Example 5
Source File: RandomForestClassificationModelToMleap.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap.converter.runtime.classification import com.truecar.mleap.core.classification.RandomForestClassification import com.truecar.mleap.runtime.transformer import org.apache.spark.ml.classification.{DecisionTreeClassificationModel, RandomForestClassificationModel} import org.apache.spark.ml.mleap.converter.runtime.TransformerToMleap object RandomForestClassificationModelToMleap extends TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] { override def toMleap(t: RandomForestClassificationModel): transformer.RandomForestClassificationModel = { val trees = t.trees.asInstanceOf[Array[DecisionTreeClassificationModel]] .map(tree => DecisionTreeClassificationModelToMleap(tree).toMleap) val model = RandomForestClassification(trees, t.numFeatures, t.numClasses) transformer.RandomForestClassificationModel(t.getFeaturesCol, t.getPredictionCol, model) } }
Example 6
Source File: BaseTransformerConverter.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap.converter.runtime import com.truecar.mleap.runtime.transformer import org.apache.spark.ml.PipelineModel import org.apache.spark.ml.classification.RandomForestClassificationModel import org.apache.spark.ml.feature.{IndexToString, StandardScalerModel, StringIndexerModel, VectorAssembler} import org.apache.spark.ml.mleap.classification.SVMModel import org.apache.spark.ml.mleap.converter.runtime.classification.{RandomForestClassificationModelToMleap, SupportVectorMachineModelToMleap} import org.apache.spark.ml.mleap.converter.runtime.feature.{IndexToStringToMleap, StandardScalerModelToMleap, StringIndexerModelToMleap, VectorAssemblerModelToMleap} import org.apache.spark.ml.mleap.converter.runtime.regression.{LinearRegressionModelToMleap, RandomForestRegressionModelToMleap} import org.apache.spark.ml.regression.{LinearRegressionModel, RandomForestRegressionModel} trait BaseTransformerConverter extends SparkTransformerConverter { // regression implicit val mleapLinearRegressionModelToMleap: TransformerToMleap[LinearRegressionModel, transformer.LinearRegressionModel] = addConverter(LinearRegressionModelToMleap) implicit val mleapRandomForestRegressionModelToMleap: TransformerToMleap[RandomForestRegressionModel, transformer.RandomForestRegressionModel] = addConverter(RandomForestRegressionModelToMleap) // classification implicit val mleapRandomForestClassificationModelToMleap: TransformerToMleap[RandomForestClassificationModel, transformer.RandomForestClassificationModel] = addConverter(RandomForestClassificationModelToMleap) implicit val mleapSupportVectorMachineModelToMleap: TransformerToMleap[SVMModel, transformer.SupportVectorMachineModel] = addConverter(SupportVectorMachineModelToMleap) //feature implicit val mleapIndexToStringToMleap: TransformerToMleap[IndexToString, transformer.ReverseStringIndexerModel] = addConverter(IndexToStringToMleap) implicit val mleapStandardScalerModelToMleap: TransformerToMleap[StandardScalerModel, transformer.StandardScalerModel] = addConverter(StandardScalerModelToMleap) implicit val mleapStringIndexerModelToMleap: TransformerToMleap[StringIndexerModel, transformer.StringIndexerModel] = addConverter(StringIndexerModelToMleap) implicit val mleapVectorAssemblerToMleap: TransformerToMleap[VectorAssembler, transformer.VectorAssemblerModel] = addConverter(VectorAssemblerModelToMleap) // other implicit val mleapPipelineModelToMleap: TransformerToMleap[PipelineModel, transformer.PipelineModel] = addConverter(PipelineModelToMleap(this)) } object BaseTransformerConverter extends BaseTransformerConverter