org.apache.spark.ml.classification.GBTClassificationModel Scala Examples
The following examples show how to use org.apache.spark.ml.classification.GBTClassificationModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: GBTPrediction.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.ml_classification import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf.{ConfigurableStop, Port, StopGroup} import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.spark.ml.classification.GBTClassificationModel import org.apache.spark.sql.SparkSession class GBTPrediction extends ConfigurableStop{ val authorEmail: String = "[email protected]" val description: String = "Use an existing GBT Model to predict" val inportList: List[String] = List(Port.DefaultPort) val outportList: List[String] = List(Port.DefaultPort) var test_data_path:String =_ var model_path:String=_ def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val spark = pec.get[SparkSession]() //load data stored in libsvm format as a dataframe val data=spark.read.format("libsvm").load(test_data_path) //data.show() //load model val model=GBTClassificationModel.load(model_path) val predictions=model.transform(data) predictions.show() out.write(predictions) } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map: Map[String, Any]): Unit = { test_data_path=MapUtil.get(map,key="test_data_path").asInstanceOf[String] model_path=MapUtil.get(map,key="model_path").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val test_data_path = new PropertyDescriptor().name("test_data_path").displayName("TEST_DATA_PATH").defaultValue("").required(true) val model_path = new PropertyDescriptor().name("model_path").displayName("MODEL_PATH").defaultValue("").required(true) descriptor = test_data_path :: descriptor descriptor = model_path :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/ml_classification/GBTPrediction.png") } override def getGroup(): List[String] = { List(StopGroup.MLGroup.toString) } }
Example 2
Source File: LocalGBTClassificationModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.classification import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common._ import io.hydrosphere.spark_ml_serving.regression.LocalDecisionTreeRegressionModel import org.apache.spark.ml.classification.GBTClassificationModel import org.apache.spark.ml.regression.DecisionTreeRegressionModel class LocalGBTClassificationModel(override val sparkTransformer: GBTClassificationModel) extends LocalPredictionModel[GBTClassificationModel] {} object LocalGBTClassificationModel extends TreeModelLoader[GBTClassificationModel] with TypedTransformerConverter[GBTClassificationModel] { override def build( metadata: Metadata, data: LocalData, treeData: LocalData ): GBTClassificationModel = { val dataRows = data.toMapList val treesMetadata = treeData.toMapList val trees = treesMetadata map { treeRow => val meta = Metadata.fromJson(treeRow("metadata").toString) val treeNodesData = dataRows .filter(_("treeID") == treeRow("treeID")) .map(_("nodeData")) .asInstanceOf[Seq[Map[String, Any]]] LocalDecisionTreeRegressionModel.createTree( meta, LocalData.fromMapList(treeNodesData.toList) ) } val weights = treeData.column("weights").get.data.asInstanceOf[Seq[Double]].toArray val ctor = classOf[GBTClassificationModel].getDeclaredConstructor( classOf[String], classOf[Array[DecisionTreeRegressionModel]], classOf[Array[Double]], classOf[Int] ) ctor.setAccessible(true) val inst = ctor .newInstance( metadata.uid, trees.to[Array], weights, metadata.numFeatures.get.asInstanceOf[java.lang.Integer] ) inst .setFeaturesCol(metadata.paramMap("featuresCol").asInstanceOf[String]) .setPredictionCol(metadata.paramMap("predictionCol").asInstanceOf[String]) inst } override implicit def toLocal( sparkTransformer: GBTClassificationModel ): LocalGBTClassificationModel = { new LocalGBTClassificationModel(sparkTransformer) } }
Example 3
Source File: GBTClassifierOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.classification import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.bundle.serializer.ModelSerializer import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext} import org.apache.spark.ml.classification.GBTClassificationModel import org.apache.spark.ml.regression.DecisionTreeRegressionModel class GBTClassifierOp extends SimpleSparkOp[GBTClassificationModel] { override val Model: OpModel[SparkBundleContext, GBTClassificationModel] = new OpModel[SparkBundleContext, GBTClassificationModel] { override val klazz: Class[GBTClassificationModel] = classOf[GBTClassificationModel] override def opName: String = Bundle.BuiltinOps.classification.gbt_classifier override def store(model: Model, obj: GBTClassificationModel) (implicit context: BundleContext[SparkBundleContext]): Model = { var i = 0 val trees = obj.trees.map { tree => val name = s"tree$i" ModelSerializer(context.bundleContext(name)).write(tree).get i = i + 1 name } model.withValue("num_features", Value.long(obj.numFeatures)). withValue("num_classes", Value.long(2)). withValue("tree_weights", Value.doubleList(obj.treeWeights)). withValue("trees", Value.stringList(trees)). withValue("thresholds", obj.get(obj.thresholds).map(Value.doubleList(_))) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): GBTClassificationModel = { if (model.value("num_classes").getLong != 2) { throw new IllegalArgumentException("MLeap only supports binary logistic regression") } val numFeatures = model.value("num_features").getLong.toInt val treeWeights = model.value("tree_weights").getDoubleList.toArray val models = model.value("trees").getStringList.map { tree => ModelSerializer(context.bundleContext(tree)).read().get.asInstanceOf[DecisionTreeRegressionModel] }.toArray val gbt = new GBTClassificationModel(uid = "", _trees = models, _treeWeights = treeWeights, numFeatures = numFeatures) model.getValue("thresholds") .map(t => gbt.setThresholds(t.getDoubleList.toArray)) .getOrElse(gbt) } } override def sparkLoad(uid: String, shape: NodeShape, model: GBTClassificationModel): GBTClassificationModel = { val r = new GBTClassificationModel(uid = uid, _trees = model.trees, _treeWeights = model.treeWeights, numFeatures = model.numFeatures) if(model.isDefined(model.thresholds)) { r.setThresholds(model.getThresholds) } r } override def sparkInputs(obj: GBTClassificationModel): Seq[ParamSpec] = { Seq("features" -> obj.featuresCol) } override def sparkOutputs(obj: GBTClassificationModel): Seq[SimpleParamSpec] = { Seq("raw_prediction" -> obj.rawPredictionCol, "probability" -> obj.probabilityCol, "prediction" -> obj.predictionCol) } }
Example 4
Source File: OpGBTClassifierTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.classification import com.salesforce.op.features.types._ import com.salesforce.op.stages.impl.PredictionEquality import com.salesforce.op.stages.sparkwrappers.specific.{OpPredictorWrapper, OpPredictorWrapperModel} import com.salesforce.op.test.{OpEstimatorSpec, TestFeatureBuilder} import org.apache.spark.ml.classification.{GBTClassificationModel, GBTClassifier} import org.apache.spark.ml.linalg.Vectors import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class OpGBTClassifierTest extends OpEstimatorSpec[Prediction, OpPredictorWrapperModel[GBTClassificationModel], OpPredictorWrapper[GBTClassifier, GBTClassificationModel]] with PredictionEquality { override def specName: String = Spec[OpGBTClassifier] val (inputData, rawFeature1, feature2) = TestFeatureBuilder("label", "features", Seq[(RealNN, OPVector)]( 1.0.toRealNN -> Vectors.dense(12.0, 4.3, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.0, 0.3, 0.1).toOPVector, 0.0.toRealNN -> Vectors.dense(1.0, 3.9, 4.3).toOPVector, 1.0.toRealNN -> Vectors.dense(10.0, 1.3, 0.9).toOPVector, 1.0.toRealNN -> Vectors.dense(15.0, 4.7, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.5, 0.9, 10.1).toOPVector, 1.0.toRealNN -> Vectors.dense(11.5, 2.3, 1.3).toOPVector, 0.0.toRealNN -> Vectors.dense(0.1, 3.3, 0.1).toOPVector ) ) val feature1 = rawFeature1.copy(isResponse = true) val estimator = new OpGBTClassifier().setInput(feature1, feature2) val expectedResult = Seq( Prediction(1.0, Array(-1.54, 1.54), Array(0.04, 0.95)), Prediction(0.0, Array(1.54, -1.54), Array(0.95, 0.04)), Prediction(0.0, Array(1.54, -1.54), Array(0.95, 0.04)), Prediction(1.0, Array(-1.54, 1.54), Array(0.04, 0.95)), Prediction(1.0, Array(-1.54, 1.54), Array(0.04, 0.95)), Prediction(0.0, Array(1.54, -1.54), Array(0.95, 0.04)), Prediction(1.0, Array(-1.54, 1.54), Array(0.04, 0.95)), Prediction(0.0, Array(1.54, -1.54), Array(0.95, 0.04)) ) it should "allow the user to set the desired spark parameters" in { estimator .setMaxIter(10) .setMaxDepth(6) .setMaxBins(2) .setMinInstancesPerNode(2) .setMinInfoGain(0.1) estimator.fit(inputData) estimator.predictor.getMaxIter shouldBe 10 estimator.predictor.getMaxDepth shouldBe 6 estimator.predictor.getMaxBins shouldBe 2 estimator.predictor.getMinInstancesPerNode shouldBe 2 estimator.predictor.getMinInfoGain shouldBe 0.1 } }