org.apache.spark.ml.classification.ClassificationModel Scala Examples

The following examples show how to use org.apache.spark.ml.classification.ClassificationModel. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalClassificationModel.scala    From spark-ml-serving   with Apache License 2.0 5 votes vote down vote up
package io.hydrosphere.spark_ml_serving.common.classification

import io.hydrosphere.spark_ml_serving.common.{LocalData, LocalDataColumn, LocalPredictionModel}
import org.apache.spark.ml.classification.ClassificationModel
import org.apache.spark.ml.linalg.Vector

abstract class LocalClassificationModel[T <: ClassificationModel[Vector, T]]
  extends LocalPredictionModel[T] {
  def predictRaw(v: List[Double]): List[Double] = invokeVec('predictRaw, v)

  override def transform(localData: LocalData) = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        var result = localData

        sparkTransformer.get(sparkTransformer.rawPredictionCol).foreach { name =>
          val res = LocalDataColumn(
            name,
            column.data.map(_.asInstanceOf[List[Double]]).map(predictRaw)
          )
          result = result.withColumn(res)
        }

        sparkTransformer.get(sparkTransformer.predictionCol).foreach { name =>
          val res =
            LocalDataColumn(name, column.data.map(_.asInstanceOf[List[Double]]).map(predict))
          result = result.withColumn(res)
        }

        result
      case None => localData
    }
  }
} 
Example 2
Source File: OneVsRestOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.ops.classification

import ml.combust.bundle.BundleContext
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.bundle.serializer.ModelSerializer
import ml.combust.bundle.dsl._
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.classification.{ClassificationModel, OneVsRestModel}


class OneVsRestOp extends SimpleSparkOp[OneVsRestModel] {
  override val Model: OpModel[SparkBundleContext, OneVsRestModel] = new OpModel[SparkBundleContext, OneVsRestModel] {
    override val klazz: Class[OneVsRestModel] = classOf[OneVsRestModel]

    override def opName: String = Bundle.BuiltinOps.classification.one_vs_rest

    override def store(model: Model, obj: OneVsRestModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      var i = 0
      for(cModel <- obj.models) {
        val name = s"model$i"
        ModelSerializer(context.bundleContext(name)).write(cModel).get
        i = i + 1
        name
      }

      model.withValue("num_classes", Value.long(obj.models.length)).
        withValue("num_features", Value.long(obj.models.head.numFeatures))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): OneVsRestModel = {
      val numClasses = model.value("num_classes").getLong.toInt

      val models = (0 until numClasses).toArray.map {
        i => ModelSerializer(context.bundleContext(s"model$i")).read().get.asInstanceOf[ClassificationModel[_, _]]
      }

      val labelMetadata = NominalAttribute.defaultAttr.
        withName("prediction").
        withNumValues(models.length).
        toMetadata
      new OneVsRestModel(uid = "", models = models, labelMetadata = labelMetadata)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: OneVsRestModel): OneVsRestModel = {
    val labelMetadata = NominalAttribute.defaultAttr.
      withName(shape.output("prediction").name).
      withNumValues(model.models.length).
      toMetadata

    new OneVsRestModel(uid = uid,
      labelMetadata = labelMetadata,
      models = model.models)
  }

  override def sparkInputs(obj: OneVsRestModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: OneVsRestModel): Seq[SimpleParamSpec] = {
    Seq("raw_prediction" -> obj.rawPredictionCol, "prediction" -> obj.predictionCol)
  }
} 
Example 3
Source File: OneVsRestOp.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.bundle.extension.ops.classification

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.OpModel
import ml.combust.bundle.serializer.ModelSerializer
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.classification.ClassificationModel
import org.apache.spark.ml.mleap.classification.OneVsRestModel


class OneVsRestOp extends SimpleSparkOp[OneVsRestModel] {
  override val Model: OpModel[SparkBundleContext, OneVsRestModel] = new OpModel[SparkBundleContext, OneVsRestModel] {
    override val klazz: Class[OneVsRestModel] = classOf[OneVsRestModel]

    override def opName: String = Bundle.BuiltinOps.classification.one_vs_rest

    override def store(model: Model, obj: OneVsRestModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      var i = 0
      for(cModel <- obj.models) {
        val name = s"model$i"
        ModelSerializer(context.bundleContext(name)).write(cModel)
        i = i + 1
        name
      }

      model.withValue("num_classes", Value.long(obj.models.length))
        .withValue("num_features", Value.long(obj.models.head.numFeatures))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): OneVsRestModel = {
      val numClasses = model.value("num_classes").getLong.toInt

      val models = (0 until numClasses).toArray.map {
        i => ModelSerializer(context.bundleContext(s"model$i")).read().get.asInstanceOf[ClassificationModel[_, _]]
      }

      val labelMetadata = NominalAttribute.defaultAttr.
        withName("prediction").
        withNumValues(models.length).
        toMetadata
      new OneVsRestModel(uid = "", models = models, labelMetadata = labelMetadata)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: OneVsRestModel): OneVsRestModel = {
    val labelMetadata = NominalAttribute.defaultAttr.
      withName(shape.output("prediction").name).
      withNumValues(model.models.length).
      toMetadata
    new OneVsRestModel(uid = uid, models = model.models, labelMetadata = labelMetadata)
  }

  override def sparkInputs(obj: OneVsRestModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: OneVsRestModel): Seq[SimpleParamSpec] = {
    Seq("probability" -> obj.probabilityCol,
      "prediction" -> obj.predictionCol)
  }
} 
Example 4
Source File: EvaluationUtils.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.automl

import com.microsoft.ml.spark.core.metrics.MetricConstants
import com.microsoft.ml.spark.core.schema.SchemaConstants
import com.microsoft.ml.spark.train.{TrainClassifier, TrainRegressor, TrainedClassifierModel, TrainedRegressorModel}
import org.apache.spark.injections.RegressionUtils
import org.apache.spark.ml.classification.{ClassificationModel, Classifier}
import org.apache.spark.ml.{PipelineStage, Transformer}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.regression._

object EvaluationUtils {
  val ModelTypeUnsupportedErr = "Model type not supported for evaluation"
  // Find type of trained models
  def getModelType(model: PipelineStage): String = {
    model match {
      case _: TrainRegressor => SchemaConstants.RegressionKind
      case _: TrainClassifier => SchemaConstants.ClassificationKind
      case _: Classifier[_, _, _] => SchemaConstants.ClassificationKind
      case regressor: PipelineStage if RegressionUtils.isRegressor(regressor) => SchemaConstants.RegressionKind
      case _: DecisionTreeRegressor => SchemaConstants.RegressionKind
      case _: GBTRegressor => SchemaConstants.RegressionKind
      case _: RandomForestRegressor => SchemaConstants.RegressionKind
      case _: TrainedRegressorModel => SchemaConstants.RegressionKind
      case _: TrainedClassifierModel => SchemaConstants.ClassificationKind
      case evm: BestModel => getModelType(evm.getBestModel)
      case _: ClassificationModel[_, _] => SchemaConstants.ClassificationKind
      case _: RegressionModel[_, _] => SchemaConstants.RegressionKind
      case _ => throw new Exception(ModelTypeUnsupportedErr)
    }
  }

  def getMetricWithOperator(model: PipelineStage, evaluationMetric: String): (String, Ordering[Double]) = {
    val modelType = getModelType(model)
    getMetricWithOperator(modelType, evaluationMetric)
  }

  def getMetricWithOperator(modelType: String, evaluationMetric: String): (String, Ordering[Double]) = {
    val chooseHighest = Ordering.Double
    val chooseLowest = Ordering.Double.reverse
    val (evaluationMetricColumnName, operator): (String, Ordering[Double]) = modelType match {
      case SchemaConstants.RegressionKind => evaluationMetric match {
        case MetricConstants.MseSparkMetric  => (MetricConstants.MseColumnName,  chooseLowest)
        case MetricConstants.RmseSparkMetric => (MetricConstants.RmseColumnName, chooseLowest)
        case MetricConstants.R2SparkMetric   => (MetricConstants.R2ColumnName,   chooseHighest)
        case MetricConstants.MaeSparkMetric  => (MetricConstants.MaeColumnName,  chooseLowest)
        case _ => throw new Exception("Metric is not supported for regressors")
      }
      case SchemaConstants.ClassificationKind => evaluationMetric match {
        case MetricConstants.AucSparkMetric       => (MetricConstants.AucColumnName, chooseHighest)
        case MetricConstants.PrecisionSparkMetric => (MetricConstants.PrecisionColumnName, chooseHighest)
        case MetricConstants.RecallSparkMetric    => (MetricConstants.RecallColumnName, chooseHighest)
        case MetricConstants.AccuracySparkMetric  => (MetricConstants.AccuracyColumnName, chooseHighest)
        case _ => throw new Exception("Metric is not supported for classifiers")
      }
      case _ => throw new Exception("Model type not supported for evaluation")
    }
    (evaluationMetricColumnName, operator)
  }

  def getModelParams(model: Transformer): ParamMap = {
    model match {
      case reg: TrainedRegressorModel => reg.getParamMap
      case cls: TrainedClassifierModel => cls.getParamMap
      case evm: BestModel => getModelParams(evm.getBestModel)
      case _ => throw new Exception("Model type not supported for evaluation")
    }
  }

  
  def modelParamsToString(model: Transformer): String =
    getModelParams(model).toSeq.map(pv => s"${pv.param.name}: ${pv.value}").sorted.mkString(", ")

}