org.apache.spark.ml.classification.ClassificationModel Scala Example

Source File: LocalClassificationModel.scala From spark-ml-serving with Apache License 2.0

5 votes

package io.hydrosphere.spark_ml_serving.common.classification

import io.hydrosphere.spark_ml_serving.common.{LocalData, LocalDataColumn, LocalPredictionModel}
import org.apache.spark.ml.classification.ClassificationModel
import org.apache.spark.ml.linalg.Vector

abstract class LocalClassificationModel[T <: ClassificationModel[Vector, T]]
  extends LocalPredictionModel[T] {
  def predictRaw(v: List[Double]): List[Double] = invokeVec('predictRaw, v)

  override def transform(localData: LocalData) = {
    localData.column(sparkTransformer.getFeaturesCol) match {
      case Some(column) =>
        var result = localData

        sparkTransformer.get(sparkTransformer.rawPredictionCol).foreach { name =>
          val res = LocalDataColumn(
            name,
            column.data.map(_.asInstanceOf[List[Double]]).map(predictRaw)
          )
          result = result.withColumn(res)
        }

        sparkTransformer.get(sparkTransformer.predictionCol).foreach { name =>
          val res =
            LocalDataColumn(name, column.data.map(_.asInstanceOf[List[Double]]).map(predict))
          result = result.withColumn(res)
        }

        result
      case None => localData
    }
  }
}

Source File: OneVsRestOp.scala From mleap with Apache License 2.0

5 votes

package org.apache.spark.ml.bundle.ops.classification

import ml.combust.bundle.BundleContext
import ml.combust.bundle.op.{OpModel, OpNode}
import ml.combust.bundle.serializer.ModelSerializer
import ml.combust.bundle.dsl._
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.classification.{ClassificationModel, OneVsRestModel}


class OneVsRestOp extends SimpleSparkOp[OneVsRestModel] {
  override val Model: OpModel[SparkBundleContext, OneVsRestModel] = new OpModel[SparkBundleContext, OneVsRestModel] {
    override val klazz: Class[OneVsRestModel] = classOf[OneVsRestModel]

    override def opName: String = Bundle.BuiltinOps.classification.one_vs_rest

    override def store(model: Model, obj: OneVsRestModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      var i = 0
      for(cModel <- obj.models) {
        val name = s"model$i"
        ModelSerializer(context.bundleContext(name)).write(cModel).get
        i = i + 1
        name
      }

      model.withValue("num_classes", Value.long(obj.models.length)).
        withValue("num_features", Value.long(obj.models.head.numFeatures))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): OneVsRestModel = {
      val numClasses = model.value("num_classes").getLong.toInt

      val models = (0 until numClasses).toArray.map {
        i => ModelSerializer(context.bundleContext(s"model$i")).read().get.asInstanceOf[ClassificationModel[_, _]]
      }

      val labelMetadata = NominalAttribute.defaultAttr.
        withName("prediction").
        withNumValues(models.length).
        toMetadata
      new OneVsRestModel(uid = "", models = models, labelMetadata = labelMetadata)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: OneVsRestModel): OneVsRestModel = {
    val labelMetadata = NominalAttribute.defaultAttr.
      withName(shape.output("prediction").name).
      withNumValues(model.models.length).
      toMetadata

    new OneVsRestModel(uid = uid,
      labelMetadata = labelMetadata,
      models = model.models)
  }

  override def sparkInputs(obj: OneVsRestModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: OneVsRestModel): Seq[SimpleParamSpec] = {
    Seq("raw_prediction" -> obj.rawPredictionCol, "prediction" -> obj.predictionCol)
  }
}

Source File: OneVsRestOp.scala From mleap with Apache License 2.0

5 votes

package org.apache.spark.ml.bundle.extension.ops.classification

import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl._
import ml.combust.bundle.op.OpModel
import ml.combust.bundle.serializer.ModelSerializer
import org.apache.spark.ml.attribute.NominalAttribute
import org.apache.spark.ml.bundle.{ParamSpec, SimpleParamSpec, SimpleSparkOp, SparkBundleContext}
import org.apache.spark.ml.classification.ClassificationModel
import org.apache.spark.ml.mleap.classification.OneVsRestModel


class OneVsRestOp extends SimpleSparkOp[OneVsRestModel] {
  override val Model: OpModel[SparkBundleContext, OneVsRestModel] = new OpModel[SparkBundleContext, OneVsRestModel] {
    override val klazz: Class[OneVsRestModel] = classOf[OneVsRestModel]

    override def opName: String = Bundle.BuiltinOps.classification.one_vs_rest

    override def store(model: Model, obj: OneVsRestModel)
                      (implicit context: BundleContext[SparkBundleContext]): Model = {
      var i = 0
      for(cModel <- obj.models) {
        val name = s"model$i"
        ModelSerializer(context.bundleContext(name)).write(cModel)
        i = i + 1
        name
      }

      model.withValue("num_classes", Value.long(obj.models.length))
        .withValue("num_features", Value.long(obj.models.head.numFeatures))
    }

    override def load(model: Model)
                     (implicit context: BundleContext[SparkBundleContext]): OneVsRestModel = {
      val numClasses = model.value("num_classes").getLong.toInt

      val models = (0 until numClasses).toArray.map {
        i => ModelSerializer(context.bundleContext(s"model$i")).read().get.asInstanceOf[ClassificationModel[_, _]]
      }

      val labelMetadata = NominalAttribute.defaultAttr.
        withName("prediction").
        withNumValues(models.length).
        toMetadata
      new OneVsRestModel(uid = "", models = models, labelMetadata = labelMetadata)
    }
  }

  override def sparkLoad(uid: String, shape: NodeShape, model: OneVsRestModel): OneVsRestModel = {
    val labelMetadata = NominalAttribute.defaultAttr.
      withName(shape.output("prediction").name).
      withNumValues(model.models.length).
      toMetadata
    new OneVsRestModel(uid = uid, models = model.models, labelMetadata = labelMetadata)
  }

  override def sparkInputs(obj: OneVsRestModel): Seq[ParamSpec] = {
    Seq("features" -> obj.featuresCol)
  }

  override def sparkOutputs(obj: OneVsRestModel): Seq[SimpleParamSpec] = {
    Seq("probability" -> obj.probabilityCol,
      "prediction" -> obj.predictionCol)
  }
}

Source File: EvaluationUtils.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.automl

import com.microsoft.ml.spark.core.metrics.MetricConstants
import com.microsoft.ml.spark.core.schema.SchemaConstants
import com.microsoft.ml.spark.train.{TrainClassifier, TrainRegressor, TrainedClassifierModel, TrainedRegressorModel}
import org.apache.spark.injections.RegressionUtils
import org.apache.spark.ml.classification.{ClassificationModel, Classifier}
import org.apache.spark.ml.{PipelineStage, Transformer}
import org.apache.spark.ml.param.{Param, ParamMap}
import org.apache.spark.ml.regression._

object EvaluationUtils {
  val ModelTypeUnsupportedErr = "Model type not supported for evaluation"
  // Find type of trained models
  def getModelType(model: PipelineStage): String = {
    model match {
      case _: TrainRegressor => SchemaConstants.RegressionKind
      case _: TrainClassifier => SchemaConstants.ClassificationKind
      case _: Classifier[_, _, _] => SchemaConstants.ClassificationKind
      case regressor: PipelineStage if RegressionUtils.isRegressor(regressor) => SchemaConstants.RegressionKind
      case _: DecisionTreeRegressor => SchemaConstants.RegressionKind
      case _: GBTRegressor => SchemaConstants.RegressionKind
      case _: RandomForestRegressor => SchemaConstants.RegressionKind
      case _: TrainedRegressorModel => SchemaConstants.RegressionKind
      case _: TrainedClassifierModel => SchemaConstants.ClassificationKind
      case evm: BestModel => getModelType(evm.getBestModel)
      case _: ClassificationModel[_, _] => SchemaConstants.ClassificationKind
      case _: RegressionModel[_, _] => SchemaConstants.RegressionKind
      case _ => throw new Exception(ModelTypeUnsupportedErr)
    }
  }

  def getMetricWithOperator(model: PipelineStage, evaluationMetric: String): (String, Ordering[Double]) = {
    val modelType = getModelType(model)
    getMetricWithOperator(modelType, evaluationMetric)
  }

  def getMetricWithOperator(modelType: String, evaluationMetric: String): (String, Ordering[Double]) = {
    val chooseHighest = Ordering.Double
    val chooseLowest = Ordering.Double.reverse
    val (evaluationMetricColumnName, operator): (String, Ordering[Double]) = modelType match {
      case SchemaConstants.RegressionKind => evaluationMetric match {
        case MetricConstants.MseSparkMetric  => (MetricConstants.MseColumnName,  chooseLowest)
        case MetricConstants.RmseSparkMetric => (MetricConstants.RmseColumnName, chooseLowest)
        case MetricConstants.R2SparkMetric   => (MetricConstants.R2ColumnName,   chooseHighest)
        case MetricConstants.MaeSparkMetric  => (MetricConstants.MaeColumnName,  chooseLowest)
        case _ => throw new Exception("Metric is not supported for regressors")
      }
      case SchemaConstants.ClassificationKind => evaluationMetric match {
        case MetricConstants.AucSparkMetric       => (MetricConstants.AucColumnName, chooseHighest)
        case MetricConstants.PrecisionSparkMetric => (MetricConstants.PrecisionColumnName, chooseHighest)
        case MetricConstants.RecallSparkMetric    => (MetricConstants.RecallColumnName, chooseHighest)
        case MetricConstants.AccuracySparkMetric  => (MetricConstants.AccuracyColumnName, chooseHighest)
        case _ => throw new Exception("Metric is not supported for classifiers")
      }
      case _ => throw new Exception("Model type not supported for evaluation")
    }
    (evaluationMetricColumnName, operator)
  }

  def getModelParams(model: Transformer): ParamMap = {
    model match {
      case reg: TrainedRegressorModel => reg.getParamMap
      case cls: TrainedClassifierModel => cls.getParamMap
      case evm: BestModel => getModelParams(evm.getBestModel)
      case _ => throw new Exception("Model type not supported for evaluation")
    }
  }

  
  def modelParamsToString(model: Transformer): String =
    getModelParams(model).toSeq.map(pv => s"${pv.param.name}: ${pv.value}").sorted.mkString(", ")

}

org.apache.spark.ml.classification.ClassificationModel Scala Examples