org.apache.spark.ml.PredictionModel Scala Examples
The following examples show how to use org.apache.spark.ml.PredictionModel.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: LocalPredictionModel.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.common import org.apache.spark.ml.PredictionModel import org.apache.spark.ml.linalg.Vector import scala.reflect.ClassTag abstract class LocalPredictionModel[T <: PredictionModel[Vector, T]] extends LocalTransformer[T] { def predict(v: List[Double]): Double = invoke[Double]('predict, v) override def transform(localData: LocalData): LocalData = { localData.column(sparkTransformer.getFeaturesCol) match { case Some(column) => val predictionCol = LocalDataColumn( sparkTransformer.getPredictionCol, column.data.map(_.asInstanceOf[List[Double]]).map(predict) ) localData.withColumn(predictionCol) case None => localData } } }
Example 2
Source File: GaussianProcessCommons.scala From spark-gp with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.commons import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import breeze.optimize.LBFGSB import org.apache.spark.ml.commons.kernel.{EyeKernel, Kernel, _} import org.apache.spark.ml.commons.util.DiffFunctionMemoized import org.apache.spark.ml.feature.LabeledPoint import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.util.Instrumentation import org.apache.spark.ml.{PredictionModel, Predictor} import org.apache.spark.rdd.RDD import org.apache.spark.sql.functions.col import org.apache.spark.sql.{Dataset, Row} private[ml] trait GaussianProcessCommons[F, E <: Predictor[F, E, M], M <: PredictionModel[F, M]] extends ProjectedGaussianProcessHelper { this: Predictor[F, E, M] with GaussianProcessParams => protected val getKernel : () => Kernel = () => $(kernel)() + $(sigma2).const * new EyeKernel protected def getPoints(dataset: Dataset[_]) = { dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map { case Row(label: Double, features: Vector) => LabeledPoint(label, features) } } protected def groupForExperts(points: RDD[LabeledPoint]) = { val numberOfExperts = Math.round(points.count().toDouble / $(datasetSizeForExpert)) points.zipWithIndex.map { case(instance, index) => (index % numberOfExperts, instance) }.groupByKey().map(_._2) } protected def getExpertLabelsAndKernels(points: RDD[LabeledPoint]): RDD[(BDV[Double], Kernel)] = { groupForExperts(points).map { chunk => val (labels, trainingVectors) = chunk.map(lp => (lp.label, lp.features)).toArray.unzip (BDV(labels: _*), getKernel().setTrainingVectors(trainingVectors)) } } protected def projectedProcess(expertLabelsAndKernels: RDD[(BDV[Double], Kernel)], points: RDD[LabeledPoint], optimalHyperparameters: BDV[Double]) = { val activeSet = $(activeSetProvider)($(activeSetSize), expertLabelsAndKernels, points, getKernel, optimalHyperparameters, $(seed)) points.unpersist() val (matrixKmnKnm, vectorKmny) = getMatrixKmnKnmAndVectorKmny(expertLabelsAndKernels, activeSet) expertLabelsAndKernels.unpersist() val optimalKernel = getKernel().setHyperparameters(optimalHyperparameters).setTrainingVectors(activeSet) // inv(sigma^2 K_mm + K_mn * K_nm) * K_mn * y val (magicVector, magicMatrix) = getMagicVector(optimalKernel, matrixKmnKnm, vectorKmny, activeSet, optimalHyperparameters) new GaussianProjectedProcessRawPredictor(magicVector, magicMatrix, optimalKernel) } protected def createModel(uid: String, rawPredictor: GaussianProjectedProcessRawPredictor) : M } class GaussianProjectedProcessRawPredictor private[commons] (val magicVector: BDV[Double], val magicMatrix: BDM[Double], val kernel: Kernel) extends Serializable { def predict(features: Vector): (Double, Double) = { val cross = kernel.crossKernel(features) val selfKernel = kernel.selfKernel(features) (cross * magicVector, selfKernel + cross * magicMatrix * cross.t) } }
Example 3
Source File: SparkModelConverter.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.sparkwrappers.specific import com.salesforce.op.features.types.{OPVector, Prediction, RealNN} import com.salesforce.op.stages.base.binary.OpTransformer2 import com.salesforce.op.stages.impl.classification._ import com.salesforce.op.stages.impl.regression._ import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostRegressionModel} import org.apache.spark.ml.classification._ import org.apache.spark.ml.linalg.Vector import org.apache.spark.ml.regression._ import org.apache.spark.ml.{Model, PredictionModel} // TODO remove when loco and model selector are updated def toOPUnchecked( model: Model[_], uid: String ): OpTransformer2[RealNN, OPVector, Prediction] = { model match { case m: LogisticRegressionModel => new OpLogisticRegressionModel(m, uid = uid) case m: RandomForestClassificationModel => new OpRandomForestClassificationModel(m, uid = uid) case m: NaiveBayesModel => new OpNaiveBayesModel(m, uid) case m: DecisionTreeClassificationModel => new OpDecisionTreeClassificationModel(m, uid = uid) case m: GBTClassificationModel => new OpGBTClassificationModel(m, uid = uid) case m: LinearSVCModel => new OpLinearSVCModel(m, uid = uid) case m: MultilayerPerceptronClassificationModel => new OpMultilayerPerceptronClassificationModel(m, uid = uid) case m: LinearRegressionModel => new OpLinearRegressionModel(m, uid = uid) case m: RandomForestRegressionModel => new OpRandomForestRegressionModel(m, uid = uid) case m: GBTRegressionModel => new OpGBTRegressionModel(m, uid = uid) case m: DecisionTreeRegressionModel => new OpDecisionTreeRegressionModel(m, uid = uid) case m: GeneralizedLinearRegressionModel => new OpGeneralizedLinearRegressionModel(m, uid = uid) case m: XGBoostClassificationModel => new OpXGBoostClassificationModel(m, uid = uid) case m: XGBoostRegressionModel => new OpXGBoostRegressionModel(m, uid = uid) case m => throw new RuntimeException(s"model conversion not implemented for model $m") } } }