org.apache.spark.ml.param.ParamPair Scala Examples

The following examples show how to use org.apache.spark.ml.param.ParamPair. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: HoltWintersBestModelEvaluation.scala    From uberdata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml

import com.cloudera.sparkts.models.UberHoltWintersModel
import eleflow.uberdata.enums.SupportedAlgorithm
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.ml.evaluation.TimeSeriesEvaluator
import org.apache.spark.ml.param.{ParamMap, ParamPair}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.Row

import scala.reflect.ClassTag


abstract class HoltWintersBestModelEvaluation[L, M <: ForecastBaseModel[M]](
  implicit kt: ClassTag[L],
  ord: Ordering[L] = null
) extends BestModelFinder[L, M]
    with HoltWintersParams {

  protected def holtWintersEvaluation(
    row: Row,
    model: UberHoltWintersModel,
    broadcastEvaluator: Broadcast[TimeSeriesEvaluator[L]],
    id: L
  ): (UberHoltWintersModel, ModelParamEvaluation[L]) = {
    val features =
      row.getAs[org.apache.spark.ml.linalg.Vector]($(featuresCol))
    log.warn(
      s"Evaluating forecast for id $id, with parameters " +
        s"alpha ${model.alpha}, beta ${model.beta} and gamma ${model.gamma}"
    )
    val expectedResult =
      row.getAs[org.apache.spark.ml.linalg.Vector](partialValidationCol)
    val forecastToBeValidated = Vectors.dense(new Array[Double]($(nFutures)))
    model.forecast(org.apache.spark.mllib.linalg.Vectors.fromML(features), forecastToBeValidated).toArray
    val toBeValidated =
      expectedResult.toArray.zip(forecastToBeValidated.toArray)
    val metric = broadcastEvaluator.value.evaluate(toBeValidated)
    val metricName = broadcastEvaluator.value.getMetricName
    val params = ParamMap().put(
      ParamPair(gamma, model.gamma),
      ParamPair(beta, model.beta),
      ParamPair(alpha, model.alpha)
    )
    (model,
     new ModelParamEvaluation[L](
       id,
       metric,
       params,
       Some(metricName),
       SupportedAlgorithm.HoltWinters
     ))
  }
} 
Example 2
Source File: DefaultMLWriter.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperables.serialization

import org.apache.hadoop.fs.Path
import org.apache.spark.SparkContext
import org.apache.spark.ml.param.{ParamPair, Params}
import org.apache.spark.ml.util.MLWriter
import org.json4s.JsonDSL._
import org.json4s._
import org.json4s.jackson.JsonMethods._

import ai.deepsense.deeplang.doperables.Transformer
import ai.deepsense.sparkutils.ML.MLWriterWithSparkContext

class DefaultMLWriter[T <: Params](instance: T) extends MLWriter with MLWriterWithSparkContext {

  def saveImpl(path: String): Unit = {
    val modelPath = Transformer.modelFilePath(path)
    saveMetadata(instance, path, sc)
    CustomPersistence.save(sparkContext, instance, modelPath)
  }

  
  // Copied from org.apache.spark.ml.util.DefaultParamWriter.
  // We need to be consistent with Spark Format, but this method is private.
  private def saveMetadata(
      instance: Params,
      path: String,
      sc: SparkContext,
      extraMetadata: Option[JObject] = None,
      paramMap: Option[JValue] = None): Unit = {
    val uid = instance.uid
    val cls = instance.getClass.getName
    val params = instance.extractParamMap().toSeq.asInstanceOf[Seq[ParamPair[Any]]]
    val jsonParams = paramMap.getOrElse(render(params.map { case ParamPair(p, v) =>
      p.name -> parse(p.jsonEncode(v))
    }.toList))
    val basicMetadata = ("class" -> cls) ~
      ("timestamp" -> System.currentTimeMillis()) ~
      ("sparkVersion" -> sc.version) ~
      ("uid" -> uid) ~
      ("paramMap" -> jsonParams)
    val metadata = extraMetadata match {
      case Some(jObject) =>
        basicMetadata ~ jObject
      case None =>
        basicMetadata
    }
    val metadataPath = new Path(path, "metadata").toString
    val metadataJson = compact(render(metadata))
    sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath)
  }
} 
Example 3
Source File: SparkStageParam.scala    From TransmogrifAI   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.salesforce.op.stages

import com.salesforce.op.stages.sparkwrappers.generic.SparkWrapperParams
import org.apache.hadoop.fs.Path
import org.apache.spark.ml.PipelineStage
import org.apache.spark.ml.param.{Param, ParamPair, Params}
import org.apache.spark.ml.util.{Identifiable, MLReader, MLWritable}
import org.apache.spark.util.SparkUtils
import org.json4s.JsonAST.{JObject, JValue}
import org.json4s.JsonDSL._
import org.json4s.jackson.JsonMethods.{compact, parse, render}
import org.json4s.{DefaultFormats, Formats, JString}

class SparkStageParam[S <: PipelineStage with Params]
(
  parent: String,
  name: String,
  doc: String,
  isValid: Option[S] => Boolean
) extends Param[Option[S]](parent, name, doc, isValid) {

  import SparkStageParam._

  
  override def jsonDecode(jsonStr: String): Option[S] = {
    val json = parse(jsonStr)
    val uid = (json \ "uid").extractOpt[String]
    val path = (json \ "path").extractOpt[String]

    path -> uid match {
      case (None, _) | (_, None) | (_, Some(NoUID)) =>
        savePath = None
        None
      case (Some(p), Some(stageUid)) =>
        savePath = Option(p)
        val stagePath = new Path(p, stageUid).toString
        val className = (json \ "className").extract[String]
        val cls = SparkUtils.classForName(className)
        val stage = cls.getMethod("read").invoke(null).asInstanceOf[MLReader[PipelineStage]].load(stagePath)
        Option(stage).map(_.asInstanceOf[S])
    }
  }
}

object SparkStageParam {
  implicit val formats: Formats = DefaultFormats
  val NoClass = ""
  val NoUID = ""

  def updateParamsMetadataWithPath(jValue: JValue, path: String): JValue = jValue match {
    case JObject(pairs) => JObject(
      pairs.map {
        case (SparkWrapperParams.SparkStageParamName, j) =>
          SparkWrapperParams.SparkStageParamName -> j.merge(JObject("path" -> JString(path)))
        case param => param
      }
    )
    case j => throw new IllegalArgumentException(s"Cannot recognize JSON Spark params metadata: $j")
  }

} 
Example 4
Source File: NGramExtractor.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.odkl.texts

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
import org.apache.spark.ml.param.{IntParam, ParamMap, ParamPair, ParamValidators, Params}
import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions.udf
import org.apache.spark.sql.types.{ArrayType, StringType, StructType}


  def setOutputCol(value: String): this.type = set(outputCol, value)

  setDefault(new ParamPair[Int](upperN, 2), new ParamPair[Int](lowerN, 1))

  override def transform(dataset: Dataset[_]): DataFrame = {
    val lowerBound = $(lowerN)
    val upperBound = $(upperN)
    val nGramUDF = udf[Seq[String], Seq[String]](NGramUtils.nGramFun(_,lowerBound,upperBound))
    dataset.withColumn($(outputCol), nGramUDF(dataset.col($(inputCol))))
  }


  override def copy(extra: ParamMap): Transformer = defaultCopy(extra)

  @DeveloperApi
  override def transformSchema(schema: StructType): StructType = {
    if ($(inputCol) != $(outputCol)) {
      schema.add($(outputCol), new ArrayType(StringType, true))
    } else {
      schema
    }
  }
}
object NGramExtractor extends DefaultParamsReadable[NGramExtractor] {
  override def load(path: String): NGramExtractor = super.load(path)
} 
Example 5
Source File: RegexpReplaceTransformer.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.odkl.texts

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.Transformer
import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol}
import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{StringType, StructType}


  def setInputCol(value: String): this.type = set(inputCol, value)

  def this() = this(Identifiable.randomUID("RegexpReplaceTransformer"))

  override def transform(dataset: Dataset[_]): DataFrame = {
    dataset.withColumn($(outputCol), regexp_replace(dataset.col($(inputCol)), $(regexpPattern), $(regexpReplacement)))
  }
  override def copy(extra: ParamMap): Transformer = defaultCopy(extra)

  @DeveloperApi
  override def transformSchema(schema: StructType): StructType = {
    if ($(inputCol) equals $(outputCol)) {
      val schemaWithoutInput = new StructType(schema.fields.filterNot(_.name equals $(inputCol)))
      SchemaUtils.appendColumn(schemaWithoutInput, $(outputCol), StringType)
    } else {
      SchemaUtils.appendColumn(schema, $(outputCol), StringType)
    }
  }

}

object RegexpReplaceTransformer extends DefaultParamsReadable[RegexpReplaceTransformer] {
  override def load(path: String): RegexpReplaceTransformer = super.load(path)
} 
Example 6
Source File: SimpleReproContext.scala    From pravda-ml   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.repro

import org.apache.spark.ml.param.{Param, ParamPair, Params}
import org.apache.spark.ml.util.MLWritable
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession, functions}

class SimpleReproContext private
(spark: SparkSession, basePath: String, tags: Seq[(String,String)]) extends ReproContext {

  def this(basePath: String)(implicit spark: SparkSession) = this(spark, basePath, Seq())

  var accumulatedMetrics : Seq[DataFrame] = Seq()

  var accumulatedParams: Seq[(Seq[String], Iterable[ParamPair[_]])] = Seq()

  override def persistEstimator(estimator: MLWritable): Unit = {
    estimator.save(basePath + "/estimator")
  }

  override def persistModel(model: MLWritable): Unit = {
    model.save(basePath + "/model")
  }

  override def dive(tags: Seq[(String, String)]): ReproContext = new SimpleReproContext(
    spark, basePath, this.tags ++ tags)

  override def logParamPairs(params: Iterable[ParamPair[_]], path: Seq[String]): Unit =
    accumulatedParams = accumulatedParams :+ path -> params

  override def logMetircs(metrics: => DataFrame): Unit = accumulatedMetrics = accumulatedMetrics :+ metrics

  override def start(): Unit = {
    import spark.implicits._
    accumulatedParams.map {
      case (path, params) => params.view
        .map(x => x.param.name -> x.param.asInstanceOf[Param[Any]].jsonEncode(x.value))
        .toSeq
        .toDF("param", "value")
        .withColumn("path", functions.lit(path.mkString("/")))
    }.reduce(_ unionByName _)
      .write.parquet(taggedPrefix + "/params")
  }

  override def finish(): Unit = {
    accumulatedMetrics.reduceOption(_ unionByName _).foreach(
      _.write.parquet(taggedPrefix + "/metrics"))
  }

  private def taggedPrefix: String = {
    tags.map(x => x._1 + "=" + x._2).mkString(basePath + "/", "/", "")
  }
} 
Example 7
Source File: DefaultMLWriter.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperables.serialization

import org.apache.hadoop.fs.Path
import org.apache.spark.SparkContext
import org.apache.spark.ml.param.{ParamPair, Params}
import org.apache.spark.ml.util.MLWriter
import org.json4s.JsonDSL._
import org.json4s._
import org.json4s.jackson.JsonMethods._

import io.deepsense.deeplang.doperables.Transformer
import io.deepsense.sparkutils.ML.MLWriterWithSparkContext

class DefaultMLWriter[T <: Params](instance: T) extends MLWriter with MLWriterWithSparkContext {

  def saveImpl(path: String): Unit = {
    val modelPath = Transformer.modelFilePath(path)
    saveMetadata(instance, path, sc)
    CustomPersistence.save(sparkContext, instance, modelPath)
  }

  
  // Copied from org.apache.spark.ml.util.DefaultParamWriter.
  // We need to be consistent with Spark Format, but this method is private.
  private def saveMetadata(
      instance: Params,
      path: String,
      sc: SparkContext,
      extraMetadata: Option[JObject] = None,
      paramMap: Option[JValue] = None): Unit = {
    val uid = instance.uid
    val cls = instance.getClass.getName
    val params = instance.extractParamMap().toSeq.asInstanceOf[Seq[ParamPair[Any]]]
    val jsonParams = paramMap.getOrElse(render(params.map { case ParamPair(p, v) =>
      p.name -> parse(p.jsonEncode(v))
    }.toList))
    val basicMetadata = ("class" -> cls) ~
      ("timestamp" -> System.currentTimeMillis()) ~
      ("sparkVersion" -> sc.version) ~
      ("uid" -> uid) ~
      ("paramMap" -> jsonParams)
    val metadata = extraMetadata match {
      case Some(jObject) =>
        basicMetadata ~ jObject
      case None =>
        basicMetadata
    }
    val metadataPath = new Path(path, "metadata").toString
    val metadataJson = compact(render(metadata))
    sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath)
  }
}