org.apache.spark.ml.param.ParamPair Scala Examples
The following examples show how to use org.apache.spark.ml.param.ParamPair.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HoltWintersBestModelEvaluation.scala From uberdata with Apache License 2.0 | 5 votes |
package org.apache.spark.ml import com.cloudera.sparkts.models.UberHoltWintersModel import eleflow.uberdata.enums.SupportedAlgorithm import org.apache.spark.broadcast.Broadcast import org.apache.spark.ml.evaluation.TimeSeriesEvaluator import org.apache.spark.ml.param.{ParamMap, ParamPair} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.sql.Row import scala.reflect.ClassTag abstract class HoltWintersBestModelEvaluation[L, M <: ForecastBaseModel[M]]( implicit kt: ClassTag[L], ord: Ordering[L] = null ) extends BestModelFinder[L, M] with HoltWintersParams { protected def holtWintersEvaluation( row: Row, model: UberHoltWintersModel, broadcastEvaluator: Broadcast[TimeSeriesEvaluator[L]], id: L ): (UberHoltWintersModel, ModelParamEvaluation[L]) = { val features = row.getAs[org.apache.spark.ml.linalg.Vector]($(featuresCol)) log.warn( s"Evaluating forecast for id $id, with parameters " + s"alpha ${model.alpha}, beta ${model.beta} and gamma ${model.gamma}" ) val expectedResult = row.getAs[org.apache.spark.ml.linalg.Vector](partialValidationCol) val forecastToBeValidated = Vectors.dense(new Array[Double]($(nFutures))) model.forecast(org.apache.spark.mllib.linalg.Vectors.fromML(features), forecastToBeValidated).toArray val toBeValidated = expectedResult.toArray.zip(forecastToBeValidated.toArray) val metric = broadcastEvaluator.value.evaluate(toBeValidated) val metricName = broadcastEvaluator.value.getMetricName val params = ParamMap().put( ParamPair(gamma, model.gamma), ParamPair(beta, model.beta), ParamPair(alpha, model.alpha) ) (model, new ModelParamEvaluation[L]( id, metric, params, Some(metricName), SupportedAlgorithm.HoltWinters )) } }
Example 2
Source File: DefaultMLWriter.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.serialization import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.ml.param.{ParamPair, Params} import org.apache.spark.ml.util.MLWriter import org.json4s.JsonDSL._ import org.json4s._ import org.json4s.jackson.JsonMethods._ import ai.deepsense.deeplang.doperables.Transformer import ai.deepsense.sparkutils.ML.MLWriterWithSparkContext class DefaultMLWriter[T <: Params](instance: T) extends MLWriter with MLWriterWithSparkContext { def saveImpl(path: String): Unit = { val modelPath = Transformer.modelFilePath(path) saveMetadata(instance, path, sc) CustomPersistence.save(sparkContext, instance, modelPath) } // Copied from org.apache.spark.ml.util.DefaultParamWriter. // We need to be consistent with Spark Format, but this method is private. private def saveMetadata( instance: Params, path: String, sc: SparkContext, extraMetadata: Option[JObject] = None, paramMap: Option[JValue] = None): Unit = { val uid = instance.uid val cls = instance.getClass.getName val params = instance.extractParamMap().toSeq.asInstanceOf[Seq[ParamPair[Any]]] val jsonParams = paramMap.getOrElse(render(params.map { case ParamPair(p, v) => p.name -> parse(p.jsonEncode(v)) }.toList)) val basicMetadata = ("class" -> cls) ~ ("timestamp" -> System.currentTimeMillis()) ~ ("sparkVersion" -> sc.version) ~ ("uid" -> uid) ~ ("paramMap" -> jsonParams) val metadata = extraMetadata match { case Some(jObject) => basicMetadata ~ jObject case None => basicMetadata } val metadataPath = new Path(path, "metadata").toString val metadataJson = compact(render(metadata)) sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath) } }
Example 3
Source File: SparkStageParam.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages import com.salesforce.op.stages.sparkwrappers.generic.SparkWrapperParams import org.apache.hadoop.fs.Path import org.apache.spark.ml.PipelineStage import org.apache.spark.ml.param.{Param, ParamPair, Params} import org.apache.spark.ml.util.{Identifiable, MLReader, MLWritable} import org.apache.spark.util.SparkUtils import org.json4s.JsonAST.{JObject, JValue} import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods.{compact, parse, render} import org.json4s.{DefaultFormats, Formats, JString} class SparkStageParam[S <: PipelineStage with Params] ( parent: String, name: String, doc: String, isValid: Option[S] => Boolean ) extends Param[Option[S]](parent, name, doc, isValid) { import SparkStageParam._ override def jsonDecode(jsonStr: String): Option[S] = { val json = parse(jsonStr) val uid = (json \ "uid").extractOpt[String] val path = (json \ "path").extractOpt[String] path -> uid match { case (None, _) | (_, None) | (_, Some(NoUID)) => savePath = None None case (Some(p), Some(stageUid)) => savePath = Option(p) val stagePath = new Path(p, stageUid).toString val className = (json \ "className").extract[String] val cls = SparkUtils.classForName(className) val stage = cls.getMethod("read").invoke(null).asInstanceOf[MLReader[PipelineStage]].load(stagePath) Option(stage).map(_.asInstanceOf[S]) } } } object SparkStageParam { implicit val formats: Formats = DefaultFormats val NoClass = "" val NoUID = "" def updateParamsMetadataWithPath(jValue: JValue, path: String): JValue = jValue match { case JObject(pairs) => JObject( pairs.map { case (SparkWrapperParams.SparkStageParamName, j) => SparkWrapperParams.SparkStageParamName -> j.merge(JObject("path" -> JString(path))) case param => param } ) case j => throw new IllegalArgumentException(s"Cannot recognize JSON Spark params metadata: $j") } }
Example 4
Source File: NGramExtractor.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl.texts import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.param.{IntParam, ParamMap, ParamPair, ParamValidators, Params} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions.udf import org.apache.spark.sql.types.{ArrayType, StringType, StructType} def setOutputCol(value: String): this.type = set(outputCol, value) setDefault(new ParamPair[Int](upperN, 2), new ParamPair[Int](lowerN, 1)) override def transform(dataset: Dataset[_]): DataFrame = { val lowerBound = $(lowerN) val upperBound = $(upperN) val nGramUDF = udf[Seq[String], Seq[String]](NGramUtils.nGramFun(_,lowerBound,upperBound)) dataset.withColumn($(outputCol), nGramUDF(dataset.col($(inputCol)))) } override def copy(extra: ParamMap): Transformer = defaultCopy(extra) @DeveloperApi override def transformSchema(schema: StructType): StructType = { if ($(inputCol) != $(outputCol)) { schema.add($(outputCol), new ArrayType(StringType, true)) } else { schema } } } object NGramExtractor extends DefaultParamsReadable[NGramExtractor] { override def load(path: String): NGramExtractor = super.load(path) }
Example 5
Source File: RegexpReplaceTransformer.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl.texts import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils} import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{StringType, StructType} def setInputCol(value: String): this.type = set(inputCol, value) def this() = this(Identifiable.randomUID("RegexpReplaceTransformer")) override def transform(dataset: Dataset[_]): DataFrame = { dataset.withColumn($(outputCol), regexp_replace(dataset.col($(inputCol)), $(regexpPattern), $(regexpReplacement))) } override def copy(extra: ParamMap): Transformer = defaultCopy(extra) @DeveloperApi override def transformSchema(schema: StructType): StructType = { if ($(inputCol) equals $(outputCol)) { val schemaWithoutInput = new StructType(schema.fields.filterNot(_.name equals $(inputCol))) SchemaUtils.appendColumn(schemaWithoutInput, $(outputCol), StringType) } else { SchemaUtils.appendColumn(schema, $(outputCol), StringType) } } } object RegexpReplaceTransformer extends DefaultParamsReadable[RegexpReplaceTransformer] { override def load(path: String): RegexpReplaceTransformer = super.load(path) }
Example 6
Source File: SimpleReproContext.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.repro import org.apache.spark.ml.param.{Param, ParamPair, Params} import org.apache.spark.ml.util.MLWritable import org.apache.spark.sql.{DataFrame, Dataset, SparkSession, functions} class SimpleReproContext private (spark: SparkSession, basePath: String, tags: Seq[(String,String)]) extends ReproContext { def this(basePath: String)(implicit spark: SparkSession) = this(spark, basePath, Seq()) var accumulatedMetrics : Seq[DataFrame] = Seq() var accumulatedParams: Seq[(Seq[String], Iterable[ParamPair[_]])] = Seq() override def persistEstimator(estimator: MLWritable): Unit = { estimator.save(basePath + "/estimator") } override def persistModel(model: MLWritable): Unit = { model.save(basePath + "/model") } override def dive(tags: Seq[(String, String)]): ReproContext = new SimpleReproContext( spark, basePath, this.tags ++ tags) override def logParamPairs(params: Iterable[ParamPair[_]], path: Seq[String]): Unit = accumulatedParams = accumulatedParams :+ path -> params override def logMetircs(metrics: => DataFrame): Unit = accumulatedMetrics = accumulatedMetrics :+ metrics override def start(): Unit = { import spark.implicits._ accumulatedParams.map { case (path, params) => params.view .map(x => x.param.name -> x.param.asInstanceOf[Param[Any]].jsonEncode(x.value)) .toSeq .toDF("param", "value") .withColumn("path", functions.lit(path.mkString("/"))) }.reduce(_ unionByName _) .write.parquet(taggedPrefix + "/params") } override def finish(): Unit = { accumulatedMetrics.reduceOption(_ unionByName _).foreach( _.write.parquet(taggedPrefix + "/metrics")) } private def taggedPrefix: String = { tags.map(x => x._1 + "=" + x._2).mkString(basePath + "/", "/", "") } }
Example 7
Source File: DefaultMLWriter.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.serialization import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.ml.param.{ParamPair, Params} import org.apache.spark.ml.util.MLWriter import org.json4s.JsonDSL._ import org.json4s._ import org.json4s.jackson.JsonMethods._ import io.deepsense.deeplang.doperables.Transformer import io.deepsense.sparkutils.ML.MLWriterWithSparkContext class DefaultMLWriter[T <: Params](instance: T) extends MLWriter with MLWriterWithSparkContext { def saveImpl(path: String): Unit = { val modelPath = Transformer.modelFilePath(path) saveMetadata(instance, path, sc) CustomPersistence.save(sparkContext, instance, modelPath) } // Copied from org.apache.spark.ml.util.DefaultParamWriter. // We need to be consistent with Spark Format, but this method is private. private def saveMetadata( instance: Params, path: String, sc: SparkContext, extraMetadata: Option[JObject] = None, paramMap: Option[JValue] = None): Unit = { val uid = instance.uid val cls = instance.getClass.getName val params = instance.extractParamMap().toSeq.asInstanceOf[Seq[ParamPair[Any]]] val jsonParams = paramMap.getOrElse(render(params.map { case ParamPair(p, v) => p.name -> parse(p.jsonEncode(v)) }.toList)) val basicMetadata = ("class" -> cls) ~ ("timestamp" -> System.currentTimeMillis()) ~ ("sparkVersion" -> sc.version) ~ ("uid" -> uid) ~ ("paramMap" -> jsonParams) val metadata = extraMetadata match { case Some(jObject) => basicMetadata ~ jObject case None => basicMetadata } val metadataPath = new Path(path, "metadata").toString val metadataJson = compact(render(metadata)) sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath) } }