org.apache.spark.ml.util.MLWriter Scala Examples
The following examples show how to use org.apache.spark.ml.util.MLWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MultilayerPerceptronClassifierWrapper.scala From drizzle-spark with Apache License 2.0 | 8 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel, val labelCount: Long, val layers: Array[Int], val weights: Array[Double] ) extends MLWritable { def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadataStr = sc.textFile(rMetadataPath, 1).first() val rMetadata = parse(rMetadataStr) val labelCount = (rMetadata \ "labelCount").extract[Long] val layers = (rMetadata \ "layers").extract[Array[Int]] val weights = (rMetadata \ "weights").extract[Array[Double]] val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline, labelCount, layers, weights) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = ("class" -> instance.getClass.getName) ~ ("labelCount" -> instance.labelCount) ~ ("layers" -> instance.layers.toSeq) ~ ("weights" -> instance.weights.toArray.toSeq) val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 2
Source File: MathUnary.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap.feature import ml.combust.mleap.core.feature.{MathUnaryModel, UnaryOperation} import org.apache.hadoop.fs.Path import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter, Identifiable, MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.types.{DoubleType, NumericType, StructField, StructType} import org.apache.spark.sql.functions.udf private val className = classOf[MathUnary].getName override def load(path: String): MathUnary = { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) val dataPath = new Path(path, "data").toString val data = sparkSession.read.parquet(dataPath).select("operation").head() val operation = data.getAs[String](0) val model = MathUnaryModel(UnaryOperation.forName(operation)) val transformer = new MathUnary(metadata.uid, model) metadata.getAndSetParams(transformer) transformer } } }
Example 3
Source File: MultilayerPerceptronClassifierWrapper.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.feature.{IndexToString, RFormula} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.r.RWrapperUtils._ import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel ) extends MLWritable { import MultilayerPerceptronClassifierWrapper._ val mlpModel: MultilayerPerceptronClassificationModel = pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel] val weights: Array[Double] = mlpModel.weights.toArray val layers: Array[Int] = mlpModel.layers def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) .drop(mlpModel.getFeaturesCol) .drop(mlpModel.getLabelCol) .drop(PREDICTED_LABEL_INDEX_COL) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val pipelinePath = new Path(path, "pipeline").toString val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = "class" -> instance.getClass.getName val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 4
Source File: RecursivePipeline.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp import org.apache.spark.internal.Logging import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{Identifiable, MLWritable, MLWriter} import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Dataset} import scala.collection.mutable.ListBuffer class RecursivePipeline(override val uid: String, baseStages: Array[PipelineStage]) extends Pipeline { def this() = this(Identifiable.randomUID("RECURSIVE_PIPELINE"), Array.empty) def this(uid: String) = this(uid, Array.empty) def this(pipeline: Pipeline) = this(pipeline.uid, pipeline.getStages) this.setStages(baseStages) override def fit(dataset: Dataset[_]): PipelineModel = { transformSchema(dataset.schema, logging = true) val theStages = $(stages) var indexOfLastEstimator = -1 theStages.view.zipWithIndex.foreach { case (stage, index) => stage match { case _: Estimator[_] => indexOfLastEstimator = index case _ => } } var curDataset = dataset val transformers = ListBuffer.empty[Transformer] theStages.view.zipWithIndex.foreach { case (stage, index) => if (index <= indexOfLastEstimator) { val transformer = stage match { case estimator: HasRecursiveFit[_] => estimator.recursiveFit(curDataset, new Pipeline(uid).setStages(transformers.toArray).fit(dataset)) case estimator: Estimator[_] => estimator.fit(curDataset) case t: Transformer => t case _ => throw new IllegalArgumentException( s"Does not support stage $stage of type ${stage.getClass}") } if (index < indexOfLastEstimator) { curDataset = transformer.transform(curDataset) } transformers += transformer } else { transformers += stage.asInstanceOf[Transformer] } } createPipeline(dataset, transformers.toArray) } } class RecursivePipelineModel(override val uid: String, innerPipeline: PipelineModel) extends Model[RecursivePipelineModel] with MLWritable with Logging { def this(pipeline: PipelineModel) = this(pipeline.uid, pipeline) // drops right at most because is itself included private def createRecursiveAnnotators(dataset: Dataset[_]): PipelineModel = new Pipeline(uid).setStages(innerPipeline.stages.dropRight(1)).fit(dataset) override def copy(extra: ParamMap): RecursivePipelineModel = { new RecursivePipelineModel(uid, innerPipeline.copy(extra)) } override def write: MLWriter = { innerPipeline.write } override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) innerPipeline.stages.foldLeft(dataset.toDF)((cur, transformer) => transformer match { case t: HasRecursiveTransform[_] => t.recursiveTransform(cur, createRecursiveAnnotators(dataset)) case t: AnnotatorModel[_] if t.getLazyAnnotator => cur case t: Transformer => t.transform(cur) }) } override def transformSchema(schema: StructType): StructType = { innerPipeline.transformSchema(schema) } }
Example 5
Source File: ParamsAndFeaturesWritable.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.nlp import org.apache.spark.ml.param.Params import org.apache.spark.ml.util.{DefaultParamsWritable, MLWriter} import org.apache.spark.sql.SparkSession class FeaturesWriter[T](annotatorWithFeatures: HasFeatures, baseWriter: MLWriter, onWritten: (String, SparkSession) => Unit) extends MLWriter with HasFeatures { override protected def saveImpl(path: String): Unit = { baseWriter.save(path) for (feature <- annotatorWithFeatures.features) { if (feature.orDefault.isDefined) feature.serializeInfer(sparkSession, path, feature.name, feature.getOrDefault) } onWritten(path, sparkSession) } } trait ParamsAndFeaturesWritable extends DefaultParamsWritable with Params with HasFeatures { protected def onWrite(path: String, spark: SparkSession): Unit = {} override def write: MLWriter = { new FeaturesWriter( this, super.write, (path: String, spark: SparkSession) => onWrite(path, spark) ) } }
Example 6
Source File: TrainingHelper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.util import java.io.File import java.nio.file.{Files, Paths, StandardCopyOption} import java.sql.Timestamp import java.util.Date import com.johnsnowlabs.nlp.pretrained.ResourceType.ResourceType import com.johnsnowlabs.nlp.pretrained.{ResourceMetadata, ResourceType} import org.apache.commons.io.FileUtils import org.apache.spark.ml.util.MLWriter object TrainingHelper { def saveModel(name: String, language: Option[String], libVersion: Option[Version], sparkVersion: Option[Version], modelWriter: MLWriter, folder: String, category: Option[ResourceType] = Some(ResourceType.NOT_DEFINED) ): Unit = { // 1. Get current timestamp val timestamp = new Timestamp(new Date().getTime) // 2. Save model to file val file = Paths.get(folder, timestamp.toString).toString.replaceAllLiterally("\\", "/") modelWriter.save(file) // 3. Zip file val tempzipFile = Paths.get(folder, timestamp + ".zip") ZipArchiveUtil.zip(file, tempzipFile.toString) // 4. Set checksum val checksum = FileHelper.generateChecksum(tempzipFile.toString) // 5. Create resource metadata val meta = new ResourceMetadata(name, language, libVersion, sparkVersion, true, timestamp, true, category = category, checksum) val zipfile = Paths.get(meta.fileName) // 6. Move the zip Files.move(tempzipFile, zipfile, StandardCopyOption.REPLACE_EXISTING) // 7. Remove original file try { FileUtils.deleteDirectory(new File(file)) } catch { case _: java.io.IOException => //file lock may prevent deletion, ignore and continue } // 6. Add to metadata.json info about resource val metadataFile = Paths.get(folder, "metadata.json").toString ResourceMetadata.addMetadataToFile(metadataFile, meta) } }
Example 7
Source File: MultilayerPerceptronClassifierWrapper.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.feature.{IndexToString, RFormula} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.r.RWrapperUtils._ import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel ) extends MLWritable { import MultilayerPerceptronClassifierWrapper._ val mlpModel: MultilayerPerceptronClassificationModel = pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel] val weights: Array[Double] = mlpModel.weights.toArray val layers: Array[Int] = mlpModel.layers def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) .drop(mlpModel.getFeaturesCol) .drop(mlpModel.getLabelCol) .drop(PREDICTED_LABEL_INDEX_COL) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val pipelinePath = new Path(path, "pipeline").toString val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = "class" -> instance.getClass.getName val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 8
Source File: DefaultMLWriter.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.serialization import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.ml.param.{ParamPair, Params} import org.apache.spark.ml.util.MLWriter import org.json4s.JsonDSL._ import org.json4s._ import org.json4s.jackson.JsonMethods._ import ai.deepsense.deeplang.doperables.Transformer import ai.deepsense.sparkutils.ML.MLWriterWithSparkContext class DefaultMLWriter[T <: Params](instance: T) extends MLWriter with MLWriterWithSparkContext { def saveImpl(path: String): Unit = { val modelPath = Transformer.modelFilePath(path) saveMetadata(instance, path, sc) CustomPersistence.save(sparkContext, instance, modelPath) } // Copied from org.apache.spark.ml.util.DefaultParamWriter. // We need to be consistent with Spark Format, but this method is private. private def saveMetadata( instance: Params, path: String, sc: SparkContext, extraMetadata: Option[JObject] = None, paramMap: Option[JValue] = None): Unit = { val uid = instance.uid val cls = instance.getClass.getName val params = instance.extractParamMap().toSeq.asInstanceOf[Seq[ParamPair[Any]]] val jsonParams = paramMap.getOrElse(render(params.map { case ParamPair(p, v) => p.name -> parse(p.jsonEncode(v)) }.toList)) val basicMetadata = ("class" -> cls) ~ ("timestamp" -> System.currentTimeMillis()) ~ ("sparkVersion" -> sc.version) ~ ("uid" -> uid) ~ ("paramMap" -> jsonParams) val metadata = extraMetadata match { case Some(jObject) => basicMetadata ~ jObject case None => basicMetadata } val metadataPath = new Path(path, "metadata").toString val metadataJson = compact(render(metadata)) sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath) } }
Example 9
Source File: SerializableSparkEstimator.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.serialization import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{MLWritable, MLWriter} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.sql import org.apache.spark.sql.types.StructType import ai.deepsense.sparkutils.ML class SerializableSparkEstimator[T <: Model[T], E <: Estimator[T]](val sparkEstimator: E) extends ML.Estimator[SerializableSparkModel[T]] with MLWritable { override val uid: String = "e2a121fe-da6e-4ef2-9c5e-56ee558c14f0" override def fitDF(dataset: sql.DataFrame): SerializableSparkModel[T] = { val result: T = sparkEstimator.fit(dataset) new SerializableSparkModel[T](result) } override def copy(extra: ParamMap): Estimator[SerializableSparkModel[T]] = new SerializableSparkEstimator[T, E](sparkEstimator.copy(extra).asInstanceOf[E]) override def write: MLWriter = new DefaultMLWriter(this) override def transformSchema(schema: StructType): StructType = sparkEstimator.transformSchema(schema) }
Example 10
Source File: SerializableSparkModel.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.serialization import org.apache.spark.ml.Model import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType import ai.deepsense.sparkutils.ML class SerializableSparkModel[M <: Model[M]](val sparkModel: M) extends ML.Model[SerializableSparkModel[M]] with MLWritable { override def copy(extra: ParamMap): SerializableSparkModel[M] = new SerializableSparkModel(sparkModel.copy(extra)) override def write: MLWriter = { sparkModel match { case w: MLWritable => w.write case _ => new DefaultMLWriter(this) } } override def transformDF(dataset: DataFrame): DataFrame = sparkModel.transform(dataset) override def transformSchema(schema: StructType): StructType = sparkModel.transformSchema(schema) override val uid: String = "dc7178fe-b209-44f5-8a74-d3c4dafa0fae" } // This class may seem unused, but it is used reflectively by spark deserialization mechanism object SerializableSparkModel extends MLReadable[SerializableSparkModel[_]] { override def read: MLReader[SerializableSparkModel[_]] = { new DefaultMLReader[SerializableSparkModel[_]]() } }
Example 11
Source File: OpPipelineStageWriter.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages import com.salesforce.op.stages.OpPipelineStageReaderWriter._ import com.salesforce.op.stages.sparkwrappers.generic.SparkWrapperParams import org.apache.hadoop.fs.Path import org.apache.spark.ml.util.MLWriter import org.apache.spark.ml.{Estimator, SparkDefaultParamsReadWrite} import org.json4s.JsonAST.{JObject, JValue} import org.json4s.jackson.JsonMethods.{compact, render} import scala.util.{Failure, Success} def writeToJson(path: String): JObject = { stage match { case _: Estimator[_] => return JObject() // no need to serialize estimators case s: SparkWrapperParams[_] => // Set save path for all Spark wrapped stages of type [[SparkWrapperParams]] so they can save s.setStageSavePath(path) case _ => } // We produce stage metadata for all the Spark params val metadata = SparkDefaultParamsReadWrite.getMetadataToSave(stage) // Write out the stage using the specified writer instance val writer = readerWriterFor[OpPipelineStageBase](stage.getClass.asInstanceOf[Class[OpPipelineStageBase]]) val stageJson: JValue = writer.write(stage) match { case Failure(err) => throw new RuntimeException(s"Failed to write out stage '${stage.uid}'", err) case Success(json) => json } // Join metadata & with stage ctor args val j = metadata.merge(JObject(FieldNames.CtorArgs.entryName -> stageJson)) render(j).asInstanceOf[JObject] } }
Example 12
Source File: MultilayerPerceptronClassifierWrapper.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.r import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.feature.{IndexToString, RFormula} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.r.RWrapperUtils._ import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.{DataFrame, Dataset} private[r] class MultilayerPerceptronClassifierWrapper private ( val pipeline: PipelineModel ) extends MLWritable { import MultilayerPerceptronClassifierWrapper._ private val mlpModel: MultilayerPerceptronClassificationModel = pipeline.stages(1).asInstanceOf[MultilayerPerceptronClassificationModel] lazy val weights: Array[Double] = mlpModel.weights.toArray lazy val layers: Array[Int] = mlpModel.layers def transform(dataset: Dataset[_]): DataFrame = { pipeline.transform(dataset) .drop(mlpModel.getFeaturesCol) .drop(mlpModel.getLabelCol) .drop(PREDICTED_LABEL_INDEX_COL) } override def read: MLReader[MultilayerPerceptronClassifierWrapper] = new MultilayerPerceptronClassifierWrapperReader override def load(path: String): MultilayerPerceptronClassifierWrapper = super.load(path) class MultilayerPerceptronClassifierWrapperReader extends MLReader[MultilayerPerceptronClassifierWrapper]{ override def load(path: String): MultilayerPerceptronClassifierWrapper = { implicit val format = DefaultFormats val pipelinePath = new Path(path, "pipeline").toString val pipeline = PipelineModel.load(pipelinePath) new MultilayerPerceptronClassifierWrapper(pipeline) } } class MultilayerPerceptronClassifierWrapperWriter(instance: MultilayerPerceptronClassifierWrapper) extends MLWriter { override protected def saveImpl(path: String): Unit = { val rMetadataPath = new Path(path, "rMetadata").toString val pipelinePath = new Path(path, "pipeline").toString val rMetadata = "class" -> instance.getClass.getName val rMetadataJson: String = compact(render(rMetadata)) sc.parallelize(Seq(rMetadataJson), 1).saveAsTextFile(rMetadataPath) instance.pipeline.save(pipelinePath) } } }
Example 13
Source File: DefaultMLWriter.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.serialization import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.ml.param.{ParamPair, Params} import org.apache.spark.ml.util.MLWriter import org.json4s.JsonDSL._ import org.json4s._ import org.json4s.jackson.JsonMethods._ import io.deepsense.deeplang.doperables.Transformer import io.deepsense.sparkutils.ML.MLWriterWithSparkContext class DefaultMLWriter[T <: Params](instance: T) extends MLWriter with MLWriterWithSparkContext { def saveImpl(path: String): Unit = { val modelPath = Transformer.modelFilePath(path) saveMetadata(instance, path, sc) CustomPersistence.save(sparkContext, instance, modelPath) } // Copied from org.apache.spark.ml.util.DefaultParamWriter. // We need to be consistent with Spark Format, but this method is private. private def saveMetadata( instance: Params, path: String, sc: SparkContext, extraMetadata: Option[JObject] = None, paramMap: Option[JValue] = None): Unit = { val uid = instance.uid val cls = instance.getClass.getName val params = instance.extractParamMap().toSeq.asInstanceOf[Seq[ParamPair[Any]]] val jsonParams = paramMap.getOrElse(render(params.map { case ParamPair(p, v) => p.name -> parse(p.jsonEncode(v)) }.toList)) val basicMetadata = ("class" -> cls) ~ ("timestamp" -> System.currentTimeMillis()) ~ ("sparkVersion" -> sc.version) ~ ("uid" -> uid) ~ ("paramMap" -> jsonParams) val metadata = extraMetadata match { case Some(jObject) => basicMetadata ~ jObject case None => basicMetadata } val metadataPath = new Path(path, "metadata").toString val metadataJson = compact(render(metadata)) sc.parallelize(Seq(metadataJson), 1).saveAsTextFile(metadataPath) } }
Example 14
Source File: SerializableSparkEstimator.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.serialization import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{MLWritable, MLWriter} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.sql import org.apache.spark.sql.types.StructType import io.deepsense.sparkutils.ML class SerializableSparkEstimator[T <: Model[T], E <: Estimator[T]](val sparkEstimator: E) extends ML.Estimator[SerializableSparkModel[T]] with MLWritable { override val uid: String = "e2a121fe-da6e-4ef2-9c5e-56ee558c14f0" override def fitDF(dataset: sql.DataFrame): SerializableSparkModel[T] = { val result: T = sparkEstimator.fit(dataset) new SerializableSparkModel[T](result) } override def copy(extra: ParamMap): Estimator[SerializableSparkModel[T]] = new SerializableSparkEstimator[T, E](sparkEstimator.copy(extra).asInstanceOf[E]) override def write: MLWriter = new DefaultMLWriter(this) override def transformSchema(schema: StructType): StructType = sparkEstimator.transformSchema(schema) }
Example 15
Source File: SerializableSparkModel.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.serialization import org.apache.spark.ml.Model import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType import io.deepsense.sparkutils.ML class SerializableSparkModel[M <: Model[M]](val sparkModel: M) extends ML.Model[SerializableSparkModel[M]] with MLWritable { override def copy(extra: ParamMap): SerializableSparkModel[M] = new SerializableSparkModel(sparkModel.copy(extra)) override def write: MLWriter = { sparkModel match { case w: MLWritable => w.write case _ => new DefaultMLWriter(this) } } override def transformDF(dataset: DataFrame): DataFrame = sparkModel.transform(dataset) override def transformSchema(schema: StructType): StructType = sparkModel.transformSchema(schema) override val uid: String = "dc7178fe-b209-44f5-8a74-d3c4dafa0fae" } // This class may seem unused, but it is used reflectively by spark deserialization mechanism object SerializableSparkModel extends MLReadable[SerializableSparkModel[_]] { override def read: MLReader[SerializableSparkModel[_]] = { new DefaultMLReader[SerializableSparkModel[_]]() } }