org.apache.spark.ml.param.DoubleParam Scala Examples
The following examples show how to use org.apache.spark.ml.param.DoubleParam.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Normalizer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.sql.types.DataType @Since("1.4.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { val normalizer = new feature.Normalizer($(p)) vector => normalizer.transform(OldVectors.fromML(vector)).asML } override protected def outputDataType: DataType = new VectorUDT() } @Since("1.6.0") object Normalizer extends DefaultParamsReadable[Normalizer] { @Since("1.6.0") override def load(path: String): Normalizer = super.load(path) }
Example 2
Source File: GaussianProcessParams.scala From spark-gp with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.commons import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.commons.kernel.{Kernel, RBFKernel} import org.apache.spark.ml.param.shared.{HasAggregationDepth, HasMaxIter, HasSeed, HasTol} import org.apache.spark.ml.param.{DoubleParam, IntParam, Param} private[ml] trait GaussianProcessParams extends PredictorParams with HasMaxIter with HasTol with HasAggregationDepth with HasSeed { final val activeSetProvider = new Param[ActiveSetProvider](this, "activeSetProvider", "the class which provides the active set used by Projected Process Approximation") final val kernel = new Param[() => Kernel](this, "kernel", "function of no arguments which returns " + "the kernel of the prior Gaussian Process") final val datasetSizeForExpert = new IntParam(this, "datasetSizeForExpert", "The number of data points fed to each expert. " + "Time and space complexity of training quadratically grows with it.") final val sigma2 = new DoubleParam(this, "sigma2", "The variance of noise in the inputs. The value is added to the diagonal of the " + "kernel Matrix. Also prevents numerical issues associated with inversion " + "of a computationally singular matrix ") final val activeSetSize = new IntParam(this, "activeSetSize", "Number of latent functions to project the process onto. " + "The size of the produced model and prediction complexity " + "linearly depend on this value.") def setActiveSetProvider(value : ActiveSetProvider): this.type = set(activeSetProvider, value) setDefault(activeSetProvider -> RandomActiveSetProvider) def setDatasetSizeForExpert(value: Int): this.type = set(datasetSizeForExpert, value) setDefault(datasetSizeForExpert -> 100) def setMaxIter(value: Int): this.type = set(maxIter, value) setDefault(maxIter -> 100) def setSigma2(value: Double): this.type = set(sigma2, value) setDefault(sigma2 -> 1e-3) def setKernel(value: () => Kernel): this.type = set(kernel, value) setDefault(kernel -> (() => new RBFKernel())) def setTol(value: Double): this.type = set(tol, value) setDefault(tol -> 1E-6) def setActiveSetSize(value: Int): this.type = set(activeSetSize, value) setDefault(activeSetSize -> 100) def setSeed(value: Long): this.type = set(seed, value) }
Example 3
Source File: Normalizer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.sql.types.DataType @Since("1.4.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { val normalizer = new feature.Normalizer($(p)) vector => normalizer.transform(OldVectors.fromML(vector)).asML } override protected def outputDataType: DataType = new VectorUDT() } @Since("1.6.0") object Normalizer extends DefaultParamsReadable[Normalizer] { @Since("1.6.0") override def load(path: String): Normalizer = super.load(path) }
Example 4
Source File: Normalizer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.sql.types.DataType @Since("1.4.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { val normalizer = new feature.Normalizer($(p)) vector => normalizer.transform(OldVectors.fromML(vector)).asML } override protected def outputDataType: DataType = new VectorUDT() } @Since("1.6.0") object Normalizer extends DefaultParamsReadable[Normalizer] { @Since("1.6.0") override def load(path: String): Normalizer = super.load(path) }
Example 5
Source File: FillMissingWithMean.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.feature import com.salesforce.op.UID import com.salesforce.op.features.FeatureSparkTypes import com.salesforce.op.features.types._ import com.salesforce.op.stages.base.unary.{UnaryEstimator, UnaryModel} import com.salesforce.op.utils.spark.RichRow._ import org.apache.spark.ml.param.DoubleParam import org.apache.spark.sql.Dataset import scala.reflect.runtime.universe._ class FillMissingWithMean[N, I <: OPNumeric[N]] ( uid: String = UID[FillMissingWithMean[_, _]] )(implicit tti: TypeTag[I], ttiv: TypeTag[I#Value]) extends UnaryEstimator[I, RealNN](operationName = "fillWithMean", uid = uid) { val defaultValue = new DoubleParam(this, "defaultValue", "default value to replace the missing ones") set(defaultValue, 0.0) def setDefaultValue(v: Double): this.type = set(defaultValue, v) private implicit val dEncoder = FeatureSparkTypes.featureTypeEncoder[Real] def fitFn(dataset: Dataset[Option[N]]): UnaryModel[I, RealNN] = { val grouped = dataset.map(v => iConvert.ftFactory.newInstance(v).toDouble).groupBy() val mean = grouped.mean().first().getOption[Double](0).getOrElse($(defaultValue)) new FillMissingWithMeanModel[I](mean = mean, operationName = operationName, uid = uid) } } final class FillMissingWithMeanModel[I <: OPNumeric[_]] private[op] ( val mean: Double, operationName: String, uid: String )(implicit tti: TypeTag[I]) extends UnaryModel[I, RealNN](operationName = operationName, uid = uid) { def transformFn: I => RealNN = _.toDouble.getOrElse(mean).toRealNN }
Example 6
Source File: RealVectorizer.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.feature import com.salesforce.op.UID import com.salesforce.op.features.types._ import com.salesforce.op.stages.base.sequence.{SequenceEstimator, SequenceModel} import com.salesforce.op.utils.spark.SequenceAggregators import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.param.{BooleanParam, DoubleParam} import org.apache.spark.sql.Dataset import scala.reflect.runtime.universe.TypeTag class RealVectorizer[T <: Real] ( uid: String = UID[RealVectorizer[_]], operationName: String = "vecReal" )(implicit tti: TypeTag[T], ttiv: TypeTag[T#Value]) extends SequenceEstimator[T, OPVector](operationName = operationName, uid = uid) with VectorizerDefaults with TrackNullsParam { final val fillValue = new DoubleParam(this, "fillValue", "default value for FillWithConstant") setDefault(fillValue, 0.0) final val withConstant = new BooleanParam(this, "fillWithConstant", "boolean to check if filling the nulls with a constant value") setDefault(withConstant, true) def setFillWithConstant(value: Double): this.type = { set(fillValue, value) set(withConstant, true) } def setFillWithMean: this.type = { set(withConstant, false) } private def constants(): Seq[Double] = { val size = getInputFeatures().length val defValue = $(fillValue) val constants = List.fill(size)(defValue) constants } private def means(dataset: Dataset[Seq[T#Value]]): Seq[Double] = { val size = getInputFeatures().length val means = dataset.select(SequenceAggregators.MeanSeqNullNum(size = size).toColumn).first() means } def fitFn(dataset: Dataset[Seq[T#Value]]): SequenceModel[T, OPVector] = { if ($(trackNulls)) setMetadata(vectorMetadataWithNullIndicators.toMetadata) val fillValues = if ($(withConstant)) constants() else means(dataset) new RealVectorizerModel[T]( fillValues = fillValues, trackNulls = $(trackNulls), operationName = operationName, uid = uid) } } final class RealVectorizerModel[T <: Real] private[op] ( val fillValues: Seq[Double], val trackNulls: Boolean, operationName: String, uid: String )(implicit tti: TypeTag[T]) extends SequenceModel[T, OPVector](operationName = operationName, uid = uid) with VectorizerDefaults { def transformFn: Seq[T] => OPVector = row => { val replaced = if (!trackNulls) { row.zip(fillValues). map { case (r, m) => r.value.getOrElse(m) } } else { row.zip(fillValues). flatMap { case (r, m) => r.value.getOrElse(m) :: booleanToDouble(r.isEmpty) :: Nil } } Vectors.dense(replaced.toArray).toOPVector } }
Example 7
Source File: LanguageDetectorTransformer.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl.texts import com.google.common.base.Optional import com.optimaize.langdetect.LanguageDetector import com.optimaize.langdetect.i18n.LdLocale import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap} import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.functions.udf import org.apache.spark.sql.types.{StringType, StructType} import scala.collection.Map def setOutputCol(value: String): this.type = set(outputCol, value) def this() = this(Identifiable.randomUID("languageDetector")) override def transform(dataset: Dataset[_]): DataFrame = { dataset.withColumn($(outputCol), languageDetection(dataset.col($(inputCol)))) } override def copy(extra: ParamMap): Transformer = { defaultCopy(extra) } @DeveloperApi override def transformSchema(schema: StructType): StructType = { SchemaUtils.appendColumn(schema, $(outputCol), StringType) } @transient object languageDetectorWrapped extends Serializable { val languageDetector: LanguageDetector = LanguageDetectorUtils.buildLanguageDetector( LanguageDetectorUtils.readListLangsBuiltIn(), $(minimalConfidence), $(languagePriors).toMap) } }
Example 8
Source File: UnaryTransformerExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.DoubleParam import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.functions.col import org.apache.spark.sql.types.{DataType, DataTypes} import org.apache.spark.util.Utils // $example off$ object MyTransformer extends DefaultParamsReadable[MyTransformer] // $example off$ def main(args: Array[String]) { val spark = SparkSession .builder() .appName("UnaryTransformerExample") .getOrCreate() // $example on$ val myTransformer = new MyTransformer() .setShift(0.5) .setInputCol("input") .setOutputCol("output") // Create data, transform, and display it. val data = spark.range(0, 5).toDF("input") .select(col("input").cast("double").as("input")) val result = myTransformer.transform(data) println("Transformed by adding constant value") result.show() // Save and load the Transformer. val tmpDir = Utils.createTempDir() val dirName = tmpDir.getCanonicalPath myTransformer.write.overwrite().save(dirName) val sameTransformer = MyTransformer.load(dirName) // Transform the data to show the results are identical. println("Same transform applied from loaded model") val sameResult = sameTransformer.transform(data) sameResult.show() Utils.deleteRecursively(tmpDir) // $example off$ spark.stop() } } // scalastyle:on println
Example 9
Source File: Normalizer.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature import org.apache.spark.annotation.Since import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.linalg.{Vector, VectorUDT} import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vectors => OldVectors} import org.apache.spark.sql.types.DataType @Since("1.4.0") def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { val normalizer = new feature.Normalizer($(p)) vector => normalizer.transform(OldVectors.fromML(vector)).asML } override protected def outputDataType: DataType = new VectorUDT() } @Since("1.6.0") object Normalizer extends DefaultParamsReadable[Normalizer] { @Since("1.6.0") override def load(path: String): Normalizer = super.load(path) }
Example 10
Source File: Normalizer.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature import org.apache.spark.annotation.{Since, Experimental} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.types.DataType def setP(value: Double): this.type = set(p, value) override protected def createTransformFunc: Vector => Vector = { val normalizer = new feature.Normalizer($(p)) normalizer.transform } override protected def outputDataType: DataType = new VectorUDT() } @Since("1.6.0") object Normalizer extends DefaultParamsReadable[Normalizer] { @Since("1.6.0") override def load(path: String): Normalizer = super.load(path) }