org.apache.spark.ml.feature.Binarizer Scala Examples
The following examples show how to use org.apache.spark.ml.feature.Binarizer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BinarizerExample.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.feature.Binarizer // $example off$ import org.apache.spark.sql.{SparkSession} object BinarizerExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("BinarizerExample") .getOrCreate() // $example on$ val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame = spark.createDataFrame(data).toDF("id", "feature") val binarizer: Binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") .setThreshold(0.5) val binarizedDataFrame = binarizer.transform(dataFrame) println(s"Binarizer output with Threshold = ${binarizer.getThreshold}") binarizedDataFrame.show() // $example off$ spark.stop() } } // scalastyle:on println
Example 2
Source File: LocalBinarizer.scala From spark-ml-serving with Apache License 2.0 | 5 votes |
package io.hydrosphere.spark_ml_serving.preprocessors import io.hydrosphere.spark_ml_serving.TypedTransformerConverter import io.hydrosphere.spark_ml_serving.common._ import org.apache.spark.ml.feature.Binarizer class LocalBinarizer(override val sparkTransformer: Binarizer) extends LocalTransformer[Binarizer] { override def transform(localData: LocalData): LocalData = { localData.column(sparkTransformer.getInputCol) match { case Some(column) => val treshhold = sparkTransformer.getThreshold val newData = column.data.map(r => { if (r.asInstanceOf[Number].doubleValue() > treshhold) 1.0 else 0.0 }) localData.withColumn(LocalDataColumn(sparkTransformer.getOutputCol, newData)) case None => localData } } } object LocalBinarizer extends SimpleModelLoader[Binarizer] with TypedTransformerConverter[Binarizer] { override def build(metadata: Metadata, data: LocalData): Binarizer = { new Binarizer(metadata.uid) .setInputCol(metadata.paramMap("inputCol").asInstanceOf[String]) .setOutputCol(metadata.paramMap("outputCol").asInstanceOf[String]) .setThreshold(metadata.paramMap("threshold").toString.toDouble) } override implicit def toLocal(transformer: Binarizer) = new LocalBinarizer(transformer) }
Example 3
Source File: BinarizerOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.feature import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.OpModel import org.apache.spark.ml.bundle._ import org.apache.spark.ml.feature.Binarizer import org.apache.spark.sql.mleap.TypeConverters._ import ml.combust.mleap.runtime.types.BundleTypeConverters._ class BinarizerOp extends SimpleSparkOp[Binarizer] { override val Model: OpModel[SparkBundleContext, Binarizer] = new OpModel[SparkBundleContext, Binarizer] { override val klazz: Class[Binarizer] = classOf[Binarizer] override def opName: String = Bundle.BuiltinOps.feature.binarizer override def store(model: Model, obj: Binarizer) (implicit context: BundleContext[SparkBundleContext]): Model = { assert(context.context.dataset.isDefined, BundleHelper.sampleDataframeMessage(klazz)) val dataset = context.context.dataset.get model.withValue("threshold", Value.double(obj.getThreshold)). withValue("input_shapes", Value.dataShape(sparkToMleapDataShape(dataset.schema(obj.getInputCol), dataset))) } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): Binarizer = { new Binarizer(uid = "").setThreshold(model.value("threshold").getDouble) } } override def sparkLoad(uid: String, shape: NodeShape, model: Binarizer): Binarizer = { new Binarizer(uid = uid).setThreshold(model.getThreshold) } override def sparkInputs(obj: Binarizer): Seq[ParamSpec] = { Seq("input" -> obj.inputCol) } override def sparkOutputs(obj: Binarizer): Seq[SimpleParamSpec] = { Seq("output" -> obj.outputCol) } }
Example 4
Source File: BinarizerParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.parity.feature import org.apache.spark.ml.feature.{Binarizer, VectorAssembler} import org.apache.spark.ml.{Pipeline, Transformer} import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.sql.DataFrame class BinarizerParitySpec extends SparkParityBase { override val dataset: DataFrame = baseDataset.select("dti") override val sparkTransformer: Transformer = new Pipeline().setStages(Array(new VectorAssembler(). setInputCols(Array("dti")). setOutputCol("features"), new Binarizer(). setThreshold(0.12). setInputCol("dti"). setOutputCol("thresholded_features_double"), new Binarizer(). setThreshold(0.12). setInputCol("features"). setOutputCol("thresholded_features"))).fit(dataset) }
Example 5
Source File: BinarizerExample.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.feature.Binarizer // $example off$ import org.apache.spark.sql.{SparkSession} object BinarizerExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("BinarizerExample") .getOrCreate() // $example on$ val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame = spark.createDataFrame(data).toDF("id", "feature") val binarizer: Binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") .setThreshold(0.5) val binarizedDataFrame = binarizer.transform(dataFrame) println(s"Binarizer output with Threshold = ${binarizer.getThreshold}") binarizedDataFrame.show() // $example off$ spark.stop() } } // scalastyle:on println
Example 6
Source File: BinarizerExample.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.feature.Binarizer // $example off$ import org.apache.spark.sql.{SparkSession} object BinarizerExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("BinarizerExample") .getOrCreate() // $example on$ val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame = spark.createDataFrame(data).toDF("id", "feature") val binarizer: Binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") .setThreshold(0.5) val binarizedDataFrame = binarizer.transform(dataFrame) println(s"Binarizer output with Threshold = ${binarizer.getThreshold}") binarizedDataFrame.show() // $example off$ spark.stop() } } // scalastyle:on println
Example 7
Source File: BinarizerExample.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.feature.Binarizer // $example off$ import org.apache.spark.sql.SparkSession object BinarizerExample { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .appName("BinarizerExample") .getOrCreate() // $example on$ val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame = spark.createDataFrame(data).toDF("id", "feature") val binarizer: Binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") .setThreshold(0.5) val binarizedDataFrame = binarizer.transform(dataFrame) println(s"Binarizer output with Threshold = ${binarizer.getThreshold}") binarizedDataFrame.show() // $example off$ spark.stop() } } // scalastyle:on println
Example 8
Source File: BinarizerExample.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples.ml // $example on$ import org.apache.spark.ml.feature.Binarizer // $example off$ import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} object BinarizerExample { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("BinarizerExample") val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) // $example on$ val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature") val binarizer: Binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") .setThreshold(0.5) val binarizedDataFrame = binarizer.transform(dataFrame) val binarizedFeatures = binarizedDataFrame.select("binarized_feature") binarizedFeatures.collect().foreach(println) // $example off$ sc.stop() } } // scalastyle:on println
Example 9
Source File: Binarizer.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.dsl.StringExpr import com.ibm.aardpfark.pfa.document.{PFABuilder, PFADocument} import com.ibm.aardpfark.pfa.dsl._ import com.ibm.aardpfark.pfa.expression.PFAExpression import com.ibm.aardpfark.spark.ml.PFATransformer import org.apache.avro.SchemaBuilder import org.apache.spark.ml.feature.Binarizer class PFABinarizer(override val sparkTransformer: Binarizer) extends PFATransformer { private val inputCol = sparkTransformer.getInputCol private val outputCol = sparkTransformer.getOutputCol private val inputExpr = StringExpr(s"input.${inputCol}") override def inputSchema = { SchemaBuilder.record(withUid(inputBaseName)).fields() .name(inputCol).`type`().unionOf().array().items().doubleType() .and() .doubleType().endUnion() .noDefault() .endRecord() } override def outputSchema = { SchemaBuilder.record(withUid(outputBaseName)).fields() .name(outputCol).`type`().unionOf().array().items().doubleType() .and() .doubleType().endUnion() .noDefault() .endRecord() } private val th = sparkTransformer.getThreshold private val doubleBin = NamedFunctionDef("doubleBin", FunctionDef[Double, Double]("d", If (core.gt(StringExpr("d"), th)) Then 1.0 Else 0.0) ) override def action: PFAExpression = { val asDouble = As[Double]("x", x => doubleBin.call(x)) val asArray = As[Array[Double]]("x", x => a.map(x, doubleBin.ref)) val cast = Cast(inputExpr, Seq(asDouble, asArray)) NewRecord(outputSchema, Map(outputCol -> cast)) } override def pfa: PFADocument = { PFABuilder() .withName(sparkTransformer.uid) .withMetadata(getMetadata) .withInput(inputSchema) .withOutput(outputSchema) .withFunction(doubleBin) .withAction(action) .pfa } }
Example 10
Source File: BinarizerSuite.scala From aardpfark with Apache License 2.0 | 5 votes |
package com.ibm.aardpfark.spark.ml.feature import com.ibm.aardpfark.pfa.{Result, SparkFeaturePFASuiteBase} import org.apache.spark.ml.feature.Binarizer class BinarizerSuite extends SparkFeaturePFASuiteBase[BinarizerResult] { val data = Array((0, 0.1), (1, 0.8), (2, 0.2)) val dataFrame = spark.createDataFrame(data).toDF("id", "feature") val binarizer: Binarizer = new Binarizer() .setInputCol("feature") .setOutputCol("binarized_feature") .setThreshold(0.5) override val sparkTransformer = binarizer val result = binarizer.transform(dataFrame) // Need to specify type since a union is expected override val input = Array("{\"feature\":{\"double\":0.1}}", "{\"feature\":{\"double\":0.8}}", "{\"feature\":{\"double\":0.2}}") override val expectedOutput = result.select(binarizer.getOutputCol).toJSON.collect() } case class BinarizerResult(binarized_feature: Double) extends Result