org.apache.spark.ml.feature.MinMaxScaler Java Examples
The following examples show how to use
org.apache.spark.ml.feature.MinMaxScaler.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaMinMaxScalerExample.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaMinMaxScalerExample") .getOrCreate(); // $example on$ List<Row> data = Arrays.asList( RowFactory.create(0, Vectors.dense(1.0, 0.1, -1.0)), RowFactory.create(1, Vectors.dense(2.0, 1.1, 1.0)), RowFactory.create(2, Vectors.dense(3.0, 10.1, 3.0)) ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); Dataset<Row> dataFrame = spark.createDataFrame(data, schema); MinMaxScaler scaler = new MinMaxScaler() .setInputCol("features") .setOutputCol("scaledFeatures"); // Compute summary statistics and generate MinMaxScalerModel MinMaxScalerModel scalerModel = scaler.fit(dataFrame); // rescale each feature to range [min, max]. Dataset<Row> scaledData = scalerModel.transform(dataFrame); System.out.println("Features scaled to range: [" + scaler.getMin() + ", " + scaler.getMax() + "]"); scaledData.select("features", "scaledFeatures").show(); // $example off$ spark.stop(); }
Example #2
Source File: DataPreview.java From StockPrediction with MIT License | 5 votes |
public static void main (String[] args) throws IOException { SparkSession spark = SparkSession.builder().master("local").appName("DataProcess").getOrCreate(); String filename = "prices-split-adjusted.csv"; String symbol = "GOOG"; // load data from csv file Dataset<Row> data = spark.read().format("csv").option("header", true) .load(new ClassPathResource(filename).getFile().getAbsolutePath()) //.filter(functions.col("symbol").equalTo(symbol)) //.drop("date").drop("symbol") .withColumn("openPrice", functions.col("open").cast("double")).drop("open") .withColumn("closePrice", functions.col("close").cast("double")).drop("close") .withColumn("lowPrice", functions.col("low").cast("double")).drop("low") .withColumn("highPrice", functions.col("high").cast("double")).drop("high") .withColumn("volumeTmp", functions.col("volume").cast("double")).drop("volume") .toDF("date", "symbol", "open", "close", "low", "high", "volume"); data.show(); Dataset<Row> symbols = data.select("date", "symbol").groupBy("symbol").agg(functions.count("date").as("count")); System.out.println("Number of Symbols: " + symbols.count()); symbols.show(); VectorAssembler assembler = new VectorAssembler() .setInputCols(new String[] {"open", "low", "high", "volume", "close"}) .setOutputCol("features"); data = assembler.transform(data).drop("open", "low", "high", "volume", "close"); data = new MinMaxScaler().setMin(0).setMax(1) .setInputCol("features").setOutputCol("normalizedFeatures") .fit(data).transform(data) .drop("features").toDF("features"); }
Example #3
Source File: MinMaxScalerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testMinMaxScaler() { //prepare data JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList( RowFactory.create(1.0, Vectors.dense(data[0])), RowFactory.create(2.0, Vectors.dense(data[1])), RowFactory.create(3.0, Vectors.dense(data[2])), RowFactory.create(4.0, Vectors.dense(data[3])) )); StructType schema = new StructType(new StructField[]{ new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); Dataset<Row> df = spark.createDataFrame(jrdd, schema); //train model in spark MinMaxScalerModel sparkModel = new MinMaxScaler() .setInputCol("features") .setOutputCol("scaled") .setMin(-5) .setMax(5) .fit(df); //Export model, import it back and get transformer byte[] exportedModel = ModelExporter.export(sparkModel); final Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions List<Row> sparkOutput = sparkModel.transform(df).orderBy("label").select("features", "scaled").collectAsList(); assertCorrectness(sparkOutput, expected, transformer); }
Example #4
Source File: MinMaxScalerBridgeTest.java From spark-transformers with Apache License 2.0 | 5 votes |
@Test public void testStandardScaler() { //prepare data List<LabeledPoint> localTraining = Arrays.asList( new LabeledPoint(1.0, Vectors.dense(data[0])), new LabeledPoint(2.0, Vectors.dense(data[1])), new LabeledPoint(3.0, Vectors.dense(data[2])), new LabeledPoint(3.0, Vectors.dense(data[3]))); DataFrame df = sqlContext.createDataFrame(sc.parallelize(localTraining), LabeledPoint.class); //train model in spark MinMaxScalerModel sparkModel = new MinMaxScaler() .setInputCol("features") .setOutputCol("scaled") .setMin(-5) .setMax(5) .fit(df); //Export model, import it back and get transformer byte[] exportedModel = ModelExporter.export(sparkModel, df); final Transformer transformer = ModelImporter.importAndGetTransformer(exportedModel); //compare predictions Row[] sparkOutput = sparkModel.transform(df).orderBy("label").select("features", "scaled").collect(); assertCorrectness(sparkOutput, expected, transformer); }