Java Code Examples for org.apache.spark.ml.linalg.Vectors#dense()
The following examples show how to use
org.apache.spark.ml.linalg.Vectors#dense() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
private static Vector createVector(MatrixBlock row) { if( row.isEmptyBlock(false) ) //EMPTY SPARSE ROW return Vectors.sparse(row.getNumColumns(), new int[0], new double[0]); else if( row.isInSparseFormat() ) //SPARSE ROW return Vectors.sparse(row.getNumColumns(), row.getSparseBlock().indexes(0), row.getSparseBlock().values(0)); else // DENSE ROW return Vectors.dense(row.getDenseBlockValues()); }
Example 2
Source File: JavaElementwiseProductExample.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaElementwiseProductExample") .getOrCreate(); // $example on$ // Create some vector data; also works for sparse vectors List<Row> data = Arrays.asList( RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)), RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0)) ); List<StructField> fields = new ArrayList<StructField>(2); fields.add(DataTypes.createStructField("id", DataTypes.StringType, false)); fields.add(DataTypes.createStructField("vector", new VectorUDT(), false)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(data, schema); Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0); ElementwiseProduct transformer = new ElementwiseProduct() .setScalingVec(transformingVector) .setInputCol("vector") .setOutputCol("transformedVector"); // Batch transform the vectors to create new column: transformer.transform(dataFrame).show(); // $example off$ spark.stop(); }
Example 3
Source File: SimplePredictionFromTextFile.java From net.jgp.labs.spark with Apache License 2.0 | 5 votes |
private void start() { SparkSession spark = SparkSession.builder().appName( "Simple prediction from Text File").master("local").getOrCreate(); spark.udf().register("vectorBuilder", new VectorBuilder(), new VectorUDT()); String filename = "data/tuple-data-file.csv"; StructType schema = new StructType( new StructField[] { new StructField("_c0", DataTypes.DoubleType, false, Metadata.empty()), new StructField("_c1", DataTypes.DoubleType, false, Metadata .empty()), new StructField("features", new VectorUDT(), true, Metadata .empty()), }); Dataset<Row> df = spark.read().format("csv").schema(schema).option("header", "false") .load(filename); df = df.withColumn("valuefeatures", df.col("_c0")).drop("_c0"); df = df.withColumn("label", df.col("_c1")).drop("_c1"); df.printSchema(); df = df.withColumn("features", callUDF("vectorBuilder", df.col( "valuefeatures"))); df.printSchema(); df.show(); LinearRegression lr = new LinearRegression().setMaxIter(20);// .setRegParam(1).setElasticNetParam(1); // Fit the model to the data. LinearRegressionModel model = lr.fit(df); // Given a dataset, predict each point's label, and show the results. model.transform(df).show(); LinearRegressionTrainingSummary trainingSummary = model.summary(); System.out.println("numIterations: " + trainingSummary.totalIterations()); System.out.println("objectiveHistory: " + Vectors.dense(trainingSummary .objectiveHistory())); trainingSummary.residuals().show(); System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError()); System.out.println("r2: " + trainingSummary.r2()); double intercept = model.intercept(); System.out.println("Interesection: " + intercept); double regParam = model.getRegParam(); System.out.println("Regression parameter: " + regParam); double tol = model.getTol(); System.out.println("Tol: " + tol); Double feature = 7.0; Vector features = Vectors.dense(feature); double p = model.predict(features); System.out.println("Prediction for feature " + feature + " is " + p); System.out.println(8 * regParam + intercept); }
Example 4
Source File: RDDConverterUtils.java From systemds with Apache License 2.0 | 5 votes |
private static Vector createVector(MatrixBlock row) { if( row.isEmptyBlock(false) ) //EMPTY SPARSE ROW return Vectors.sparse(row.getNumColumns(), new int[0], new double[0]); else if( row.isInSparseFormat() ) //SPARSE ROW return Vectors.sparse(row.getNumColumns(), row.getSparseBlock().indexes(0), row.getSparseBlock().values(0)); else // DENSE ROW return Vectors.dense(row.getDenseBlockValues()); }
Example 5
Source File: JavaBucketedRandomProjectionLSHExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaBucketedRandomProjectionLSHExample") .getOrCreate(); // $example on$ List<Row> dataA = Arrays.asList( RowFactory.create(0, Vectors.dense(1.0, 1.0)), RowFactory.create(1, Vectors.dense(1.0, -1.0)), RowFactory.create(2, Vectors.dense(-1.0, -1.0)), RowFactory.create(3, Vectors.dense(-1.0, 1.0)) ); List<Row> dataB = Arrays.asList( RowFactory.create(4, Vectors.dense(1.0, 0.0)), RowFactory.create(5, Vectors.dense(-1.0, 0.0)), RowFactory.create(6, Vectors.dense(0.0, 1.0)), RowFactory.create(7, Vectors.dense(0.0, -1.0)) ); StructType schema = new StructType(new StructField[]{ new StructField("id", DataTypes.IntegerType, false, Metadata.empty()), new StructField("keys", new VectorUDT(), false, Metadata.empty()) }); Dataset<Row> dfA = spark.createDataFrame(dataA, schema); Dataset<Row> dfB = spark.createDataFrame(dataB, schema); Vector key = Vectors.dense(1.0, 0.0); BucketedRandomProjectionLSH mh = new BucketedRandomProjectionLSH() .setBucketLength(2.0) .setNumHashTables(3) .setInputCol("keys") .setOutputCol("values"); BucketedRandomProjectionLSHModel model = mh.fit(dfA); // Feature Transformation model.transform(dfA).show(); // Cache the transformed columns Dataset<Row> transformedA = model.transform(dfA).cache(); Dataset<Row> transformedB = model.transform(dfB).cache(); // Approximate similarity join model.approxSimilarityJoin(dfA, dfB, 1.5).show(); model.approxSimilarityJoin(transformedA, transformedB, 1.5).show(); // Self Join model.approxSimilarityJoin(dfA, dfA, 2.5).filter("datasetA.id < datasetB.id").show(); // Approximate nearest neighbor search model.approxNearestNeighbors(dfA, key, 2).show(); model.approxNearestNeighbors(transformedA, key, 2).show(); // $example off$ spark.stop(); }
Example 6
Source File: VectorBuilder.java From net.jgp.labs.spark with Apache License 2.0 | 4 votes |
@Override public Vector call(Double t1) throws Exception { return Vectors.dense(t1); }