Java Code Examples for org.apache.spark.api.java.JavaRDD#toRDD()
The following examples show how to use
org.apache.spark.api.java.JavaRDD#toRDD() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GraphXGraphGenerator.java From rya with Apache License 2.0 | 8 votes |
public Graph<RyaTypeWritable, RyaTypeWritable> createGraph(SparkContext sc, Configuration conf) throws IOException, AccumuloSecurityException{ StorageLevel storageLvl1 = StorageLevel.MEMORY_ONLY(); StorageLevel storageLvl2 = StorageLevel.MEMORY_ONLY(); ClassTag<RyaTypeWritable> RTWTag = ClassTag$.MODULE$.apply(RyaTypeWritable.class); RyaTypeWritable rtw = null; RDD<Tuple2<Object, RyaTypeWritable>> vertexRDD = getVertexRDD(sc, conf); RDD<Tuple2<Object, Edge>> edgeRDD = getEdgeRDD(sc, conf); JavaRDD<Tuple2<Object, Edge>> jrddTuple = edgeRDD.toJavaRDD(); JavaRDD<Edge<RyaTypeWritable>> jrdd = jrddTuple.map(tuple -> tuple._2); RDD<Edge<RyaTypeWritable>> goodERDD = JavaRDD.toRDD(jrdd); return Graph.apply(vertexRDD, goodERDD, rtw, storageLvl1, storageLvl2, RTWTag, RTWTag); }
Example 2
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testRDDSumIJVDML() { System.out.println("MLContextTest - RDD<String> IJV sum DML"); List<String> list = new ArrayList<>(); list.add("1 1 1"); list.add("2 1 2"); list.add("1 2 3"); list.add("3 3 4"); JavaRDD<String> javaRDD = sc.parallelize(list); RDD<String> rdd = JavaRDD.toRDD(javaRDD); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3); Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm); setExpectedStdOut("sum: 10.0"); ml.execute(script); }
Example 3
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testRDDGoodMetadataDML() { System.out.println("MLContextTest - RDD<String> good metadata DML"); List<String> list = new ArrayList<>(); list.add("1,1,1"); list.add("2,2,2"); list.add("3,3,3"); JavaRDD<String> javaRDD = sc.parallelize(list); RDD<String> rdd = JavaRDD.toRDD(javaRDD); MatrixMetadata mm = new MatrixMetadata(3, 3, 9); Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm); setExpectedStdOut("sum: 18.0"); ml.execute(script); }
Example 4
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testRDDSumIJVDML() { System.out.println("MLContextTest - RDD<String> IJV sum DML"); List<String> list = new ArrayList<>(); list.add("1 1 1"); list.add("2 1 2"); list.add("1 2 3"); list.add("3 3 4"); JavaRDD<String> javaRDD = sc.parallelize(list); RDD<String> rdd = JavaRDD.toRDD(javaRDD); MatrixMetadata mm = new MatrixMetadata(MatrixFormat.IJV, 3, 3); Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm); setExpectedStdOut("sum: 10.0"); ml.execute(script); }
Example 5
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testRDDGoodMetadataDML() { System.out.println("MLContextTest - RDD<String> good metadata DML"); List<String> list = new ArrayList<>(); list.add("1,1,1"); list.add("2,2,2"); list.add("3,3,3"); JavaRDD<String> javaRDD = sc.parallelize(list); RDD<String> rdd = JavaRDD.toRDD(javaRDD); MatrixMetadata mm = new MatrixMetadata(3, 3, 9); Script script = dml("print('sum: ' + sum(M));").in("M", rdd, mm); setExpectedStdOut("sum: 18.0"); ml.execute(script); }
Example 6
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testRDDSumCSVDML() { System.out.println("MLContextTest - RDD<String> CSV sum DML"); List<String> list = new ArrayList<>(); list.add("1,1,1"); list.add("2,2,2"); list.add("3,3,3"); JavaRDD<String> javaRDD = sc.parallelize(list); RDD<String> rdd = JavaRDD.toRDD(javaRDD); Script script = dml("print('sum: ' + sum(M));").in("M", rdd); setExpectedStdOut("sum: 18.0"); ml.execute(script); }
Example 7
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testInputTupleSeqNoMetadataDML() { System.out.println("MLContextTest - Tuple sequence no metadata DML"); List<String> list1 = new ArrayList<>(); list1.add("1,2"); list1.add("3,4"); JavaRDD<String> javaRDD1 = sc.parallelize(list1); RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1); List<String> list2 = new ArrayList<>(); list2.add("5,6"); list2.add("7,8"); JavaRDD<String> javaRDD2 = sc.parallelize(list2); RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2); Tuple2 tuple1 = new Tuple2("m1", rdd1); Tuple2 tuple2 = new Tuple2("m2", rdd2); List tupleList = new ArrayList(); tupleList.add(tuple1); tupleList.add(tuple2); Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq(); Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq); setExpectedStdOut("sums: 10.0 26.0"); ml.execute(script); }
Example 8
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testInputTupleSeqWithMetadataDML() { System.out.println("MLContextTest - Tuple sequence with metadata DML"); List<String> list1 = new ArrayList<>(); list1.add("1,2"); list1.add("3,4"); JavaRDD<String> javaRDD1 = sc.parallelize(list1); RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1); List<String> list2 = new ArrayList<>(); list2.add("5,6"); list2.add("7,8"); JavaRDD<String> javaRDD2 = sc.parallelize(list2); RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2); MatrixMetadata mm1 = new MatrixMetadata(2, 2); MatrixMetadata mm2 = new MatrixMetadata(2, 2); Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1); Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2); List tupleList = new ArrayList(); tupleList.add(tuple1); tupleList.add(tuple2); Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq(); Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq); setExpectedStdOut("sums: 10.0 26.0"); ml.execute(script); }
Example 9
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testRDDSumCSVDML() { System.out.println("MLContextTest - RDD<String> CSV sum DML"); List<String> list = new ArrayList<>(); list.add("1,1,1"); list.add("2,2,2"); list.add("3,3,3"); JavaRDD<String> javaRDD = sc.parallelize(list); RDD<String> rdd = JavaRDD.toRDD(javaRDD); Script script = dml("print('sum: ' + sum(M));").in("M", rdd); setExpectedStdOut("sum: 18.0"); ml.execute(script); }
Example 10
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testInputTupleSeqNoMetadataDML() { System.out.println("MLContextTest - Tuple sequence no metadata DML"); List<String> list1 = new ArrayList<>(); list1.add("1,2"); list1.add("3,4"); JavaRDD<String> javaRDD1 = sc.parallelize(list1); RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1); List<String> list2 = new ArrayList<>(); list2.add("5,6"); list2.add("7,8"); JavaRDD<String> javaRDD2 = sc.parallelize(list2); RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2); Tuple2 tuple1 = new Tuple2("m1", rdd1); Tuple2 tuple2 = new Tuple2("m2", rdd2); List tupleList = new ArrayList(); tupleList.add(tuple1); tupleList.add(tuple2); Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq(); Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq); setExpectedStdOut("sums: 10.0 26.0"); ml.execute(script); }
Example 11
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testInputTupleSeqWithMetadataDML() { System.out.println("MLContextTest - Tuple sequence with metadata DML"); List<String> list1 = new ArrayList<>(); list1.add("1,2"); list1.add("3,4"); JavaRDD<String> javaRDD1 = sc.parallelize(list1); RDD<String> rdd1 = JavaRDD.toRDD(javaRDD1); List<String> list2 = new ArrayList<>(); list2.add("5,6"); list2.add("7,8"); JavaRDD<String> javaRDD2 = sc.parallelize(list2); RDD<String> rdd2 = JavaRDD.toRDD(javaRDD2); MatrixMetadata mm1 = new MatrixMetadata(2, 2); MatrixMetadata mm2 = new MatrixMetadata(2, 2); Tuple3 tuple1 = new Tuple3("m1", rdd1, mm1); Tuple3 tuple2 = new Tuple3("m2", rdd2, mm2); List tupleList = new ArrayList(); tupleList.add(tuple1); tupleList.add(tuple2); Seq seq = JavaConversions.asScalaBuffer(tupleList).toSeq(); Script script = dml("print('sums: ' + sum(m1) + ' ' + sum(m2));").in(seq); setExpectedStdOut("sums: 10.0 26.0"); ml.execute(script); }
Example 12
Source File: CollabFilterCassandra7.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 5 votes |
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) { RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(new org.apache.spark.api.java.function.Function<CassandraRow, Tuple2<Object, Object>>() { @Override public Tuple2<Object, Object> call(CassandraRow validationRow) throws Exception { return new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)); } })); JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD(); return predictionJavaRdd; }
Example 13
Source File: JavaSVMWithSGDExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaSVMWithSGDExample"); SparkContext sc = new SparkContext(conf); // $example on$ String path = "data/mllib/sample_libsvm_data.txt"; JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(sc, path).toJavaRDD(); // Split initial RDD into two... [60% training data, 40% testing data]. JavaRDD<LabeledPoint> training = data.sample(false, 0.6, 11L); training.cache(); JavaRDD<LabeledPoint> test = data.subtract(training); // Run training algorithm to build the model. int numIterations = 100; final SVMModel model = SVMWithSGD.train(training.rdd(), numIterations); // Clear the default threshold. model.clearThreshold(); // Compute raw scores on the test set. JavaRDD<Tuple2<Object, Object>> scoreAndLabels = test.map( new Function<LabeledPoint, Tuple2<Object, Object>>() { public Tuple2<Object, Object> call(LabeledPoint p) { Double score = model.predict(p.features()); return new Tuple2<Object, Object>(score, p.label()); } } ); // Get evaluation metrics. BinaryClassificationMetrics metrics = new BinaryClassificationMetrics(JavaRDD.toRDD(scoreAndLabels)); double auROC = metrics.areaUnderROC(); System.out.println("Area under ROC = " + auROC); // Save and load model model.save(sc, "target/tmp/javaSVMWithSGDModel"); SVMModel sameModel = SVMModel.load(sc, "target/tmp/javaSVMWithSGDModel"); // $example off$ sc.stop(); }
Example 14
Source File: CollabFilterCassandra8.java From Spark-Cassandra-Collabfiltering with Apache License 2.0 | 4 votes |
public JavaRDD<Rating> predict(MatrixFactorizationModel model, CassandraJavaRDD<CassandraRow> validationsCassRdd) { RDD<Tuple2<Object, Object>> validationsRdd = JavaRDD.toRDD(validationsCassRdd.map(validationRow -> new Tuple2<Object, Object>(validationRow.getInt(RatingDO.USER_COL), validationRow.getInt(RatingDO.PRODUCT_COL)))); JavaRDD<Rating> predictionJavaRdd = model.predict(validationsRdd).toJavaRDD(); return predictionJavaRdd; }