Java Code Examples for org.apache.spark.api.java.JavaRDD#map()
The following examples show how to use
org.apache.spark.api.java.JavaRDD#map() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testGetTuple1DML() { System.out.println("MLContextTest - Get Tuple1<Matrix> DML"); JavaRDD<String> javaRddString = sc .parallelize(Stream.of("1,2,3", "4,5,6", "7,8,9").collect(Collectors.toList())); JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> df = spark.createDataFrame(javaRddRow, schema); Script script = dml("N=M*2").in("M", df).out("N"); Tuple1<Matrix> tuple = ml.execute(script).getTuple("N"); double[][] n = tuple._1().to2DDoubleArray(); Assert.assertEquals(2.0, n[0][0], 0); Assert.assertEquals(4.0, n[0][1], 0); Assert.assertEquals(6.0, n[0][2], 0); Assert.assertEquals(8.0, n[1][0], 0); Assert.assertEquals(10.0, n[1][1], 0); Assert.assertEquals(12.0, n[1][2], 0); Assert.assertEquals(14.0, n[2][0], 0); Assert.assertEquals(16.0, n[2][1], 0); Assert.assertEquals(18.0, n[2][2], 0); }
Example 2
Source File: SparkSharder.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private static <L extends Locatable, SB extends ShardBoundary> JavaRDD<Shard<L>> shard(JavaSparkContext ctx, JavaRDD<L> locatables, Class<L> locatableClass, SAMSequenceDictionary sequenceDictionary, JavaRDD<SB> intervals, int maxLocatableLength, boolean useShuffle) { JavaRDD<ShardBoundary> paddedIntervals = intervals.map(ShardBoundary::paddedShardBoundary); if (useShuffle) { throw new UnsupportedOperationException("Shuffle not supported when sharding an RDD of intervals."); } return joinOverlapping(ctx, locatables, locatableClass, sequenceDictionary, paddedIntervals, maxLocatableLength, new MapFunction<Tuple2<ShardBoundary, Iterable<L>>, Shard<L>>() { private static final long serialVersionUID = 1L; @Override public Shard<L> call(Tuple2<ShardBoundary, Iterable<L>> value) { return value._1().createShard(value._2()); } }); }
Example 3
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testDataFrameSumDMLVectorWithNoIDColumnNoFormatSpecified() { System.out.println("MLContextTest - DataFrame sum DML, vector with no ID column, no format specified"); List<Vector> list = new ArrayList<>(); list.add(Vectors.dense(1.0, 2.0, 3.0)); list.add(Vectors.dense(4.0, 5.0, 6.0)); list.add(Vectors.dense(7.0, 8.0, 9.0)); JavaRDD<Vector> javaRddVector = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddVector.map(new VectorRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", new VectorUDT(), true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame); setExpectedStdOut("sum: 45.0"); ml.execute(script); }
Example 4
Source File: MarkDuplicatesSparkUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testReadsMissingReadGroups() { JavaSparkContext ctx = SparkContextFactory.getTestSparkContext(); SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname, true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY); samRecordSetBuilder.addFrag("READ" , 0, 10000, false); JavaRDD<GATKRead> reads = ctx.parallelize(Lists.newArrayList(samRecordSetBuilder.getRecords()), 2).map(SAMRecordToGATKReadAdapter::new); reads = reads.map(r -> {r.setReadGroup(null); return r;}); SAMFileHeader header = samRecordSetBuilder.getHeader(); try { MarkDuplicatesSparkUtils.transformToDuplicateNames(header, MarkDuplicatesScoringStrategy.SUM_OF_BASE_QUALITIES, null, reads, 2, false).collect(); Assert.fail("Should have thrown an exception"); } catch (Exception e){ Assert.assertTrue(e instanceof SparkException); Assert.assertTrue(e.getCause() instanceof UserException.ReadMissingReadGroup); } }
Example 5
Source File: MLContextTest.java From systemds with Apache License 2.0 | 6 votes |
@Test public void testInputMatrixBlockDML() { System.out.println("MLContextTest - input MatrixBlock DML"); List<String> list = new ArrayList<>(); list.add("10,20,30"); list.add("40,50,60"); list.add("70,80,90"); JavaRDD<String> javaRddString = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); Matrix m = new Matrix(dataFrame); MatrixBlock matrixBlock = m.toMatrixBlock(); Script script = dml("avg = avg(M);").in("M", matrixBlock).out("avg"); double avg = ml.execute(script).getDouble("avg"); Assert.assertEquals(50.0, avg, 0.0); }
Example 6
Source File: InMemoryHashIndex.java From hudi with Apache License 2.0 | 6 votes |
@Override public JavaRDD<WriteStatus> updateLocation(JavaRDD<WriteStatus> writeStatusRDD, JavaSparkContext jsc, HoodieTable<T> hoodieTable) { return writeStatusRDD.map(new Function<WriteStatus, WriteStatus>() { @Override public WriteStatus call(WriteStatus writeStatus) { for (HoodieRecord record : writeStatus.getWrittenRecords()) { if (!writeStatus.isErrored(record.getKey())) { HoodieKey key = record.getKey(); Option<HoodieRecordLocation> newLocation = record.getNewLocation(); if (newLocation.isPresent()) { recordLocationMap.put(key, newLocation.get()); } else { // Delete existing index for a deleted record recordLocationMap.remove(key); } } } return writeStatus; } }); }
Example 7
Source File: TestLineRecordReaderFunction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testLineRecordReader() throws Exception { File dataFile = new ClassPathResource("iris.dat").getFile(); List<String> lines = FileUtils.readLines(dataFile); JavaSparkContext sc = getContext(); JavaRDD<String> linesRdd = sc.parallelize(lines); CSVRecordReader rr = new CSVRecordReader(0, ','); JavaRDD<List<Writable>> out = linesRdd.map(new LineRecordReaderFunction(rr)); List<List<Writable>> outList = out.collect(); CSVRecordReader rr2 = new CSVRecordReader(0, ','); rr2.initialize(new FileSplit(dataFile)); Set<List<Writable>> expectedSet = new HashSet<>(); int totalCount = 0; while (rr2.hasNext()) { expectedSet.add(rr2.next()); totalCount++; } assertEquals(totalCount, outList.size()); for (List<Writable> line : outList) { assertTrue(expectedSet.contains(line)); } }
Example 8
Source File: EntitySalienceFeatureExtractorSpark.java From ambiverse-nlu with Apache License 2.0 | 5 votes |
/** * Extract a DataFrame ready for training or testing. * @param jsc * @param documents * @param sqlContext * @return * @throws ResourceInitializationException */ public DataFrame extract(JavaSparkContext jsc, JavaRDD<SCAS> documents, SQLContext sqlContext) throws ResourceInitializationException { Accumulator<Integer> TOTAL_DOCS = jsc.accumulator(0, "TOTAL_DOCS"); Accumulator<Integer> SALIENT_ENTITY_INSTANCES = jsc.accumulator(0, "SALIENT_ENTITY_INSTANCES"); Accumulator<Integer> NON_SALIENT_ENTITY_INSTANCES = jsc.accumulator(0, "NON_SALIENT_ENTITY_INSTANCES"); TrainingSettings trainingSettings = getTrainingSettings(); FeatureExtractor fe = new NYTEntitySalienceFeatureExtractor(); final int featureVectorSize = FeatureSetFactory.createFeatureSet(TrainingSettings.FeatureExtractor.ENTITY_SALIENCE).getFeatureVectorSize(); JavaRDD<TrainingInstance> trainingInstances = documents.flatMap(s -> { TOTAL_DOCS.add(1); return fe.getTrainingInstances(s.getJCas(), trainingSettings.getFeatureExtractor(), trainingSettings.getPositiveInstanceScalingFactor()); }); StructType schema = new StructType(new StructField[]{ new StructField("docId", DataTypes.StringType, false, Metadata.empty() ), new StructField("entityId", DataTypes.StringType, false, Metadata.empty() ), new StructField("label", DataTypes.DoubleType, false, Metadata.empty() ), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); JavaRDD<Row> withFeatures = trainingInstances.map(ti -> { if (ti.getLabel() == 1.0) { SALIENT_ENTITY_INSTANCES.add(1); } else { NON_SALIENT_ENTITY_INSTANCES.add(1); } Vector vei = FeatureValueInstanceUtils.convertToSparkMLVector(ti, featureVectorSize); return RowFactory.create(ti.getDocId(), ti.getEntityId(), ti.getLabel(), vei); }); return sqlContext.createDataFrame(withFeatures, schema); }
Example 9
Source File: SparkExport.java From DataVec with Apache License 2.0 | 5 votes |
public static void exportCSVSpark(String directory, String delimiter, String quote, int outputSplits, JavaRDD<List<Writable>> data) { //NOTE: Order is probably not random here... JavaRDD<String> lines = data.map(new WritablesToStringFunction(delimiter, quote)); lines.coalesce(outputSplits); lines.saveAsTextFile(directory); }
Example 10
Source File: DataFrameOps.java From toolbox with Apache License 2.0 | 5 votes |
static JavaRDD<DataInstance> toDataInstanceRDD(DataFrame data, Attributes attributes) { JavaRDD<double[]> rawRDD = data.rdd() .toJavaRDD() .map( row -> transformRow2DataInstance(row, attributes) ); return rawRDD.map(v -> new DataInstanceFromDataRow( new DataRowSpark(v, attributes) ) ); }
Example 11
Source File: MLLibUtil.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Converts JavaRDD labeled points to JavaRDD DataSets. * @param data JavaRDD LabeledPoints * @param numPossibleLabels number of possible labels * @param preCache boolean pre-cache rdd before operation * @return */ public static JavaRDD<DataSet> fromLabeledPoint(JavaRDD<LabeledPoint> data, final long numPossibleLabels, boolean preCache) { if (preCache && !data.getStorageLevel().useMemory()) { data.cache(); } return data.map(new Function<LabeledPoint, DataSet>() { @Override public DataSet call(LabeledPoint lp) { return fromLabeledPoint(lp, numPossibleLabels); } }); }
Example 12
Source File: MLContextTest.java From systemds with Apache License 2.0 | 5 votes |
@Test public void testDataFrameToBinaryBlocks() { System.out.println("MLContextTest - DataFrame to binary blocks"); List<String> list = new ArrayList<>(); list.add("1,2,3"); list.add("4,5,6"); list.add("7,8,9"); JavaRDD<String> javaRddString = sc.parallelize(list); JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow()); List<StructField> fields = new ArrayList<>(); fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true)); fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true)); StructType schema = DataTypes.createStructType(fields); Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema); JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = MLContextConversionUtil .dataFrameToMatrixBinaryBlocks(dataFrame); Tuple2<MatrixIndexes, MatrixBlock> first = binaryBlocks.first(); MatrixBlock mb = first._2(); double[][] matrix = DataConverter.convertToDoubleMatrix(mb); Assert.assertArrayEquals(new double[] { 1.0, 2.0, 3.0 }, matrix[0], 0.0); Assert.assertArrayEquals(new double[] { 4.0, 5.0, 6.0 }, matrix[1], 0.0); Assert.assertArrayEquals(new double[] { 7.0, 8.0, 9.0 }, matrix[2], 0.0); }
Example 13
Source File: BatchHeatMapProcessor.java From lambda-arch with Apache License 2.0 | 5 votes |
/** * Converts each row from the iotData to a Measurement * * @param iotData | Spark SQL context * @return A set containing all data from the CSV file as Measurements */ private JavaRDD<Measurement> csvToMeasurements(JavaRDD<IoTData> iotData) { JavaRDD<Measurement> map = iotData.map(row -> { Coordinate coordinate = new Coordinate( Double.valueOf(row.getLatitude()), Double.valueOf(row.getLongitude()) ); return new Measurement(coordinate, row.getTimestamp()); }); return map; }
Example 14
Source File: PileupSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override protected void processAlignments(JavaRDD<LocusWalkerContext> rdd, JavaSparkContext ctx) { JavaRDD<String> lines = rdd.map(pileupFunction(metadata, outputInsertLength, showVerbose)); if (numReducers != 0) { lines = lines.coalesce(numReducers); } lines.saveAsTextFile(outputFile); }
Example 15
Source File: AdaBoostMHLearner.java From sparkboost with Apache License 2.0 | 4 votes |
protected void updateDistributionMatrix(JavaSparkContext sc, JavaRDD<MultilabelPoint> docs, double[][] localDM, WeakHypothesis localWH) { Broadcast<WeakHypothesis> distWH = sc.broadcast(localWH); Broadcast<double[][]> distDM = sc.broadcast(localDM); JavaRDD<DMPartialResult> partialResults = docs.map(doc -> { int[] validFeatures = doc.getFeatures().indices(); HashMap<Integer, Integer> dictFeatures = new HashMap<>(); for (int featID : validFeatures) dictFeatures.put(featID, featID); HashMap<Integer, Integer> dictLabels = new HashMap<>(); for (int idx = 0; idx < doc.getLabels().length; idx++) dictLabels.put(doc.getLabels()[idx], doc.getLabels()[idx]); double[][] dm = distDM.getValue(); WeakHypothesis wh = distWH.getValue(); double[] labelsRes = new double[dm.length]; for (int labelID = 0; labelID < dm.length; labelID++) { float catValue = 1; if (dictLabels.containsKey(labelID)) { catValue = -1; } // Compute the weak hypothesis value. double value = 0; WeakHypothesis.WeakHypothesisData v = wh.getLabelData(labelID); int pivot = v.getFeatureID(); if (dictFeatures.containsKey(pivot)) value = v.getC1(); else value = v.getC0(); double partialRes = dm[labelID][doc.getPointID()] * Math.exp(catValue * value); labelsRes[labelID] = partialRes; } return new DMPartialResult(doc.getPointID(), labelsRes); }); Iterator<DMPartialResult> itResults = partialResults.toLocalIterator(); // Update partial results. double normalization = 0; while (itResults.hasNext()) { DMPartialResult r = itResults.next(); for (int labelID = 0; labelID < localDM.length; labelID++) { localDM[labelID][r.docID] = r.labelsRes[labelID]; normalization += localDM[labelID][r.docID]; } } // Normalize all values. for (int labelID = 0; labelID < localDM.length; labelID++) { for (int docID = 0; docID < localDM[0].length; docID++) { localDM[labelID][docID] = localDM[labelID][docID] / normalization; } } }
Example 16
Source File: EntitySalienceAnnotatorAndFeatureExtractorSpark.java From ambiverse-nlu with Apache License 2.0 | 4 votes |
/** * Extract a DataFrame ready for training or testing. * @param jsc * @param documents * @param sqlContext * @return * @throws ResourceInitializationException */ public DataFrame extract(JavaSparkContext jsc, JavaRDD<SCAS> documents, SQLContext sqlContext) throws ResourceInitializationException { Accumulator<Integer> TOTAL_DOCS = jsc.accumulator(0, "TOTAL_DOCS"); Accumulator<Integer> SALIENT_ENTITY_INSTANCES = jsc.accumulator(0, "SALIENT_ENTITY_INSTANCES"); Accumulator<Integer> NON_SALIENT_ENTITY_INSTANCES = jsc.accumulator(0, "NON_SALIENT_ENTITY_INSTANCES"); TrainingSettings trainingSettings = getTrainingSettings(); final SparkSerializableAnalysisEngine ae = EntitySalienceFactory.createEntitySalienceEntityAnnotator(trainingSettings.getEntitySalienceEntityAnnotator()); FeatureExtractor fe = new NYTEntitySalienceFeatureExtractor(); final int featureVectorSize = FeatureSetFactory.createFeatureSet(TrainingSettings.FeatureExtractor.ENTITY_SALIENCE).getFeatureVectorSize(); JavaRDD<TrainingInstance> trainingInstances = documents .map(s -> { TOTAL_DOCS.add(1); Logger tmpLogger = LoggerFactory.getLogger(EntitySalienceFeatureExtractorSpark.class); String docId = JCasUtil.selectSingle(s.getJCas(), DocumentMetaData.class).getDocumentId(); tmpLogger.info("Processing document {}.", docId); //Before processing the document through the Disambiguation Pipeline, add the AIDA settings // in each document. SparkUimaUtils.addSettingsToJCas(s.getJCas(), trainingSettings.getDocumentCoherent(), trainingSettings.getDocumentConfidenceThreshold()); return ae.process(s); }) .flatMap(s -> fe.getTrainingInstances(s.getJCas(), trainingSettings.getFeatureExtractor(), trainingSettings.getPositiveInstanceScalingFactor())); StructType schema = new StructType(new StructField[]{ new StructField("docId", DataTypes.StringType, false, Metadata.empty() ), new StructField("entity", DataTypes.StringType, false, Metadata.empty() ), new StructField("label", DataTypes.DoubleType, false, Metadata.empty() ), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); JavaRDD<Row> withFeatures = trainingInstances.map(ti -> { if (ti.getLabel() == 1.0) { SALIENT_ENTITY_INSTANCES.add(1); } else { NON_SALIENT_ENTITY_INSTANCES.add(1); } Vector vei = FeatureValueInstanceUtils.convertToSparkMLVector(ti, featureVectorSize); return RowFactory.create(ti.getDocId(), ti.getEntityId(), ti.getLabel(), vei); }); return sqlContext.createDataFrame(withFeatures, schema); }
Example 17
Source File: JavaLatentDirichletAllocationExample.java From SparkDemo with MIT License | 4 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaKLatentDirichletAllocationExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ // Load and parse the data String path = "data/mllib/sample_lda_data.txt"; JavaRDD<String> data = jsc.textFile(path); JavaRDD<Vector> parsedData = data.map( new Function<String, Vector>() { public Vector call(String s) { String[] sarray = s.trim().split(" "); double[] values = new double[sarray.length]; for (int i = 0; i < sarray.length; i++) { values[i] = Double.parseDouble(sarray[i]); } return Vectors.dense(values); } } ); // Index documents with unique IDs JavaPairRDD<Long, Vector> corpus = JavaPairRDD.fromJavaRDD(parsedData.zipWithIndex().map( new Function<Tuple2<Vector, Long>, Tuple2<Long, Vector>>() { public Tuple2<Long, Vector> call(Tuple2<Vector, Long> doc_id) { return doc_id.swap(); } } ) ); corpus.cache(); // Cluster the documents into three topics using LDA LDAModel ldaModel = new LDA().setK(3).run(corpus); // Output topics. Each is a distribution over words (matching word count vectors) System.out.println("Learned topics (as distributions over vocab of " + ldaModel.vocabSize() + " words):"); Matrix topics = ldaModel.topicsMatrix(); for (int topic = 0; topic < 3; topic++) { System.out.print("Topic " + topic + ":"); for (int word = 0; word < ldaModel.vocabSize(); word++) { System.out.print(" " + topics.apply(word, topic)); } System.out.println(); } ldaModel.save(jsc.sc(), "target/org/apache/spark/JavaLatentDirichletAllocationExample/LDAModel"); DistributedLDAModel sameModel = DistributedLDAModel.load(jsc.sc(), "target/org/apache/spark/JavaLatentDirichletAllocationExample/LDAModel"); // $example off$ jsc.stop(); }
Example 18
Source File: HierarchicalAlignmentClient.java From render with GNU General Public License v2.0 | 4 votes |
private void createWarpStackForTier() throws IOException { LOG.info("createWarpStackForTier: entry"); final ProcessTimer timer = new ProcessTimer(); final Set<StackId> existingRoughProjectStackIds = new HashSet<>(driverRoughRender.getProjectStacks()); final StackId warpStackId = HierarchicalStack.deriveWarpStackIdForTier(roughTilesStackId, currentTier); boolean generateWarpStack = true; if (existingRoughProjectStackIds.contains(warpStackId) && parameters.keepExisting(PipelineStep.WARP)) { generateWarpStack = false; } if (generateWarpStack) { // remove any existing warp stack results driverRoughRender.deleteStack(warpStackId.getStack(), null); final StackMetaData roughTilesStackMetaData = driverRoughRender.getStackMetaData(roughTilesStackId.getStack()); driverRoughRender.setupDerivedStack(roughTilesStackMetaData, warpStackId.getStack()); final String projectForTier = this.tierProject; final JavaRDD<Double> rddZValues = sparkContext.parallelize(zValues); final HierarchicalWarpFieldStackFunction warpFieldStackFunction = new HierarchicalWarpFieldStackFunction(parameters.renderWeb.baseDataUrl, parameters.renderWeb.owner, currentTier, projectForTier, tierParentStackId, warpStackId.getStack(), parameters.consensusBuildMethod); final JavaRDD<Integer> rddTileCounts = rddZValues.map(warpFieldStackFunction); final List<Integer> tileCountList = rddTileCounts.collect(); LOG.info("createWarpStackForTier: counting results"); long total = 0; for (final Integer tileCount : tileCountList) { total += tileCount; } LOG.info("createWarpStackForTier: added {} tile specs to {}", total, warpStackId); driverRoughRender.setStackState(warpStackId.getStack(), StackMetaData.StackState.COMPLETE); } LOG.info("createWarpStackForTier: exit, processing took {} seconds", timer.getElapsedSeconds()); }
Example 19
Source File: DataFrames.java From DataVec with Apache License 2.0 | 3 votes |
/** * Creates a data frame from a collection of writables * rdd given a schema * * @param schema the schema to use * @param data the data to convert * @return the dataframe object */ public static DataRowsFacade toDataFrame(Schema schema, JavaRDD<List<Writable>> data) { JavaSparkContext sc = new JavaSparkContext(data.context()); SQLContext sqlContext = new SQLContext(sc); JavaRDD<Row> rows = data.map(new ToRow(schema)); return dataRows(sqlContext.createDataFrame(rows, fromSchema(schema))); }
Example 20
Source File: DataFrameOps.java From toolbox with Apache License 2.0 | 2 votes |
static JavaRDD<Row> toRowRDD(JavaRDD<DataInstance> rawRDD, Attributes atts) { // FIXME: Categorical values should be inserted with their corresponding state name return rawRDD.map( v -> transformArray2RowAttributes(v, atts)); }