Java Code Examples for org.apache.spark.api.java.JavaRDD#id()
The following examples show how to use
org.apache.spark.api.java.JavaRDD#id() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseTrainingMaster.java From deeplearning4j with Apache License 2.0 | 5 votes |
protected JavaRDD<String> exportIfRequired(JavaSparkContext sc, JavaRDD<DataSet> trainingData) { ExportSupport.assertExportSupported(sc); if (collectTrainingStats) stats.logExportStart(); //Two possibilities here: // 1. We've seen this RDD before (i.e., multiple epochs training case) // 2. We have not seen this RDD before // (a) And we haven't got any stored data -> simply export // (b) And we previously exported some data from a different RDD -> delete the last data int currentRDDUid = trainingData.id(); //Id is a "A unique ID for this RDD (within its SparkContext)." String baseDir; if (lastExportedRDDId == Integer.MIN_VALUE) { //Haven't seen a RDD<DataSet> yet in this training master -> export data baseDir = export(trainingData); } else { if (lastExportedRDDId == currentRDDUid) { //Use the already-exported data again for another epoch baseDir = getBaseDirForRDD(trainingData); } else { //The new RDD is different to the last one // Clean up the data for the last one, and export deleteTempDir(sc, lastRDDExportPath); baseDir = export(trainingData); } } if (collectTrainingStats) stats.logExportEnd(); return sc.textFile(baseDir + "paths/"); }
Example 2
Source File: BaseTrainingMaster.java From deeplearning4j with Apache License 2.0 | 5 votes |
protected JavaRDD<String> exportIfRequiredMDS(JavaSparkContext sc, JavaRDD<MultiDataSet> trainingData) { ExportSupport.assertExportSupported(sc); if (collectTrainingStats) stats.logExportStart(); //Two possibilities here: // 1. We've seen this RDD before (i.e., multiple epochs training case) // 2. We have not seen this RDD before // (a) And we haven't got any stored data -> simply export // (b) And we previously exported some data from a different RDD -> delete the last data int currentRDDUid = trainingData.id(); //Id is a "A unique ID for this RDD (within its SparkContext)." String baseDir; if (lastExportedRDDId == Integer.MIN_VALUE) { //Haven't seen a RDD<DataSet> yet in this training master -> export data baseDir = exportMDS(trainingData); } else { if (lastExportedRDDId == currentRDDUid) { //Use the already-exported data again for another epoch baseDir = getBaseDirForRDD(trainingData); } else { //The new RDD is different to the last one // Clean up the data for the last one, and export deleteTempDir(sc, lastRDDExportPath); baseDir = exportMDS(trainingData); } } if (collectTrainingStats) stats.logExportEnd(); return sc.textFile(baseDir + "paths/"); }
Example 3
Source File: BaseTrainingMaster.java From deeplearning4j with Apache License 2.0 | 5 votes |
protected String export(JavaRDD<DataSet> trainingData) { String baseDir = getBaseDirForRDD(trainingData); String dataDir = baseDir + "data/"; String pathsDir = baseDir + "paths/"; log.info("Initiating RDD<DataSet> export at {}", baseDir); JavaRDD<String> paths = trainingData .mapPartitionsWithIndex(new BatchAndExportDataSetsFunction(batchSizePerWorker, dataDir), true); paths.saveAsTextFile(pathsDir); log.info("RDD<DataSet> export complete at {}", baseDir); lastExportedRDDId = trainingData.id(); lastRDDExportPath = baseDir; return baseDir; }
Example 4
Source File: BaseTrainingMaster.java From deeplearning4j with Apache License 2.0 | 5 votes |
protected String exportMDS(JavaRDD<MultiDataSet> trainingData) { String baseDir = getBaseDirForRDD(trainingData); String dataDir = baseDir + "data/"; String pathsDir = baseDir + "paths/"; log.info("Initiating RDD<MultiDataSet> export at {}", baseDir); JavaRDD<String> paths = trainingData.mapPartitionsWithIndex( new BatchAndExportMultiDataSetsFunction(batchSizePerWorker, dataDir), true); paths.saveAsTextFile(pathsDir); log.info("RDD<MultiDataSet> export complete at {}", baseDir); lastExportedRDDId = trainingData.id(); lastRDDExportPath = baseDir; return baseDir; }
Example 5
Source File: BaseTrainingMaster.java From deeplearning4j with Apache License 2.0 | 5 votes |
protected String getBaseDirForRDD(JavaRDD<?> rdd) { if (exportDirectory == null) { exportDirectory = getDefaultExportDirectory(rdd.context()); } return exportDirectory + (exportDirectory.endsWith("/") ? "" : "/") + trainingMasterUID + "/" + rdd.id() + "/"; }