org.apache.spark.api.java.JavaFutureAction Java Examples
The following examples show how to use
org.apache.spark.api.java.JavaFutureAction.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JavaStatusTrackerDemo.java From SparkDemo with MIT License | 5 votes |
public static void main(String[] args) throws Exception { SparkSession spark = SparkSession .builder() .appName(APP_NAME) .getOrCreate(); final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); // Example of implementing a progress reporter for a simple job. JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map( new IdentityWithDelay<Integer>()); JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync(); while (!jobFuture.isDone()) { Thread.sleep(1000); // 1 second List<Integer> jobIds = jobFuture.jobIds(); if (jobIds.isEmpty()) { continue; } int currentJobId = jobIds.get(jobIds.size() - 1); SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId); SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]); System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() + " active, " + stageInfo.numCompletedTasks() + " complete"); } System.out.println("Job results are: " + jobFuture.get()); spark.stop(); }
Example #2
Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<List<T>> collectAsync() { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example #3
Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<Long> countAsync() { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example #4
Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example #5
Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example #6
Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<List<T>> takeAsync(final int num) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example #7
Source File: JavaRDD.java From nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<List<T>> collectAsync() { throw new UnsupportedOperationException("Operation not yet implemented."); }
Example #8
Source File: JavaRDD.java From nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<Long> countAsync() { throw new UnsupportedOperationException("Operation not yet implemented."); }
Example #9
Source File: JavaRDD.java From nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) { throw new UnsupportedOperationException("Operation not yet implemented."); }
Example #10
Source File: JavaRDD.java From nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) { throw new UnsupportedOperationException("Operation not yet implemented."); }
Example #11
Source File: JavaRDD.java From nemo with Apache License 2.0 | 4 votes |
@Override public JavaFutureAction<List<T>> takeAsync(final int num) { throw new UnsupportedOperationException("Operation not yet implemented."); }
Example #12
Source File: LocalSparkJobStatus.java From sqoop-on-spark with Apache License 2.0 | 4 votes |
public LocalSparkJobStatus(JavaSparkContext sparkContext, int jobId, JavaFutureAction<Void> future) { this.sparkContext = sparkContext; this.jobId = jobId; this.future = future; }
Example #13
Source File: SparkTableChecker.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
@Override public List<String> checkIndex(PairDataSet index, String indexName, LeadingIndexColumnInfo leadingIndexColumnInfo, long conglomerate, DDLMessage.TentativeIndex tentativeIndex) throws Exception { this.indexName = indexName; this.conglomerate = conglomerate; this.tentativeIndex = tentativeIndex; List<String> messages = Lists.newLinkedList(); // Count number of rows in base table and reuse it if the index does not exclude default or null keys JavaFutureAction<Long> tableCountFuture = null; filteredTable = baseTable.filter(new IndexFilter<>(leadingIndexColumnInfo)).index(new KeyByRowIdFunction<>()); if (baseTableCount == 0 || leadingIndexColumnInfo != null) { SpliceSpark.pushScope(String.format("Count table %s.%s", schemaName, tableName)); tableCountFuture = ((SparkPairDataSet) filteredTable).rdd.countAsync(); SpliceSpark.popScope(); } // count number of rows in the index SpliceSpark.pushScope(String.format("Count index %s.%s", schemaName, indexName)); JavaFutureAction<Long> indexCountFuture = ((SparkPairDataSet)index).rdd.countAsync(); SpliceSpark.popScope(); messages.addAll(checkMissingIndexes(filteredTable, index)); if (tableCountFuture != null) { if (leadingIndexColumnInfo == null) { baseTableCount = tableCountFuture.get(); } else { filteredTableCount = tableCountFuture.get(); } } indexCount = indexCountFuture.get(); long tableCount = leadingIndexColumnInfo != null ? filteredTableCount : baseTableCount; // If index and table count do not match, or there are rows not indexed, check for invalid indexes if (indexCount != tableCount || missingIndexCount != 0) { messages.addAll(checkInvalidIndexes(filteredTable, index)); } if (indexCount - invalidIndexCount > tableCount - missingIndexCount) { messages.addAll(checkDuplicateIndexes(filteredTable, index)); } return messages; }
Example #14
Source File: CompactionJob.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
@Override public Void call() throws Exception { if(!status.markRunning()){ //the client has already cancelled us or has died before we could get started, so stop now return null; } int order = concurrentCompactions.incrementAndGet(); try { int maxConcurrentCompactions = HConfiguration.getConfiguration().getOlapCompactionMaximumConcurrent(); if (order > maxConcurrentCompactions) { status.markCompleted(new FailedOlapResult( new CancellationException("Maximum number of concurrent compactions already running"))); return null; } initializeJob(); Configuration conf = new Configuration(HConfiguration.unwrapDelegate()); if (LOG.isTraceEnabled()) { LOG.trace("regionLocation = " + compactionRequest.regionLocation); } conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation); conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String()); SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize"); //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1); //ParallelCollectionRDD rdd1 = getCompactionRDD(); JavaSparkContext context = SpliceSpark.getContext(); JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf, CompactionInputFormat.class, Integer.class, Iterator.class); rdd1.setName("Distribute Compaction Load"); SpliceSpark.popScope(); compactionRequest.compactionFunction.setContext(new SparkCompactionContext()); SpliceSpark.pushScope(compactionRequest.scope + ": Compact files"); JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction)); rdd2.setName(compactionRequest.jobDetails); SpliceSpark.popScope(); SpliceSpark.pushScope("Compaction"); if (!status.isRunning()) { //the client timed out during our setup, so it's time to stop return null; } long startTime = clock.currentTimeMillis(); JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync(); while (!collectFuture.isDone()) { try { collectFuture.get(tickTime, TimeUnit.MILLISECONDS); } catch (TimeoutException te) { /* * A TimeoutException just means that tickTime expired. That's okay, we just stick our * head up and make sure that the client is still operating */ } if (!status.isRunning()) { /* * The client timed out, so cancel the compaction and terminate */ collectFuture.cancel(true); context.cancelJobGroup(compactionRequest.jobGroup); return null; } if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) { // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction if (!compactionRunning(collectFuture.jobIds())) { collectFuture.cancel(true); context.cancelJobGroup(compactionRequest.jobGroup); status.markCompleted(new FailedOlapResult( new RejectedExecutionException("No resources available for running compaction in Spark"))); return null; } } } //the compaction completed List<String> sPaths = collectFuture.get(); status.markCompleted(new CompactionResult(sPaths)); SpliceSpark.popScope(); if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths); return null; } finally { concurrentCompactions.decrementAndGet(); } }