org.apache.spark.api.java.JavaFutureAction Java Exaples

Source File: JavaStatusTrackerDemo.java From SparkDemo with MIT License

5 votes

public static void main(String[] args) throws Exception {
  SparkSession spark = SparkSession
    .builder()
    .appName(APP_NAME)
    .getOrCreate();

  final JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext());

  // Example of implementing a progress reporter for a simple job.
  JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 5).map(
      new IdentityWithDelay<Integer>());
  JavaFutureAction<List<Integer>> jobFuture = rdd.collectAsync();
  while (!jobFuture.isDone()) {
    Thread.sleep(1000);  // 1 second
    List<Integer> jobIds = jobFuture.jobIds();
    if (jobIds.isEmpty()) {
      continue;
    }
    int currentJobId = jobIds.get(jobIds.size() - 1);
    SparkJobInfo jobInfo = jsc.statusTracker().getJobInfo(currentJobId);
    SparkStageInfo stageInfo = jsc.statusTracker().getStageInfo(jobInfo.stageIds()[0]);
    System.out.println(stageInfo.numTasks() + " tasks total: " + stageInfo.numActiveTasks() +
        " active, " + stageInfo.numCompletedTasks() + " complete");
  }

  System.out.println("Job results are: " + jobFuture.get());
  spark.stop();
}

Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<List<T>> collectAsync() {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}

Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<Long> countAsync() {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}

Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}

Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}

Source File: SparkJavaRDD.java From incubator-nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<List<T>> takeAsync(final int num) {
  throw new UnsupportedOperationException(NOT_YET_SUPPORTED);
}

Source File: JavaRDD.java From nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<List<T>> collectAsync()  {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}

Source File: JavaRDD.java From nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<Long> countAsync()  {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}

Source File: JavaRDD.java From nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<Void> foreachAsync(final VoidFunction<T> f) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}

Source File: JavaRDD.java From nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<Void> foreachPartitionAsync(final VoidFunction<Iterator<T>> f) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}

Source File: JavaRDD.java From nemo with Apache License 2.0

4 votes

@Override
public JavaFutureAction<List<T>> takeAsync(final int num) {
  throw new UnsupportedOperationException("Operation not yet implemented.");
}

Source File: LocalSparkJobStatus.java From sqoop-on-spark with Apache License 2.0

4 votes

public LocalSparkJobStatus(JavaSparkContext sparkContext, int jobId,
		JavaFutureAction<Void> future) {
	this.sparkContext = sparkContext;
	this.jobId = jobId;
	this.future = future;
}

Source File: SparkTableChecker.java From spliceengine with GNU Affero General Public License v3.0

4 votes

@Override
public List<String> checkIndex(PairDataSet index,
                               String indexName,
                               LeadingIndexColumnInfo leadingIndexColumnInfo,
                               long conglomerate,
                               DDLMessage.TentativeIndex tentativeIndex) throws Exception {

    this.indexName = indexName;
    this.conglomerate = conglomerate;
    this.tentativeIndex = tentativeIndex;

    List<String> messages = Lists.newLinkedList();

    // Count number of rows in base table and reuse it if the index does not exclude default or null keys
    JavaFutureAction<Long> tableCountFuture = null;
    filteredTable = baseTable.filter(new IndexFilter<>(leadingIndexColumnInfo)).index(new KeyByRowIdFunction<>());
    if (baseTableCount == 0 || leadingIndexColumnInfo != null) {
        SpliceSpark.pushScope(String.format("Count table %s.%s", schemaName, tableName));
        tableCountFuture = ((SparkPairDataSet) filteredTable).rdd.countAsync();
        SpliceSpark.popScope();
    }
    // count number of rows in the index
    SpliceSpark.pushScope(String.format("Count index %s.%s", schemaName, indexName));
    JavaFutureAction<Long> indexCountFuture = ((SparkPairDataSet)index).rdd.countAsync();
    SpliceSpark.popScope();

    messages.addAll(checkMissingIndexes(filteredTable, index));
    if (tableCountFuture != null) {
        if (leadingIndexColumnInfo == null) {
            baseTableCount = tableCountFuture.get();
        }
        else {
            filteredTableCount = tableCountFuture.get();
        }
    }

    indexCount = indexCountFuture.get();
    long tableCount = leadingIndexColumnInfo != null ? filteredTableCount : baseTableCount;

    // If index and table count do not match, or there are rows not indexed, check for invalid indexes
    if (indexCount != tableCount ||  missingIndexCount != 0) {
        messages.addAll(checkInvalidIndexes(filteredTable, index));
    }

    if (indexCount - invalidIndexCount > tableCount - missingIndexCount) {
        messages.addAll(checkDuplicateIndexes(filteredTable, index));
    }
    return messages;
}

Source File: CompactionJob.java From spliceengine with GNU Affero General Public License v3.0

4 votes

@Override
public Void call() throws Exception {
    if(!status.markRunning()){
        //the client has already cancelled us or has died before we could get started, so stop now
        return null;
    }
    int order = concurrentCompactions.incrementAndGet();
    try {
        int maxConcurrentCompactions = HConfiguration.getConfiguration().getOlapCompactionMaximumConcurrent();
        if (order > maxConcurrentCompactions) {
            status.markCompleted(new FailedOlapResult(
                    new CancellationException("Maximum number of concurrent compactions already running")));
            return null;
        }
        
        initializeJob();
        Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        if (LOG.isTraceEnabled()) {
            LOG.trace("regionLocation = " + compactionRequest.regionLocation);
        }
        conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation);
        conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String());

        SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize");
        //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1);
        //ParallelCollectionRDD rdd1 = getCompactionRDD();

        JavaSparkContext context = SpliceSpark.getContext();
        JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf,
                CompactionInputFormat.class,
                Integer.class,
                Iterator.class);
        rdd1.setName("Distribute Compaction Load");
        SpliceSpark.popScope();

        compactionRequest.compactionFunction.setContext(new SparkCompactionContext());
        SpliceSpark.pushScope(compactionRequest.scope + ": Compact files");
        JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction));
        rdd2.setName(compactionRequest.jobDetails);
        SpliceSpark.popScope();

        SpliceSpark.pushScope("Compaction");
        if (!status.isRunning()) {
            //the client timed out during our setup, so it's time to stop
            return null;
        }
        long startTime = clock.currentTimeMillis();
        JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync();
        while (!collectFuture.isDone()) {
            try {
                collectFuture.get(tickTime, TimeUnit.MILLISECONDS);
            } catch (TimeoutException te) {
                /*
                 * A TimeoutException just means that tickTime expired. That's okay, we just stick our
                 * head up and make sure that the client is still operating
                 */
            }
            if (!status.isRunning()) {
                /*
                 * The client timed out, so cancel the compaction and terminate
                 */
                collectFuture.cancel(true);
                context.cancelJobGroup(compactionRequest.jobGroup);
                return null;
            }
            if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) {
                // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction
                if (!compactionRunning(collectFuture.jobIds())) {
                    collectFuture.cancel(true);
                    context.cancelJobGroup(compactionRequest.jobGroup);
                    status.markCompleted(new FailedOlapResult(
                            new RejectedExecutionException("No resources available for running compaction in Spark")));
                    return null;
                }
            }
        }
        //the compaction completed
        List<String> sPaths = collectFuture.get();
        status.markCompleted(new CompactionResult(sPaths));
        SpliceSpark.popScope();

        if (LOG.isTraceEnabled())
            SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths);
        return null;
    } finally {
        concurrentCompactions.decrementAndGet();
    }
}

org.apache.spark.api.java.JavaFutureAction Java Examples