org.apache.spark.api.java.JavaRDD#setName

Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0

6 votes

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public DataSet< V> union(DataSet<V> dataSet, OperationContext operationContext, String name, boolean pushScope, String scopeDetail) {
    pushScopeIfNeeded((SpliceFunction)null, pushScope, scopeDetail);
    try {
        if (dataSet instanceof SparkDataSet) {
            JavaRDD rdd1 = rdd.union(((SparkDataSet) dataSet).rdd);
            rdd1.setName(name != null ? name : RDDName.UNION.displayName());
            return new SparkDataSet<>(rdd1);
        } else {
            // Let the NativeSparkDataset perform the conversion
            return dataSet.union(this, operationContext);
        }
    } finally {
        if (pushScope) SpliceSpark.popScope();
    }
}

Source File: SparkExecutionOperator.java From rheem with Apache License 2.0

5 votes

/**
 * Utility method to name an RDD according to this instance's name.
 *
 * @param rdd that should be renamed
 * @see #getName()
 */
default void name(JavaRDD<?> rdd) {
    if (this.getName() != null) {
        rdd.setName(this.getName());
    } else {
        rdd.setName(this.toString());
    }
}

Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public <Op extends SpliceOperation> DataSet<V> take(TakeFunction<Op,V> takeFunction) {
    JavaRDD<V> rdd1 = rdd.mapPartitions(new SparkFlatMapFunction<>(takeFunction));
    rdd1.setName(takeFunction.getSparkName());

    return new SparkDataSet<>(rdd1);
}

Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public DataSet<V> coalesce(int numPartitions, boolean shuffle) {
    JavaRDD rdd1 = rdd.coalesce(numPartitions, shuffle);
    rdd1.setName(String.format("Coalesce %d partitions", numPartitions));
    SparkUtils.setAncestorRDDNames(rdd1, 3, new String[]{"Coalesce Data", "Shuffle Data", "Map For Coalesce"}, null);
    return new SparkDataSet<>(rdd1);
}

Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public DataSet<V> coalesce(int numPartitions, boolean shuffle, boolean isLast, OperationContext context, boolean pushScope, String scopeDetail) {
    pushScopeIfNeeded(context, pushScope, scopeDetail);
    try {
        JavaRDD rdd1 = rdd.coalesce(numPartitions, shuffle);
        rdd1.setName(String.format("Coalesce %d partitions", numPartitions));
        SparkUtils.setAncestorRDDNames(rdd1, 3, new String[]{"Coalesce Data", "Shuffle Data", "Map For Coalesce"}, null);
        return new SparkDataSet<V>(rdd1);
    } finally {
        if (pushScope) context.popScope();
    }
}

Source File: SparkDataSetProcessor.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings({ "unchecked" })
@Override
public <V> DataSet<V> singleRowDataSet(V value, Object caller) {
    String scope = StreamUtils.getScopeString(caller);
        SpliceSpark.pushScope(scope);
    try {
        JavaRDD rdd1 = SpliceSpark.getContext().parallelize(Collections.singletonList(value), 1);
        rdd1.setName(RDDName.SINGLE_ROW_DATA_SET.displayName());
        return new SparkDataSet<>(rdd1);
    } finally {
        SpliceSpark.popScope();
    }
}

Source File: SparkDataSetProcessor.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public <V> DataSet< V> createDataSet(Iterator<V> value, String name) {
    JavaRDD rdd1 = SpliceSpark.getContext().parallelize(Lists.newArrayList(value));
    rdd1.setName(name);
    return new SparkDataSet(rdd1);
}

Source File: CompactionJob.java From spliceengine with GNU Affero General Public License v3.0

4 votes

@Override
public Void call() throws Exception {
    if(!status.markRunning()){
        //the client has already cancelled us or has died before we could get started, so stop now
        return null;
    }
    int order = concurrentCompactions.incrementAndGet();
    try {
        int maxConcurrentCompactions = HConfiguration.getConfiguration().getOlapCompactionMaximumConcurrent();
        if (order > maxConcurrentCompactions) {
            status.markCompleted(new FailedOlapResult(
                    new CancellationException("Maximum number of concurrent compactions already running")));
            return null;
        }
        
        initializeJob();
        Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
        if (LOG.isTraceEnabled()) {
            LOG.trace("regionLocation = " + compactionRequest.regionLocation);
        }
        conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation);
        conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String());

        SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize");
        //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1);
        //ParallelCollectionRDD rdd1 = getCompactionRDD();

        JavaSparkContext context = SpliceSpark.getContext();
        JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf,
                CompactionInputFormat.class,
                Integer.class,
                Iterator.class);
        rdd1.setName("Distribute Compaction Load");
        SpliceSpark.popScope();

        compactionRequest.compactionFunction.setContext(new SparkCompactionContext());
        SpliceSpark.pushScope(compactionRequest.scope + ": Compact files");
        JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction));
        rdd2.setName(compactionRequest.jobDetails);
        SpliceSpark.popScope();

        SpliceSpark.pushScope("Compaction");
        if (!status.isRunning()) {
            //the client timed out during our setup, so it's time to stop
            return null;
        }
        long startTime = clock.currentTimeMillis();
        JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync();
        while (!collectFuture.isDone()) {
            try {
                collectFuture.get(tickTime, TimeUnit.MILLISECONDS);
            } catch (TimeoutException te) {
                /*
                 * A TimeoutException just means that tickTime expired. That's okay, we just stick our
                 * head up and make sure that the client is still operating
                 */
            }
            if (!status.isRunning()) {
                /*
                 * The client timed out, so cancel the compaction and terminate
                 */
                collectFuture.cancel(true);
                context.cancelJobGroup(compactionRequest.jobGroup);
                return null;
            }
            if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) {
                // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction
                if (!compactionRunning(collectFuture.jobIds())) {
                    collectFuture.cancel(true);
                    context.cancelJobGroup(compactionRequest.jobGroup);
                    status.markCompleted(new FailedOlapResult(
                            new RejectedExecutionException("No resources available for running compaction in Spark")));
                    return null;
                }
            }
        }
        //the compaction completed
        List<String> sPaths = collectFuture.get();
        status.markCompleted(new CompactionResult(sPaths));
        SpliceSpark.popScope();

        if (LOG.isTraceEnabled())
            SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths);
        return null;
    } finally {
        concurrentCompactions.decrementAndGet();
    }
}

Java Code Examples for org.apache.spark.api.java.JavaRDD#setName()