Java Code Examples for org.apache.spark.api.java.JavaRDD#setName()
The following examples show how to use
org.apache.spark.api.java.JavaRDD#setName() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
@SuppressWarnings({ "unchecked", "rawtypes" }) @Override public DataSet< V> union(DataSet<V> dataSet, OperationContext operationContext, String name, boolean pushScope, String scopeDetail) { pushScopeIfNeeded((SpliceFunction)null, pushScope, scopeDetail); try { if (dataSet instanceof SparkDataSet) { JavaRDD rdd1 = rdd.union(((SparkDataSet) dataSet).rdd); rdd1.setName(name != null ? name : RDDName.UNION.displayName()); return new SparkDataSet<>(rdd1); } else { // Let the NativeSparkDataset perform the conversion return dataSet.union(this, operationContext); } } finally { if (pushScope) SpliceSpark.popScope(); } }
Example 2
Source File: SparkExecutionOperator.java From rheem with Apache License 2.0 | 5 votes |
/** * Utility method to name an RDD according to this instance's name. * * @param rdd that should be renamed * @see #getName() */ default void name(JavaRDD<?> rdd) { if (this.getName() != null) { rdd.setName(this.getName()); } else { rdd.setName(this.toString()); } }
Example 3
Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public <Op extends SpliceOperation> DataSet<V> take(TakeFunction<Op,V> takeFunction) { JavaRDD<V> rdd1 = rdd.mapPartitions(new SparkFlatMapFunction<>(takeFunction)); rdd1.setName(takeFunction.getSparkName()); return new SparkDataSet<>(rdd1); }
Example 4
Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override public DataSet<V> coalesce(int numPartitions, boolean shuffle) { JavaRDD rdd1 = rdd.coalesce(numPartitions, shuffle); rdd1.setName(String.format("Coalesce %d partitions", numPartitions)); SparkUtils.setAncestorRDDNames(rdd1, 3, new String[]{"Coalesce Data", "Shuffle Data", "Map For Coalesce"}, null); return new SparkDataSet<>(rdd1); }
Example 5
Source File: SparkDataSet.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override public DataSet<V> coalesce(int numPartitions, boolean shuffle, boolean isLast, OperationContext context, boolean pushScope, String scopeDetail) { pushScopeIfNeeded(context, pushScope, scopeDetail); try { JavaRDD rdd1 = rdd.coalesce(numPartitions, shuffle); rdd1.setName(String.format("Coalesce %d partitions", numPartitions)); SparkUtils.setAncestorRDDNames(rdd1, 3, new String[]{"Coalesce Data", "Shuffle Data", "Map For Coalesce"}, null); return new SparkDataSet<V>(rdd1); } finally { if (pushScope) context.popScope(); } }
Example 6
Source File: SparkDataSetProcessor.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@SuppressWarnings({ "unchecked" }) @Override public <V> DataSet<V> singleRowDataSet(V value, Object caller) { String scope = StreamUtils.getScopeString(caller); SpliceSpark.pushScope(scope); try { JavaRDD rdd1 = SpliceSpark.getContext().parallelize(Collections.singletonList(value), 1); rdd1.setName(RDDName.SINGLE_ROW_DATA_SET.displayName()); return new SparkDataSet<>(rdd1); } finally { SpliceSpark.popScope(); } }
Example 7
Source File: SparkDataSetProcessor.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override public <V> DataSet< V> createDataSet(Iterator<V> value, String name) { JavaRDD rdd1 = SpliceSpark.getContext().parallelize(Lists.newArrayList(value)); rdd1.setName(name); return new SparkDataSet(rdd1); }
Example 8
Source File: CompactionJob.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
@Override public Void call() throws Exception { if(!status.markRunning()){ //the client has already cancelled us or has died before we could get started, so stop now return null; } int order = concurrentCompactions.incrementAndGet(); try { int maxConcurrentCompactions = HConfiguration.getConfiguration().getOlapCompactionMaximumConcurrent(); if (order > maxConcurrentCompactions) { status.markCompleted(new FailedOlapResult( new CancellationException("Maximum number of concurrent compactions already running"))); return null; } initializeJob(); Configuration conf = new Configuration(HConfiguration.unwrapDelegate()); if (LOG.isTraceEnabled()) { LOG.trace("regionLocation = " + compactionRequest.regionLocation); } conf.set(MRConstants.REGION_LOCATION, compactionRequest.regionLocation); conf.set(MRConstants.COMPACTION_FILES, getCompactionFilesBase64String()); SpliceSpark.pushScope(compactionRequest.scope + ": Parallelize"); //JavaRDD rdd1 = SpliceSpark.getContext().parallelize(files, 1); //ParallelCollectionRDD rdd1 = getCompactionRDD(); JavaSparkContext context = SpliceSpark.getContext(); JavaPairRDD<Integer, Iterator> rdd1 = context.newAPIHadoopRDD(conf, CompactionInputFormat.class, Integer.class, Iterator.class); rdd1.setName("Distribute Compaction Load"); SpliceSpark.popScope(); compactionRequest.compactionFunction.setContext(new SparkCompactionContext()); SpliceSpark.pushScope(compactionRequest.scope + ": Compact files"); JavaRDD<String> rdd2 = rdd1.mapPartitions(new SparkFlatMapFunction<>(compactionRequest.compactionFunction)); rdd2.setName(compactionRequest.jobDetails); SpliceSpark.popScope(); SpliceSpark.pushScope("Compaction"); if (!status.isRunning()) { //the client timed out during our setup, so it's time to stop return null; } long startTime = clock.currentTimeMillis(); JavaFutureAction<List<String>> collectFuture = rdd2.collectAsync(); while (!collectFuture.isDone()) { try { collectFuture.get(tickTime, TimeUnit.MILLISECONDS); } catch (TimeoutException te) { /* * A TimeoutException just means that tickTime expired. That's okay, we just stick our * head up and make sure that the client is still operating */ } if (!status.isRunning()) { /* * The client timed out, so cancel the compaction and terminate */ collectFuture.cancel(true); context.cancelJobGroup(compactionRequest.jobGroup); return null; } if (clock.currentTimeMillis() - startTime > compactionRequest.maxWait) { // Make sure compaction is scheduled in Spark and running, otherwise cancel it and fallback to in-HBase compaction if (!compactionRunning(collectFuture.jobIds())) { collectFuture.cancel(true); context.cancelJobGroup(compactionRequest.jobGroup); status.markCompleted(new FailedOlapResult( new RejectedExecutionException("No resources available for running compaction in Spark"))); return null; } } } //the compaction completed List<String> sPaths = collectFuture.get(); status.markCompleted(new CompactionResult(sPaths)); SpliceSpark.popScope(); if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "Paths Returned: %s", sPaths); return null; } finally { concurrentCompactions.decrementAndGet(); } }