Java Code Examples for org.apache.spark.TaskContext#get()
The following examples show how to use
org.apache.spark.TaskContext#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkTableUtil.java From iceberg with Apache License 2.0 | 6 votes |
private static Iterator<ManifestFile> buildManifest(SerializableConfiguration conf, PartitionSpec spec, String basePath, Iterator<Tuple2<String, DataFile>> fileTuples) { if (fileTuples.hasNext()) { FileIO io = new HadoopFileIO(conf.get()); TaskContext ctx = TaskContext.get(); String suffix = String.format("stage-%d-task-%d-manifest", ctx.stageId(), ctx.taskAttemptId()); Path location = new Path(basePath, suffix); String outputPath = FileFormat.AVRO.addExtension(location.toString()); OutputFile outputFile = io.newOutputFile(outputPath); ManifestWriter<DataFile> writer = ManifestFiles.write(spec, outputFile); try (ManifestWriter<DataFile> writerRef = writer) { fileTuples.forEachRemaining(fileTuple -> writerRef.add(fileTuple._2)); } catch (IOException e) { throw SparkExceptionUtil.toUncheckedException(e, "Unable to close the manifest writer: %s", outputPath); } ManifestFile manifestFile = writer.toManifestFile(); return ImmutableList.of(manifestFile).iterator(); } else { return Collections.emptyIterator(); } }
Example 2
Source File: SparkAMDSI.java From deeplearning4j with Apache License 2.0 | 6 votes |
public SparkAMDSI(MultiDataSetIterator iterator, int queueSize, BlockingQueue<MultiDataSet> queue, boolean useWorkspace, DataSetCallback callback, Integer deviceId) { this(); if (queueSize < 2) queueSize = 2; this.callback = callback; this.buffer = queue; this.backedIterator = iterator; this.useWorkspaces = useWorkspace; this.prefetchSize = queueSize; this.workspaceId = "SAMDSI_ITER-" + java.util.UUID.randomUUID().toString(); this.deviceId = deviceId; if (iterator.resetSupported()) this.backedIterator.reset(); this.thread = new SparkPrefetchThread(buffer, iterator, terminator, Nd4j.getAffinityManager().getDeviceForCurrentThread()); context = TaskContext.get(); thread.setDaemon(true); thread.start(); }
Example 3
Source File: NLJoinFunction.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
protected void init(Iterator<ExecRow> from) throws StandardException { checkInit(); taskContext = TaskContext.get(); if (taskContext != null) { taskContext.addTaskCompletionListener((TaskCompletionListener) (t) -> close()); } operationContext.getOperation().registerCloseable(this); SConfiguration configuration= EngineDriver.driver().getConfiguration(); batchSize = configuration.getNestedLoopJoinBatchSize(); nLeftRows = 0; leftSideIterator = from; executorService = SIDriver.driver().getExecutorService(); firstBatch = new ArrayDeque<>(batchSize); initOperationContexts(); loadBatch(); }
Example 4
Source File: SpliceOutputCommitter.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
@Override public void setupTask(TaskAttemptContext taskContext) throws IOException { if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG,"setupTask"); // Create child additive transaction so we don't read rows inserted by ourselves in this operation TaskContext sparkTaskContext = TaskContext.get(); TaskId taskId = null; if (sparkTaskContext != null) { int stageId = sparkTaskContext.stageId(); int partitionId = sparkTaskContext.partitionId(); int attemptNumber = sparkTaskContext.attemptNumber(); taskId = new TaskId(stageId, partitionId, attemptNumber); } TxnView txn = SIDriver.driver().lifecycleManager().beginChildTransaction(parentTxn, parentTxn.getIsolationLevel(), true, destinationTable, false, taskId); ActiveWriteTxn childTxn = new ActiveWriteTxn(txn.getTxnId(), txn.getTxnId(), parentTxn, true, parentTxn.getIsolationLevel(), taskId); currentTxn.set(childTxn); if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG,"beginTxn=%s and destinationTable=%s",childTxn,destinationTable); }
Example 5
Source File: SparkADSI.java From deeplearning4j with Apache License 2.0 | 5 votes |
public SparkADSI(DataSetIterator iterator, int queueSize, BlockingQueue<DataSet> queue, boolean useWorkspace, DataSetCallback callback, Integer deviceId) { this(); if (queueSize < 2) queueSize = 2; this.deviceId = deviceId; this.callback = callback; this.useWorkspace = useWorkspace; this.buffer = queue; this.prefetchSize = queueSize; this.backedIterator = iterator; this.workspaceId = "SADSI_ITER-" + java.util.UUID.randomUUID().toString(); if (iterator.resetSupported()) this.backedIterator.reset(); context = TaskContext.get(); this.thread = new SparkPrefetchThread(buffer, iterator, terminator, null, Nd4j.getAffinityManager().getDeviceForCurrentThread()); /** * We want to ensure, that background thread will have the same thread->device affinity, as master thread */ thread.setDaemon(true); thread.start(); }
Example 6
Source File: IteratorUtils.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
public static <E> Iterator<E> asInterruptibleIterator(Iterator<E> it) { TaskContext context = TaskContext.get(); if (context != null) { return (Iterator<E>) JavaConverters.asJavaIteratorConverter(new InterruptibleIterator(context, JavaConverters.asScalaIteratorConverter(it).asScala())).asJava(); } else return it; }
Example 7
Source File: SparkLeanOperationContext.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override @SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "intended") public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException{ if (in.readBoolean()) { SpliceClient.connectionString = in.readUTF(); SpliceClient.setClient(HConfiguration.getConfiguration().getAuthenticationTokenEnabled(), SpliceClient.Mode.EXECUTOR); } badRecordsSeen = in.readLong(); badRecordThreshold = in.readLong(); permissive=in.readBoolean(); SpliceSpark.setupSpliceStaticComponents(); boolean isOp=in.readBoolean(); if(isOp){ broadcastedActivation = (BroadcastedActivation)in.readObject(); ActivationHolder ah = broadcastedActivation.getActivationHolder(); op=(Op)ah.getOperationsMap().get(in.readInt()); activation = ah.getActivation(); TaskContext taskContext = TaskContext.get(); if (taskContext != null) { taskContext.addTaskCompletionListener((TaskCompletionListener)(ctx) -> ah.close()); } } badRecordsAccumulator = (Accumulable<BadRecordsRecorder,String>) in.readObject(); importFileName= (String) in.readObject(); rowsWritten=(LongAccumulator)in.readObject(); }
Example 8
Source File: KafkaStreamer.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
@Override public Iterator<String> call(Integer partition, Iterator<T> locatedRowIterator) throws Exception { taskContext = TaskContext.get(); if (taskContext != null && taskContext.attemptNumber() > 0) { LOG.trace("KS.c attempts "+taskContext.attemptNumber()); long entriesInKafka = KafkaUtils.messageCount(bootstrapServers, topicName, partition); LOG.trace("KS.c entries "+entriesInKafka); for (long i = 0; i < entriesInKafka; ++i) { locatedRowIterator.next(); } } Properties props = new Properties(); props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); props.put(ProducerConfig.CLIENT_ID_CONFIG, "spark-producer-dss-ks-"+UUID.randomUUID() ); props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class.getName()); props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ExternalizableSerializer.class.getName()); KafkaProducer<Integer, Externalizable> producer = new KafkaProducer<>(props); int count = 0 ; while (locatedRowIterator.hasNext()) { T lr = locatedRowIterator.next(); ProducerRecord<Integer, Externalizable> record = new ProducerRecord(topicName, count++, lr); producer.send(record); LOG.trace("KS.c sent "+partition.intValue()+" "+count+" "+lr); } LOG.trace("KS.c count "+partition.intValue()+" "+count); producer.close(); // TODO Clean up return Arrays.asList("OK").iterator(); }
Example 9
Source File: RowDataRewriter.java From iceberg with Apache License 2.0 | 4 votes |
private TaskResult rewriteDataForTask(CombinedScanTask task) throws Exception { TaskContext context = TaskContext.get(); int partitionId = context.partitionId(); long taskId = context.taskAttemptId(); RowDataReader dataReader = new RowDataReader( task, schema, schema, nameMapping, io.value(), encryptionManager.value(), caseSensitive); SparkAppenderFactory appenderFactory = new SparkAppenderFactory( properties, schema, SparkSchemaUtil.convert(schema)); OutputFileFactory fileFactory = new OutputFileFactory( spec, format, locations, io.value(), encryptionManager.value(), partitionId, taskId); BaseWriter writer; if (spec.fields().isEmpty()) { writer = new UnpartitionedWriter(spec, format, appenderFactory, fileFactory, io.value(), Long.MAX_VALUE); } else { writer = new PartitionedWriter(spec, format, appenderFactory, fileFactory, io.value(), Long.MAX_VALUE, schema); } try { while (dataReader.next()) { InternalRow row = dataReader.get(); writer.write(row); } dataReader.close(); dataReader = null; return writer.complete(); } catch (Throwable originalThrowable) { try { LOG.error("Aborting task", originalThrowable); context.markTaskFailed(originalThrowable); LOG.error("Aborting commit for partition {} (task {}, attempt {}, stage {}.{})", partitionId, taskId, context.attemptNumber(), context.stageId(), context.stageAttemptNumber()); if (dataReader != null) { dataReader.close(); } writer.abort(); LOG.error("Aborted commit for partition {} (task {}, attempt {}, stage {}.{})", partitionId, taskId, context.taskAttemptId(), context.stageId(), context.stageAttemptNumber()); } catch (Throwable inner) { if (originalThrowable != inner) { originalThrowable.addSuppressed(inner); LOG.warn("Suppressing exception in catch: {}", inner.getMessage(), inner); } } if (originalThrowable instanceof Exception) { throw originalThrowable; } else { throw new RuntimeException(originalThrowable); } } }
Example 10
Source File: SparkBoundedInMemoryExecutor.java From hudi with Apache License 2.0 | 4 votes |
public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer, BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) { super(hoodieConfig.getWriteBufferLimitBytes(), producer, Option.of(consumer), bufferedIteratorTransform); this.sparkThreadTaskContext = TaskContext.get(); }
Example 11
Source File: SMRecordReaderImpl.java From spliceengine with GNU Affero General Public License v3.0 | 4 votes |
public void init(Configuration config, InputSplit split) throws IOException, InterruptedException { if (LOG.isDebugEnabled()) SpliceLogUtils.debug(LOG, "init"); if (TaskContext.get() != null) { TaskContext.get().addTaskFailureListener(this); } String tableScannerAsString = config.get(MRConstants.SPLICE_SCAN_INFO); if (tableScannerAsString == null) throw new IOException("splice scan info was not serialized to task, failing"); byte[] scanStartKey = null; byte[] scanStopKey = null; try { builder = TableScannerBuilder.getTableScannerBuilderFromBase64String(tableScannerAsString); if (LOG.isTraceEnabled()) SpliceLogUtils.trace(LOG, "config loaded builder=%s", builder); TableSplit tSplit = ((SMSplit) split).getSplit(); token = builder.getToken(); DataScan scan = builder.getScan(); scanStartKey = scan.getStartKey(); scanStopKey = scan.getStopKey(); if (Bytes.startComparator.compare(scanStartKey, tSplit.getStartRow()) < 0) { // the split itself is more restrictive scan.startKey(tSplit.getStartRow()); } if (Bytes.endComparator.compare(scanStopKey, tSplit.getEndRow()) > 0) { // the split itself is more restrictive scan.stopKey(tSplit.getEndRow()); } setScan(((HScan) scan).unwrapDelegate()); // TODO (wjk): this seems weird (added with DB-4483) this.statisticsRun = AbstractSMInputFormat.oneSplitPerRegion(config); Double sampling = AbstractSMInputFormat.sampling(config); if (sampling != null) { this.sampling = true; this.samplingRate = sampling; } restart(scan.getStartKey()); } catch (IOException ioe) { LOG.error(String.format("Received exception with scan %s, original start key %s, original stop key %s, split %s", scan, Bytes.toStringBinary(scanStartKey), Bytes.toStringBinary(scanStopKey), split), ioe); throw ioe; } catch (StandardException e) { throw new IOException(e); } }