org.apache.spark.TaskContext#get

Source File: SparkTableUtil.java From iceberg with Apache License 2.0

6 votes

private static Iterator<ManifestFile> buildManifest(SerializableConfiguration conf, PartitionSpec spec,
                                                    String basePath, Iterator<Tuple2<String, DataFile>> fileTuples) {
  if (fileTuples.hasNext()) {
    FileIO io = new HadoopFileIO(conf.get());
    TaskContext ctx = TaskContext.get();
    String suffix = String.format("stage-%d-task-%d-manifest", ctx.stageId(), ctx.taskAttemptId());
    Path location = new Path(basePath, suffix);
    String outputPath = FileFormat.AVRO.addExtension(location.toString());
    OutputFile outputFile = io.newOutputFile(outputPath);
    ManifestWriter<DataFile> writer = ManifestFiles.write(spec, outputFile);

    try (ManifestWriter<DataFile> writerRef = writer) {
      fileTuples.forEachRemaining(fileTuple -> writerRef.add(fileTuple._2));
    } catch (IOException e) {
      throw SparkExceptionUtil.toUncheckedException(e, "Unable to close the manifest writer: %s", outputPath);
    }

    ManifestFile manifestFile = writer.toManifestFile();
    return ImmutableList.of(manifestFile).iterator();
  } else {
    return Collections.emptyIterator();
  }
}

Source File: SparkAMDSI.java From deeplearning4j with Apache License 2.0

6 votes

public SparkAMDSI(MultiDataSetIterator iterator, int queueSize, BlockingQueue<MultiDataSet> queue,
                boolean useWorkspace, DataSetCallback callback, Integer deviceId) {
    this();

    if (queueSize < 2)
        queueSize = 2;

    this.callback = callback;
    this.buffer = queue;
    this.backedIterator = iterator;
    this.useWorkspaces = useWorkspace;
    this.prefetchSize = queueSize;
    this.workspaceId = "SAMDSI_ITER-" + java.util.UUID.randomUUID().toString();
    this.deviceId = deviceId;

    if (iterator.resetSupported())
        this.backedIterator.reset();

    this.thread = new SparkPrefetchThread(buffer, iterator, terminator, Nd4j.getAffinityManager().getDeviceForCurrentThread());

    context = TaskContext.get();

    thread.setDaemon(true);
    thread.start();
}

Source File: NLJoinFunction.java From spliceengine with GNU Affero General Public License v3.0

6 votes

protected void init(Iterator<ExecRow> from) throws StandardException {
    checkInit();
    taskContext = TaskContext.get();
    if (taskContext != null) {
        taskContext.addTaskCompletionListener((TaskCompletionListener) (t) -> close());
    }
    operationContext.getOperation().registerCloseable(this);
    SConfiguration configuration= EngineDriver.driver().getConfiguration();
    batchSize = configuration.getNestedLoopJoinBatchSize();
    nLeftRows = 0;
    leftSideIterator = from;
    executorService = SIDriver.driver().getExecutorService();
    firstBatch = new ArrayDeque<>(batchSize);

    initOperationContexts();
    loadBatch();
}

Source File: SpliceOutputCommitter.java From spliceengine with GNU Affero General Public License v3.0

6 votes

@Override
public void setupTask(TaskAttemptContext taskContext) throws IOException {

    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG,"setupTask");
    // Create child additive transaction so we don't read rows inserted by ourselves in this operation
    TaskContext sparkTaskContext = TaskContext.get();
    TaskId taskId = null;
    if (sparkTaskContext != null) {
        int stageId = sparkTaskContext.stageId();
        int partitionId = sparkTaskContext.partitionId();
        int attemptNumber = sparkTaskContext.attemptNumber();
        taskId = new TaskId(stageId, partitionId, attemptNumber);
    }
    TxnView txn = SIDriver.driver().lifecycleManager().beginChildTransaction(parentTxn, parentTxn.getIsolationLevel(),
            true, destinationTable, false, taskId);
    ActiveWriteTxn childTxn = new ActiveWriteTxn(txn.getTxnId(), txn.getTxnId(), parentTxn, true, parentTxn.getIsolationLevel(), taskId);
    currentTxn.set(childTxn);
    if (LOG.isDebugEnabled())
        SpliceLogUtils.debug(LOG,"beginTxn=%s and destinationTable=%s",childTxn,destinationTable);

}

Source File: SparkADSI.java From deeplearning4j with Apache License 2.0

5 votes

public SparkADSI(DataSetIterator iterator, int queueSize, BlockingQueue<DataSet> queue, boolean useWorkspace,
                DataSetCallback callback, Integer deviceId) {
    this();

    if (queueSize < 2)
        queueSize = 2;

    this.deviceId = deviceId;
    this.callback = callback;
    this.useWorkspace = useWorkspace;
    this.buffer = queue;
    this.prefetchSize = queueSize;
    this.backedIterator = iterator;
    this.workspaceId = "SADSI_ITER-" + java.util.UUID.randomUUID().toString();

    if (iterator.resetSupported())
        this.backedIterator.reset();

    context = TaskContext.get();

    this.thread = new SparkPrefetchThread(buffer, iterator, terminator, null, Nd4j.getAffinityManager().getDeviceForCurrentThread());

    /**
     * We want to ensure, that background thread will have the same thread->device affinity, as master thread
     */

    thread.setDaemon(true);
    thread.start();
}

Source File: IteratorUtils.java From spliceengine with GNU Affero General Public License v3.0

5 votes

public static <E> Iterator<E> asInterruptibleIterator(Iterator<E> it) {
    TaskContext context = TaskContext.get();
    if (context != null) {
        return (Iterator<E>) JavaConverters.asJavaIteratorConverter(new InterruptibleIterator(context, JavaConverters.asScalaIteratorConverter(it).asScala())).asJava();
    } else
        return it;
}

Source File: SparkLeanOperationContext.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
@SuppressFBWarnings(value = "ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD", justification = "intended")
public void readExternal(ObjectInput in)
        throws IOException, ClassNotFoundException{
    if (in.readBoolean()) {
        SpliceClient.connectionString = in.readUTF();
        SpliceClient.setClient(HConfiguration.getConfiguration().getAuthenticationTokenEnabled(), SpliceClient.Mode.EXECUTOR);
    }
    badRecordsSeen = in.readLong();
    badRecordThreshold = in.readLong();
    permissive=in.readBoolean();
    SpliceSpark.setupSpliceStaticComponents();
    boolean isOp=in.readBoolean();
    if(isOp){
        broadcastedActivation = (BroadcastedActivation)in.readObject();
        ActivationHolder ah = broadcastedActivation.getActivationHolder();
        op=(Op)ah.getOperationsMap().get(in.readInt());
        activation = ah.getActivation();
        TaskContext taskContext = TaskContext.get();
        if (taskContext != null) {
            taskContext.addTaskCompletionListener((TaskCompletionListener)(ctx) -> ah.close());
        }
    }
    badRecordsAccumulator = (Accumulable<BadRecordsRecorder,String>) in.readObject();
    importFileName= (String) in.readObject();
    rowsWritten=(LongAccumulator)in.readObject();
}

Source File: KafkaStreamer.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public Iterator<String> call(Integer partition, Iterator<T> locatedRowIterator) throws Exception {
    taskContext = TaskContext.get();

    if (taskContext != null && taskContext.attemptNumber() > 0) {
        LOG.trace("KS.c attempts "+taskContext.attemptNumber());
        long entriesInKafka = KafkaUtils.messageCount(bootstrapServers, topicName, partition);
        LOG.trace("KS.c entries "+entriesInKafka);
        for (long i = 0; i < entriesInKafka; ++i) {
            locatedRowIterator.next();
        }
    }

    Properties props = new Properties();
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers);
    props.put(ProducerConfig.CLIENT_ID_CONFIG, "spark-producer-dss-ks-"+UUID.randomUUID() );
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, IntegerSerializer.class.getName());
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, ExternalizableSerializer.class.getName());
    KafkaProducer<Integer, Externalizable> producer = new KafkaProducer<>(props);
    int count = 0 ;
    while (locatedRowIterator.hasNext()) {
        T lr = locatedRowIterator.next();

        ProducerRecord<Integer, Externalizable> record = new ProducerRecord(topicName, count++, lr);
        producer.send(record);
        LOG.trace("KS.c sent "+partition.intValue()+" "+count+" "+lr);
    }
    LOG.trace("KS.c count "+partition.intValue()+" "+count);

    producer.close();
    // TODO Clean up
    return Arrays.asList("OK").iterator();
}

Source File: RowDataRewriter.java From iceberg with Apache License 2.0

4 votes

private TaskResult rewriteDataForTask(CombinedScanTask task) throws Exception {
  TaskContext context = TaskContext.get();
  int partitionId = context.partitionId();
  long taskId = context.taskAttemptId();

  RowDataReader dataReader = new RowDataReader(
      task, schema, schema, nameMapping, io.value(), encryptionManager.value(), caseSensitive);

  SparkAppenderFactory appenderFactory = new SparkAppenderFactory(
      properties, schema, SparkSchemaUtil.convert(schema));
  OutputFileFactory fileFactory = new OutputFileFactory(
      spec, format, locations, io.value(), encryptionManager.value(), partitionId, taskId);

  BaseWriter writer;
  if (spec.fields().isEmpty()) {
    writer = new UnpartitionedWriter(spec, format, appenderFactory, fileFactory, io.value(), Long.MAX_VALUE);
  } else {
    writer = new PartitionedWriter(spec, format, appenderFactory, fileFactory, io.value(), Long.MAX_VALUE, schema);
  }

  try {
    while (dataReader.next()) {
      InternalRow row = dataReader.get();
      writer.write(row);
    }

    dataReader.close();
    dataReader = null;
    return writer.complete();

  } catch (Throwable originalThrowable) {
    try {
      LOG.error("Aborting task", originalThrowable);
      context.markTaskFailed(originalThrowable);

      LOG.error("Aborting commit for partition {} (task {}, attempt {}, stage {}.{})",
          partitionId, taskId, context.attemptNumber(), context.stageId(), context.stageAttemptNumber());
      if (dataReader != null) {
        dataReader.close();
      }
      writer.abort();
      LOG.error("Aborted commit for partition {} (task {}, attempt {}, stage {}.{})",
          partitionId, taskId, context.taskAttemptId(), context.stageId(), context.stageAttemptNumber());

    } catch (Throwable inner) {
      if (originalThrowable != inner) {
        originalThrowable.addSuppressed(inner);
        LOG.warn("Suppressing exception in catch: {}", inner.getMessage(), inner);
      }
    }

    if (originalThrowable instanceof Exception) {
      throw originalThrowable;
    } else {
      throw new RuntimeException(originalThrowable);
    }
  }
}

Source File: SparkBoundedInMemoryExecutor.java From hudi with Apache License 2.0

4 votes

public SparkBoundedInMemoryExecutor(final HoodieWriteConfig hoodieConfig, BoundedInMemoryQueueProducer<I> producer,
    BoundedInMemoryQueueConsumer<O, E> consumer, Function<I, O> bufferedIteratorTransform) {
  super(hoodieConfig.getWriteBufferLimitBytes(), producer, Option.of(consumer), bufferedIteratorTransform);
  this.sparkThreadTaskContext = TaskContext.get();
}

Source File: SMRecordReaderImpl.java From spliceengine with GNU Affero General Public License v3.0

4 votes

public void init(Configuration config, InputSplit split) throws IOException, InterruptedException {	
	if (LOG.isDebugEnabled())
		SpliceLogUtils.debug(LOG, "init");
	if (TaskContext.get() != null) {
		TaskContext.get().addTaskFailureListener(this);
	}
	String tableScannerAsString = config.get(MRConstants.SPLICE_SCAN_INFO);
       if (tableScannerAsString == null)
		throw new IOException("splice scan info was not serialized to task, failing");
	byte[] scanStartKey = null;
	byte[] scanStopKey = null;
	try {
		builder = TableScannerBuilder.getTableScannerBuilderFromBase64String(tableScannerAsString);
		if (LOG.isTraceEnabled())
			SpliceLogUtils.trace(LOG, "config loaded builder=%s", builder);
		TableSplit tSplit = ((SMSplit) split).getSplit();
		token = builder.getToken();
		DataScan scan = builder.getScan();
		scanStartKey = scan.getStartKey();
		scanStopKey = scan.getStopKey();
		if (Bytes.startComparator.compare(scanStartKey, tSplit.getStartRow()) < 0) {
			// the split itself is more restrictive
			scan.startKey(tSplit.getStartRow());
		}
		if (Bytes.endComparator.compare(scanStopKey, tSplit.getEndRow()) > 0) {
			// the split itself is more restrictive
			scan.stopKey(tSplit.getEndRow());
		}
		setScan(((HScan) scan).unwrapDelegate());
		// TODO (wjk): this seems weird (added with DB-4483)
		this.statisticsRun = AbstractSMInputFormat.oneSplitPerRegion(config);
		Double sampling = AbstractSMInputFormat.sampling(config);
		if (sampling != null) {
			this.sampling = true;
			this.samplingRate = sampling;
		}
		restart(scan.getStartKey());
	} catch (IOException ioe) {
		LOG.error(String.format("Received exception with scan %s, original start key %s, original stop key %s, split %s",
				scan, Bytes.toStringBinary(scanStartKey), Bytes.toStringBinary(scanStopKey), split), ioe);
		throw ioe;
       } catch (StandardException e) {
		throw new IOException(e);
	}
}

Java Code Examples for org.apache.spark.TaskContext#get()