Java Code Examples for org.apache.flink.runtime.execution.ExecutionState#CANCELING
The following examples show how to use
org.apache.flink.runtime.execution.ExecutionState#CANCELING .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ExecutionVertexInputConstraintTest.java From flink with Apache License 2.0 | 6 votes |
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception { waitForAllExecutionsPredicate(eg, isInExecutionState(ExecutionState.CANCELING) .or(isInExecutionState(ExecutionState.CANCELED)) .or(isInExecutionState(ExecutionState.FAILED)) .or(isInExecutionState(ExecutionState.FINISHED)), 2000L); for (ExecutionVertex ev : eg.getAllExecutionVertices()) { if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) { ev.getCurrentExecutionAttempt().completeCancelling(); } } waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L); }
Example 2
Source File: ExecutionVertexInputConstraintTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception { waitForAllExecutionsPredicate(eg, isInExecutionState(ExecutionState.CANCELING) .or(isInExecutionState(ExecutionState.CANCELED)) .or(isInExecutionState(ExecutionState.FAILED)) .or(isInExecutionState(ExecutionState.FINISHED)), 2000L); for (ExecutionVertex ev : eg.getAllExecutionVertices()) { if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) { ev.getCurrentExecutionAttempt().completeCancelling(); } } waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L); }
Example 3
Source File: DefaultScheduler.java From flink with Apache License 2.0 | 6 votes |
private void notifyCoordinatorOfCancellation(ExecutionVertex vertex) { // this method makes a best effort to filter out duplicate notifications, meaning cases where // the coordinator was already notified for that specific task // we don't notify if the task is already FAILED, CANCELLING, or CANCELED final ExecutionState currentState = vertex.getExecutionState(); if (currentState == ExecutionState.FAILED || currentState == ExecutionState.CANCELING || currentState == ExecutionState.CANCELED) { return; } for (OperatorCoordinator coordinator : vertex.getJobVertex().getOperatorCoordinators()) { coordinator.subtaskFailed(vertex.getParallelSubtaskIndex(), null); } }
Example 4
Source File: ExecutionVertexInputConstraintTest.java From flink with Apache License 2.0 | 6 votes |
private void waitUntilJobRestarted(ExecutionGraph eg) throws Exception { waitForAllExecutionsPredicate(eg, isInExecutionState(ExecutionState.CANCELING) .or(isInExecutionState(ExecutionState.CANCELED)) .or(isInExecutionState(ExecutionState.FAILED)) .or(isInExecutionState(ExecutionState.FINISHED)), 2000L); for (ExecutionVertex ev : eg.getAllExecutionVertices()) { if (ev.getCurrentExecutionAttempt().getState() == ExecutionState.CANCELING) { ev.getCurrentExecutionAttempt().completeCancelling(); } } waitUntilJobStatus(eg, JobStatus.RUNNING, 2000L); }
Example 5
Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0 | 5 votes |
private void abortConsumptionOrIgnoreCheckResult(ResponseHandle responseHandle) { ExecutionState producerState = getProducerState(responseHandle); if (producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED) { // The producing execution has been canceled or failed. We // don't need to re-trigger the request since it cannot // succeed. if (LOG.isDebugEnabled()) { LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.", taskNameWithSubtask, resultPartitionId.getPartitionId(), resultPartitionId.getProducerId(), producerState); } responseHandle.cancelConsumption(); } else { // Any other execution state is unexpected. Currently, only // state CREATED is left out of the checked states. If we // see a producer in this state, something went wrong with // scheduling in topological order. final String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.", resultPartitionId.getProducerId(), resultPartitionId.getPartitionId(), producerState); responseHandle.failConsumption(new IllegalStateException(msg)); } }
Example 6
Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0 | 5 votes |
private void abortConsumptionOrIgnoreCheckResult(ResponseHandle responseHandle) { ExecutionState producerState = getProducerState(responseHandle); if (producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED) { // The producing execution has been canceled or failed. We // don't need to re-trigger the request since it cannot // succeed. if (LOG.isDebugEnabled()) { LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.", taskNameWithSubtask, resultPartitionId.getPartitionId(), resultPartitionId.getProducerId(), producerState); } responseHandle.cancelConsumption(); } else { // Any other execution state is unexpected. Currently, only // state CREATED is left out of the checked states. If we // see a producer in this state, something went wrong with // scheduling in topological order. final String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.", resultPartitionId.getProducerId(), resultPartitionId.getPartitionId(), producerState); responseHandle.failConsumption(new IllegalStateException(msg)); } }
Example 7
Source File: ExecutionJobVertex.java From flink with Apache License 2.0 | 5 votes |
/** * A utility function that computes an "aggregated" state for the vertex. * * <p>This state is not used anywhere in the coordination, but can be used for display * in dashboards to as a summary for how the particular parallel operation represented by * this ExecutionJobVertex is currently behaving. * * <p>For example, if at least one parallel task is failed, the aggregate state is failed. * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished, * and so on. * * @param verticesPerState The number of vertices in each state (indexed by the ordinal of * the ExecutionState values). * @param parallelism The parallelism of the ExecutionJobVertex * * @return The aggregate state of this ExecutionJobVertex. */ public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) { if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) { throw new IllegalArgumentException("Must provide an array as large as there are execution states."); } if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) { return ExecutionState.FAILED; } if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) { return ExecutionState.CANCELING; } else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) { return ExecutionState.CANCELED; } else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) { return ExecutionState.RUNNING; } else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) { return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ? ExecutionState.FINISHED : ExecutionState.RUNNING; } else { // all else collapses under created return ExecutionState.CREATED; } }
Example 8
Source File: InterruptSensitiveRestoreTest.java From flink with Apache License 2.0 | 5 votes |
private void testRestoreWithInterrupt(int mode) throws Exception { IN_RESTORE_LATCH.reset(); Configuration taskConfig = new Configuration(); StreamConfig cfg = new StreamConfig(taskConfig); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); switch (mode) { case OPERATOR_MANAGED: case OPERATOR_RAW: case KEYED_MANAGED: case KEYED_RAW: cfg.setStateKeySerializer(IntSerializer.INSTANCE); cfg.setStreamOperator(new StreamSource<>(new TestSource(mode))); break; default: throw new IllegalArgumentException(); } StreamStateHandle lockingHandle = new InterruptLockingStateHandle(); Task task = createTask(cfg, taskConfig, lockingHandle, mode); // start the task and wait until it is in "restore" task.startTaskThread(); IN_RESTORE_LATCH.await(); // trigger cancellation and signal to continue task.cancelExecution(); task.getExecutingThread().join(30000); if (task.getExecutionState() == ExecutionState.CANCELING) { fail("Task is stuck and not canceling"); } assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
Example 9
Source File: ExecutionJobVertex.java From flink with Apache License 2.0 | 5 votes |
/** * A utility function that computes an "aggregated" state for the vertex. * * <p>This state is not used anywhere in the coordination, but can be used for display * in dashboards to as a summary for how the particular parallel operation represented by * this ExecutionJobVertex is currently behaving. * * <p>For example, if at least one parallel task is failed, the aggregate state is failed. * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished, * and so on. * * @param verticesPerState The number of vertices in each state (indexed by the ordinal of * the ExecutionState values). * @param parallelism The parallelism of the ExecutionJobVertex * * @return The aggregate state of this ExecutionJobVertex. */ public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) { if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) { throw new IllegalArgumentException("Must provide an array as large as there are execution states."); } if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) { return ExecutionState.FAILED; } if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) { return ExecutionState.CANCELING; } else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) { return ExecutionState.CANCELED; } else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) { return ExecutionState.RUNNING; } else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) { return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ? ExecutionState.FINISHED : ExecutionState.RUNNING; } else { // all else collapses under created return ExecutionState.CREATED; } }
Example 10
Source File: InterruptSensitiveRestoreTest.java From flink with Apache License 2.0 | 5 votes |
private void testRestoreWithInterrupt(int mode) throws Exception { IN_RESTORE_LATCH.reset(); Configuration taskConfig = new Configuration(); StreamConfig cfg = new StreamConfig(taskConfig); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); switch (mode) { case OPERATOR_MANAGED: case OPERATOR_RAW: case KEYED_MANAGED: case KEYED_RAW: cfg.setStateKeySerializer(IntSerializer.INSTANCE); cfg.setStreamOperator(new StreamSource<>(new TestSource(mode))); break; default: throw new IllegalArgumentException(); } StreamStateHandle lockingHandle = new InterruptLockingStateHandle(); Task task = createTask(cfg, taskConfig, lockingHandle, mode); // start the task and wait until it is in "restore" task.startTaskThread(); IN_RESTORE_LATCH.await(); // trigger cancellation and signal to continue task.cancelExecution(); task.getExecutingThread().join(30000); if (task.getExecutionState() == ExecutionState.CANCELING) { fail("Task is stuck and not canceling"); } assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
Example 11
Source File: InterruptSensitiveRestoreTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void testRestoreWithInterrupt(int mode) throws Exception { IN_RESTORE_LATCH.reset(); Configuration taskConfig = new Configuration(); StreamConfig cfg = new StreamConfig(taskConfig); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); switch (mode) { case OPERATOR_MANAGED: case OPERATOR_RAW: case KEYED_MANAGED: case KEYED_RAW: cfg.setStateKeySerializer(IntSerializer.INSTANCE); cfg.setStreamOperator(new StreamSource<>(new TestSource(mode))); break; default: throw new IllegalArgumentException(); } StreamStateHandle lockingHandle = new InterruptLockingStateHandle(); Task task = createTask(cfg, taskConfig, lockingHandle, mode); // start the task and wait until it is in "restore" task.startTaskThread(); IN_RESTORE_LATCH.await(); // trigger cancellation and signal to continue task.cancelExecution(); task.getExecutingThread().join(30000); if (task.getExecutionState() == ExecutionState.CANCELING) { fail("Task is stuck and not canceling"); } assertEquals(ExecutionState.CANCELED, task.getExecutionState()); assertNull(task.getFailureCause()); }
Example 12
Source File: ExecutionJobVertex.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * A utility function that computes an "aggregated" state for the vertex. * * <p>This state is not used anywhere in the coordination, but can be used for display * in dashboards to as a summary for how the particular parallel operation represented by * this ExecutionJobVertex is currently behaving. * * <p>For example, if at least one parallel task is failed, the aggregate state is failed. * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished, * and so on. * * @param verticesPerState The number of vertices in each state (indexed by the ordinal of * the ExecutionState values). * @param parallelism The parallelism of the ExecutionJobVertex * * @return The aggregate state of this ExecutionJobVertex. */ public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) { if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) { throw new IllegalArgumentException("Must provide an array as large as there are execution states."); } if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) { return ExecutionState.FAILED; } if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) { return ExecutionState.CANCELING; } else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) { return ExecutionState.CANCELED; } else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) { return ExecutionState.RUNNING; } else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) { return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ? ExecutionState.FINISHED : ExecutionState.RUNNING; } else { // all else collapses under created return ExecutionState.CREATED; } }
Example 13
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler(LOG, this :: closeNetworkResources, invokable, executingThread, taskNameWithSubtask); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout, LOG); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }
Example 14
Source File: Task.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Checks whether the task has failed, is canceled, or is being canceled at the moment. * @return True is the task in state FAILED, CANCELING, or CANCELED, false otherwise. */ public boolean isCanceledOrFailed() { return executionState == ExecutionState.CANCELING || executionState == ExecutionState.CANCELED || executionState == ExecutionState.FAILED; }
Example 15
Source File: TaskDeploymentDescriptorFactory.java From flink with Apache License 2.0 | 4 votes |
private static boolean isProducerFailedOrCanceled(ExecutionState producerState) { return producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED; }
Example 16
Source File: TaskDeploymentDescriptorFactory.java From flink with Apache License 2.0 | 4 votes |
private static boolean isProducerFailedOrCanceled(ExecutionState producerState) { return producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED; }
Example 17
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
/** * Checks whether the task has failed, is canceled, or is being canceled at the moment. * @return True is the task in state FAILED, CANCELING, or CANCELED, false otherwise. */ public boolean isCanceledOrFailed() { return executionState == ExecutionState.CANCELING || executionState == ExecutionState.CANCELED || executionState == ExecutionState.FAILED; }
Example 18
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
@VisibleForTesting void cancelOrFailAndCancelInvokableInternal(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler(LOG, this::closeNetworkResources, invokable, executingThread, taskNameWithSubtask); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }
Example 19
Source File: Task.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Answer to a partition state check issued after a failed partition request. */ @VisibleForTesting void onPartitionStateUpdate( IntermediateDataSetID intermediateDataSetId, ResultPartitionID resultPartitionId, ExecutionState producerState) throws IOException, InterruptedException { if (executionState == ExecutionState.RUNNING) { final SingleInputGate inputGate = inputGatesById.get(intermediateDataSetId); if (inputGate != null) { if (producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING || producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED) { // Retrigger the partition request inputGate.retriggerPartitionRequest(resultPartitionId.getPartitionId()); } else if (producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED) { // The producing execution has been canceled or failed. We // don't need to re-trigger the request since it cannot // succeed. if (LOG.isDebugEnabled()) { LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.", taskNameWithSubtask, resultPartitionId.getPartitionId(), resultPartitionId.getProducerId(), producerState); } cancelExecution(); } else { // Any other execution state is unexpected. Currently, only // state CREATED is left out of the checked states. If we // see a producer in this state, something went wrong with // scheduling in topological order. String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.", resultPartitionId.getProducerId(), resultPartitionId.getPartitionId(), producerState); failExternally(new IllegalStateException(msg)); } } else { failExternally(new IllegalStateException("Received partition producer state for " + "unknown input gate " + intermediateDataSetId + ".")); } } else { LOG.debug("Task {} ignored a partition producer state notification, because it's not running.", taskNameWithSubtask); } }
Example 20
Source File: Task.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler( LOG, invokable, executingThread, taskNameWithSubtask, producedPartitions, inputGates); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout, LOG); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }