Java Code Examples for org.apache.flink.runtime.execution.ExecutionState#RUNNING
The following examples show how to use
org.apache.flink.runtime.execution.ExecutionState#RUNNING .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskExecutionStateTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testEqualsHashCode() { try { final JobID jid = new JobID(); final ExecutionAttemptID executionId = new ExecutionAttemptID(); final ExecutionState state = ExecutionState.RUNNING; final Throwable error = new RuntimeException("some test error message"); TaskExecutionState s1 = new TaskExecutionState(jid, executionId, state, error); TaskExecutionState s2 = new TaskExecutionState(jid, executionId, state, error); assertEquals(s1.hashCode(), s2.hashCode()); assertEquals(s1, s2); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 2
Source File: Task.java From flink with Apache License 2.0 | 6 votes |
/** * Dispatches an operator event to the invokable task. * * <p>If the event delivery did not succeed, this method throws an exception. Callers can use that * exception for error reporting, but need not react with failing this task (this method takes care * of that). * * @throws FlinkException This method throws exceptions indicating the reason why delivery did not succeed. */ public void deliverOperatorEvent(OperatorID operator, SerializedValue<OperatorEvent> evt) throws FlinkException { final AbstractInvokable invokable = this.invokable; if (invokable == null || executionState != ExecutionState.RUNNING) { throw new TaskNotRunningException("Task is not yet running."); } try { invokable.dispatchOperatorEvent(operator, evt); } catch (Throwable t) { ExceptionUtils.rethrowIfFatalErrorOrOOM(t); if (getExecutionState() == ExecutionState.RUNNING) { FlinkException e = new FlinkException("Error while handling operator event", t); failExternally(e); throw e; } } }
Example 3
Source File: Task.java From flink with Apache License 2.0 | 5 votes |
@Override public void notifyCheckpointComplete(final long checkpointID) { final AbstractInvokable invokable = this.invokable; if (executionState == ExecutionState.RUNNING && invokable != null) { try { invokable.notifyCheckpointCompleteAsync(checkpointID); } catch (RejectedExecutionException ex) { // This may happen if the mailbox is closed. It means that the task is shutting down, so we just ignore it. LOG.debug( "Notify checkpoint complete {} for {} ({}) was rejected by the mailbox", checkpointID, taskNameWithSubtask, executionId); } catch (Throwable t) { if (getExecutionState() == ExecutionState.RUNNING) { // fail task if checkpoint confirmation failed. failExternally(new RuntimeException( "Error while confirming checkpoint", t)); } } } else { LOG.debug("Ignoring checkpoint commit notification for non-running task {}.", taskNameWithSubtask); } }
Example 4
Source File: ExecutionJobVertex.java From flink with Apache License 2.0 | 5 votes |
/** * A utility function that computes an "aggregated" state for the vertex. * * <p>This state is not used anywhere in the coordination, but can be used for display * in dashboards to as a summary for how the particular parallel operation represented by * this ExecutionJobVertex is currently behaving. * * <p>For example, if at least one parallel task is failed, the aggregate state is failed. * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished, * and so on. * * @param verticesPerState The number of vertices in each state (indexed by the ordinal of * the ExecutionState values). * @param parallelism The parallelism of the ExecutionJobVertex * * @return The aggregate state of this ExecutionJobVertex. */ public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) { if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) { throw new IllegalArgumentException("Must provide an array as large as there are execution states."); } if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) { return ExecutionState.FAILED; } if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) { return ExecutionState.CANCELING; } else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) { return ExecutionState.CANCELED; } else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) { return ExecutionState.RUNNING; } else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) { return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ? ExecutionState.FINISHED : ExecutionState.RUNNING; } else { // all else collapses under created return ExecutionState.CREATED; } }
Example 5
Source File: Task.java From flink with Apache License 2.0 | 5 votes |
/** * Calls the invokable to trigger a checkpoint. * * @param checkpointID The ID identifying the checkpoint. * @param checkpointTimestamp The timestamp associated with the checkpoint. * @param checkpointOptions Options for performing this checkpoint. * @param advanceToEndOfEventTime Flag indicating if the source should inject a {@code MAX_WATERMARK} in the pipeline * to fire any registered event-time timers. */ public void triggerCheckpointBarrier( final long checkpointID, final long checkpointTimestamp, final CheckpointOptions checkpointOptions, final boolean advanceToEndOfEventTime) { final AbstractInvokable invokable = this.invokable; final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointID, checkpointTimestamp); if (executionState == ExecutionState.RUNNING && invokable != null) { try { invokable.triggerCheckpointAsync(checkpointMetaData, checkpointOptions, advanceToEndOfEventTime); } catch (RejectedExecutionException ex) { // This may happen if the mailbox is closed. It means that the task is shutting down, so we just ignore it. LOG.debug( "Triggering checkpoint {} for {} ({}) was rejected by the mailbox", checkpointID, taskNameWithSubtask, executionId); } catch (Throwable t) { if (getExecutionState() == ExecutionState.RUNNING) { failExternally(new Exception( "Error while triggering checkpoint " + checkpointID + " for " + taskNameWithSubtask, t)); } else { LOG.debug("Encountered error while triggering checkpoint {} for " + "{} ({}) while being not in state running.", checkpointID, taskNameWithSubtask, executionId, t); } } } else { LOG.debug("Declining checkpoint request for non-running task {} ({}).", taskNameWithSubtask, executionId); // send back a message that we did not do the checkpoint checkpointResponder.declineCheckpoint(jobId, executionId, checkpointID, new CheckpointException("Task name with subtask : " + taskNameWithSubtask, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY)); } }
Example 6
Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0 | 5 votes |
private boolean isProducerConsumerReady(ResponseHandle responseHandle) { ExecutionState producerState = getProducerState(responseHandle); return producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING || producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED; }
Example 7
Source File: StreamTaskTest.java From flink with Apache License 2.0 | 5 votes |
/** * This test checks that cancel calls that are issued before the operator is * instantiated still lead to proper canceling. */ @Test public void testEarlyCanceling() throws Exception { final StreamConfig cfg = new StreamConfig(new Configuration()); cfg.setOperatorID(new OperatorID(4711L, 42L)); cfg.setStreamOperator(new SlowlyDeserializingOperator()); cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime); final TaskManagerActions taskManagerActions = spy(new NoOpTaskManagerActions()); final Task task = createTask(SourceStreamTask.class, cfg, new Configuration(), taskManagerActions); final TaskExecutionState state = new TaskExecutionState( task.getJobID(), task.getExecutionId(), ExecutionState.RUNNING); task.startTaskThread(); verify(taskManagerActions, timeout(2000L)).updateTaskExecutionState(eq(state)); // send a cancel. because the operator takes a long time to deserialize, this should // hit the task before the operator is deserialized task.cancelExecution(); task.getExecutingThread().join(); assertFalse("Task did not cancel", task.getExecutingThread().isAlive()); assertEquals(ExecutionState.CANCELED, task.getExecutionState()); }
Example 8
Source File: CheckpointCoordinator.java From flink with Apache License 2.0 | 5 votes |
/** * Check if all tasks that we need to trigger are running. If not, abort the checkpoint. * * @return the executions need to be triggered. * @throws CheckpointException the exception fails checking */ private Execution[] getTriggerExecutions() throws CheckpointException { Execution[] executions = new Execution[tasksToTrigger.length]; for (int i = 0; i < tasksToTrigger.length; i++) { Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt(); if (ee == null) { LOG.info( "Checkpoint triggering task {} of job {} is not being executed at the moment. Aborting checkpoint.", tasksToTrigger[i].getTaskNameWithSubtaskIndex(), job); throw new CheckpointException( CheckpointFailureReason.NOT_ALL_REQUIRED_TASKS_RUNNING); } else if (ee.getState() == ExecutionState.RUNNING) { executions[i] = ee; } else { LOG.info( "Checkpoint triggering task {} of job {} is not in state {} but {} instead. Aborting checkpoint.", tasksToTrigger[i].getTaskNameWithSubtaskIndex(), job, ExecutionState.RUNNING, ee.getState()); throw new CheckpointException( CheckpointFailureReason.NOT_ALL_REQUIRED_TASKS_RUNNING); } } return executions; }
Example 9
Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0 | 5 votes |
private boolean isProducerConsumerReady(ResponseHandle responseHandle) { ExecutionState producerState = getProducerState(responseHandle); return producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING || producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED; }
Example 10
Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0 | 4 votes |
private static boolean isConsumerStateValidForConsumption( ExecutionState consumerExecutionState) { return consumerExecutionState == ExecutionState.RUNNING || consumerExecutionState == ExecutionState.DEPLOYING; }
Example 11
Source File: YARNHighAvailabilityITCase.java From flink with Apache License 2.0 | 4 votes |
private static Predicate<ExecutionState> isRunning() { return executionState -> executionState == ExecutionState.RUNNING; }
Example 12
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
/** * Calls the invokable to trigger a checkpoint. * * @param checkpointID The ID identifying the checkpoint. * @param checkpointTimestamp The timestamp associated with the checkpoint. * @param checkpointOptions Options for performing this checkpoint. * @param advanceToEndOfEventTime Flag indicating if the source should inject a {@code MAX_WATERMARK} in the pipeline * to fire any registered event-time timers. */ public void triggerCheckpointBarrier( final long checkpointID, final long checkpointTimestamp, final CheckpointOptions checkpointOptions, final boolean advanceToEndOfEventTime) { final AbstractInvokable invokable = this.invokable; final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointID, checkpointTimestamp); if (executionState == ExecutionState.RUNNING && invokable != null) { // build a local closure final String taskName = taskNameWithSubtask; final SafetyNetCloseableRegistry safetyNetCloseableRegistry = FileSystemSafetyNet.getSafetyNetCloseableRegistryForThread(); Runnable runnable = new Runnable() { @Override public void run() { // set safety net from the task's context for checkpointing thread LOG.debug("Creating FileSystem stream leak safety net for {}", Thread.currentThread().getName()); FileSystemSafetyNet.setSafetyNetCloseableRegistryForThread(safetyNetCloseableRegistry); try { boolean success = invokable.triggerCheckpoint(checkpointMetaData, checkpointOptions, advanceToEndOfEventTime); if (!success) { checkpointResponder.declineCheckpoint( getJobID(), getExecutionId(), checkpointID, new CheckpointException("Task Name" + taskName, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY)); } } catch (Throwable t) { if (getExecutionState() == ExecutionState.RUNNING) { failExternally(new Exception( "Error while triggering checkpoint " + checkpointID + " for " + taskNameWithSubtask, t)); } else { LOG.debug("Encountered error while triggering checkpoint {} for " + "{} ({}) while being not in state running.", checkpointID, taskNameWithSubtask, executionId, t); } } finally { FileSystemSafetyNet.setSafetyNetCloseableRegistryForThread(null); } } }; executeAsyncCallRunnable( runnable, String.format("Checkpoint Trigger for %s (%s).", taskNameWithSubtask, executionId)); } else { LOG.debug("Declining checkpoint request for non-running task {} ({}).", taskNameWithSubtask, executionId); // send back a message that we did not do the checkpoint checkpointResponder.declineCheckpoint(jobId, executionId, checkpointID, new CheckpointException("Task name with subtask : " + taskNameWithSubtask, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY)); } }
Example 13
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler(LOG, this :: closeNetworkResources, invokable, executingThread, taskNameWithSubtask); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout, LOG); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }
Example 14
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
@Override public boolean isRunning() { return executionState == ExecutionState.RUNNING; }
Example 15
Source File: TaskDeploymentDescriptorFactory.java From flink with Apache License 2.0 | 4 votes |
private static boolean isProducerAvailable(ExecutionState producerState) { return producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED || producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING; }
Example 16
Source File: InputChannelDeploymentDescriptorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests the deployment descriptors for local, remote, and unknown partition * locations (with lazy deployment allowed and all execution states for the * producers). */ @Test public void testMixedLocalRemoteUnknownDeployment() throws Exception { boolean allowLazyDeployment = true; ResourceID consumerResourceId = ResourceID.generate(); ExecutionVertex consumer = mock(ExecutionVertex.class); LogicalSlot consumerSlot = mockSlot(consumerResourceId); // Local and remote channel are only allowed for certain execution // states. for (ExecutionState state : ExecutionState.values()) { // Local partition ExecutionVertex localProducer = mockExecutionVertex(state, consumerResourceId); IntermediateResultPartition localPartition = mockPartition(localProducer); ResultPartitionID localPartitionId = new ResultPartitionID(localPartition.getPartitionId(), localProducer.getCurrentExecutionAttempt().getAttemptId()); ExecutionEdge localEdge = new ExecutionEdge(localPartition, consumer, 0); // Remote partition ExecutionVertex remoteProducer = mockExecutionVertex(state, ResourceID.generate()); // new resource ID IntermediateResultPartition remotePartition = mockPartition(remoteProducer); ResultPartitionID remotePartitionId = new ResultPartitionID(remotePartition.getPartitionId(), remoteProducer.getCurrentExecutionAttempt().getAttemptId()); ConnectionID remoteConnectionId = new ConnectionID(remoteProducer.getCurrentAssignedResource().getTaskManagerLocation(), 0); ExecutionEdge remoteEdge = new ExecutionEdge(remotePartition, consumer, 1); // Unknown partition ExecutionVertex unknownProducer = mockExecutionVertex(state, null); // no assigned resource IntermediateResultPartition unknownPartition = mockPartition(unknownProducer); ResultPartitionID unknownPartitionId = new ResultPartitionID(unknownPartition.getPartitionId(), unknownProducer.getCurrentExecutionAttempt().getAttemptId()); ExecutionEdge unknownEdge = new ExecutionEdge(unknownPartition, consumer, 2); InputChannelDeploymentDescriptor[] desc = InputChannelDeploymentDescriptor.fromEdges( new ExecutionEdge[]{localEdge, remoteEdge, unknownEdge}, consumerSlot.getTaskManagerLocation().getResourceID(), allowLazyDeployment); assertEquals(3, desc.length); // These states are allowed if (state == ExecutionState.RUNNING || state == ExecutionState.FINISHED || state == ExecutionState.SCHEDULED || state == ExecutionState.DEPLOYING) { // Create local or remote channels assertEquals(localPartitionId, desc[0].getConsumedPartitionId()); assertTrue(desc[0].getConsumedPartitionLocation().isLocal()); assertNull(desc[0].getConsumedPartitionLocation().getConnectionId()); assertEquals(remotePartitionId, desc[1].getConsumedPartitionId()); assertTrue(desc[1].getConsumedPartitionLocation().isRemote()); assertEquals(remoteConnectionId, desc[1].getConsumedPartitionLocation().getConnectionId()); } else { // Unknown (lazy deployment allowed) assertEquals(localPartitionId, desc[0].getConsumedPartitionId()); assertTrue(desc[0].getConsumedPartitionLocation().isUnknown()); assertNull(desc[0].getConsumedPartitionLocation().getConnectionId()); assertEquals(remotePartitionId, desc[1].getConsumedPartitionId()); assertTrue(desc[1].getConsumedPartitionLocation().isUnknown()); assertNull(desc[1].getConsumedPartitionLocation().getConnectionId()); } assertEquals(unknownPartitionId, desc[2].getConsumedPartitionId()); assertTrue(desc[2].getConsumedPartitionLocation().isUnknown()); assertNull(desc[2].getConsumedPartitionLocation().getConnectionId()); } }
Example 17
Source File: BackPressureRequestCoordinator.java From flink with Apache License 2.0 | 4 votes |
/** * Triggers a task back pressure stats request to all tasks. * * @param tasks Tasks to request. * @return A future of the completed task back pressure stats. */ CompletableFuture<BackPressureStats> triggerBackPressureRequest(ExecutionVertex[] tasks) { checkNotNull(tasks, "Tasks to request must not be null."); checkArgument(tasks.length >= 1, "No tasks to request."); // Execution IDs of running tasks ExecutionAttemptID[] triggerIds = new ExecutionAttemptID[tasks.length]; Execution[] executions = new Execution[tasks.length]; // Check that all tasks are RUNNING before triggering anything. The // triggering can still fail. for (int i = 0; i < triggerIds.length; i++) { Execution execution = tasks[i].getCurrentExecutionAttempt(); if (execution != null && execution.getState() == ExecutionState.RUNNING) { executions[i] = execution; triggerIds[i] = execution.getAttemptId(); } else { return FutureUtils.completedExceptionally(new IllegalStateException("Task " + tasks[i] .getTaskNameWithSubtaskIndex() + " is not running.")); } } synchronized (lock) { if (isShutDown) { return FutureUtils.completedExceptionally(new IllegalStateException("Shut down.")); } int requestId = requestIdCounter++; LOG.debug("Triggering task back pressure request {}.", requestId); PendingBackPressureRequest pending = new PendingBackPressureRequest(requestId, triggerIds); // Add the pending request before scheduling the discard task to // prevent races with removing it again. pendingRequests.put(requestId, pending); requestBackPressure(executions, requestId); return pending.getBackPressureStatsFuture(); } }
Example 18
Source File: Task.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Utility method to dispatch an asynchronous call on the invokable. * * @param runnable The async call runnable. * @param callName The name of the call, for logging purposes. */ private void executeAsyncCallRunnable(Runnable runnable, String callName) { // make sure the executor is initialized. lock against concurrent calls to this function synchronized (this) { if (executionState != ExecutionState.RUNNING) { return; } // get ourselves a reference on the stack that cannot be concurrently modified ExecutorService executor = this.asyncCallDispatcher; if (executor == null) { // first time use, initialize checkState(userCodeClassLoader != null, "userCodeClassLoader must not be null"); executor = Executors.newSingleThreadExecutor( new DispatcherThreadFactory( TASK_THREADS_GROUP, "Async calls on " + taskNameWithSubtask, userCodeClassLoader)); this.asyncCallDispatcher = executor; // double-check for execution state, and make sure we clean up after ourselves // if we created the dispatcher while the task was concurrently canceled if (executionState != ExecutionState.RUNNING) { executor.shutdown(); asyncCallDispatcher = null; return; } } LOG.debug("Invoking async call {} on task {}", callName, taskNameWithSubtask); try { executor.submit(runnable); } catch (RejectedExecutionException e) { // may be that we are concurrently finished or canceled. // if not, report that something is fishy if (executionState == ExecutionState.RUNNING) { throw new RuntimeException("Async call was rejected, even though the task is running.", e); } } } }
Example 19
Source File: Task.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Answer to a partition state check issued after a failed partition request. */ @VisibleForTesting void onPartitionStateUpdate( IntermediateDataSetID intermediateDataSetId, ResultPartitionID resultPartitionId, ExecutionState producerState) throws IOException, InterruptedException { if (executionState == ExecutionState.RUNNING) { final SingleInputGate inputGate = inputGatesById.get(intermediateDataSetId); if (inputGate != null) { if (producerState == ExecutionState.SCHEDULED || producerState == ExecutionState.DEPLOYING || producerState == ExecutionState.RUNNING || producerState == ExecutionState.FINISHED) { // Retrigger the partition request inputGate.retriggerPartitionRequest(resultPartitionId.getPartitionId()); } else if (producerState == ExecutionState.CANCELING || producerState == ExecutionState.CANCELED || producerState == ExecutionState.FAILED) { // The producing execution has been canceled or failed. We // don't need to re-trigger the request since it cannot // succeed. if (LOG.isDebugEnabled()) { LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.", taskNameWithSubtask, resultPartitionId.getPartitionId(), resultPartitionId.getProducerId(), producerState); } cancelExecution(); } else { // Any other execution state is unexpected. Currently, only // state CREATED is left out of the checked states. If we // see a producer in this state, something went wrong with // scheduling in topological order. String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.", resultPartitionId.getProducerId(), resultPartitionId.getPartitionId(), producerState); failExternally(new IllegalStateException(msg)); } } else { failExternally(new IllegalStateException("Received partition producer state for " + "unknown input gate " + intermediateDataSetId + ".")); } } else { LOG.debug("Task {} ignored a partition producer state notification, because it's not running.", taskNameWithSubtask); } }
Example 20
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
@VisibleForTesting void cancelOrFailAndCancelInvokableInternal(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler(LOG, this::closeNetworkResources, invokable, executingThread, taskNameWithSubtask); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }