org.apache.flink.runtime.execution.ExecutionState#RUNNING

Source File: TaskExecutionStateTest.java From flink with Apache License 2.0

6 votes

@Test
public void testEqualsHashCode() {
	try {
		final JobID jid = new JobID();
		final ExecutionAttemptID executionId = new ExecutionAttemptID();
		final ExecutionState state = ExecutionState.RUNNING;
		final Throwable error = new RuntimeException("some test error message");
		
		TaskExecutionState s1 = new TaskExecutionState(jid, executionId, state, error);
		TaskExecutionState s2 = new TaskExecutionState(jid, executionId, state, error);
		
		assertEquals(s1.hashCode(), s2.hashCode());
		assertEquals(s1, s2);
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}

Source File: Task.java From flink with Apache License 2.0

6 votes

/**
 * Dispatches an operator event to the invokable task.
 *
 * <p>If the event delivery did not succeed, this method throws an exception. Callers can use that
 * exception for error reporting, but need not react with failing this task (this method takes care
 * of that).
 *
 * @throws FlinkException This method throws exceptions indicating the reason why delivery did not succeed.
 */
public void deliverOperatorEvent(OperatorID operator, SerializedValue<OperatorEvent> evt) throws FlinkException {
	final AbstractInvokable invokable = this.invokable;

	if (invokable == null || executionState != ExecutionState.RUNNING) {
		throw new TaskNotRunningException("Task is not yet running.");
	}

	try {
		invokable.dispatchOperatorEvent(operator, evt);
	}
	catch (Throwable t) {
		ExceptionUtils.rethrowIfFatalErrorOrOOM(t);

		if (getExecutionState() == ExecutionState.RUNNING) {
			FlinkException e = new FlinkException("Error while handling operator event", t);
			failExternally(e);
			throw e;
		}
	}
}

Source File: Task.java From flink with Apache License 2.0

5 votes

@Override
public void notifyCheckpointComplete(final long checkpointID) {
	final AbstractInvokable invokable = this.invokable;

	if (executionState == ExecutionState.RUNNING && invokable != null) {
		try {
			invokable.notifyCheckpointCompleteAsync(checkpointID);
		}
		catch (RejectedExecutionException ex) {
			// This may happen if the mailbox is closed. It means that the task is shutting down, so we just ignore it.
			LOG.debug(
				"Notify checkpoint complete {} for {} ({}) was rejected by the mailbox",
				checkpointID, taskNameWithSubtask, executionId);
		}
		catch (Throwable t) {
			if (getExecutionState() == ExecutionState.RUNNING) {
				// fail task if checkpoint confirmation failed.
				failExternally(new RuntimeException(
					"Error while confirming checkpoint",
					t));
			}
		}
	}
	else {
		LOG.debug("Ignoring checkpoint commit notification for non-running task {}.", taskNameWithSubtask);
	}
}

Source File: ExecutionJobVertex.java From flink with Apache License 2.0

5 votes

/**
 * A utility function that computes an "aggregated" state for the vertex.
 *
 * <p>This state is not used anywhere in the  coordination, but can be used for display
 * in dashboards to as a summary for how the particular parallel operation represented by
 * this ExecutionJobVertex is currently behaving.
 *
 * <p>For example, if at least one parallel task is failed, the aggregate state is failed.
 * If not, and at least one parallel task is cancelling (or cancelled), the aggregate state
 * is cancelling (or cancelled). If all tasks are finished, the aggregate state is finished,
 * and so on.
 *
 * @param verticesPerState The number of vertices in each state (indexed by the ordinal of
 *                         the ExecutionState values).
 * @param parallelism The parallelism of the ExecutionJobVertex
 *
 * @return The aggregate state of this ExecutionJobVertex.
 */
public static ExecutionState getAggregateJobVertexState(int[] verticesPerState, int parallelism) {
	if (verticesPerState == null || verticesPerState.length != ExecutionState.values().length) {
		throw new IllegalArgumentException("Must provide an array as large as there are execution states.");
	}

	if (verticesPerState[ExecutionState.FAILED.ordinal()] > 0) {
		return ExecutionState.FAILED;
	}
	if (verticesPerState[ExecutionState.CANCELING.ordinal()] > 0) {
		return ExecutionState.CANCELING;
	}
	else if (verticesPerState[ExecutionState.CANCELED.ordinal()] > 0) {
		return ExecutionState.CANCELED;
	}
	else if (verticesPerState[ExecutionState.RUNNING.ordinal()] > 0) {
		return ExecutionState.RUNNING;
	}
	else if (verticesPerState[ExecutionState.FINISHED.ordinal()] > 0) {
		return verticesPerState[ExecutionState.FINISHED.ordinal()] == parallelism ?
				ExecutionState.FINISHED : ExecutionState.RUNNING;
	}
	else {
		// all else collapses under created
		return ExecutionState.CREATED;
	}
}

Source File: Task.java From flink with Apache License 2.0

5 votes

/**
 * Calls the invokable to trigger a checkpoint.
 *
 * @param checkpointID The ID identifying the checkpoint.
 * @param checkpointTimestamp The timestamp associated with the checkpoint.
 * @param checkpointOptions Options for performing this checkpoint.
 * @param advanceToEndOfEventTime Flag indicating if the source should inject a {@code MAX_WATERMARK} in the pipeline
 *                           to fire any registered event-time timers.
 */
public void triggerCheckpointBarrier(
		final long checkpointID,
		final long checkpointTimestamp,
		final CheckpointOptions checkpointOptions,
		final boolean advanceToEndOfEventTime) {

	final AbstractInvokable invokable = this.invokable;
	final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointID, checkpointTimestamp);

	if (executionState == ExecutionState.RUNNING && invokable != null) {
		try {
			invokable.triggerCheckpointAsync(checkpointMetaData, checkpointOptions, advanceToEndOfEventTime);
		}
		catch (RejectedExecutionException ex) {
			// This may happen if the mailbox is closed. It means that the task is shutting down, so we just ignore it.
			LOG.debug(
				"Triggering checkpoint {} for {} ({}) was rejected by the mailbox",
				checkpointID, taskNameWithSubtask, executionId);
		}
		catch (Throwable t) {
			if (getExecutionState() == ExecutionState.RUNNING) {
				failExternally(new Exception(
					"Error while triggering checkpoint " + checkpointID + " for " +
						taskNameWithSubtask, t));
			} else {
				LOG.debug("Encountered error while triggering checkpoint {} for " +
					"{} ({}) while being not in state running.", checkpointID,
					taskNameWithSubtask, executionId, t);
			}
		}
	}
	else {
		LOG.debug("Declining checkpoint request for non-running task {} ({}).", taskNameWithSubtask, executionId);

		// send back a message that we did not do the checkpoint
		checkpointResponder.declineCheckpoint(jobId, executionId, checkpointID,
				new CheckpointException("Task name with subtask : " + taskNameWithSubtask, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY));
	}
}

Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0

5 votes

private boolean isProducerConsumerReady(ResponseHandle responseHandle) {
	ExecutionState producerState = getProducerState(responseHandle);
	return producerState == ExecutionState.SCHEDULED ||
		producerState == ExecutionState.DEPLOYING ||
		producerState == ExecutionState.RUNNING ||
		producerState == ExecutionState.FINISHED;
}

Source File: StreamTaskTest.java From flink with Apache License 2.0

5 votes

/**
 * This test checks that cancel calls that are issued before the operator is
 * instantiated still lead to proper canceling.
 */
@Test
public void testEarlyCanceling() throws Exception {
	final StreamConfig cfg = new StreamConfig(new Configuration());
	cfg.setOperatorID(new OperatorID(4711L, 42L));
	cfg.setStreamOperator(new SlowlyDeserializingOperator());
	cfg.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	final TaskManagerActions taskManagerActions = spy(new NoOpTaskManagerActions());
	final Task task = createTask(SourceStreamTask.class, cfg, new Configuration(), taskManagerActions);

	final TaskExecutionState state = new TaskExecutionState(
		task.getJobID(), task.getExecutionId(), ExecutionState.RUNNING);

	task.startTaskThread();

	verify(taskManagerActions, timeout(2000L)).updateTaskExecutionState(eq(state));

	// send a cancel. because the operator takes a long time to deserialize, this should
	// hit the task before the operator is deserialized
	task.cancelExecution();

	task.getExecutingThread().join();

	assertFalse("Task did not cancel", task.getExecutingThread().isAlive());
	assertEquals(ExecutionState.CANCELED, task.getExecutionState());
}

Source File: CheckpointCoordinator.java From flink with Apache License 2.0

5 votes

/**
 * Check if all tasks that we need to trigger are running. If not, abort the checkpoint.
 *
 * @return the executions need to be triggered.
 * @throws CheckpointException the exception fails checking
 */
private Execution[] getTriggerExecutions() throws CheckpointException {
	Execution[] executions = new Execution[tasksToTrigger.length];
	for (int i = 0; i < tasksToTrigger.length; i++) {
		Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt();
		if (ee == null) {
			LOG.info(
				"Checkpoint triggering task {} of job {} is not being executed at the moment. Aborting checkpoint.",
				tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
				job);
			throw new CheckpointException(
				CheckpointFailureReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
		} else if (ee.getState() == ExecutionState.RUNNING) {
			executions[i] = ee;
		} else {
			LOG.info(
				"Checkpoint triggering task {} of job {} is not in state {} but {} instead. Aborting checkpoint.",
				tasksToTrigger[i].getTaskNameWithSubtaskIndex(),
				job,
				ExecutionState.RUNNING,
				ee.getState());
			throw new CheckpointException(
				CheckpointFailureReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
		}
	}
	return executions;
}

Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0

5 votes

private boolean isProducerConsumerReady(ResponseHandle responseHandle) {
	ExecutionState producerState = getProducerState(responseHandle);
	return producerState == ExecutionState.SCHEDULED ||
		producerState == ExecutionState.DEPLOYING ||
		producerState == ExecutionState.RUNNING ||
		producerState == ExecutionState.FINISHED;
}

Source File: RemoteChannelStateChecker.java From flink with Apache License 2.0

4 votes

private static boolean isConsumerStateValidForConsumption(
		ExecutionState consumerExecutionState) {
	return consumerExecutionState == ExecutionState.RUNNING ||
		consumerExecutionState == ExecutionState.DEPLOYING;
}

Source File: YARNHighAvailabilityITCase.java From flink with Apache License 2.0

4 votes

private static Predicate<ExecutionState> isRunning() {
	return executionState -> executionState == ExecutionState.RUNNING;
}

Source File: Task.java From flink with Apache License 2.0

4 votes

/**
 * Calls the invokable to trigger a checkpoint.
 *
 * @param checkpointID The ID identifying the checkpoint.
 * @param checkpointTimestamp The timestamp associated with the checkpoint.
 * @param checkpointOptions Options for performing this checkpoint.
 * @param advanceToEndOfEventTime Flag indicating if the source should inject a {@code MAX_WATERMARK} in the pipeline
 *                           to fire any registered event-time timers.
 */
public void triggerCheckpointBarrier(
		final long checkpointID,
		final long checkpointTimestamp,
		final CheckpointOptions checkpointOptions,
		final boolean advanceToEndOfEventTime) {

	final AbstractInvokable invokable = this.invokable;
	final CheckpointMetaData checkpointMetaData = new CheckpointMetaData(checkpointID, checkpointTimestamp);

	if (executionState == ExecutionState.RUNNING && invokable != null) {

		// build a local closure
		final String taskName = taskNameWithSubtask;
		final SafetyNetCloseableRegistry safetyNetCloseableRegistry =
			FileSystemSafetyNet.getSafetyNetCloseableRegistryForThread();

		Runnable runnable = new Runnable() {
			@Override
			public void run() {
				// set safety net from the task's context for checkpointing thread
				LOG.debug("Creating FileSystem stream leak safety net for {}", Thread.currentThread().getName());
				FileSystemSafetyNet.setSafetyNetCloseableRegistryForThread(safetyNetCloseableRegistry);

				try {
					boolean success = invokable.triggerCheckpoint(checkpointMetaData, checkpointOptions, advanceToEndOfEventTime);
					if (!success) {
						checkpointResponder.declineCheckpoint(
								getJobID(), getExecutionId(), checkpointID,
								new CheckpointException("Task Name" + taskName, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY));
					}
				}
				catch (Throwable t) {
					if (getExecutionState() == ExecutionState.RUNNING) {
						failExternally(new Exception(
							"Error while triggering checkpoint " + checkpointID + " for " +
								taskNameWithSubtask, t));
					} else {
						LOG.debug("Encountered error while triggering checkpoint {} for " +
							"{} ({}) while being not in state running.", checkpointID,
							taskNameWithSubtask, executionId, t);
					}
				} finally {
					FileSystemSafetyNet.setSafetyNetCloseableRegistryForThread(null);
				}
			}
		};
		executeAsyncCallRunnable(
				runnable,
				String.format("Checkpoint Trigger for %s (%s).", taskNameWithSubtask, executionId));
	}
	else {
		LOG.debug("Declining checkpoint request for non-running task {} ({}).", taskNameWithSubtask, executionId);

		// send back a message that we did not do the checkpoint
		checkpointResponder.declineCheckpoint(jobId, executionId, checkpointID,
				new CheckpointException("Task name with subtask : " + taskNameWithSubtask, CheckpointFailureReason.CHECKPOINT_DECLINED_TASK_NOT_READY));
	}
}

Source File: Task.java From flink with Apache License 2.0

4 votes

private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(LOG, this :: closeNetworkResources, invokable, executingThread, taskNameWithSubtask);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout,
								LOG);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}

Source File: Task.java From flink with Apache License 2.0

4 votes

@Override
public boolean isRunning() {
	return executionState == ExecutionState.RUNNING;
}

Source File: TaskDeploymentDescriptorFactory.java From flink with Apache License 2.0

4 votes

private static boolean isProducerAvailable(ExecutionState producerState) {
	return producerState == ExecutionState.RUNNING ||
		producerState == ExecutionState.FINISHED ||
		producerState == ExecutionState.SCHEDULED ||
		producerState == ExecutionState.DEPLOYING;
}

Source File: InputChannelDeploymentDescriptorTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Tests the deployment descriptors for local, remote, and unknown partition
 * locations (with lazy deployment allowed and all execution states for the
 * producers).
 */
@Test
public void testMixedLocalRemoteUnknownDeployment() throws Exception {
	boolean allowLazyDeployment = true;

	ResourceID consumerResourceId = ResourceID.generate();
	ExecutionVertex consumer = mock(ExecutionVertex.class);
	LogicalSlot consumerSlot = mockSlot(consumerResourceId);

	// Local and remote channel are only allowed for certain execution
	// states.
	for (ExecutionState state : ExecutionState.values()) {
		// Local partition
		ExecutionVertex localProducer = mockExecutionVertex(state, consumerResourceId);
		IntermediateResultPartition localPartition = mockPartition(localProducer);
		ResultPartitionID localPartitionId = new ResultPartitionID(localPartition.getPartitionId(), localProducer.getCurrentExecutionAttempt().getAttemptId());
		ExecutionEdge localEdge = new ExecutionEdge(localPartition, consumer, 0);

		// Remote partition
		ExecutionVertex remoteProducer = mockExecutionVertex(state, ResourceID.generate()); // new resource ID
		IntermediateResultPartition remotePartition = mockPartition(remoteProducer);
		ResultPartitionID remotePartitionId = new ResultPartitionID(remotePartition.getPartitionId(), remoteProducer.getCurrentExecutionAttempt().getAttemptId());
		ConnectionID remoteConnectionId = new ConnectionID(remoteProducer.getCurrentAssignedResource().getTaskManagerLocation(), 0);
		ExecutionEdge remoteEdge = new ExecutionEdge(remotePartition, consumer, 1);

		// Unknown partition
		ExecutionVertex unknownProducer = mockExecutionVertex(state, null); // no assigned resource
		IntermediateResultPartition unknownPartition = mockPartition(unknownProducer);
		ResultPartitionID unknownPartitionId = new ResultPartitionID(unknownPartition.getPartitionId(), unknownProducer.getCurrentExecutionAttempt().getAttemptId());
		ExecutionEdge unknownEdge = new ExecutionEdge(unknownPartition, consumer, 2);

		InputChannelDeploymentDescriptor[] desc = InputChannelDeploymentDescriptor.fromEdges(
			new ExecutionEdge[]{localEdge, remoteEdge, unknownEdge},
			consumerSlot.getTaskManagerLocation().getResourceID(),
			allowLazyDeployment);

		assertEquals(3, desc.length);

		// These states are allowed
		if (state == ExecutionState.RUNNING || state == ExecutionState.FINISHED ||
			state == ExecutionState.SCHEDULED || state == ExecutionState.DEPLOYING) {

			// Create local or remote channels
			assertEquals(localPartitionId, desc[0].getConsumedPartitionId());
			assertTrue(desc[0].getConsumedPartitionLocation().isLocal());
			assertNull(desc[0].getConsumedPartitionLocation().getConnectionId());

			assertEquals(remotePartitionId, desc[1].getConsumedPartitionId());
			assertTrue(desc[1].getConsumedPartitionLocation().isRemote());
			assertEquals(remoteConnectionId, desc[1].getConsumedPartitionLocation().getConnectionId());
		} else {
			// Unknown (lazy deployment allowed)
			assertEquals(localPartitionId, desc[0].getConsumedPartitionId());
			assertTrue(desc[0].getConsumedPartitionLocation().isUnknown());
			assertNull(desc[0].getConsumedPartitionLocation().getConnectionId());

			assertEquals(remotePartitionId, desc[1].getConsumedPartitionId());
			assertTrue(desc[1].getConsumedPartitionLocation().isUnknown());
			assertNull(desc[1].getConsumedPartitionLocation().getConnectionId());
		}

		assertEquals(unknownPartitionId, desc[2].getConsumedPartitionId());
		assertTrue(desc[2].getConsumedPartitionLocation().isUnknown());
		assertNull(desc[2].getConsumedPartitionLocation().getConnectionId());
	}
}

Source File: BackPressureRequestCoordinator.java From flink with Apache License 2.0

4 votes

/**
 * Triggers a task back pressure stats request to all tasks.
 *
 * @param tasks Tasks to request.
 * @return A future of the completed task back pressure stats.
 */
CompletableFuture<BackPressureStats> triggerBackPressureRequest(ExecutionVertex[] tasks) {
	checkNotNull(tasks, "Tasks to request must not be null.");
	checkArgument(tasks.length >= 1, "No tasks to request.");

	// Execution IDs of running tasks
	ExecutionAttemptID[] triggerIds = new ExecutionAttemptID[tasks.length];
	Execution[] executions = new Execution[tasks.length];

	// Check that all tasks are RUNNING before triggering anything. The
	// triggering can still fail.
	for (int i = 0; i < triggerIds.length; i++) {
		Execution execution = tasks[i].getCurrentExecutionAttempt();
		if (execution != null && execution.getState() == ExecutionState.RUNNING) {
			executions[i] = execution;
			triggerIds[i] = execution.getAttemptId();
		} else {
			return FutureUtils.completedExceptionally(new IllegalStateException("Task " + tasks[i]
				.getTaskNameWithSubtaskIndex() + " is not running."));
		}
	}

	synchronized (lock) {
		if (isShutDown) {
			return FutureUtils.completedExceptionally(new IllegalStateException("Shut down."));
		}

		int requestId = requestIdCounter++;

		LOG.debug("Triggering task back pressure request {}.", requestId);

		PendingBackPressureRequest pending = new PendingBackPressureRequest(requestId, triggerIds);

		// Add the pending request before scheduling the discard task to
		// prevent races with removing it again.
		pendingRequests.put(requestId, pending);

		requestBackPressure(executions, requestId);

		return pending.getBackPressureStatsFuture();
	}
}

Source File: Task.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Utility method to dispatch an asynchronous call on the invokable.
 *
 * @param runnable The async call runnable.
 * @param callName The name of the call, for logging purposes.
 */
private void executeAsyncCallRunnable(Runnable runnable, String callName) {
	// make sure the executor is initialized. lock against concurrent calls to this function
	synchronized (this) {
		if (executionState != ExecutionState.RUNNING) {
			return;
		}

		// get ourselves a reference on the stack that cannot be concurrently modified
		ExecutorService executor = this.asyncCallDispatcher;
		if (executor == null) {
			// first time use, initialize
			checkState(userCodeClassLoader != null, "userCodeClassLoader must not be null");
			executor = Executors.newSingleThreadExecutor(
					new DispatcherThreadFactory(
						TASK_THREADS_GROUP,
						"Async calls on " + taskNameWithSubtask,
						userCodeClassLoader));
			this.asyncCallDispatcher = executor;

			// double-check for execution state, and make sure we clean up after ourselves
			// if we created the dispatcher while the task was concurrently canceled
			if (executionState != ExecutionState.RUNNING) {
				executor.shutdown();
				asyncCallDispatcher = null;
				return;
			}
		}

		LOG.debug("Invoking async call {} on task {}", callName, taskNameWithSubtask);

		try {
			executor.submit(runnable);
		}
		catch (RejectedExecutionException e) {
			// may be that we are concurrently finished or canceled.
			// if not, report that something is fishy
			if (executionState == ExecutionState.RUNNING) {
				throw new RuntimeException("Async call was rejected, even though the task is running.", e);
			}
		}
	}
}

Source File: Task.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Answer to a partition state check issued after a failed partition request.
 */
@VisibleForTesting
void onPartitionStateUpdate(
		IntermediateDataSetID intermediateDataSetId,
		ResultPartitionID resultPartitionId,
		ExecutionState producerState) throws IOException, InterruptedException {

	if (executionState == ExecutionState.RUNNING) {
		final SingleInputGate inputGate = inputGatesById.get(intermediateDataSetId);

		if (inputGate != null) {
			if (producerState == ExecutionState.SCHEDULED
				|| producerState == ExecutionState.DEPLOYING
				|| producerState == ExecutionState.RUNNING
				|| producerState == ExecutionState.FINISHED) {

				// Retrigger the partition request
				inputGate.retriggerPartitionRequest(resultPartitionId.getPartitionId());

			} else if (producerState == ExecutionState.CANCELING
				|| producerState == ExecutionState.CANCELED
				|| producerState == ExecutionState.FAILED) {

				// The producing execution has been canceled or failed. We
				// don't need to re-trigger the request since it cannot
				// succeed.
				if (LOG.isDebugEnabled()) {
					LOG.debug("Cancelling task {} after the producer of partition {} with attempt ID {} has entered state {}.",
						taskNameWithSubtask,
						resultPartitionId.getPartitionId(),
						resultPartitionId.getProducerId(),
						producerState);
				}

				cancelExecution();
			} else {
				// Any other execution state is unexpected. Currently, only
				// state CREATED is left out of the checked states. If we
				// see a producer in this state, something went wrong with
				// scheduling in topological order.
				String msg = String.format("Producer with attempt ID %s of partition %s in unexpected state %s.",
					resultPartitionId.getProducerId(),
					resultPartitionId.getPartitionId(),
					producerState);

				failExternally(new IllegalStateException(msg));
			}
		} else {
			failExternally(new IllegalStateException("Received partition producer state for " +
					"unknown input gate " + intermediateDataSetId + "."));
		}
	} else {
		LOG.debug("Task {} ignored a partition producer state notification, because it's not running.", taskNameWithSubtask);
	}
}

Source File: Task.java From flink with Apache License 2.0

4 votes

@VisibleForTesting
void cancelOrFailAndCancelInvokableInternal(ExecutionState targetState, Throwable cause) {
	while (true) {
		ExecutionState current = executionState;

		// if the task is already canceled (or canceling) or finished or failed,
		// then we need not do anything
		if (current.isTerminal() || current == ExecutionState.CANCELING) {
			LOG.info("Task {} is already in state {}", taskNameWithSubtask, current);
			return;
		}

		if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) {
			if (transitionState(current, targetState, cause)) {
				// if we manage this state transition, then the invokable gets never called
				// we need not call cancel on it
				this.failureCause = cause;
				return;
			}
		}
		else if (current == ExecutionState.RUNNING) {
			if (transitionState(ExecutionState.RUNNING, targetState, cause)) {
				// we are canceling / failing out of the running state
				// we need to cancel the invokable

				// copy reference to guard against concurrent null-ing out the reference
				final AbstractInvokable invokable = this.invokable;

				if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) {
					this.failureCause = cause;

					LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId);

					// because the canceling may block on user code, we cancel from a separate thread
					// we do not reuse the async call handler, because that one may be blocked, in which
					// case the canceling could not continue

					// The canceller calls cancel and interrupts the executing thread once
					Runnable canceler = new TaskCanceler(LOG, this::closeNetworkResources, invokable, executingThread, taskNameWithSubtask);

					Thread cancelThread = new Thread(
							executingThread.getThreadGroup(),
							canceler,
							String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId));
					cancelThread.setDaemon(true);
					cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
					cancelThread.start();

					// the periodic interrupting thread - a different thread than the canceller, in case
					// the application code does blocking stuff in its cancellation paths.
					if (invokable.shouldInterruptOnCancel()) {
						Runnable interrupter = new TaskInterrupter(
								LOG,
								invokable,
								executingThread,
								taskNameWithSubtask,
								taskCancellationInterval);

						Thread interruptingThread = new Thread(
								executingThread.getThreadGroup(),
								interrupter,
								String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId));
						interruptingThread.setDaemon(true);
						interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						interruptingThread.start();
					}

					// if a cancellation timeout is set, the watchdog thread kills the process
					// if graceful cancellation does not succeed
					if (taskCancellationTimeout > 0) {
						Runnable cancelWatchdog = new TaskCancelerWatchDog(
								executingThread,
								taskManagerActions,
								taskCancellationTimeout);

						Thread watchDogThread = new Thread(
								executingThread.getThreadGroup(),
								cancelWatchdog,
								String.format("Cancellation Watchdog for %s (%s).",
										taskNameWithSubtask, executionId));
						watchDogThread.setDaemon(true);
						watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE);
						watchDogThread.start();
					}
				}
				return;
			}
		}
		else {
			throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).",
				current, taskNameWithSubtask, executionId));
		}
	}
}

Java Code Examples for org.apache.flink.runtime.execution.ExecutionState#RUNNING