Java Code Examples for org.apache.flink.runtime.execution.ExecutionState#isTerminal()
The following examples show how to use
org.apache.flink.runtime.execution.ExecutionState#isTerminal() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SubtaskExecutionAttemptDetailsInfo.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, MutableIOMetrics ioMetrics) { final ExecutionState status = execution.getState(); final long now = System.currentTimeMillis(); final TaskManagerLocation location = execution.getAssignedResourceLocation(); final String locationString = location == null ? "(unassigned)" : location.getHostname(); long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING); if (startTime == 0) { startTime = -1; } final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1; final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1; final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo( ioMetrics.getNumBytesInLocal() + ioMetrics.getNumBytesInRemote(), ioMetrics.isNumBytesInLocalComplete() && ioMetrics.isNumBytesInRemoteComplete(), ioMetrics.getNumBytesOut(), ioMetrics.isNumBytesOutComplete(), ioMetrics.getNumRecordsIn(), ioMetrics.isNumRecordsInComplete(), ioMetrics.getNumRecordsOut(), ioMetrics.isNumRecordsOutComplete()); return new SubtaskExecutionAttemptDetailsInfo( execution.getParallelSubtaskIndex(), status, execution.getAttemptNumber(), locationString, startTime, endTime, duration, ioMetricsInfo ); }
Example 2
Source File: SubtasksTimesHandler.java From flink with Apache License 2.0 | 5 votes |
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) { final String id = jobVertex.getJobVertexId().toString(); final String name = jobVertex.getName(); final long now = System.currentTimeMillis(); final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>(); int num = 0; for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) { long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps(); ExecutionState status = vertex.getExecutionState(); long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()]; long start = scheduledTime > 0 ? scheduledTime : -1; long end = status.isTerminal() ? timestamps[status.ordinal()] : now; long duration = start >= 0 ? end - start : -1L; TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation(); String locationString = location == null ? "(unassigned)" : location.getHostname(); Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length); for (ExecutionState state : ExecutionState.values()) { timestampMap.put(state, timestamps[state.ordinal()]); } subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo( num++, locationString, duration, timestampMap)); } return new SubtasksTimesInfo(id, name, now, subtasks); }
Example 3
Source File: Execution.java From flink with Apache License 2.0 | 5 votes |
private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) { // sanity check if (currentState.isTerminal()) { throw new IllegalStateException("Cannot leave terminal state " + currentState + " to transition to " + targetState + '.'); } if (STATE_UPDATER.compareAndSet(this, currentState, targetState)) { markTimestamp(targetState); if (error == null) { LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState); } else { LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState, error); } if (targetState.isTerminal()) { // complete the terminal state future terminalStateFuture.complete(targetState); } // make sure that the state transition completes normally. // potential errors (in listeners may not affect the main logic) try { vertex.notifyStateTransition(this, targetState, error); } catch (Throwable t) { LOG.error("Error while notifying execution graph of execution state transition.", t); } return true; } else { return false; } }
Example 4
Source File: SubtasksTimesHandler.java From flink with Apache License 2.0 | 5 votes |
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) { final String id = jobVertex.getJobVertexId().toString(); final String name = jobVertex.getName(); final long now = System.currentTimeMillis(); final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>(); int num = 0; for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) { long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps(); ExecutionState status = vertex.getExecutionState(); long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()]; long start = scheduledTime > 0 ? scheduledTime : -1; long end = status.isTerminal() ? timestamps[status.ordinal()] : now; long duration = start >= 0 ? end - start : -1L; TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation(); String locationString = location == null ? "(unassigned)" : location.getHostname(); Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length); for (ExecutionState state : ExecutionState.values()) { timestampMap.put(state, timestamps[state.ordinal()]); } subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo( num++, locationString, duration, timestampMap)); } return new SubtasksTimesInfo(id, name, now, subtasks); }
Example 5
Source File: SubtaskExecutionAttemptDetailsInfo.java From flink with Apache License 2.0 | 5 votes |
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, MutableIOMetrics ioMetrics) { final ExecutionState status = execution.getState(); final long now = System.currentTimeMillis(); final TaskManagerLocation location = execution.getAssignedResourceLocation(); final String locationString = location == null ? "(unassigned)" : location.getHostname(); long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING); if (startTime == 0) { startTime = -1; } final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1; final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1; final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo( ioMetrics.getNumBytesIn(), ioMetrics.isNumBytesInComplete(), ioMetrics.getNumBytesOut(), ioMetrics.isNumBytesOutComplete(), ioMetrics.getNumRecordsIn(), ioMetrics.isNumRecordsInComplete(), ioMetrics.getNumRecordsOut(), ioMetrics.isNumRecordsOutComplete()); return new SubtaskExecutionAttemptDetailsInfo( execution.getParallelSubtaskIndex(), status, execution.getAttemptNumber(), locationString, startTime, endTime, duration, ioMetricsInfo ); }
Example 6
Source File: Execution.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) { // sanity check if (currentState.isTerminal()) { throw new IllegalStateException("Cannot leave terminal state " + currentState + " to transition to " + targetState + '.'); } if (STATE_UPDATER.compareAndSet(this, currentState, targetState)) { markTimestamp(targetState); if (error == null) { LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState); } else { LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState, error); } if (targetState.isTerminal()) { // complete the terminal state future terminalStateFuture.complete(targetState); } // make sure that the state transition completes normally. // potential errors (in listeners may not affect the main logic) try { vertex.notifyStateTransition(this, targetState, error); } catch (Throwable t) { LOG.error("Error while notifying execution graph of execution state transition.", t); } return true; } else { return false; } }
Example 7
Source File: SubtasksTimesHandler.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static SubtasksTimesInfo createSubtaskTimesInfo(AccessExecutionJobVertex jobVertex) { final String id = jobVertex.getJobVertexId().toString(); final String name = jobVertex.getName(); final long now = System.currentTimeMillis(); final List<SubtasksTimesInfo.SubtaskTimeInfo> subtasks = new ArrayList<>(); int num = 0; for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) { long[] timestamps = vertex.getCurrentExecutionAttempt().getStateTimestamps(); ExecutionState status = vertex.getExecutionState(); long scheduledTime = timestamps[ExecutionState.SCHEDULED.ordinal()]; long start = scheduledTime > 0 ? scheduledTime : -1; long end = status.isTerminal() ? timestamps[status.ordinal()] : now; long duration = start >= 0 ? end - start : -1L; TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation(); String locationString = location == null ? "(unassigned)" : location.getHostname(); Map<ExecutionState, Long> timestampMap = new HashMap<>(ExecutionState.values().length); for (ExecutionState state : ExecutionState.values()) { timestampMap.put(state, timestamps[state.ordinal()]); } subtasks.add(new SubtasksTimesInfo.SubtaskTimeInfo( num++, locationString, duration, timestampMap)); } return new SubtasksTimesInfo(id, name, now, subtasks); }
Example 8
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler(LOG, this :: closeNetworkResources, invokable, executingThread, taskNameWithSubtask); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout, LOG); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }
Example 9
Source File: ExecutionVertex.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Archives the current Execution and creates a new Execution for this vertex. * * <p>This method atomically checks if the ExecutionGraph is still of an expected * global mod. version and replaces the execution if that is the case. If the ExecutionGraph * has increased its global mod. version in the meantime, this operation fails. * * <p>This mechanism can be used to prevent conflicts between various concurrent recovery and * reconfiguration actions in a similar way as "optimistic concurrency control". * * @param timestamp * The creation timestamp for the new Execution * @param originatingGlobalModVersion * * @return Returns the new created Execution. * * @throws GlobalModVersionMismatch Thrown, if the execution graph has a new global mod * version than the one passed to this message. */ public Execution resetForNewExecution(final long timestamp, final long originatingGlobalModVersion) throws GlobalModVersionMismatch { LOG.debug("Resetting execution vertex {} for new execution.", getTaskNameWithSubtaskIndex()); synchronized (priorExecutions) { // check if another global modification has been triggered since the // action that originally caused this reset/restart happened final long actualModVersion = getExecutionGraph().getGlobalModVersion(); if (actualModVersion > originatingGlobalModVersion) { // global change happened since, reject this action throw new GlobalModVersionMismatch(originatingGlobalModVersion, actualModVersion); } final Execution oldExecution = currentExecution; final ExecutionState oldState = oldExecution.getState(); if (oldState.isTerminal()) { priorExecutions.add(oldExecution.archive()); final Execution newExecution = new Execution( getExecutionGraph().getFutureExecutor(), this, oldExecution.getAttemptNumber() + 1, originatingGlobalModVersion, timestamp, timeout); this.currentExecution = newExecution; CoLocationGroup grp = jobVertex.getCoLocationGroup(); if (grp != null) { this.locationConstraint = grp.getLocationConstraint(subTaskIndex); } // register this execution at the execution graph, to receive call backs getExecutionGraph().registerExecution(newExecution); // if the execution was 'FINISHED' before, tell the ExecutionGraph that // we take one step back on the road to reaching global FINISHED if (oldState == FINISHED) { getExecutionGraph().vertexUnFinished(); } return newExecution; } else { throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState); } } }
Example 10
Source File: JobVertexDetailsHandler.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static JobVertexDetailsInfo createJobVertexDetailsInfo(AccessExecutionJobVertex jobVertex, JobID jobID, @Nullable MetricFetcher metricFetcher) { List<JobVertexDetailsInfo.VertexTaskDetail> subtasks = new ArrayList<>(); final long now = System.currentTimeMillis(); int num = 0; for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) { final ExecutionState status = vertex.getExecutionState(); TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation(); String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort(); long startTime = vertex.getStateTimestamp(ExecutionState.DEPLOYING); if (startTime == 0) { startTime = -1; } long endTime = status.isTerminal() ? vertex.getStateTimestamp(status) : -1; long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1; MutableIOMetrics counts = new MutableIOMetrics(); counts.addIOMetrics( vertex.getCurrentExecutionAttempt(), metricFetcher, jobID.toString(), jobVertex.getJobVertexId().toString()); subtasks.add(new JobVertexDetailsInfo.VertexTaskDetail( num, status, vertex.getCurrentExecutionAttempt().getAttemptNumber(), locationString, startTime, endTime, duration, new IOMetricsInfo( counts.getNumBytesInLocal() + counts.getNumBytesInRemote(), counts.isNumBytesInLocalComplete() && counts.isNumBytesInRemoteComplete(), counts.getNumBytesOut(), counts.isNumBytesOutComplete(), counts.getNumRecordsIn(), counts.isNumRecordsInComplete(), counts.getNumRecordsOut(), counts.isNumRecordsOutComplete()))); num++; } return new JobVertexDetailsInfo( jobVertex.getJobVertexId(), jobVertex.getName(), jobVertex.getParallelism(), now, subtasks); }
Example 11
Source File: JobVertexDetailsHandler.java From flink with Apache License 2.0 | 4 votes |
private static JobVertexDetailsInfo createJobVertexDetailsInfo(AccessExecutionJobVertex jobVertex, JobID jobID, @Nullable MetricFetcher metricFetcher) { List<JobVertexDetailsInfo.VertexTaskDetail> subtasks = new ArrayList<>(); final long now = System.currentTimeMillis(); int num = 0; for (AccessExecutionVertex vertex : jobVertex.getTaskVertices()) { final ExecutionState status = vertex.getExecutionState(); TaskManagerLocation location = vertex.getCurrentAssignedResourceLocation(); String locationString = location == null ? "(unassigned)" : location.getHostname() + ":" + location.dataPort(); long startTime = vertex.getStateTimestamp(ExecutionState.DEPLOYING); if (startTime == 0) { startTime = -1; } long endTime = status.isTerminal() ? vertex.getStateTimestamp(status) : -1; long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1; MutableIOMetrics counts = new MutableIOMetrics(); counts.addIOMetrics( vertex.getCurrentExecutionAttempt(), metricFetcher, jobID.toString(), jobVertex.getJobVertexId().toString()); subtasks.add(new JobVertexDetailsInfo.VertexTaskDetail( num, status, vertex.getCurrentExecutionAttempt().getAttemptNumber(), locationString, startTime, endTime, duration, new IOMetricsInfo( counts.getNumBytesIn(), counts.isNumBytesInComplete(), counts.getNumBytesOut(), counts.isNumBytesOutComplete(), counts.getNumRecordsIn(), counts.isNumRecordsInComplete(), counts.getNumRecordsOut(), counts.isNumRecordsOutComplete()))); num++; } return new JobVertexDetailsInfo( jobVertex.getJobVertexId(), jobVertex.getName(), jobVertex.getParallelism(), now, subtasks); }
Example 12
Source File: ExecutionVertex.java From flink with Apache License 2.0 | 4 votes |
/** * Archives the current Execution and creates a new Execution for this vertex. * * <p>This method atomically checks if the ExecutionGraph is still of an expected * global mod. version and replaces the execution if that is the case. If the ExecutionGraph * has increased its global mod. version in the meantime, this operation fails. * * <p>This mechanism can be used to prevent conflicts between various concurrent recovery and * reconfiguration actions in a similar way as "optimistic concurrency control". * * @param timestamp * The creation timestamp for the new Execution * @param originatingGlobalModVersion * * @return Returns the new created Execution. * * @throws GlobalModVersionMismatch Thrown, if the execution graph has a new global mod * version than the one passed to this message. */ public Execution resetForNewExecution(final long timestamp, final long originatingGlobalModVersion) throws GlobalModVersionMismatch { LOG.debug("Resetting execution vertex {} for new execution.", getTaskNameWithSubtaskIndex()); synchronized (priorExecutions) { // check if another global modification has been triggered since the // action that originally caused this reset/restart happened final long actualModVersion = getExecutionGraph().getGlobalModVersion(); if (actualModVersion > originatingGlobalModVersion) { // global change happened since, reject this action throw new GlobalModVersionMismatch(originatingGlobalModVersion, actualModVersion); } final Execution oldExecution = currentExecution; final ExecutionState oldState = oldExecution.getState(); if (oldState.isTerminal()) { if (oldState == FINISHED) { // pipelined partitions are released in Execution#cancel(), covering both job failures and vertex resets // do not release pipelined partitions here to save RPC calls oldExecution.handlePartitionCleanup(false, true); getExecutionGraph().getPartitionReleaseStrategy().vertexUnfinished(executionVertexId); } priorExecutions.add(oldExecution.archive()); final Execution newExecution = new Execution( getExecutionGraph().getFutureExecutor(), this, oldExecution.getAttemptNumber() + 1, originatingGlobalModVersion, timestamp, timeout); currentExecution = newExecution; synchronized (inputSplits) { InputSplitAssigner assigner = jobVertex.getSplitAssigner(); if (assigner != null) { assigner.returnInputSplit(inputSplits, getParallelSubtaskIndex()); inputSplits.clear(); } } CoLocationGroup grp = jobVertex.getCoLocationGroup(); if (grp != null) { locationConstraint = grp.getLocationConstraint(subTaskIndex); } // register this execution at the execution graph, to receive call backs getExecutionGraph().registerExecution(newExecution); // if the execution was 'FINISHED' before, tell the ExecutionGraph that // we take one step back on the road to reaching global FINISHED if (oldState == FINISHED) { getExecutionGraph().vertexUnFinished(); } // reset the intermediate results for (IntermediateResultPartition resultPartition : resultPartitions.values()) { resultPartition.resetForNewExecution(); } return newExecution; } else { throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState); } } }
Example 13
Source File: Task.java From flink with Apache License 2.0 | 4 votes |
@VisibleForTesting void cancelOrFailAndCancelInvokableInternal(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler(LOG, this::closeNetworkResources, invokable, executingThread, taskNameWithSubtask); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }
Example 14
Source File: SubtaskExecutionAttemptDetailsInfo.java From flink with Apache License 2.0 | 4 votes |
public static SubtaskExecutionAttemptDetailsInfo create(AccessExecution execution, @Nullable MetricFetcher metricFetcher, JobID jobID, JobVertexID jobVertexID) { final ExecutionState status = execution.getState(); final long now = System.currentTimeMillis(); final TaskManagerLocation location = execution.getAssignedResourceLocation(); final String locationString = location == null ? "(unassigned)" : location.getHostname(); String taskmanagerId = location == null ? "(unassigned)" : location.getResourceID().toString(); long startTime = execution.getStateTimestamp(ExecutionState.DEPLOYING); if (startTime == 0) { startTime = -1; } final long endTime = status.isTerminal() ? execution.getStateTimestamp(status) : -1; final long duration = startTime > 0 ? ((endTime > 0 ? endTime : now) - startTime) : -1; final MutableIOMetrics ioMetrics = new MutableIOMetrics(); ioMetrics.addIOMetrics( execution, metricFetcher, jobID.toString(), jobVertexID.toString() ); final IOMetricsInfo ioMetricsInfo = new IOMetricsInfo( ioMetrics.getNumBytesIn(), ioMetrics.isNumBytesInComplete(), ioMetrics.getNumBytesOut(), ioMetrics.isNumBytesOutComplete(), ioMetrics.getNumRecordsIn(), ioMetrics.isNumRecordsInComplete(), ioMetrics.getNumRecordsOut(), ioMetrics.isNumRecordsOutComplete()); return new SubtaskExecutionAttemptDetailsInfo( execution.getParallelSubtaskIndex(), status, execution.getAttemptNumber(), locationString, startTime, endTime, duration, ioMetricsInfo, taskmanagerId ); }
Example 15
Source File: Task.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private void cancelOrFailAndCancelInvokable(ExecutionState targetState, Throwable cause) { while (true) { ExecutionState current = executionState; // if the task is already canceled (or canceling) or finished or failed, // then we need not do anything if (current.isTerminal() || current == ExecutionState.CANCELING) { LOG.info("Task {} is already in state {}", taskNameWithSubtask, current); return; } if (current == ExecutionState.DEPLOYING || current == ExecutionState.CREATED) { if (transitionState(current, targetState, cause)) { // if we manage this state transition, then the invokable gets never called // we need not call cancel on it this.failureCause = cause; return; } } else if (current == ExecutionState.RUNNING) { if (transitionState(ExecutionState.RUNNING, targetState, cause)) { // we are canceling / failing out of the running state // we need to cancel the invokable // copy reference to guard against concurrent null-ing out the reference final AbstractInvokable invokable = this.invokable; if (invokable != null && invokableHasBeenCanceled.compareAndSet(false, true)) { this.failureCause = cause; LOG.info("Triggering cancellation of task code {} ({}).", taskNameWithSubtask, executionId); // because the canceling may block on user code, we cancel from a separate thread // we do not reuse the async call handler, because that one may be blocked, in which // case the canceling could not continue // The canceller calls cancel and interrupts the executing thread once Runnable canceler = new TaskCanceler( LOG, invokable, executingThread, taskNameWithSubtask, producedPartitions, inputGates); Thread cancelThread = new Thread( executingThread.getThreadGroup(), canceler, String.format("Canceler for %s (%s).", taskNameWithSubtask, executionId)); cancelThread.setDaemon(true); cancelThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); cancelThread.start(); // the periodic interrupting thread - a different thread than the canceller, in case // the application code does blocking stuff in its cancellation paths. if (invokable.shouldInterruptOnCancel()) { Runnable interrupter = new TaskInterrupter( LOG, invokable, executingThread, taskNameWithSubtask, taskCancellationInterval); Thread interruptingThread = new Thread( executingThread.getThreadGroup(), interrupter, String.format("Canceler/Interrupts for %s (%s).", taskNameWithSubtask, executionId)); interruptingThread.setDaemon(true); interruptingThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); interruptingThread.start(); } // if a cancellation timeout is set, the watchdog thread kills the process // if graceful cancellation does not succeed if (taskCancellationTimeout > 0) { Runnable cancelWatchdog = new TaskCancelerWatchDog( executingThread, taskManagerActions, taskCancellationTimeout, LOG); Thread watchDogThread = new Thread( executingThread.getThreadGroup(), cancelWatchdog, String.format("Cancellation Watchdog for %s (%s).", taskNameWithSubtask, executionId)); watchDogThread.setDaemon(true); watchDogThread.setUncaughtExceptionHandler(FatalExitExceptionHandler.INSTANCE); watchDogThread.start(); } } return; } } else { throw new IllegalStateException(String.format("Unexpected state: %s of task %s (%s).", current, taskNameWithSubtask, executionId)); } } }
Example 16
Source File: ExecutionVertex.java From flink with Apache License 2.0 | 4 votes |
private Execution resetForNewExecutionInternal(final long timestamp, final long originatingGlobalModVersion) { final Execution oldExecution = currentExecution; final ExecutionState oldState = oldExecution.getState(); if (oldState.isTerminal()) { if (oldState == FINISHED) { // pipelined partitions are released in Execution#cancel(), covering both job failures and vertex resets // do not release pipelined partitions here to save RPC calls oldExecution.handlePartitionCleanup(false, true); getExecutionGraph().getPartitionReleaseStrategy().vertexUnfinished(executionVertexId); } priorExecutions.add(oldExecution.archive()); final Execution newExecution = new Execution( getExecutionGraph().getFutureExecutor(), this, oldExecution.getAttemptNumber() + 1, originatingGlobalModVersion, timestamp, timeout); currentExecution = newExecution; synchronized (inputSplits) { InputSplitAssigner assigner = jobVertex.getSplitAssigner(); if (assigner != null) { assigner.returnInputSplit(inputSplits, getParallelSubtaskIndex()); inputSplits.clear(); } } CoLocationGroup grp = jobVertex.getCoLocationGroup(); if (grp != null) { locationConstraint = grp.getLocationConstraint(subTaskIndex); } // register this execution at the execution graph, to receive call backs getExecutionGraph().registerExecution(newExecution); // if the execution was 'FINISHED' before, tell the ExecutionGraph that // we take one step back on the road to reaching global FINISHED if (oldState == FINISHED) { getExecutionGraph().vertexUnFinished(); } // reset the intermediate results for (IntermediateResultPartition resultPartition : resultPartitions.values()) { resultPartition.resetForNewExecution(); } return newExecution; } else { throw new IllegalStateException("Cannot reset a vertex that is in non-terminal state " + oldState); } }
Example 17
Source File: Execution.java From flink with Apache License 2.0 | 4 votes |
private boolean transitionState(ExecutionState currentState, ExecutionState targetState, Throwable error) { // sanity check if (currentState.isTerminal()) { throw new IllegalStateException("Cannot leave terminal state " + currentState + " to transition to " + targetState + '.'); } if (state == currentState) { state = targetState; markTimestamp(targetState); if (error == null) { LOG.info("{} ({}) switched from {} to {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState); } else { if (LOG.isInfoEnabled()) { final String locationInformation = getAssignedResource() != null ? getAssignedResource().toString() : "not deployed"; LOG.info( "{} ({}) switched from {} to {} on {}.", getVertex().getTaskNameWithSubtaskIndex(), getAttemptId(), currentState, targetState, locationInformation, error); } } if (targetState.isTerminal()) { // complete the terminal state future terminalStateFuture.complete(targetState); } // make sure that the state transition completes normally. // potential errors (in listeners may not affect the main logic) try { vertex.notifyStateTransition(this, targetState, error); } catch (Throwable t) { LOG.error("Error while notifying execution graph of execution state transition.", t); } return true; } else { return false; } }