org.apache.flink.runtime.execution.SuppressRestartsException Java Examples
The following examples show how to use
org.apache.flink.runtime.execution.SuppressRestartsException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutionGraphVariousFailuesTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests that a {@link SuppressRestartsException} in state RESTARTING stops the restarting * immediately and sets the execution graph's state to FAILED. */ @Test public void testSuppressRestartFailureWhileRestarting() throws Exception { final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10)); eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread()); eg.scheduleForExecution(); assertEquals(JobStatus.RUNNING, eg.getState()); ExecutionGraphTestUtils.switchAllVerticesToRunning(eg); eg.failGlobal(new Exception("test")); assertEquals(JobStatus.FAILING, eg.getState()); ExecutionGraphTestUtils.completeCancellingForAllVertices(eg); assertEquals(JobStatus.RESTARTING, eg.getState()); // suppress a possible restart eg.failGlobal(new SuppressRestartsException(new Exception("Test"))); assertEquals(JobStatus.FAILED, eg.getState()); }
Example #2
Source File: ExecutionGraphRestartTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testNoRestartOnSuppressException() throws Exception { final ExecutionGraph eg = createExecutionGraph(new FixedDelayRestartStrategy(Integer.MAX_VALUE, 0)).f0; // Fail with unrecoverable Exception eg.getAllExecutionVertices().iterator().next().fail( new SuppressRestartsException(new Exception("Test Exception"))); assertEquals(JobStatus.FAILING, eg.getState()); completeCanceling(eg); eg.waitUntilTerminal(); assertEquals(JobStatus.FAILED, eg.getState()); RestartStrategy restartStrategy = eg.getRestartStrategy(); assertTrue(restartStrategy instanceof FixedDelayRestartStrategy); assertEquals(0, ((FixedDelayRestartStrategy) restartStrategy).getCurrentRestartAttempt()); }
Example #3
Source File: ExecutionGraphVariousFailuesTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests that a {@link SuppressRestartsException} in state RESTARTING stops the restarting * immediately and sets the execution graph's state to FAILED. */ @Test public void testSuppressRestartFailureWhileRestarting() throws Exception { final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10)); eg.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread()); eg.scheduleForExecution(); assertEquals(JobStatus.RUNNING, eg.getState()); ExecutionGraphTestUtils.switchAllVerticesToRunning(eg); eg.failGlobal(new Exception("test")); assertEquals(JobStatus.FAILING, eg.getState()); ExecutionGraphTestUtils.completeCancellingForAllVertices(eg); assertEquals(JobStatus.RESTARTING, eg.getState()); // suppress a possible restart eg.failGlobal(new SuppressRestartsException(new Exception("Test"))); assertEquals(JobStatus.FAILED, eg.getState()); }
Example #4
Source File: ExecutionGraphRestartTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testNoRestartOnSuppressException() throws Exception { try (SlotPool slotPool = createSlotPoolImpl()) { ExecutionGraph eg = TestingExecutionGraphBuilder.newBuilder() .setRestartStrategy(new FixedDelayRestartStrategy(Integer.MAX_VALUE, 0)) .buildAndScheduleForExecution(slotPool); // Fail with unrecoverable Exception eg.getAllExecutionVertices().iterator().next().fail( new SuppressRestartsException(new Exception("Test Exception"))); assertEquals(JobStatus.FAILING, eg.getState()); completeCanceling(eg); eg.waitUntilTerminal(); assertEquals(JobStatus.FAILED, eg.getState()); RestartStrategy restartStrategy = eg.getRestartStrategy(); assertTrue(restartStrategy instanceof FixedDelayRestartStrategy); assertEquals(0, ((FixedDelayRestartStrategy) restartStrategy).getCurrentRestartAttempt()); } }
Example #5
Source File: ExecutionGraphVariousFailuesTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests that a {@link SuppressRestartsException} in state RESTARTING stops the restarting * immediately and sets the execution graph's state to FAILED. */ @Test public void testSuppressRestartFailureWhileRestarting() throws Exception { final ExecutionGraph eg = ExecutionGraphTestUtils.createSimpleTestGraph(new InfiniteDelayRestartStrategy(10)); eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread()); eg.scheduleForExecution(); assertEquals(JobStatus.RUNNING, eg.getState()); ExecutionGraphTestUtils.switchAllVerticesToRunning(eg); eg.failGlobal(new Exception("test")); assertEquals(JobStatus.FAILING, eg.getState()); ExecutionGraphTestUtils.completeCancellingForAllVertices(eg); assertEquals(JobStatus.RESTARTING, eg.getState()); // suppress a possible restart eg.failGlobal(new SuppressRestartsException(new Exception("Test"))); assertEquals(JobStatus.FAILED, eg.getState()); }
Example #6
Source File: ExecutionFailureHandlerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests the check for unrecoverable error. */ @Test public void testUnrecoverableErrorCheck() { // normal error assertFalse(ExecutionFailureHandler.isUnrecoverableError(new Exception())); // direct unrecoverable error assertTrue(ExecutionFailureHandler.isUnrecoverableError(new SuppressRestartsException(new Exception()))); // nested unrecoverable error assertTrue(ExecutionFailureHandler.isUnrecoverableError( new Exception(new SuppressRestartsException(new Exception())))); }
Example #7
Source File: ExecutionGraphRestartTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testNoRestartOnSuppressException() throws Exception { try (SlotPool slotPool = createSlotPoolImpl()) { ExecutionGraph eg = TestingExecutionGraphBuilder .newBuilder() .setJobGraph(createJobGraph()) .setRestartStrategy(new FixedDelayRestartStrategy(Integer.MAX_VALUE, 0)) .setSlotProvider(createSchedulerWithSlots(slotPool)) .build(); startAndScheduleExecutionGraph(eg); // Fail with unrecoverable Exception eg.getAllExecutionVertices().iterator().next().fail( new SuppressRestartsException(new Exception("Test Exception"))); assertEquals(JobStatus.FAILING, eg.getState()); completeCanceling(eg); eg.waitUntilTerminal(); assertEquals(JobStatus.FAILED, eg.getState()); RestartStrategy restartStrategy = eg.getRestartStrategy(); assertTrue(restartStrategy instanceof FixedDelayRestartStrategy); assertEquals(0, ((FixedDelayRestartStrategy) restartStrategy).getCurrentRestartAttempt()); } }
Example #8
Source File: ExecutionGraphRestartTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testFailWhileRestarting() throws Exception { try (SlotPool slotPool = createSlotPoolImpl()) { TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final ExecutionGraph executionGraph = TestingExecutionGraphBuilder .newBuilder() .setJobGraph(createJobGraph()) .setRestartStrategy(new InfiniteDelayRestartStrategy()) .setSlotProvider(createSchedulerWithSlots(slotPool, taskManagerLocation)) .build(); startAndScheduleExecutionGraph(executionGraph); // Release the TaskManager and wait for the job to restart slotPool.releaseTaskManager(taskManagerLocation.getResourceID(), new Exception("Test Exception")); assertEquals(JobStatus.RESTARTING, executionGraph.getState()); // If we fail when being in RESTARTING, then we should try to restart again final long globalModVersion = executionGraph.getGlobalModVersion(); final Exception testException = new Exception("Test exception"); executionGraph.failGlobal(testException); assertNotEquals(globalModVersion, executionGraph.getGlobalModVersion()); assertEquals(JobStatus.RESTARTING, executionGraph.getState()); assertEquals(testException, executionGraph.getFailureCause()); // we should have updated the failure cause // but it should fail when sending a SuppressRestartsException executionGraph.failGlobal(new SuppressRestartsException(new Exception("Suppress restart exception"))); assertEquals(JobStatus.FAILED, executionGraph.getState()); // The restart has been aborted executionGraph.restart(executionGraph.getGlobalModVersion()); assertEquals(JobStatus.FAILED, executionGraph.getState()); } }
Example #9
Source File: ContinuousFileProcessingCheckpointITCase.java From flink with Apache License 2.0 | 5 votes |
@Override public void invoke(String value) throws Exception { int fileIdx = getFileIdx(value); Set<String> content = actualContent.get(fileIdx); if (content == null) { content = new HashSet<>(); actualContent.put(fileIdx, content); } // detect duplicate lines. if (!content.add(value + "\n")) { fail("Duplicate line: " + value); System.exit(0); } elementCounter++; // this is termination if (elementCounter >= NO_OF_FILES * LINES_PER_FILE) { actualCollectedContent = actualContent; throw new SuppressRestartsException(new SuccessException()); } // add some latency so that we have at least two checkpoint in if (!hasRestoredAfterFailure && successfulCheckpoints < 2) { Thread.sleep(5); } // simulate a node failure if (!hasRestoredAfterFailure && successfulCheckpoints >= 2 && elementCounter >= elementsToFailure) { throw new Exception("Task Failure @ elem: " + elementCounter + " / " + elementsToFailure); } }
Example #10
Source File: ThrowableClassifierTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testThrowableType_NonRecoverable() { assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception("")))); assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new NoResourceAvailableException())); }
Example #11
Source File: ExecutionFailureHandlerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests the check for unrecoverable error. */ @Test public void testUnrecoverableErrorCheck() { // normal error assertFalse(ExecutionFailureHandler.isUnrecoverableError(new Exception())); // direct unrecoverable error assertTrue(ExecutionFailureHandler.isUnrecoverableError(new SuppressRestartsException(new Exception()))); // nested unrecoverable error assertTrue(ExecutionFailureHandler.isUnrecoverableError( new Exception(new SuppressRestartsException(new Exception())))); }
Example #12
Source File: ExecutionGraphRestartTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testFailWhileRestarting() throws Exception { try (SlotPool slotPool = createSlotPoolImpl()) { TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final ExecutionGraph executionGraph = TestingExecutionGraphBuilder.newBuilder() .setRestartStrategy(new InfiniteDelayRestartStrategy()) .setTaskManagerLocation(taskManagerLocation) .buildAndScheduleForExecution(slotPool); // Release the TaskManager and wait for the job to restart slotPool.releaseTaskManager(taskManagerLocation.getResourceID(), new Exception("Test Exception")); assertEquals(JobStatus.RESTARTING, executionGraph.getState()); // If we fail when being in RESTARTING, then we should try to restart again final long globalModVersion = executionGraph.getGlobalModVersion(); final Exception testException = new Exception("Test exception"); executionGraph.failGlobal(testException); assertNotEquals(globalModVersion, executionGraph.getGlobalModVersion()); assertEquals(JobStatus.RESTARTING, executionGraph.getState()); assertEquals(testException, executionGraph.getFailureCause()); // we should have updated the failure cause // but it should fail when sending a SuppressRestartsException executionGraph.failGlobal(new SuppressRestartsException(new Exception("Suppress restart exception"))); assertEquals(JobStatus.FAILED, executionGraph.getState()); // The restart has been aborted executionGraph.restart(executionGraph.getGlobalModVersion()); assertEquals(JobStatus.FAILED, executionGraph.getState()); } }
Example #13
Source File: ThrowableClassifierTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testThrowableType_NonRecoverable() { assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception("")))); assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new NoResourceAvailableException())); }
Example #14
Source File: ConcurrentFailoverStrategyExecutionGraphTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a terminal global failure concurrent to a local failover * leads to a properly failed state. */ @Test public void testGlobalFailureConcurrentToLocalFailover() throws Exception { // the logic in this test is as follows: // - start a job // - cause a task failure and delay the local recovery action via the manual executor // - cause a global failure // - resume in local recovery action // - validate that this does in fact not start a new task, because the graph as a // whole should now be terminally failed already final JobID jid = new JobID(); final int parallelism = 2; final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism); final ExecutionGraph graph = createSampleGraph( jid, TestRestartPipelinedRegionStrategy::new, TestRestartStrategy.directExecuting(), slotProvider, parallelism); graph.start(mainThreadExecutor); TestRestartPipelinedRegionStrategy strategy = (TestRestartPipelinedRegionStrategy) graph.getFailoverStrategy(); // This future is used to block the failover strategy execution until we complete it final CompletableFuture<?> blocker = new CompletableFuture<>(); strategy.setBlockerFuture(blocker); final ExecutionJobVertex ejv = graph.getVerticesTopologically().iterator().next(); final ExecutionVertex vertex1 = ejv.getTaskVertices()[0]; final ExecutionVertex vertex2 = ejv.getTaskVertices()[1]; graph.scheduleForExecution(); assertEquals(JobStatus.RUNNING, graph.getState()); // let one of the vertices fail - that triggers a local recovery action vertex1.getCurrentExecutionAttempt().fail(new Exception("test failure")); assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState()); // graph should still be running and the failover recovery action should be queued assertEquals(JobStatus.RUNNING, graph.getState()); // now cancel the job graph.failGlobal(new SuppressRestartsException(new Exception("test exception"))); assertEquals(JobStatus.FAILING, graph.getState()); assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState()); assertEquals(ExecutionState.CANCELING, vertex2.getCurrentExecutionAttempt().getState()); // let the recovery action continue blocker.complete(null); // now report that cancelling is complete for the other vertex vertex2.getCurrentExecutionAttempt().completeCancelling(); assertEquals(JobStatus.FAILED, graph.getState()); assertTrue(vertex1.getCurrentExecutionAttempt().getState().isTerminal()); assertTrue(vertex2.getCurrentExecutionAttempt().getState().isTerminal()); // make sure all slots are recycled assertEquals(parallelism, slotProvider.getNumberOfAvailableSlots()); }
Example #15
Source File: ExecutionGraph.java From flink with Apache License 2.0 | 4 votes |
/** * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then * try to fail the job. This operation is only permitted if the current state is FAILING or * RESTARTING. * * @return true if the operation could be executed; false if a concurrent job status change occurred */ private boolean tryRestartOrFail(long globalModVersionForRestart) { JobStatus currentState = state; if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) { final Throwable failureCause = this.failureCause; synchronized (progressLock) { if (LOG.isDebugEnabled()) { LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause); } else { LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID()); } final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException); final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart(); boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart; if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) { LOG.info("Restarting the job {} ({}).", getJobName(), getJobID()); RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart); FutureUtils.assertNoException( restartStrategy .restart(restarter, getJobMasterMainThreadExecutor()) .exceptionally((throwable) -> { failGlobal(throwable); return null; })); return true; } else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) { final String cause1 = isFailureCauseAllowingRestart ? null : "a type of SuppressRestartsException was thrown"; final String cause2 = isRestartStrategyAllowingRestart ? null : "the restart strategy prevented it"; LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(), StringUtils.concatenateWithAnd(cause1, cause2), failureCause); onTerminalState(JobStatus.FAILED); return true; } else { // we must have changed the state concurrently, thus we cannot complete this operation return false; } } } else { // this operation is only allowed in the state FAILING or RESTARTING return false; } }
Example #16
Source File: ExecutionGraph.java From flink with Apache License 2.0 | 4 votes |
/** * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then * try to fail the job. This operation is only permitted if the current state is FAILING or * RESTARTING. * * @return true if the operation could be executed; false if a concurrent job status change occurred */ @Deprecated private boolean tryRestartOrFail(long globalModVersionForRestart) { if (!isLegacyScheduling()) { return true; } JobStatus currentState = state; if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) { final Throwable failureCause = this.failureCause; if (LOG.isDebugEnabled()) { LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause); } else { LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID()); } final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException); final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart(); boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart; if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) { LOG.info("Restarting the job {} ({}).", getJobName(), getJobID()); RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart); FutureUtils.assertNoException( restartStrategy .restart(restarter, getJobMasterMainThreadExecutor()) .exceptionally((throwable) -> { failGlobal(throwable); return null; })); return true; } else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) { final String cause1 = isFailureCauseAllowingRestart ? null : "a type of SuppressRestartsException was thrown"; final String cause2 = isRestartStrategyAllowingRestart ? null : "the restart strategy prevented it"; LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(), StringUtils.concatenateWithAnd(cause1, cause2), failureCause); onTerminalState(JobStatus.FAILED); return true; } else { // we must have changed the state concurrently, thus we cannot complete this operation return false; } } else { // this operation is only allowed in the state FAILING or RESTARTING return false; } }
Example #17
Source File: ThrowableClassifierTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testThrowableType_NonRecoverable() { assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception("")))); }
Example #18
Source File: ExecutionGraphRestartTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testFailWhileRestarting() throws Exception { Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext()); Instance instance = ExecutionGraphTestUtils.getInstance( new ActorTaskManagerGateway( new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS); scheduler.newInstanceAvailable(instance); // Blocking program ExecutionGraph executionGraph = new ExecutionGraph( TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "TestJob", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), // We want to manually control the restart and delay new InfiniteDelayRestartStrategy(), scheduler); executionGraph.start(TestingComponentMainThreadExecutorServiceAdapter.forMainThread()); JobVertex jobVertex = new JobVertex("NoOpInvokable"); jobVertex.setInvokableClass(NoOpInvokable.class); jobVertex.setParallelism(NUM_TASKS); JobGraph jobGraph = new JobGraph("TestJob", jobVertex); executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources()); assertEquals(JobStatus.CREATED, executionGraph.getState()); executionGraph.scheduleForExecution(); assertEquals(JobStatus.RUNNING, executionGraph.getState()); // Kill the instance and wait for the job to restart instance.markDead(); assertEquals(JobStatus.RESTARTING, executionGraph.getState()); // If we fail when being in RESTARTING, then we should try to restart again final long globalModVersion = executionGraph.getGlobalModVersion(); final Exception testException = new Exception("Test exception"); executionGraph.failGlobal(testException); assertNotEquals(globalModVersion, executionGraph.getGlobalModVersion()); assertEquals(JobStatus.RESTARTING, executionGraph.getState()); assertEquals(testException, executionGraph.getFailureCause()); // we should have updated the failure cause // but it should fail when sending a SuppressRestartsException executionGraph.failGlobal(new SuppressRestartsException(new Exception("Suppress restart exception"))); assertEquals(JobStatus.FAILED, executionGraph.getState()); // The restart has been aborted executionGraph.restart(executionGraph.getGlobalModVersion()); assertEquals(JobStatus.FAILED, executionGraph.getState()); }
Example #19
Source File: ConcurrentFailoverStrategyExecutionGraphTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a terminal global failure concurrent to a local failover * leads to a properly failed state. */ @Test public void testGlobalFailureConcurrentToLocalFailover() throws Exception { // the logic in this test is as follows: // - start a job // - cause a task failure and delay the local recovery action via the manual executor // - cause a global failure // - resume in local recovery action // - validate that this does in fact not start a new task, because the graph as a // whole should now be terminally failed already final JobID jid = new JobID(); final int parallelism = 2; final SimpleSlotProvider slotProvider = new SimpleSlotProvider(jid, parallelism); final ExecutionGraph graph = createSampleGraph( jid, TestRestartPipelinedRegionStrategy::new, TestRestartStrategy.directExecuting(), slotProvider, parallelism); graph.start(mainThreadExecutor); TestRestartPipelinedRegionStrategy strategy = (TestRestartPipelinedRegionStrategy) graph.getFailoverStrategy(); // This future is used to block the failover strategy execution until we complete it final CompletableFuture<?> blocker = new CompletableFuture<>(); strategy.setBlockerFuture(blocker); final ExecutionJobVertex ejv = graph.getVerticesTopologically().iterator().next(); final ExecutionVertex vertex1 = ejv.getTaskVertices()[0]; final ExecutionVertex vertex2 = ejv.getTaskVertices()[1]; graph.scheduleForExecution(); assertEquals(JobStatus.RUNNING, graph.getState()); // let one of the vertices fail - that triggers a local recovery action vertex1.getCurrentExecutionAttempt().fail(new Exception("test failure")); assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState()); // graph should still be running and the failover recovery action should be queued assertEquals(JobStatus.RUNNING, graph.getState()); // now cancel the job graph.failGlobal(new SuppressRestartsException(new Exception("test exception"))); assertEquals(JobStatus.FAILING, graph.getState()); assertEquals(ExecutionState.FAILED, vertex1.getCurrentExecutionAttempt().getState()); assertEquals(ExecutionState.CANCELING, vertex2.getCurrentExecutionAttempt().getState()); // let the recovery action continue blocker.complete(null); // now report that cancelling is complete for the other vertex vertex2.getCurrentExecutionAttempt().completeCancelling(); assertEquals(JobStatus.FAILED, graph.getState()); assertTrue(vertex1.getCurrentExecutionAttempt().getState().isTerminal()); assertTrue(vertex2.getCurrentExecutionAttempt().getState().isTerminal()); // make sure all slots are recycled assertEquals(parallelism, slotProvider.getNumberOfAvailableSlots()); }
Example #20
Source File: ExecutionGraph.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Try to restart the job. If we cannot restart the job (e.g. no more restarts allowed), then * try to fail the job. This operation is only permitted if the current state is FAILING or * RESTARTING. * * @return true if the operation could be executed; false if a concurrent job status change occurred */ private boolean tryRestartOrFail(long globalModVersionForRestart) { JobStatus currentState = state; if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) { final Throwable failureCause = this.failureCause; synchronized (progressLock) { if (LOG.isDebugEnabled()) { LOG.debug("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID(), failureCause); } else { LOG.info("Try to restart or fail the job {} ({}) if no longer possible.", getJobName(), getJobID()); } final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException); final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart(); boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart; if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) { LOG.info("Restarting the job {} ({}).", getJobName(), getJobID()); RestartCallback restarter = new ExecutionGraphRestartCallback(this, globalModVersionForRestart); restartStrategy.restart(restarter, getJobMasterMainThreadExecutor()); return true; } else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) { final String cause1 = isFailureCauseAllowingRestart ? null : "a type of SuppressRestartsException was thrown"; final String cause2 = isRestartStrategyAllowingRestart ? null : "the restart strategy prevented it"; LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(), StringUtils.concatenateWithAnd(cause1, cause2), failureCause); onTerminalState(JobStatus.FAILED); return true; } else { // we must have changed the state concurrently, thus we cannot complete this operation return false; } } } else { // this operation is only allowed in the state FAILING or RESTARTING return false; } }