org.apache.flink.runtime.checkpoint.CompletedCheckpoint Java Examples
The following examples show how to use
org.apache.flink.runtime.checkpoint.CompletedCheckpoint.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OperatorCoordinatorSchedulerTest.java From flink with Apache License 2.0 | 6 votes |
private long takeCompleteCheckpoint( DefaultScheduler scheduler, TestingOperatorCoordinator testingOperatorCoordinator, byte[] coordinatorState) throws Exception { final CompletableFuture<CompletedCheckpoint> checkpointFuture = triggerCheckpoint(scheduler); testingOperatorCoordinator.getLastTriggeredCheckpoint().complete(coordinatorState); acknowledgeCurrentCheckpoint(scheduler); // wait until checkpoint has completed final long checkpointId = checkpointFuture.get().getCheckpointID(); // now wait until it has been acknowledged while (!testingOperatorCoordinator.hasCompleteCheckpoint()) { executor.triggerAll(); Thread.sleep(1); } return checkpointId; }
Example #2
Source File: ZooKeeperUtils.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Creates a {@link ZooKeeperCompletedCheckpointStore} instance. * * @param client The {@link CuratorFramework} ZooKeeper client to use * @param configuration {@link Configuration} object * @param jobId ID of job to create the instance for * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain * @param executor to run ZooKeeper callbacks * @return {@link ZooKeeperCompletedCheckpointStore} instance * @throws Exception if the completed checkpoint store cannot be created */ public static CompletedCheckpointStore createCompletedCheckpoints( CuratorFramework client, Configuration configuration, JobID jobId, int maxNumberOfCheckpointsToRetain, Executor executor) throws Exception { checkNotNull(configuration, "Configuration"); String checkpointsPath = configuration.getString( HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH); RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage( configuration, "completedCheckpoint"); checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId); final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore( maxNumberOfCheckpointsToRetain, createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage), executor); LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath); return zooKeeperCompletedCheckpointStore; }
Example #3
Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0 | 5 votes |
@Override public void shutdown(JobStatus jobStatus) throws Exception { if (jobStatus.isGloballyTerminalState()) { checkpoints.clear(); suspended.clear(); } else { suspended.clear(); for (CompletedCheckpoint checkpoint : checkpoints) { suspended.add(checkpoint); } checkpoints.clear(); } }
Example #4
Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0 | 5 votes |
@Override public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception { checkpoints.addLast(checkpoint); if (checkpoints.size() > maxRetainedCheckpoints) { removeOldestCheckpoint(); } }
Example #5
Source File: SchedulerTestingUtils.java From flink with Apache License 2.0 | 5 votes |
public static CompletedCheckpoint takeCheckpoint(DefaultScheduler scheduler) throws Exception { final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler); checkpointCoordinator.triggerCheckpoint(false); assertEquals("test setup inconsistent", 1, checkpointCoordinator.getNumberOfPendingCheckpoints()); final PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next(); final CompletableFuture<CompletedCheckpoint> future = checkpoint.getCompletionFuture(); acknowledgePendingCheckpoint(scheduler, checkpoint.getCheckpointId()); CompletedCheckpoint completed = future.getNow(null); assertNotNull("checkpoint not complete", completed); return completed; }
Example #6
Source File: OperatorCoordinatorSchedulerTest.java From flink with Apache License 2.0 | 5 votes |
private CompletableFuture<CompletedCheckpoint> triggerCheckpoint(DefaultScheduler scheduler) throws Exception { final CompletableFuture<CompletedCheckpoint> future = SchedulerTestingUtils.triggerCheckpoint(scheduler); final TestingOperatorCoordinator coordinator = getCoordinator(scheduler); // the Checkpoint Coordinator executes parts of the logic in its timer thread, and delegates some calls // to the scheduler executor. so we need to do a mix of waiting for the timer thread and working off // tasks in the scheduler executor. // we can drop this here once the CheckpointCoordinator also runs in a 'main thread executor'. while (!(coordinator.hasTriggeredCheckpoint() || future.isDone())) { executor.triggerAll(); Thread.sleep(1); } return future; }
Example #7
Source File: OperatorCoordinatorSchedulerTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testTakeCheckpoint() throws Exception { final byte[] checkpointData = new byte[656]; new Random().nextBytes(checkpointData); final DefaultScheduler scheduler = createSchedulerAndDeployTasks(); final TestingOperatorCoordinator coordinator = getCoordinator(scheduler); final CompletableFuture<CompletedCheckpoint> checkpointFuture = triggerCheckpoint(scheduler); coordinator.getLastTriggeredCheckpoint().complete(checkpointData); acknowledgeCurrentCheckpoint(scheduler); final OperatorState state = checkpointFuture.get().getOperatorStates().get(testOperatorId); assertArrayEquals(checkpointData, getStateHandleContents(state.getCoordinatorState())); }
Example #8
Source File: JobMasterTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that a JobMaster will restore the given JobGraph from its savepoint upon * initial submission. */ @Test public void testRestoringFromSavepoint() throws Exception { // create savepoint data final long savepointId = 42L; final File savepointFile = createSavepoint(savepointId); // set savepoint settings final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), true); final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #9
Source File: ZooKeeperUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a {@link ZooKeeperCompletedCheckpointStore} instance. * * @param client The {@link CuratorFramework} ZooKeeper client to use * @param configuration {@link Configuration} object * @param jobId ID of job to create the instance for * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain * @param executor to run ZooKeeper callbacks * @return {@link ZooKeeperCompletedCheckpointStore} instance * @throws Exception if the completed checkpoint store cannot be created */ public static CompletedCheckpointStore createCompletedCheckpoints( CuratorFramework client, Configuration configuration, JobID jobId, int maxNumberOfCheckpointsToRetain, Executor executor) throws Exception { checkNotNull(configuration, "Configuration"); String checkpointsPath = configuration.getString( HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH); RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage( configuration, HA_STORAGE_COMPLETED_CHECKPOINT); checkpointsPath += ZooKeeperJobGraphStore.getPathForJob(jobId); final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore( maxNumberOfCheckpointsToRetain, createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage), executor); LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath); return zooKeeperCompletedCheckpointStore; }
Example #10
Source File: SchedulerBase.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<String> triggerSavepoint(final String targetDirectory, final boolean cancelJob) { mainThreadExecutor.assertRunningInMainThread(); final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator == null) { throw new IllegalStateException( String.format("Job %s is not a streaming job.", jobGraph.getJobID())); } else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) { log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID()); throw new IllegalStateException( "No savepoint directory configured. You can either specify a directory " + "while cancelling via -s :targetDirectory or configure a cluster-wide " + "default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'."); } log.info("Triggering {}savepoint for job {}.", cancelJob ? "cancel-with-" : "", jobGraph.getJobID()); if (cancelJob) { checkpointCoordinator.stopCheckpointScheduler(); } return checkpointCoordinator .triggerSavepoint(targetDirectory) .thenApply(CompletedCheckpoint::getExternalPointer) .handleAsync((path, throwable) -> { if (throwable != null) { if (cancelJob) { startCheckpointScheduler(checkpointCoordinator); } throw new CompletionException(throwable); } else if (cancelJob) { log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID()); cancel(); } return path; }, mainThreadExecutor); }
Example #11
Source File: RegionFailoverITCase.java From flink with Apache License 2.0 | 5 votes |
@Override public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception { super.addCheckpoint(checkpoint); // we record the information when adding completed checkpoint instead of 'notifyCheckpointComplete' invoked // on task side to avoid race condition. See FLINK-13601. lastCompletedCheckpointId.set(checkpoint.getCheckpointID()); numCompletedCheckpoints.incrementAndGet(); }
Example #12
Source File: NotifyCheckpointAbortedITCase.java From flink with Apache License 2.0 | 5 votes |
@Override public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception { if (abortCheckpointLatch.isTriggered()) { super.addCheckpoint(checkpoint); } else { // tell main thread that all checkpoints on task side have been finished. addCheckpointLatch.trigger(); // wait for the main thread to throw exception so that the checkpoint would be notified as aborted. abortCheckpointLatch.await(); throw new ExpectedTestException(); } }
Example #13
Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0 | 5 votes |
@Override public void shutdown(JobStatus jobStatus) throws Exception { if (jobStatus.isGloballyTerminalState()) { checkpoints.clear(); suspended.clear(); } else { suspended.clear(); for (CompletedCheckpoint checkpoint : checkpoints) { suspended.add(checkpoint); } checkpoints.clear(); } }
Example #14
Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0 | 5 votes |
@Override public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception { checkpoints.addLast(checkpoint); if (checkpoints.size() > maxRetainedCheckpoints) { removeOldestCheckpoint(); } }
Example #15
Source File: JobMasterTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that a JobMaster will restore the given JobGraph from its savepoint upon * initial submission. */ @Test public void testRestoringFromSavepoint() throws Exception { // create savepoint data final long savepointId = 42L; final File savepointFile = createSavepoint(savepointId); // set savepoint settings final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), true); final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #16
Source File: JobMaster.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<String> triggerSavepoint( @Nullable final String targetDirectory, final boolean cancelJob, final Time timeout) { final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator == null) { return FutureUtils.completedExceptionally(new IllegalStateException( String.format("Job %s is not a streaming job.", jobGraph.getJobID()))); } else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) { log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID()); return FutureUtils.completedExceptionally(new IllegalStateException( "No savepoint directory configured. You can either specify a directory " + "while cancelling via -s :targetDirectory or configure a cluster-wide " + "default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'.")); } if (cancelJob) { checkpointCoordinator.stopCheckpointScheduler(); } return checkpointCoordinator .triggerSavepoint(System.currentTimeMillis(), targetDirectory) .thenApply(CompletedCheckpoint::getExternalPointer) .handleAsync((path, throwable) -> { if (throwable != null) { if (cancelJob) { startCheckpointScheduler(checkpointCoordinator); } throw new CompletionException(throwable); } else if (cancelJob) { log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID()); cancel(timeout); } return path; }, getMainThreadExecutor()); }
Example #17
Source File: ZooKeeperUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Creates a {@link ZooKeeperCompletedCheckpointStore} instance. * * @param client The {@link CuratorFramework} ZooKeeper client to use * @param configuration {@link Configuration} object * @param jobId ID of job to create the instance for * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain * @param executor to run ZooKeeper callbacks * @return {@link ZooKeeperCompletedCheckpointStore} instance * @throws Exception if the completed checkpoint store cannot be created */ public static CompletedCheckpointStore createCompletedCheckpoints( CuratorFramework client, Configuration configuration, JobID jobId, int maxNumberOfCheckpointsToRetain, Executor executor) throws Exception { checkNotNull(configuration, "Configuration"); String checkpointsPath = configuration.getString( HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH); RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage( configuration, "completedCheckpoint"); checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId); final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore( maxNumberOfCheckpointsToRetain, createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage), executor); LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath); return zooKeeperCompletedCheckpointStore; }
Example #18
Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that a JobMaster will restore the given JobGraph from its savepoint upon * initial submission. */ @Test public void testRestoringFromSavepoint() throws Exception { // create savepoint data final long savepointId = 42L; final File savepointFile = createSavepoint(savepointId); // set savepoint settings final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), true); final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #19
Source File: LegacyScheduler.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<String> triggerSavepoint(final String targetDirectory, final boolean cancelJob) { mainThreadExecutor.assertRunningInMainThread(); final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator(); if (checkpointCoordinator == null) { throw new IllegalStateException( String.format("Job %s is not a streaming job.", jobGraph.getJobID())); } else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) { log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID()); throw new IllegalStateException( "No savepoint directory configured. You can either specify a directory " + "while cancelling via -s :targetDirectory or configure a cluster-wide " + "default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'."); } if (cancelJob) { checkpointCoordinator.stopCheckpointScheduler(); } return checkpointCoordinator .triggerSavepoint(System.currentTimeMillis(), targetDirectory) .thenApply(CompletedCheckpoint::getExternalPointer) .handleAsync((path, throwable) -> { if (throwable != null) { if (cancelJob) { startCheckpointScheduler(checkpointCoordinator); } throw new CompletionException(throwable); } else if (cancelJob) { log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID()); cancel(); } return path; }, mainThreadExecutor); }
Example #20
Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public void shutdown(JobStatus jobStatus) throws Exception { if (jobStatus.isGloballyTerminalState()) { checkpoints.clear(); suspended.clear(); } else { suspended.clear(); for (CompletedCheckpoint checkpoint : checkpoints) { suspended.add(checkpoint); } checkpoints.clear(); } }
Example #21
Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception { checkpoints.addLast(checkpoint); if (checkpoints.size() > maxRetainedCheckpoints) { removeOldestCheckpoint(); } }
Example #22
Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override public CompletedCheckpoint getLatestCheckpoint() throws Exception { return checkpoints.isEmpty() ? null : checkpoints.getLast(); }
Example #23
Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0 | 4 votes |
@Override public List<CompletedCheckpoint> getAllCheckpoints() throws Exception { return new ArrayList<>(checkpoints); }
Example #24
Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0 | 4 votes |
public void removeOldestCheckpoint() throws Exception { CompletedCheckpoint checkpointToSubsume = checkpoints.removeFirst(); checkpointToSubsume.discardOnSubsume(); }
Example #25
Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a JobMaster will only restore a modified JobGraph if non * restored state is allowed. */ @Test public void testRestoringModifiedJobFromSavepoint() throws Exception { // create savepoint data final long savepointId = 42L; final OperatorID operatorID = new OperatorID(); final File savepointFile = createSavepointWithOperatorState(savepointId, operatorID); // set savepoint settings which don't allow non restored state final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), false); // create a new operator final JobVertex jobVertex = new JobVertex("New operator"); jobVertex.setInvokableClass(NoOpInvokable.class); final JobGraph jobGraphWithNewOperator = createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); try { createJobMaster( configuration, jobGraphWithNewOperator, haServices, new TestingJobManagerSharedServicesBuilder().build()); fail("Should fail because we cannot resume the changed JobGraph from the savepoint."); } catch (IllegalStateException expected) { // that was expected :-) } // allow for non restored state jobGraphWithNewOperator.setSavepointRestoreSettings( SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), true)); final JobMaster jobMaster = createJobMaster( configuration, jobGraphWithNewOperator, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #26
Source File: SchedulerTestingUtils.java From flink with Apache License 2.0 | 4 votes |
public static CompletableFuture<CompletedCheckpoint> triggerCheckpoint(DefaultScheduler scheduler) throws Exception { final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler); return checkpointCoordinator.triggerCheckpoint(false); }
Example #27
Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that an existing checkpoint will have precedence over an savepoint. */ @Test public void testCheckpointPrecedesSavepointRecovery() throws Exception { // create savepoint data final long savepointId = 42L; final File savepointFile = createSavepoint(savepointId); // set savepoint settings final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" + savepointFile.getAbsolutePath(), true); final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings); final long checkpointId = 1L; final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint( jobGraph.getJobID(), checkpointId, 1L, 1L, Collections.emptyMap(), null, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new DummyCheckpointStorageLocation()); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); completedCheckpointStore.addCheckpoint(completedCheckpoint); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #28
Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public void removeOldestCheckpoint() throws Exception { CompletedCheckpoint checkpointToSubsume = checkpoints.removeFirst(); checkpointToSubsume.discardOnSubsume(); }
Example #29
Source File: JobMasterTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that an existing checkpoint will have precedence over an savepoint. */ @Test public void testCheckpointPrecedesSavepointRecovery() throws Exception { // create savepoint data final long savepointId = 42L; final File savepointFile = createSavepoint(savepointId); // set savepoint settings final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" + savepointFile.getAbsolutePath(), true); final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings); final long checkpointId = 1L; final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint( jobGraph.getJobID(), checkpointId, 1L, 1L, Collections.emptyMap(), null, CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION), new DummyCheckpointStorageLocation()); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); completedCheckpointStore.addCheckpoint(completedCheckpoint); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example #30
Source File: JobMasterTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a JobMaster will only restore a modified JobGraph if non * restored state is allowed. */ @Test public void testRestoringModifiedJobFromSavepoint() throws Exception { // create savepoint data final long savepointId = 42L; final OperatorID operatorID = new OperatorID(); final File savepointFile = createSavepointWithOperatorState(savepointId, operatorID); // set savepoint settings which don't allow non restored state final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), false); // create a new operator final JobVertex jobVertex = new JobVertex("New operator"); jobVertex.setInvokableClass(NoOpInvokable.class); final JobGraph jobGraphWithNewOperator = createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex); final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1); final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter()); haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory); try { createJobMaster( configuration, jobGraphWithNewOperator, haServices, new TestingJobManagerSharedServicesBuilder().build()); fail("Should fail because we cannot resume the changed JobGraph from the savepoint."); } catch (IllegalStateException expected) { // that was expected :-) } // allow for non restored state jobGraphWithNewOperator.setSavepointRestoreSettings( SavepointRestoreSettings.forPath( savepointFile.getAbsolutePath(), true)); final JobMaster jobMaster = createJobMaster( configuration, jobGraphWithNewOperator, haServices, new TestingJobManagerSharedServicesBuilder().build()); try { // starting the JobMaster should have read the savepoint final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false); assertThat(savepointCheckpoint, Matchers.notNullValue()); assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }