org.apache.flink.runtime.checkpoint.CompletedCheckpoint Java Exaples

Source File: OperatorCoordinatorSchedulerTest.java From flink with Apache License 2.0

6 votes

private long takeCompleteCheckpoint(
		DefaultScheduler scheduler,
		TestingOperatorCoordinator testingOperatorCoordinator,
		byte[] coordinatorState) throws Exception {

	final CompletableFuture<CompletedCheckpoint> checkpointFuture = triggerCheckpoint(scheduler);

	testingOperatorCoordinator.getLastTriggeredCheckpoint().complete(coordinatorState);
	acknowledgeCurrentCheckpoint(scheduler);

	// wait until checkpoint has completed
	final long checkpointId = checkpointFuture.get().getCheckpointID();

	// now wait until it has been acknowledged
	while (!testingOperatorCoordinator.hasCompleteCheckpoint()) {
		executor.triggerAll();
		Thread.sleep(1);
	}

	return checkpointId;
}

Source File: ZooKeeperUtils.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Creates a {@link ZooKeeperCompletedCheckpointStore} instance.
 *
 * @param client                         The {@link CuratorFramework} ZooKeeper client to use
 * @param configuration                  {@link Configuration} object
 * @param jobId                          ID of job to create the instance for
 * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain
 * @param executor to run ZooKeeper callbacks
 * @return {@link ZooKeeperCompletedCheckpointStore} instance
 * @throws Exception if the completed checkpoint store cannot be created
 */
public static CompletedCheckpointStore createCompletedCheckpoints(
		CuratorFramework client,
		Configuration configuration,
		JobID jobId,
		int maxNumberOfCheckpointsToRetain,
		Executor executor) throws Exception {

	checkNotNull(configuration, "Configuration");

	String checkpointsPath = configuration.getString(
		HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH);

	RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage(
		configuration,
		"completedCheckpoint");

	checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId);

	final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore(
		maxNumberOfCheckpointsToRetain,
		createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage),
		executor);

	LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath);
	return zooKeeperCompletedCheckpointStore;
}

Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0

5 votes

@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}

Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0

5 votes

@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {

	checkpoints.addLast(checkpoint);

	if (checkpoints.size() > maxRetainedCheckpoints) {
		removeOldestCheckpoint();
	}
}

Source File: SchedulerTestingUtils.java From flink with Apache License 2.0

5 votes

public static CompletedCheckpoint takeCheckpoint(DefaultScheduler scheduler) throws Exception {
	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
	checkpointCoordinator.triggerCheckpoint(false);

	assertEquals("test setup inconsistent", 1, checkpointCoordinator.getNumberOfPendingCheckpoints());
	final PendingCheckpoint checkpoint = checkpointCoordinator.getPendingCheckpoints().values().iterator().next();
	final CompletableFuture<CompletedCheckpoint> future = checkpoint.getCompletionFuture();

	acknowledgePendingCheckpoint(scheduler, checkpoint.getCheckpointId());

	CompletedCheckpoint completed = future.getNow(null);
	assertNotNull("checkpoint not complete", completed);
	return completed;
}

Source File: OperatorCoordinatorSchedulerTest.java From flink with Apache License 2.0

5 votes

private CompletableFuture<CompletedCheckpoint> triggerCheckpoint(DefaultScheduler scheduler) throws Exception {
	final CompletableFuture<CompletedCheckpoint> future = SchedulerTestingUtils.triggerCheckpoint(scheduler);
	final TestingOperatorCoordinator coordinator = getCoordinator(scheduler);

	// the Checkpoint Coordinator executes parts of the logic in its timer thread, and delegates some calls
	// to the scheduler executor. so we need to do a mix of waiting for the timer thread and working off
	// tasks in the scheduler executor.
	// we can drop this here once the CheckpointCoordinator also runs in a 'main thread executor'.
	while (!(coordinator.hasTriggeredCheckpoint() || future.isDone())) {
		executor.triggerAll();
		Thread.sleep(1);
	}

	return future;
}

Source File: OperatorCoordinatorSchedulerTest.java From flink with Apache License 2.0

5 votes

@Test
public void testTakeCheckpoint() throws Exception {
	final byte[] checkpointData = new byte[656];
	new Random().nextBytes(checkpointData);

	final DefaultScheduler scheduler = createSchedulerAndDeployTasks();
	final TestingOperatorCoordinator coordinator = getCoordinator(scheduler);

	final CompletableFuture<CompletedCheckpoint> checkpointFuture = triggerCheckpoint(scheduler);
	coordinator.getLastTriggeredCheckpoint().complete(checkpointData);
	acknowledgeCurrentCheckpoint(scheduler);

	final OperatorState state = checkpointFuture.get().getOperatorStates().get(testOperatorId);
	assertArrayEquals(checkpointData, getStateHandleContents(state.getCoordinatorState()));
}

Source File: JobMasterTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon
 * initial submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

Source File: ZooKeeperUtils.java From flink with Apache License 2.0

5 votes

/**
 * Creates a {@link ZooKeeperCompletedCheckpointStore} instance.
 *
 * @param client                         The {@link CuratorFramework} ZooKeeper client to use
 * @param configuration                  {@link Configuration} object
 * @param jobId                          ID of job to create the instance for
 * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain
 * @param executor to run ZooKeeper callbacks
 * @return {@link ZooKeeperCompletedCheckpointStore} instance
 * @throws Exception if the completed checkpoint store cannot be created
 */
public static CompletedCheckpointStore createCompletedCheckpoints(
		CuratorFramework client,
		Configuration configuration,
		JobID jobId,
		int maxNumberOfCheckpointsToRetain,
		Executor executor) throws Exception {

	checkNotNull(configuration, "Configuration");

	String checkpointsPath = configuration.getString(
		HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH);

	RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage(
		configuration,
		HA_STORAGE_COMPLETED_CHECKPOINT);

	checkpointsPath += ZooKeeperJobGraphStore.getPathForJob(jobId);

	final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore(
		maxNumberOfCheckpointsToRetain,
		createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage),
		executor);

	LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath);
	return zooKeeperCompletedCheckpointStore;
}

Source File: SchedulerBase.java From flink with Apache License 2.0

5 votes

@Override
public CompletableFuture<String> triggerSavepoint(final String targetDirectory, final boolean cancelJob) {
	mainThreadExecutor.assertRunningInMainThread();

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	if (checkpointCoordinator == null) {
		throw new IllegalStateException(
			String.format("Job %s is not a streaming job.", jobGraph.getJobID()));
	} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
		log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());

		throw new IllegalStateException(
			"No savepoint directory configured. You can either specify a directory " +
				"while cancelling via -s :targetDirectory or configure a cluster-wide " +
				"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'.");
	}

	log.info("Triggering {}savepoint for job {}.", cancelJob ? "cancel-with-" : "", jobGraph.getJobID());

	if (cancelJob) {
		checkpointCoordinator.stopCheckpointScheduler();
	}

	return checkpointCoordinator
		.triggerSavepoint(targetDirectory)
		.thenApply(CompletedCheckpoint::getExternalPointer)
		.handleAsync((path, throwable) -> {
			if (throwable != null) {
				if (cancelJob) {
					startCheckpointScheduler(checkpointCoordinator);
				}
				throw new CompletionException(throwable);
			} else if (cancelJob) {
				log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
				cancel();
			}
			return path;
		}, mainThreadExecutor);
}

Source File: RegionFailoverITCase.java From flink with Apache License 2.0

5 votes

@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {
	super.addCheckpoint(checkpoint);
	// we record the information when adding completed checkpoint instead of 'notifyCheckpointComplete' invoked
	// on task side to avoid race condition. See FLINK-13601.
	lastCompletedCheckpointId.set(checkpoint.getCheckpointID());
	numCompletedCheckpoints.incrementAndGet();
}

Source File: NotifyCheckpointAbortedITCase.java From flink with Apache License 2.0

5 votes

@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {
	if (abortCheckpointLatch.isTriggered()) {
		super.addCheckpoint(checkpoint);
	} else {
		// tell main thread that all checkpoints on task side have been finished.
		addCheckpointLatch.trigger();
		// wait for the main thread to throw exception so that the checkpoint would be notified as aborted.
		abortCheckpointLatch.await();
		throw new ExpectedTestException();
	}
}

Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0

5 votes

@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}

Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0

5 votes

@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {

	checkpoints.addLast(checkpoint);

	if (checkpoints.size() > maxRetainedCheckpoints) {
		removeOldestCheckpoint();
	}
}

Source File: JobMasterTest.java From flink with Apache License 2.0

5 votes

/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon
 * initial submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

Source File: JobMaster.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public CompletableFuture<String> triggerSavepoint(
		@Nullable final String targetDirectory,
		final boolean cancelJob,
		final Time timeout) {

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	if (checkpointCoordinator == null) {
		return FutureUtils.completedExceptionally(new IllegalStateException(
			String.format("Job %s is not a streaming job.", jobGraph.getJobID())));
	} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
		log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());

		return FutureUtils.completedExceptionally(new IllegalStateException(
			"No savepoint directory configured. You can either specify a directory " +
				"while cancelling via -s :targetDirectory or configure a cluster-wide " +
				"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'."));
	}

	if (cancelJob) {
		checkpointCoordinator.stopCheckpointScheduler();
	}
	return checkpointCoordinator
		.triggerSavepoint(System.currentTimeMillis(), targetDirectory)
		.thenApply(CompletedCheckpoint::getExternalPointer)
		.handleAsync((path, throwable) -> {
			if (throwable != null) {
				if (cancelJob) {
					startCheckpointScheduler(checkpointCoordinator);
				}
				throw new CompletionException(throwable);
			} else if (cancelJob) {
				log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
				cancel(timeout);
			}
			return path;
		}, getMainThreadExecutor());
}

Source File: ZooKeeperUtils.java From flink with Apache License 2.0

5 votes

/**
 * Creates a {@link ZooKeeperCompletedCheckpointStore} instance.
 *
 * @param client                         The {@link CuratorFramework} ZooKeeper client to use
 * @param configuration                  {@link Configuration} object
 * @param jobId                          ID of job to create the instance for
 * @param maxNumberOfCheckpointsToRetain The maximum number of checkpoints to retain
 * @param executor to run ZooKeeper callbacks
 * @return {@link ZooKeeperCompletedCheckpointStore} instance
 * @throws Exception if the completed checkpoint store cannot be created
 */
public static CompletedCheckpointStore createCompletedCheckpoints(
		CuratorFramework client,
		Configuration configuration,
		JobID jobId,
		int maxNumberOfCheckpointsToRetain,
		Executor executor) throws Exception {

	checkNotNull(configuration, "Configuration");

	String checkpointsPath = configuration.getString(
		HighAvailabilityOptions.HA_ZOOKEEPER_CHECKPOINTS_PATH);

	RetrievableStateStorageHelper<CompletedCheckpoint> stateStorage = createFileSystemStateStorage(
		configuration,
		"completedCheckpoint");

	checkpointsPath += ZooKeeperSubmittedJobGraphStore.getPathForJob(jobId);

	final ZooKeeperCompletedCheckpointStore zooKeeperCompletedCheckpointStore = new ZooKeeperCompletedCheckpointStore(
		maxNumberOfCheckpointsToRetain,
		createZooKeeperStateHandleStore(client, checkpointsPath, stateStorage),
		executor);

	LOG.info("Initialized {} in '{}'.", ZooKeeperCompletedCheckpointStore.class.getSimpleName(), checkpointsPath);
	return zooKeeperCompletedCheckpointStore;
}

Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Tests that a JobMaster will restore the given JobGraph from its savepoint upon
 * initial submission.
 */
@Test
public void testRestoringFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);
	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

Source File: LegacyScheduler.java From flink with Apache License 2.0

5 votes

@Override
public CompletableFuture<String> triggerSavepoint(final String targetDirectory, final boolean cancelJob) {
	mainThreadExecutor.assertRunningInMainThread();

	final CheckpointCoordinator checkpointCoordinator = executionGraph.getCheckpointCoordinator();
	if (checkpointCoordinator == null) {
		throw new IllegalStateException(
			String.format("Job %s is not a streaming job.", jobGraph.getJobID()));
	} else if (targetDirectory == null && !checkpointCoordinator.getCheckpointStorage().hasDefaultSavepointLocation()) {
		log.info("Trying to cancel job {} with savepoint, but no savepoint directory configured.", jobGraph.getJobID());

		throw new IllegalStateException(
			"No savepoint directory configured. You can either specify a directory " +
				"while cancelling via -s :targetDirectory or configure a cluster-wide " +
				"default via key '" + CheckpointingOptions.SAVEPOINT_DIRECTORY.key() + "'.");
	}

	if (cancelJob) {
		checkpointCoordinator.stopCheckpointScheduler();
	}

	return checkpointCoordinator
		.triggerSavepoint(System.currentTimeMillis(), targetDirectory)
		.thenApply(CompletedCheckpoint::getExternalPointer)
		.handleAsync((path, throwable) -> {
			if (throwable != null) {
				if (cancelJob) {
					startCheckpointScheduler(checkpointCoordinator);
				}
				throw new CompletionException(throwable);
			} else if (cancelJob) {
				log.info("Savepoint stored in {}. Now cancelling {}.", path, jobGraph.getJobID());
				cancel();
			}
			return path;
		}, mainThreadExecutor);
}

Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void shutdown(JobStatus jobStatus) throws Exception {
	if (jobStatus.isGloballyTerminalState()) {
		checkpoints.clear();
		suspended.clear();
	} else {
		suspended.clear();

		for (CompletedCheckpoint checkpoint : checkpoints) {
			suspended.add(checkpoint);
		}

		checkpoints.clear();
	}
}

Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void addCheckpoint(CompletedCheckpoint checkpoint) throws Exception {

	checkpoints.addLast(checkpoint);

	if (checkpoints.size() > maxRetainedCheckpoints) {
		removeOldestCheckpoint();
	}
}

Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
public CompletedCheckpoint getLatestCheckpoint() throws Exception {
	return checkpoints.isEmpty() ? null : checkpoints.getLast();
}

Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0

4 votes

@Override
public List<CompletedCheckpoint> getAllCheckpoints() throws Exception {
	return new ArrayList<>(checkpoints);
}

Source File: RecoverableCompletedCheckpointStore.java From flink with Apache License 2.0

4 votes

public void removeOldestCheckpoint() throws Exception {
	CompletedCheckpoint checkpointToSubsume = checkpoints.removeFirst();
	checkpointToSubsume.discardOnSubsume();
}

Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Tests that a JobMaster will only restore a modified JobGraph if non
 * restored state is allowed.
 */
@Test
public void testRestoringModifiedJobFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final OperatorID operatorID = new OperatorID();
	final File savepointFile = createSavepointWithOperatorState(savepointId, operatorID);

	// set savepoint settings which don't allow non restored state
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		false);

	// create a new operator
	final JobVertex jobVertex = new JobVertex("New operator");
	jobVertex.setInvokableClass(NoOpInvokable.class);
	final JobGraph jobGraphWithNewOperator = createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	try {
		createJobMaster(
			configuration,
			jobGraphWithNewOperator,
			haServices,
			new TestingJobManagerSharedServicesBuilder().build());
		fail("Should fail because we cannot resume the changed JobGraph from the savepoint.");
	} catch (IllegalStateException expected) {
		// that was expected :-)
	}

	// allow for non restored state
	jobGraphWithNewOperator.setSavepointRestoreSettings(
		SavepointRestoreSettings.forPath(
			savepointFile.getAbsolutePath(),
			true));

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraphWithNewOperator,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

Source File: SchedulerTestingUtils.java From flink with Apache License 2.0

4 votes

public static CompletableFuture<CompletedCheckpoint> triggerCheckpoint(DefaultScheduler scheduler) throws Exception {
	final CheckpointCoordinator checkpointCoordinator = getCheckpointCoordinator(scheduler);
	return checkpointCoordinator.triggerCheckpoint(false);
}

Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Tests that an existing checkpoint will have precedence over an savepoint.
 */
@Test
public void testCheckpointPrecedesSavepointRecovery() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" +
			savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final long checkpointId = 1L;

	final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(
		jobGraph.getJobID(),
		checkpointId,
		1L,
		1L,
		Collections.emptyMap(),
		null,
		CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION),
		new DummyCheckpointStorageLocation());

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	completedCheckpointStore.addCheckpoint(completedCheckpoint);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint();

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

Source File: RecoverableCompletedCheckpointStore.java From Flink-CEPplus with Apache License 2.0

4 votes

public void removeOldestCheckpoint() throws Exception {
	CompletedCheckpoint checkpointToSubsume = checkpoints.removeFirst();
	checkpointToSubsume.discardOnSubsume();
}

Source File: JobMasterTest.java From flink with Apache License 2.0

4 votes

/**
 * Tests that an existing checkpoint will have precedence over an savepoint.
 */
@Test
public void testCheckpointPrecedesSavepointRecovery() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final File savepointFile = createSavepoint(savepointId);

	// set savepoint settings
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath("" +
			savepointFile.getAbsolutePath(),
		true);
	final JobGraph jobGraph = createJobGraphWithCheckpointing(savepointRestoreSettings);

	final long checkpointId = 1L;

	final CompletedCheckpoint completedCheckpoint = new CompletedCheckpoint(
		jobGraph.getJobID(),
		checkpointId,
		1L,
		1L,
		Collections.emptyMap(),
		null,
		CheckpointProperties.forCheckpoint(CheckpointRetentionPolicy.NEVER_RETAIN_AFTER_TERMINATION),
		new DummyCheckpointStorageLocation());

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	completedCheckpointStore.addCheckpoint(completedCheckpoint);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraph,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(checkpointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

Source File: JobMasterTest.java From flink with Apache License 2.0

4 votes

/**
 * Tests that a JobMaster will only restore a modified JobGraph if non
 * restored state is allowed.
 */
@Test
public void testRestoringModifiedJobFromSavepoint() throws Exception {

	// create savepoint data
	final long savepointId = 42L;
	final OperatorID operatorID = new OperatorID();
	final File savepointFile = createSavepointWithOperatorState(savepointId, operatorID);

	// set savepoint settings which don't allow non restored state
	final SavepointRestoreSettings savepointRestoreSettings = SavepointRestoreSettings.forPath(
		savepointFile.getAbsolutePath(),
		false);

	// create a new operator
	final JobVertex jobVertex = new JobVertex("New operator");
	jobVertex.setInvokableClass(NoOpInvokable.class);
	final JobGraph jobGraphWithNewOperator = createJobGraphFromJobVerticesWithCheckpointing(savepointRestoreSettings, jobVertex);

	final StandaloneCompletedCheckpointStore completedCheckpointStore = new StandaloneCompletedCheckpointStore(1);
	final TestingCheckpointRecoveryFactory testingCheckpointRecoveryFactory = new TestingCheckpointRecoveryFactory(completedCheckpointStore, new StandaloneCheckpointIDCounter());
	haServices.setCheckpointRecoveryFactory(testingCheckpointRecoveryFactory);

	try {
		createJobMaster(
			configuration,
			jobGraphWithNewOperator,
			haServices,
			new TestingJobManagerSharedServicesBuilder().build());
		fail("Should fail because we cannot resume the changed JobGraph from the savepoint.");
	} catch (IllegalStateException expected) {
		// that was expected :-)
	}

	// allow for non restored state
	jobGraphWithNewOperator.setSavepointRestoreSettings(
		SavepointRestoreSettings.forPath(
			savepointFile.getAbsolutePath(),
			true));

	final JobMaster jobMaster = createJobMaster(
		configuration,
		jobGraphWithNewOperator,
		haServices,
		new TestingJobManagerSharedServicesBuilder().build());

	try {
		// starting the JobMaster should have read the savepoint
		final CompletedCheckpoint savepointCheckpoint = completedCheckpointStore.getLatestCheckpoint(false);

		assertThat(savepointCheckpoint, Matchers.notNullValue());

		assertThat(savepointCheckpoint.getCheckpointID(), is(savepointId));
	} finally {
		RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
	}
}

org.apache.flink.runtime.checkpoint.CompletedCheckpoint Java Examples