Java Code Examples for org.apache.flink.client.program.ClusterClient#triggerSavepoint()
The following examples show how to use
org.apache.flink.client.program.ClusterClient#triggerSavepoint() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CliFrontend.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Sends a {@link org.apache.flink.runtime.messages.JobManagerMessages.TriggerSavepoint} * message to the job manager. */ private String triggerSavepoint(ClusterClient<?> clusterClient, JobID jobId, String savepointDirectory) throws FlinkException { logAndSysout("Triggering savepoint for job " + jobId + '.'); CompletableFuture<String> savepointPathFuture = clusterClient.triggerSavepoint(jobId, savepointDirectory); logAndSysout("Waiting for response..."); final String savepointPath; try { savepointPath = savepointPathFuture.get(); } catch (Exception e) { Throwable cause = ExceptionUtils.stripExecutionException(e); throw new FlinkException("Triggering a savepoint for the job " + jobId + " failed.", cause); } logAndSysout("Savepoint completed. Path: " + savepointPath); logAndSysout("You can resume your program from this savepoint with the run command."); return savepointPath; }
Example 2
Source File: CliFrontend.java From flink with Apache License 2.0 | 6 votes |
/** * Sends a SavepointTriggerMessage to the job manager. */ private String triggerSavepoint(ClusterClient<?> clusterClient, JobID jobId, String savepointDirectory) throws FlinkException { logAndSysout("Triggering savepoint for job " + jobId + '.'); CompletableFuture<String> savepointPathFuture = clusterClient.triggerSavepoint(jobId, savepointDirectory); logAndSysout("Waiting for response..."); final String savepointPath; try { savepointPath = savepointPathFuture.get(); } catch (Exception e) { Throwable cause = ExceptionUtils.stripExecutionException(e); throw new FlinkException("Triggering a savepoint for the job " + jobId + " failed.", cause); } logAndSysout("Savepoint completed. Path: " + savepointPath); logAndSysout("You can resume your program from this savepoint with the run command."); return savepointPath; }
Example 3
Source File: CliFrontend.java From flink with Apache License 2.0 | 6 votes |
/** * Sends a SavepointTriggerMessage to the job manager. */ private void triggerSavepoint(ClusterClient<?> clusterClient, JobID jobId, String savepointDirectory) throws FlinkException { logAndSysout("Triggering savepoint for job " + jobId + '.'); CompletableFuture<String> savepointPathFuture = clusterClient.triggerSavepoint(jobId, savepointDirectory); logAndSysout("Waiting for response..."); try { final String savepointPath = savepointPathFuture.get(clientTimeout.toMillis(), TimeUnit.MILLISECONDS); logAndSysout("Savepoint completed. Path: " + savepointPath); logAndSysout("You can resume your program from this savepoint with the run command."); } catch (Exception e) { Throwable cause = ExceptionUtils.stripExecutionException(e); throw new FlinkException("Triggering a savepoint for the job " + jobId + " failed.", cause); } }
Example 4
Source File: SavepointReaderKeyedStateITCase.java From flink with Apache License 2.0 | 5 votes |
private String takeSavepoint(JobGraph jobGraph) throws Exception { SavepointSource.initializeForTest(); ClusterClient<?> client = miniClusterResource.getClusterClient(); client.setDetached(true); JobID jobId = jobGraph.getJobID(); Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5)); String dirPath = getTempDirPath(new AbstractID().toHexString()); try { client.setDetached(true); JobSubmissionResult result = client.submitJob(jobGraph, getClass().getClassLoader()); boolean finished = false; while (deadline.hasTimeLeft()) { if (SavepointSource.isFinished()) { finished = true; break; } } if (!finished) { Assert.fail("Failed to initialize state within deadline"); } CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath); return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } finally { client.cancel(jobId); } }
Example 5
Source File: AbstractFlinkClient.java From alchemy with Apache License 2.0 | 5 votes |
public SavepointResponse savepoint(ClusterClient clusterClient, SavepointFlinkRequest request) throws Exception { if (StringUtils.isEmpty(request.getJobID())) { return new SavepointResponse("the job is not submit yet"); } CompletableFuture<String> future = clusterClient.triggerSavepoint(JobID.fromHexString(request.getJobID()), request.getSavepointDirectory()); return new SavepointResponse(true, future.get()); }
Example 6
Source File: SavepointReaderITTestBase.java From flink with Apache License 2.0 | 5 votes |
private String takeSavepoint(JobGraph jobGraph) throws Exception { SavepointSource.initializeForTest(); ClusterClient<?> client = miniClusterResource.getClusterClient(); JobID jobId = jobGraph.getJobID(); Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5)); String dirPath = getTempDirPath(new AbstractID().toHexString()); try { JobSubmissionResult result = ClientUtils.submitJob(client, jobGraph); boolean finished = false; while (deadline.hasTimeLeft()) { if (SavepointSource.isFinished()) { finished = true; break; } try { Thread.sleep(2L); } catch (InterruptedException ignored) { Thread.currentThread().interrupt(); } } if (!finished) { Assert.fail("Failed to initialize state within deadline"); } CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath); return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } finally { client.cancel(jobId).get(); } }
Example 7
Source File: SavepointTestBase.java From flink with Apache License 2.0 | 5 votes |
private CompletableFuture<String> triggerSavepoint(ClusterClient<?> client, JobID jobID) throws RuntimeException { try { String dirPath = getTempDirPath(new AbstractID().toHexString()); return client.triggerSavepoint(jobID, dirPath); } catch (IOException e) { throw new RuntimeException(e); } }
Example 8
Source File: RescalingITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a job cannot be restarted from a savepoint with a different parallelism if the * rescaled operator has non-partitioned state. * * @throws Exception */ @Test public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception { final int parallelism = numSlots / 2; final int parallelism2 = numSlots; final int maxParallelism = 13; Duration timeout = Duration.ofMinutes(3); Deadline deadline = Deadline.now().plus(timeout); ClusterClient<?> client = cluster.getClusterClient(); try { JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); final JobID jobID = jobGraph.getJobID(); client.setDetached(true); client.submitJob(jobGraph, RescalingITCase.class.getClassLoader()); // wait until the operator is started StateSourceBase.workStartedLatch.await(); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null); final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); client.cancel(jobID); while (!getRunningJobs(client).isEmpty()) { Thread.sleep(50); } // job successfully removed JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); client.setDetached(false); client.submitJob(scaledJobGraph, RescalingITCase.class.getClassLoader()); } catch (JobExecutionException exception) { if (exception.getCause() instanceof IllegalStateException) { // we expect a IllegalStateException wrapped // in a JobExecutionException, because the job containing non-partitioned state // is being rescaled } else { throw exception; } } }
Example 9
Source File: SavepointMigrationTestBase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@SafeVarargs protected final void executeAndSavepoint( StreamExecutionEnvironment env, String savepointPath, Tuple2<String, Integer>... expectedAccumulators) throws Exception { ClusterClient<?> client = miniClusterResource.getClusterClient(); client.setDetached(true); // Submit the job JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, SavepointMigrationTestBase.class.getClassLoader()); LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID()); boolean done = false; while (DEADLINE.hasTimeLeft()) { Thread.sleep(100); Map<String, OptionalFailure<Object>> accumulators = client.getAccumulators(jobSubmissionResult.getJobID()); boolean allDone = true; for (Tuple2<String, Integer> acc : expectedAccumulators) { OptionalFailure<Object> accumOpt = accumulators.get(acc.f0); if (accumOpt == null) { allDone = false; break; } Integer numFinished = (Integer) accumOpt.get(); if (numFinished == null) { allDone = false; break; } if (!numFinished.equals(acc.f1)) { allDone = false; break; } } if (allDone) { done = true; break; } } if (!done) { fail("Did not see the expected accumulator results within time limit."); } LOG.info("Triggering savepoint."); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null); String jobmanagerSavepointPath = savepointPathFuture.get(DEADLINE.timeLeft().toMillis(), TimeUnit.MILLISECONDS); File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath()); // savepoints were changed to be directories in Flink 1.3 if (jobManagerSavepoint.isDirectory()) { FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath)); } else { FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath)); } }
Example 10
Source File: RescalingITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job cannot be restarted from a savepoint with a different parallelism if the * rescaled operator has non-partitioned state. * * @throws Exception */ @Test public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception { final int parallelism = numSlots / 2; final int parallelism2 = numSlots; final int maxParallelism = 13; Duration timeout = Duration.ofMinutes(3); Deadline deadline = Deadline.now().plus(timeout); ClusterClient<?> client = cluster.getClusterClient(); try { JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); final JobID jobID = jobGraph.getJobID(); client.setDetached(true); client.submitJob(jobGraph, RescalingITCase.class.getClassLoader()); // wait until the operator is started StateSourceBase.workStartedLatch.await(); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null); final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); client.cancel(jobID); while (!getRunningJobs(client).isEmpty()) { Thread.sleep(50); } // job successfully removed JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); client.setDetached(false); client.submitJob(scaledJobGraph, RescalingITCase.class.getClassLoader()); } catch (JobExecutionException exception) { if (exception.getCause() instanceof IllegalStateException) { // we expect a IllegalStateException wrapped // in a JobExecutionException, because the job containing non-partitioned state // is being rescaled } else { throw exception; } } }
Example 11
Source File: SavepointMigrationTestBase.java From flink with Apache License 2.0 | 4 votes |
@SafeVarargs protected final void executeAndSavepoint( StreamExecutionEnvironment env, String savepointPath, Tuple2<String, Integer>... expectedAccumulators) throws Exception { ClusterClient<?> client = miniClusterResource.getClusterClient(); client.setDetached(true); // Submit the job JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, SavepointMigrationTestBase.class.getClassLoader()); LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID()); boolean done = false; while (DEADLINE.hasTimeLeft()) { Thread.sleep(100); Map<String, OptionalFailure<Object>> accumulators = client.getAccumulators(jobSubmissionResult.getJobID()); boolean allDone = true; for (Tuple2<String, Integer> acc : expectedAccumulators) { OptionalFailure<Object> accumOpt = accumulators.get(acc.f0); if (accumOpt == null) { allDone = false; break; } Integer numFinished = (Integer) accumOpt.get(); if (numFinished == null) { allDone = false; break; } if (!numFinished.equals(acc.f1)) { allDone = false; break; } } if (allDone) { done = true; break; } } if (!done) { fail("Did not see the expected accumulator results within time limit."); } LOG.info("Triggering savepoint."); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null); String jobmanagerSavepointPath = savepointPathFuture.get(DEADLINE.timeLeft().toMillis(), TimeUnit.MILLISECONDS); File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath()); // savepoints were changed to be directories in Flink 1.3 if (jobManagerSavepoint.isDirectory()) { FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath)); } else { FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath)); } }
Example 12
Source File: SavepointReaderITTestBase.java From flink with Apache License 2.0 | 4 votes |
private String takeSavepoint(JobGraph jobGraph) throws Exception { SavepointSource.initializeForTest(); ClusterClient<?> client = miniClusterResource.getClusterClient(); client.setDetached(true); JobID jobId = jobGraph.getJobID(); Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5)); String dirPath = getTempDirPath(new AbstractID().toHexString()); try { client.setDetached(true); JobSubmissionResult result = client.submitJob(jobGraph, SavepointReaderITCase.class.getClassLoader()); boolean finished = false; while (deadline.hasTimeLeft()) { if (SavepointSource.isFinished()) { finished = true; break; } try { Thread.sleep(2L); } catch (InterruptedException ignored) { Thread.currentThread().interrupt(); } } if (!finished) { Assert.fail("Failed to initialize state within deadline"); } CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath); return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } finally { client.cancel(jobId); } }
Example 13
Source File: RescalingITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job cannot be restarted from a savepoint with a different parallelism if the * rescaled operator has non-partitioned state. * * @throws Exception */ @Test public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception { final int parallelism = numSlots / 2; final int parallelism2 = numSlots; final int maxParallelism = 13; Duration timeout = Duration.ofMinutes(3); Deadline deadline = Deadline.now().plus(timeout); ClusterClient<?> client = cluster.getClusterClient(); try { JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); final JobID jobID = jobGraph.getJobID(); ClientUtils.submitJob(client, jobGraph); // wait until the operator is started StateSourceBase.workStartedLatch.await(); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null); final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); client.cancel(jobID).get(); while (!getRunningJobs(client).isEmpty()) { Thread.sleep(50); } // job successfully removed JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); ClientUtils.submitJobAndWaitForResult(client, scaledJobGraph, RescalingITCase.class.getClassLoader()); } catch (JobExecutionException exception) { if (exception.getCause() instanceof IllegalStateException) { // we expect a IllegalStateException wrapped // in a JobExecutionException, because the job containing non-partitioned state // is being rescaled } else { throw exception; } } }
Example 14
Source File: SavepointMigrationTestBase.java From flink with Apache License 2.0 | 4 votes |
@SafeVarargs protected final void executeAndSavepoint( StreamExecutionEnvironment env, String savepointPath, Tuple2<String, Integer>... expectedAccumulators) throws Exception { final Deadline deadLine = Deadline.fromNow(Duration.ofMinutes(5)); ClusterClient<?> client = miniClusterResource.getClusterClient(); // Submit the job JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobSubmissionResult jobSubmissionResult = ClientUtils.submitJob(client, jobGraph); LOG.info("Submitted job {} and waiting...", jobSubmissionResult.getJobID()); boolean done = false; while (deadLine.hasTimeLeft()) { Thread.sleep(100); Map<String, Object> accumulators = client.getAccumulators(jobSubmissionResult.getJobID()).get(); boolean allDone = true; for (Tuple2<String, Integer> acc : expectedAccumulators) { Object accumOpt = accumulators.get(acc.f0); if (accumOpt == null) { allDone = false; break; } Integer numFinished = (Integer) accumOpt; if (!numFinished.equals(acc.f1)) { allDone = false; break; } } if (allDone) { done = true; break; } } if (!done) { fail("Did not see the expected accumulator results within time limit."); } LOG.info("Triggering savepoint."); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobSubmissionResult.getJobID(), null); String jobmanagerSavepointPath = savepointPathFuture.get(deadLine.timeLeft().toMillis(), TimeUnit.MILLISECONDS); File jobManagerSavepoint = new File(new URI(jobmanagerSavepointPath).getPath()); // savepoints were changed to be directories in Flink 1.3 if (jobManagerSavepoint.isDirectory()) { FileUtils.moveDirectory(jobManagerSavepoint, new File(savepointPath)); } else { FileUtils.moveFile(jobManagerSavepoint, new File(savepointPath)); } }