Java Code Examples for org.apache.flink.client.program.ClusterClient#cancel()
The following examples show how to use
org.apache.flink.client.program.ClusterClient#cancel() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SavepointTestBase.java From flink with Apache License 2.0 | 6 votes |
public <T> String takeSavepoint(Collection<T> data, Function<SourceFunction<T>, StreamExecutionEnvironment> jobGraphFactory) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableClosureCleaner(); WaitingSource<T> waitingSource = createSource(data); JobGraph jobGraph = jobGraphFactory.apply(waitingSource).getStreamGraph().getJobGraph(); JobID jobId = jobGraph.getJobID(); ClusterClient<?> client = miniClusterResource.getClusterClient(); try { JobSubmissionResult result = ClientUtils.submitJob(client, jobGraph); return CompletableFuture .runAsync(waitingSource::awaitSource) .thenCompose(ignore -> triggerSavepoint(client, result.getJobID())) .get(5, TimeUnit.MINUTES); } catch (Exception e) { throw new RuntimeException("Failed to take savepoint", e); } finally { client.cancel(jobId); } }
Example 2
Source File: CancelingTestBase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
protected void runAndCancelJob(Plan plan, final int msecsTillCanceling, int maxTimeTillCanceled) throws Exception { // submit job final JobGraph jobGraph = getJobGraph(plan); ClusterClient<?> client = CLUSTER.getClusterClient(); client.setDetached(true); JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, CancelingTestBase.class.getClassLoader()); Deadline submissionDeadLine = new FiniteDuration(2, TimeUnit.MINUTES).fromNow(); JobStatus jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); while (jobStatus != JobStatus.RUNNING && submissionDeadLine.hasTimeLeft()) { Thread.sleep(50); jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); } if (jobStatus != JobStatus.RUNNING) { Assert.fail("Job not in state RUNNING."); } Thread.sleep(msecsTillCanceling); client.cancel(jobSubmissionResult.getJobID()); Deadline cancelDeadline = new FiniteDuration(maxTimeTillCanceled, TimeUnit.MILLISECONDS).fromNow(); JobStatus jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); while (jobStatusAfterCancel != JobStatus.CANCELED && cancelDeadline.hasTimeLeft()) { Thread.sleep(50); jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); } if (jobStatusAfterCancel != JobStatus.CANCELED) { Assert.fail("Failed to cancel job with ID " + jobSubmissionResult.getJobID() + '.'); } }
Example 3
Source File: SavepointITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void restoreJobAndVerifyState(String savepointPath, MiniClusterResourceFactory clusterFactory, int parallelism) throws Exception { final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000); jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); final JobID jobId = jobGraph.getJobID(); StatefulCounter.resetForTest(parallelism); MiniClusterWithClientResource cluster = clusterFactory.get(); cluster.before(); ClusterClient<?> client = cluster.getClusterClient(); try { client.setDetached(true); client.submitJob(jobGraph, SavepointITCase.class.getClassLoader()); // Await state is restored StatefulCounter.getRestoreLatch().await(); // Await some progress after restore StatefulCounter.getProgressLatch().await(); client.cancel(jobId); FutureUtils.retrySuccessfulWithDelay( () -> client.getJobStatus(jobId), Time.milliseconds(50), Deadline.now().plus(Duration.ofSeconds(30)), status -> status == JobStatus.CANCELED, TestingUtils.defaultScheduledExecutor() ); client.disposeSavepoint(savepointPath) .get(); assertFalse("Savepoint not properly cleaned up.", new File(savepointPath).exists()); } finally { cluster.after(); StatefulCounter.resetForTest(parallelism); } }
Example 4
Source File: ResumeCheckpointManuallyITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static String runJobAndGetExternalizedCheckpoint(StateBackend backend, File checkpointDir, @Nullable String externalCheckpoint, ClusterClient<?> client) throws Exception { JobGraph initialJobGraph = getJobGraph(backend, externalCheckpoint); NotifyingInfiniteTupleSource.countDownLatch = new CountDownLatch(PARALLELISM); client.submitJob(initialJobGraph, ResumeCheckpointManuallyITCase.class.getClassLoader()); // wait until all sources have been started NotifyingInfiniteTupleSource.countDownLatch.await(); waitUntilExternalizedCheckpointCreated(checkpointDir, initialJobGraph.getJobID()); client.cancel(initialJobGraph.getJobID()); waitUntilCanceled(initialJobGraph.getJobID(), client); return getExternalizedCheckpointCheckpointPath(checkpointDir, initialJobGraph.getJobID()); }
Example 5
Source File: CancelingTestBase.java From flink with Apache License 2.0 | 5 votes |
protected void runAndCancelJob(Plan plan, final int msecsTillCanceling, int maxTimeTillCanceled) throws Exception { // submit job final JobGraph jobGraph = getJobGraph(plan); ClusterClient<?> client = CLUSTER.getClusterClient(); client.setDetached(true); JobSubmissionResult jobSubmissionResult = client.submitJob(jobGraph, CancelingTestBase.class.getClassLoader()); Deadline submissionDeadLine = new FiniteDuration(2, TimeUnit.MINUTES).fromNow(); JobStatus jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); while (jobStatus != JobStatus.RUNNING && submissionDeadLine.hasTimeLeft()) { Thread.sleep(50); jobStatus = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); } if (jobStatus != JobStatus.RUNNING) { Assert.fail("Job not in state RUNNING."); } Thread.sleep(msecsTillCanceling); client.cancel(jobSubmissionResult.getJobID()); Deadline cancelDeadline = new FiniteDuration(maxTimeTillCanceled, TimeUnit.MILLISECONDS).fromNow(); JobStatus jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); while (jobStatusAfterCancel != JobStatus.CANCELED && cancelDeadline.hasTimeLeft()) { Thread.sleep(50); jobStatusAfterCancel = client.getJobStatus(jobSubmissionResult.getJobID()).get(GET_FUTURE_TIMEOUT, TimeUnit.MILLISECONDS); } if (jobStatusAfterCancel != JobStatus.CANCELED) { Assert.fail("Failed to cancel job with ID " + jobSubmissionResult.getJobID() + '.'); } }
Example 6
Source File: SavepointITCase.java From flink with Apache License 2.0 | 5 votes |
private void restoreJobAndVerifyState(String savepointPath, MiniClusterResourceFactory clusterFactory, int parallelism) throws Exception { final JobGraph jobGraph = createJobGraph(parallelism, 0, 1000); jobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); final JobID jobId = jobGraph.getJobID(); StatefulCounter.resetForTest(parallelism); MiniClusterWithClientResource cluster = clusterFactory.get(); cluster.before(); ClusterClient<?> client = cluster.getClusterClient(); try { client.setDetached(true); client.submitJob(jobGraph, SavepointITCase.class.getClassLoader()); // Await state is restored StatefulCounter.getRestoreLatch().await(); // Await some progress after restore StatefulCounter.getProgressLatch().await(); client.cancel(jobId); FutureUtils.retrySuccessfulWithDelay( () -> client.getJobStatus(jobId), Time.milliseconds(50), Deadline.now().plus(Duration.ofSeconds(30)), status -> status == JobStatus.CANCELED, TestingUtils.defaultScheduledExecutor() ); client.disposeSavepoint(savepointPath) .get(); assertFalse("Savepoint not properly cleaned up.", new File(savepointPath).exists()); } finally { cluster.after(); StatefulCounter.resetForTest(parallelism); } }
Example 7
Source File: ResumeCheckpointManuallyITCase.java From flink with Apache License 2.0 | 5 votes |
private static String runJobAndGetExternalizedCheckpoint(StateBackend backend, File checkpointDir, @Nullable String externalCheckpoint, ClusterClient<?> client) throws Exception { JobGraph initialJobGraph = getJobGraph(backend, externalCheckpoint); NotifyingInfiniteTupleSource.countDownLatch = new CountDownLatch(PARALLELISM); client.submitJob(initialJobGraph, ResumeCheckpointManuallyITCase.class.getClassLoader()); // wait until all sources have been started NotifyingInfiniteTupleSource.countDownLatch.await(); waitUntilExternalizedCheckpointCreated(checkpointDir, initialJobGraph.getJobID()); client.cancel(initialJobGraph.getJobID()); waitUntilCanceled(initialJobGraph.getJobID(), client); return getExternalizedCheckpointCheckpointPath(checkpointDir, initialJobGraph.getJobID()); }
Example 8
Source File: SavepointReaderKeyedStateITCase.java From flink with Apache License 2.0 | 5 votes |
private String takeSavepoint(JobGraph jobGraph) throws Exception { SavepointSource.initializeForTest(); ClusterClient<?> client = miniClusterResource.getClusterClient(); client.setDetached(true); JobID jobId = jobGraph.getJobID(); Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5)); String dirPath = getTempDirPath(new AbstractID().toHexString()); try { client.setDetached(true); JobSubmissionResult result = client.submitJob(jobGraph, getClass().getClassLoader()); boolean finished = false; while (deadline.hasTimeLeft()) { if (SavepointSource.isFinished()) { finished = true; break; } } if (!finished) { Assert.fail("Failed to initialize state within deadline"); } CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath); return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } finally { client.cancel(jobId); } }
Example 9
Source File: AbstractFlinkClient.java From alchemy with Apache License 2.0 | 5 votes |
public SavepointResponse cancel(ClusterClient clusterClient, CancelFlinkRequest request) throws Exception { if (StringUtils.isEmpty(request.getJobID())) { return new SavepointResponse("the job is not submit yet"); } boolean savePoint = request.getSavePoint() != null && request.getSavePoint().booleanValue(); if (savePoint) { String path = clusterClient.cancelWithSavepoint(JobID.fromHexString(request.getJobID()), request.getSavepointDirectory()); return new SavepointResponse(true, path); } else { clusterClient.cancel(JobID.fromHexString(request.getJobID())); return new SavepointResponse(true); } }
Example 10
Source File: ClassLoaderITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests disposal of a savepoint, which contains custom user code KvState. */ @Test public void testDisposeSavepointWithCustomKvState() throws Exception { ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), miniClusterResource.getMiniCluster()); Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow(); File checkpointDir = FOLDER.newFolder(); File outputDir = FOLDER.newFolder(); final PackagedProgram program = new PackagedProgram( new File(CUSTOM_KV_STATE_JAR_PATH), new String[] { String.valueOf(parallelism), checkpointDir.toURI().toString(), "5000", outputDir.toURI().toString() }); TestStreamEnvironment.setAsContext( miniClusterResource.getMiniCluster(), parallelism, Collections.singleton(new Path(CUSTOM_KV_STATE_JAR_PATH)), Collections.<URL>emptyList() ); // Execute detached Thread invokeThread = new Thread(new Runnable() { @Override public void run() { try { program.invokeInteractiveModeForExecution(); } catch (ProgramInvocationException ignored) { if (ignored.getCause() == null || !(ignored.getCause() instanceof JobCancellationException)) { ignored.printStackTrace(); } } } }); LOG.info("Starting program invoke thread"); invokeThread.start(); // The job ID JobID jobId = null; LOG.info("Waiting for job status running."); // Wait for running job while (jobId == null && deadline.hasTimeLeft()) { Collection<JobStatusMessage> jobs = clusterClient.listJobs().get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); for (JobStatusMessage job : jobs) { if (job.getJobState() == JobStatus.RUNNING) { jobId = job.getJobId(); LOG.info("Job running. ID: " + jobId); break; } } // Retry if job is not available yet if (jobId == null) { Thread.sleep(100L); } } // Trigger savepoint String savepointPath = null; for (int i = 0; i < 20; i++) { LOG.info("Triggering savepoint (" + (i + 1) + "/20)."); try { savepointPath = clusterClient.triggerSavepoint(jobId, null) .get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } catch (Exception cause) { LOG.info("Failed to trigger savepoint. Retrying...", cause); // This can fail if the operators are not opened yet Thread.sleep(500); } } assertNotNull("Failed to trigger savepoint", savepointPath); clusterClient.disposeSavepoint(savepointPath).get(); clusterClient.cancel(jobId); // make sure, the execution is finished to not influence other test methods invokeThread.join(deadline.timeLeft().toMillis()); assertFalse("Program invoke thread still running", invokeThread.isAlive()); }
Example 11
Source File: RescalingITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a job cannot be restarted from a savepoint with a different parallelism if the * rescaled operator has non-partitioned state. * * @throws Exception */ @Test public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception { final int parallelism = numSlots / 2; final int parallelism2 = numSlots; final int maxParallelism = 13; Duration timeout = Duration.ofMinutes(3); Deadline deadline = Deadline.now().plus(timeout); ClusterClient<?> client = cluster.getClusterClient(); try { JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); final JobID jobID = jobGraph.getJobID(); client.setDetached(true); client.submitJob(jobGraph, RescalingITCase.class.getClassLoader()); // wait until the operator is started StateSourceBase.workStartedLatch.await(); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null); final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); client.cancel(jobID); while (!getRunningJobs(client).isEmpty()) { Thread.sleep(50); } // job successfully removed JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); client.setDetached(false); client.submitJob(scaledJobGraph, RescalingITCase.class.getClassLoader()); } catch (JobExecutionException exception) { if (exception.getCause() instanceof IllegalStateException) { // we expect a IllegalStateException wrapped // in a JobExecutionException, because the job containing non-partitioned state // is being rescaled } else { throw exception; } } }
Example 12
Source File: PulsarTestBaseWithFlink.java From pulsar-flink with Apache License 2.0 | 4 votes |
public static void cancelRunningJobs(ClusterClient<?> client) throws Exception { List<JobID> runningJobs = getRunningJobs(client); for (JobID runningJob : runningJobs) { client.cancel(runningJob); } }
Example 13
Source File: ClassLoaderITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests disposal of a savepoint, which contains custom user code KvState. */ @Test public void testDisposeSavepointWithCustomKvState() throws Exception { ClusterClient<?> clusterClient = new MiniClusterClient(new Configuration(), miniClusterResource.getMiniCluster()); Deadline deadline = new FiniteDuration(100, TimeUnit.SECONDS).fromNow(); File checkpointDir = FOLDER.newFolder(); File outputDir = FOLDER.newFolder(); final PackagedProgram program = new PackagedProgram( new File(CUSTOM_KV_STATE_JAR_PATH), new String[] { String.valueOf(parallelism), checkpointDir.toURI().toString(), "5000", outputDir.toURI().toString() }); TestStreamEnvironment.setAsContext( miniClusterResource.getMiniCluster(), parallelism, Collections.singleton(new Path(CUSTOM_KV_STATE_JAR_PATH)), Collections.<URL>emptyList() ); // Execute detached Thread invokeThread = new Thread(new Runnable() { @Override public void run() { try { program.invokeInteractiveModeForExecution(); } catch (ProgramInvocationException ignored) { if (ignored.getCause() == null || !(ignored.getCause() instanceof JobCancellationException)) { ignored.printStackTrace(); } } } }); LOG.info("Starting program invoke thread"); invokeThread.start(); // The job ID JobID jobId = null; LOG.info("Waiting for job status running."); // Wait for running job while (jobId == null && deadline.hasTimeLeft()) { Collection<JobStatusMessage> jobs = clusterClient.listJobs().get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); for (JobStatusMessage job : jobs) { if (job.getJobState() == JobStatus.RUNNING) { jobId = job.getJobId(); LOG.info("Job running. ID: " + jobId); break; } } // Retry if job is not available yet if (jobId == null) { Thread.sleep(100L); } } // Trigger savepoint String savepointPath = null; for (int i = 0; i < 20; i++) { LOG.info("Triggering savepoint (" + (i + 1) + "/20)."); try { savepointPath = clusterClient.triggerSavepoint(jobId, null) .get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } catch (Exception cause) { LOG.info("Failed to trigger savepoint. Retrying...", cause); // This can fail if the operators are not opened yet Thread.sleep(500); } } assertNotNull("Failed to trigger savepoint", savepointPath); clusterClient.disposeSavepoint(savepointPath).get(); clusterClient.cancel(jobId); // make sure, the execution is finished to not influence other test methods invokeThread.join(deadline.timeLeft().toMillis()); assertFalse("Program invoke thread still running", invokeThread.isAlive()); }
Example 14
Source File: RescalingITCase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job cannot be restarted from a savepoint with a different parallelism if the * rescaled operator has non-partitioned state. * * @throws Exception */ @Test public void testSavepointRescalingNonPartitionedStateCausesException() throws Exception { final int parallelism = numSlots / 2; final int parallelism2 = numSlots; final int maxParallelism = 13; Duration timeout = Duration.ofMinutes(3); Deadline deadline = Deadline.now().plus(timeout); ClusterClient<?> client = cluster.getClusterClient(); try { JobGraph jobGraph = createJobGraphWithOperatorState(parallelism, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); final JobID jobID = jobGraph.getJobID(); client.setDetached(true); client.submitJob(jobGraph, RescalingITCase.class.getClassLoader()); // wait until the operator is started StateSourceBase.workStartedLatch.await(); CompletableFuture<String> savepointPathFuture = client.triggerSavepoint(jobID, null); final String savepointPath = savepointPathFuture.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); client.cancel(jobID); while (!getRunningJobs(client).isEmpty()) { Thread.sleep(50); } // job successfully removed JobGraph scaledJobGraph = createJobGraphWithOperatorState(parallelism2, maxParallelism, OperatorCheckpointMethod.NON_PARTITIONED); scaledJobGraph.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(savepointPath)); client.setDetached(false); client.submitJob(scaledJobGraph, RescalingITCase.class.getClassLoader()); } catch (JobExecutionException exception) { if (exception.getCause() instanceof IllegalStateException) { // we expect a IllegalStateException wrapped // in a JobExecutionException, because the job containing non-partitioned state // is being rescaled } else { throw exception; } } }
Example 15
Source File: SavepointReaderITTestBase.java From flink with Apache License 2.0 | 4 votes |
private String takeSavepoint(JobGraph jobGraph) throws Exception { SavepointSource.initializeForTest(); ClusterClient<?> client = miniClusterResource.getClusterClient(); client.setDetached(true); JobID jobId = jobGraph.getJobID(); Deadline deadline = Deadline.fromNow(Duration.ofMinutes(5)); String dirPath = getTempDirPath(new AbstractID().toHexString()); try { client.setDetached(true); JobSubmissionResult result = client.submitJob(jobGraph, SavepointReaderITCase.class.getClassLoader()); boolean finished = false; while (deadline.hasTimeLeft()) { if (SavepointSource.isFinished()) { finished = true; break; } try { Thread.sleep(2L); } catch (InterruptedException ignored) { Thread.currentThread().interrupt(); } } if (!finished) { Assert.fail("Failed to initialize state within deadline"); } CompletableFuture<String> path = client.triggerSavepoint(result.getJobID(), dirPath); return path.get(deadline.timeLeft().toMillis(), TimeUnit.MILLISECONDS); } finally { client.cancel(jobId); } }
Example 16
Source File: CancelJob.java From bravo with Apache License 2.0 | 4 votes |
@Override public void onceExecuteClusterAction(ClusterClient<?> client, JobID id) throws Exception { client.cancel(id); }