org.apache.flink.runtime.highavailability.RunningJobsRegistry Java Examples
The following examples show how to use
org.apache.flink.runtime.highavailability.RunningJobsRegistry.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private CompletableFuture<Boolean> tryRunRecoveredJobGraph(JobGraph jobGraph, DispatcherId dispatcherId) throws Exception { if (leaderElectionService.hasLeadership(dispatcherId.toUUID())) { final JobID jobId = jobGraph.getJobID(); if (jobManagerRunnerFutures.containsKey(jobId)) { // we must not release the job graph lock since it can only be locked once and // is currently being executed. Once we support multiple locks, we must release // the JobGraph here log.debug("Ignore added JobGraph because the job {} is already running.", jobId); return CompletableFuture.completedFuture(true); } else if (runningJobsRegistry.getJobSchedulingStatus(jobId) != RunningJobsRegistry.JobSchedulingStatus.DONE) { return waitForTerminatingJobManager(jobId, jobGraph, this::runJob).thenApply(ignored -> true); } else { log.debug("Ignore added JobGraph because the job {} has already been completed.", jobId); } } return CompletableFuture.completedFuture(false); }
Example #2
Source File: Dispatcher.java From flink with Apache License 2.0 | 6 votes |
private CompletableFuture<Boolean> tryRunRecoveredJobGraph(JobGraph jobGraph, DispatcherId dispatcherId) throws Exception { if (leaderElectionService.hasLeadership(dispatcherId.toUUID())) { final JobID jobId = jobGraph.getJobID(); if (jobManagerRunnerFutures.containsKey(jobId)) { // we must not release the job graph lock since it can only be locked once and // is currently being executed. Once we support multiple locks, we must release // the JobGraph here log.debug("Ignore added JobGraph because the job {} is already running.", jobId); return CompletableFuture.completedFuture(true); } else if (runningJobsRegistry.getJobSchedulingStatus(jobId) != RunningJobsRegistry.JobSchedulingStatus.DONE) { return waitForTerminatingJobManager(jobId, jobGraph, this::runJob).thenApply(ignored -> true); } else { log.debug("Ignore added JobGraph because the job {} has already been completed.", jobId); } } return CompletableFuture.completedFuture(false); }
Example #3
Source File: ZooKeeperHaServicesTest.java From flink with Apache License 2.0 | 5 votes |
private void runCleanupTest( Configuration configuration, TestingBlobStoreService blobStoreService, ThrowingConsumer<ZooKeeperHaServices, Exception> zooKeeperHaServicesConsumer) throws Exception { try (ZooKeeperHaServices zooKeeperHaServices = new ZooKeeperHaServices( ZooKeeperUtils.startCuratorFramework(configuration), Executors.directExecutor(), configuration, blobStoreService)) { // create some Zk services to trigger the generation of paths final LeaderRetrievalService resourceManagerLeaderRetriever = zooKeeperHaServices.getResourceManagerLeaderRetriever(); final LeaderElectionService resourceManagerLeaderElectionService = zooKeeperHaServices.getResourceManagerLeaderElectionService(); final RunningJobsRegistry runningJobsRegistry = zooKeeperHaServices.getRunningJobsRegistry(); final TestingListener listener = new TestingListener(); resourceManagerLeaderRetriever.start(listener); resourceManagerLeaderElectionService.start(new TestingContender("foobar", resourceManagerLeaderElectionService)); final JobID jobId = new JobID(); runningJobsRegistry.setJobRunning(jobId); listener.waitForNewLeader(2000L); resourceManagerLeaderRetriever.stop(); resourceManagerLeaderElectionService.stop(); runningJobsRegistry.clearJob(jobId); zooKeeperHaServicesConsumer.accept(zooKeeperHaServices); } }
Example #4
Source File: AbstractYarnNonHaServices.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() throws IOException { enter(); try { // IMPORTANT: The registry must NOT place its data in a directory that is // cleaned up by these services. return new FsNegativeRunningJobsRegistry(flinkFileSystem, workingDirectory); } finally { exit(); } }
Example #5
Source File: ZooKeeperRegistryTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the function of ZookeeperRegistry, setJobRunning(), setJobFinished(), isJobRunning() */ @Test public void testZooKeeperRegistry() throws Exception { Configuration configuration = new Configuration(); configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString()); configuration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper"); final HighAvailabilityServices zkHaService = new ZooKeeperHaServices( ZooKeeperUtils.startCuratorFramework(configuration), Executors.directExecutor(), configuration, new VoidBlobStore()); final RunningJobsRegistry zkRegistry = zkHaService.getRunningJobsRegistry(); try { JobID jobID = JobID.generate(); assertEquals(JobSchedulingStatus.PENDING, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.setJobRunning(jobID); assertEquals(JobSchedulingStatus.RUNNING, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.setJobFinished(jobID); assertEquals(JobSchedulingStatus.DONE, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.clearJob(jobID); assertEquals(JobSchedulingStatus.PENDING, zkRegistry.getJobSchedulingStatus(jobID)); } finally { zkHaService.close(); } }
Example #6
Source File: AbstractNonHaServices.java From flink with Apache License 2.0 | 5 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() throws Exception { synchronized (lock) { checkNotShutdown(); return runningJobsRegistry; } }
Example #7
Source File: Dispatcher.java From flink with Apache License 2.0 | 5 votes |
/** * Checks whether the given job has already been submitted or executed. * * @param jobId identifying the submitted job * @return true if the job has already been submitted (is running) or has been executed * @throws FlinkException if the job scheduling status cannot be retrieved */ private boolean isDuplicateJob(JobID jobId) throws FlinkException { final RunningJobsRegistry.JobSchedulingStatus jobSchedulingStatus; try { jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobId); } catch (IOException e) { throw new FlinkException(String.format("Failed to retrieve job scheduling status for job %s.", jobId), e); } return jobSchedulingStatus == RunningJobsRegistry.JobSchedulingStatus.DONE || jobManagerRunnerFutures.containsKey(jobId); }
Example #8
Source File: ZooKeeperHaServicesTest.java From flink with Apache License 2.0 | 5 votes |
private void runCleanupTest( Configuration configuration, TestingBlobStoreService blobStoreService, ThrowingConsumer<ZooKeeperHaServices, Exception> zooKeeperHaServicesConsumer) throws Exception { try (ZooKeeperHaServices zooKeeperHaServices = new ZooKeeperHaServices( ZooKeeperUtils.startCuratorFramework(configuration), Executors.directExecutor(), configuration, blobStoreService)) { // create some Zk services to trigger the generation of paths final LeaderRetrievalService resourceManagerLeaderRetriever = zooKeeperHaServices.getResourceManagerLeaderRetriever(); final LeaderElectionService resourceManagerLeaderElectionService = zooKeeperHaServices.getResourceManagerLeaderElectionService(); final RunningJobsRegistry runningJobsRegistry = zooKeeperHaServices.getRunningJobsRegistry(); final TestingListener listener = new TestingListener(); resourceManagerLeaderRetriever.start(listener); resourceManagerLeaderElectionService.start(new TestingContender("foobar", resourceManagerLeaderElectionService)); final JobID jobId = new JobID(); runningJobsRegistry.setJobRunning(jobId); listener.waitForNewLeader(2000L); resourceManagerLeaderRetriever.stop(); resourceManagerLeaderElectionService.stop(); runningJobsRegistry.clearJob(jobId); zooKeeperHaServicesConsumer.accept(zooKeeperHaServices); } }
Example #9
Source File: ZooKeeperRegistryTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the function of ZookeeperRegistry, setJobRunning(), setJobFinished(), isJobRunning() */ @Test public void testZooKeeperRegistry() throws Exception { Configuration configuration = new Configuration(); configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString()); configuration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper"); final HighAvailabilityServices zkHaService = new ZooKeeperHaServices( ZooKeeperUtils.startCuratorFramework(configuration), Executors.directExecutor(), configuration, new VoidBlobStore()); final RunningJobsRegistry zkRegistry = zkHaService.getRunningJobsRegistry(); try { JobID jobID = JobID.generate(); assertEquals(JobSchedulingStatus.PENDING, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.setJobRunning(jobID); assertEquals(JobSchedulingStatus.RUNNING, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.setJobFinished(jobID); assertEquals(JobSchedulingStatus.DONE, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.clearJob(jobID); assertEquals(JobSchedulingStatus.PENDING, zkRegistry.getJobSchedulingStatus(jobID)); } finally { zkHaService.close(); } }
Example #10
Source File: DispatcherTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that a reelected Dispatcher can recover jobs. */ @Test public void testJobRecovery() throws Exception { dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(TEST_JOB_ID, createdJobManagerRunnerLatch)); final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class); // elect the initial dispatcher as the leader dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get(); // submit the job to the current leader dispatcherGateway.submitJob(jobGraph, TIMEOUT).get(); // check that the job has been persisted assertThat(submittedJobGraphStore.getJobIds(), contains(jobGraph.getJobID())); jobMasterLeaderElectionService.isLeader(UUID.randomUUID()).get(); assertThat(runningJobsRegistry.getJobSchedulingStatus(jobGraph.getJobID()), is(RunningJobsRegistry.JobSchedulingStatus.RUNNING)); // revoke the leadership which will stop all currently running jobs dispatcherLeaderElectionService.notLeader(); // re-grant the leadership, this should trigger the job recovery dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get(); // wait until we have recovered the job createdJobManagerRunnerLatch.await(); // check whether the job has been recovered final Collection<JobID> jobIds = dispatcherGateway.listJobs(TIMEOUT).get(); assertThat(jobIds, hasSize(1)); assertThat(jobIds, contains(jobGraph.getJobID())); }
Example #11
Source File: AbstractNonHaServices.java From flink with Apache License 2.0 | 5 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() throws Exception { synchronized (lock) { checkNotShutdown(); return runningJobsRegistry; } }
Example #12
Source File: Dispatcher.java From flink with Apache License 2.0 | 5 votes |
/** * Checks whether the given job has already been submitted or executed. * * @param jobId identifying the submitted job * @return true if the job has already been submitted (is running) or has been executed * @throws FlinkException if the job scheduling status cannot be retrieved */ private boolean isDuplicateJob(JobID jobId) throws FlinkException { final RunningJobsRegistry.JobSchedulingStatus jobSchedulingStatus; try { jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobId); } catch (IOException e) { throw new FlinkException(String.format("Failed to retrieve job scheduling status for job %s.", jobId), e); } return jobSchedulingStatus == RunningJobsRegistry.JobSchedulingStatus.DONE || jobManagerRunnerFutures.containsKey(jobId); }
Example #13
Source File: AbstractYarnNonHaServices.java From flink with Apache License 2.0 | 5 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() throws IOException { enter(); try { // IMPORTANT: The registry must NOT place its data in a directory that is // cleaned up by these services. return new FsNegativeRunningJobsRegistry(flinkFileSystem, workingDirectory); } finally { exit(); } }
Example #14
Source File: ZooKeeperHaServicesTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void runCleanupTest( Configuration configuration, TestingBlobStoreService blobStoreService, ThrowingConsumer<ZooKeeperHaServices, Exception> zooKeeperHaServicesConsumer) throws Exception { try (ZooKeeperHaServices zooKeeperHaServices = new ZooKeeperHaServices( ZooKeeperUtils.startCuratorFramework(configuration), Executors.directExecutor(), configuration, blobStoreService)) { // create some Zk services to trigger the generation of paths final LeaderRetrievalService resourceManagerLeaderRetriever = zooKeeperHaServices.getResourceManagerLeaderRetriever(); final LeaderElectionService resourceManagerLeaderElectionService = zooKeeperHaServices.getResourceManagerLeaderElectionService(); final RunningJobsRegistry runningJobsRegistry = zooKeeperHaServices.getRunningJobsRegistry(); final TestingListener listener = new TestingListener(); resourceManagerLeaderRetriever.start(listener); resourceManagerLeaderElectionService.start(new TestingContender("foobar", resourceManagerLeaderElectionService)); final JobID jobId = new JobID(); runningJobsRegistry.setJobRunning(jobId); listener.waitForNewLeader(2000L); resourceManagerLeaderRetriever.stop(); resourceManagerLeaderElectionService.stop(); runningJobsRegistry.clearJob(jobId); zooKeeperHaServicesConsumer.accept(zooKeeperHaServices); } }
Example #15
Source File: ZooKeeperRegistryTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the function of ZookeeperRegistry, setJobRunning(), setJobFinished(), isJobRunning() */ @Test public void testZooKeeperRegistry() throws Exception { Configuration configuration = new Configuration(); configuration.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, testingServer.getConnectString()); configuration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper"); final HighAvailabilityServices zkHaService = new ZooKeeperHaServices( ZooKeeperUtils.startCuratorFramework(configuration), Executors.directExecutor(), configuration, new VoidBlobStore()); final RunningJobsRegistry zkRegistry = zkHaService.getRunningJobsRegistry(); try { JobID jobID = JobID.generate(); assertEquals(JobSchedulingStatus.PENDING, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.setJobRunning(jobID); assertEquals(JobSchedulingStatus.RUNNING, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.setJobFinished(jobID); assertEquals(JobSchedulingStatus.DONE, zkRegistry.getJobSchedulingStatus(jobID)); zkRegistry.clearJob(jobID); assertEquals(JobSchedulingStatus.PENDING, zkRegistry.getJobSchedulingStatus(jobID)); } finally { zkHaService.close(); } }
Example #16
Source File: DispatcherTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that a reelected Dispatcher can recover jobs. */ @Test public void testJobRecovery() throws Exception { dispatcher = createAndStartDispatcher(heartbeatServices, haServices, new ExpectedJobIdJobManagerRunnerFactory(TEST_JOB_ID, createdJobManagerRunnerLatch)); final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class); // elect the initial dispatcher as the leader dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get(); // submit the job to the current leader dispatcherGateway.submitJob(jobGraph, TIMEOUT).get(); // check that the job has been persisted assertThat(submittedJobGraphStore.getJobIds(), contains(jobGraph.getJobID())); jobMasterLeaderElectionService.isLeader(UUID.randomUUID()).get(); assertThat(runningJobsRegistry.getJobSchedulingStatus(jobGraph.getJobID()), is(RunningJobsRegistry.JobSchedulingStatus.RUNNING)); // revoke the leadership which will stop all currently running jobs dispatcherLeaderElectionService.notLeader(); // re-grant the leadership, this should trigger the job recovery dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get(); // wait until we have recovered the job createdJobManagerRunnerLatch.await(); // check whether the job has been recovered final Collection<JobID> jobIds = dispatcherGateway.listJobs(TIMEOUT).get(); assertThat(jobIds, hasSize(1)); assertThat(jobIds, contains(jobGraph.getJobID())); }
Example #17
Source File: AbstractNonHaServices.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() throws Exception { synchronized (lock) { checkNotShutdown(); return runningJobsRegistry; } }
Example #18
Source File: Dispatcher.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Checks whether the given job has already been submitted or executed. * * @param jobId identifying the submitted job * @return true if the job has already been submitted (is running) or has been executed * @throws FlinkException if the job scheduling status cannot be retrieved */ private boolean isDuplicateJob(JobID jobId) throws FlinkException { final RunningJobsRegistry.JobSchedulingStatus jobSchedulingStatus; try { jobSchedulingStatus = runningJobsRegistry.getJobSchedulingStatus(jobId); } catch (IOException e) { throw new FlinkException(String.format("Failed to retrieve job scheduling status for job %s.", jobId), e); } return jobSchedulingStatus == RunningJobsRegistry.JobSchedulingStatus.DONE || jobManagerRunnerFutures.containsKey(jobId); }
Example #19
Source File: ZooKeeperHaServices.java From flink with Apache License 2.0 | 4 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() { return runningJobsRegistry; }
Example #20
Source File: ZooKeeperHaServices.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() { return runningJobsRegistry; }
Example #21
Source File: ZooKeeperHaServices.java From flink with Apache License 2.0 | 4 votes |
@Override public RunningJobsRegistry getRunningJobsRegistry() { return runningJobsRegistry; }