org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder Java Examples
The following examples show how to use
org.apache.flink.runtime.highavailability.TestingHighAvailabilityServicesBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DispatcherHATest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tests that a fatal error is reported if the job recovery fails. */ @Test public void testFailingRecoveryIsAFatalError() throws Exception { final String exceptionMessage = "Job recovery test failure."; final Supplier<Exception> exceptionSupplier = () -> new FlinkException(exceptionMessage); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setSubmittedJobGraphStore(new FailingSubmittedJobGraphStore(exceptionSupplier)) .build(); final HATestingDispatcher dispatcher = createDispatcher(haServices); dispatcher.start(); final Throwable failure = testingFatalErrorHandler.getErrorFuture().get(); assertThat(ExceptionUtils.findThrowableWithMessage(failure, exceptionMessage).isPresent(), is(true)); testingFatalErrorHandler.clearError(); }
Example #2
Source File: DispatcherHATest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests that a fatal error is reported if the job recovery fails. */ @Test public void testFailingRecoveryIsAFatalError() throws Exception { final String exceptionMessage = "Job recovery test failure."; final Supplier<Exception> exceptionSupplier = () -> new FlinkException(exceptionMessage); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setSubmittedJobGraphStore(new FailingSubmittedJobGraphStore(exceptionSupplier)) .build(); final HATestingDispatcher dispatcher = createDispatcher(haServices); dispatcher.start(); final Throwable failure = testingFatalErrorHandler.getErrorFuture().get(); assertThat(ExceptionUtils.findThrowableWithMessage(failure, exceptionMessage).isPresent(), is(true)); testingFatalErrorHandler.clearError(); }
Example #3
Source File: DefaultDispatcherRunnerITCase.java From flink with Apache License 2.0 | 6 votes |
@Before public void setup() { dispatcherRunnerFactory = DefaultDispatcherRunnerFactory.createSessionRunner(SessionDispatcherFactory.INSTANCE); jobGraph = createJobGraph(); dispatcherLeaderElectionService = new TestingLeaderElectionService(); fatalErrorHandler = new TestingFatalErrorHandler(); jobGraphStore = TestingJobGraphStore.newBuilder().build(); partialDispatcherServices = new PartialDispatcherServices( new Configuration(), new TestingHighAvailabilityServicesBuilder().build(), CompletableFuture::new, blobServerResource.getBlobServer(), new TestingHeartbeatServices(), UnregisteredMetricGroups::createUnregisteredJobManagerMetricGroup, new MemoryArchivedExecutionGraphStore(), fatalErrorHandler, VoidHistoryServerArchivist.INSTANCE, null); }
Example #4
Source File: ResourceManagerJobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Before public void setup() throws Exception { rpcService = new TestingRpcService(); jobId = new JobID(); createAndRegisterJobMasterGateway(); jobMasterResourceId = ResourceID.generate(); jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService( jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); resourceManagerLeaderElectionService = new TestingLeaderElectionService(); haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(requestedJobId -> { if (requestedJobId.equals(jobId)) { return jobMasterLeaderRetrievalService; } else { throw new FlinkRuntimeException(String.format("Unknown job id %s", jobId)); } }) .setResourceManagerLeaderElectionService(resourceManagerLeaderElectionService) .build(); testingFatalErrorHandler = new TestingFatalErrorHandler(); resourceManager = createAndStartResourceManager(); // wait until the leader election has been completed resourceManagerLeaderElectionService.isLeader(UUID.randomUUID()).get(); resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class); }
Example #5
Source File: ResourceManagerJobMasterTest.java From flink with Apache License 2.0 | 5 votes |
@Before public void setup() throws Exception { rpcService = new TestingRpcService(); jobId = new JobID(); createAndRegisterJobMasterGateway(); jobMasterResourceId = ResourceID.generate(); jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService( jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); resourceManagerLeaderElectionService = new TestingLeaderElectionService(); haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(requestedJobId -> { if (requestedJobId.equals(jobId)) { return jobMasterLeaderRetrievalService; } else { throw new FlinkRuntimeException(String.format("Unknown job id %s", jobId)); } }) .setResourceManagerLeaderElectionService(resourceManagerLeaderElectionService) .build(); testingFatalErrorHandler = new TestingFatalErrorHandler(); resourceManager = createAndStartResourceManager(); // wait until the leader election has been completed resourceManagerLeaderElectionService.isLeader(UUID.randomUUID()).get(); resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class); }
Example #6
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the JobLeaderService won't try to reconnect to JobMaster after it * has lost the leadership. See FLINK-16836. */ @Test public void doesNotReconnectAfterTargetLostLeadership() throws Exception { final JobID jobId = new JobID(); final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService) .build(); final TestingJobMasterGateway jobMasterGateway = registerJobMaster(); final OneShotLatch jobManagerGainedLeadership = new OneShotLatch(); final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(ignored -> jobManagerGainedLeadership.trigger()); final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener); try { jobLeaderService.addJob(jobId, jobMasterGateway.getAddress()); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), UUID.randomUUID()); jobManagerGainedLeadership.await(); // revoke the leadership leaderRetrievalService.notifyListener(null, null); testingJobLeaderListener.waitUntilJobManagerLostLeadership(); jobLeaderService.reconnect(jobId); } finally { jobLeaderService.stop(); } }
Example #7
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the JobLeaderService can reconnect to an old leader which seemed * to have lost the leadership in between. See FLINK-14316. */ @Test public void canReconnectToOldLeaderWithSameLeaderAddress() throws Exception { final JobID jobId = new JobID(); final SettableLeaderRetrievalService leaderRetrievalService = new SettableLeaderRetrievalService(); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService) .build(); final TestingJobMasterGateway jobMasterGateway = registerJobMaster(); final BlockingQueue<JobID> leadershipQueue = new ArrayBlockingQueue<>(1); final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(leadershipQueue::offer); final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener); try { jobLeaderService.addJob(jobId, jobMasterGateway.getAddress()); final UUID leaderSessionId = UUID.randomUUID(); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId); // wait for the first leadership assertThat(leadershipQueue.take(), is(jobId)); // revoke the leadership leaderRetrievalService.notifyListener(null, null); testingJobLeaderListener.waitUntilJobManagerLostLeadership(); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), leaderSessionId); // check that we obtain the leadership a second time assertThat(leadershipQueue.take(), is(jobId)); } finally { jobLeaderService.stop(); } }
Example #8
Source File: DefaultJobLeaderServiceTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void removeJobWithFailingLeaderRetrievalServiceStopWillStopListeningToLeaderNotifications() throws Exception { final FailingSettableLeaderRetrievalService leaderRetrievalService = new FailingSettableLeaderRetrievalService(); final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(ignored -> leaderRetrievalService) .build(); final JobID jobId = new JobID(); final CompletableFuture<JobID> newLeaderFuture = new CompletableFuture<>(); final TestingJobLeaderListener testingJobLeaderListener = new TestingJobLeaderListener(newLeaderFuture::complete); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder().build(); rpcServiceResource.getTestingRpcService().registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobLeaderService jobLeaderService = createAndStartJobLeaderService(haServices, testingJobLeaderListener); try { jobLeaderService.addJob(jobId, "foobar"); jobLeaderService.removeJob(jobId); leaderRetrievalService.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); try { newLeaderFuture.get(10, TimeUnit.MILLISECONDS); fail("The leader future should not be completed."); } catch (TimeoutException expected) {} } finally { jobLeaderService.stop(); } }
Example #9
Source File: ResourceManagerJobMasterTest.java From flink with Apache License 2.0 | 5 votes |
@Before public void setup() throws Exception { rpcService = new TestingRpcService(); jobId = new JobID(); createAndRegisterJobMasterGateway(); jobMasterResourceId = ResourceID.generate(); jobMasterLeaderRetrievalService = new SettableLeaderRetrievalService( jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); resourceManagerLeaderElectionService = new TestingLeaderElectionService(); haServices = new TestingHighAvailabilityServicesBuilder() .setJobMasterLeaderRetrieverFunction(requestedJobId -> { if (requestedJobId.equals(jobId)) { return jobMasterLeaderRetrievalService; } else { throw new FlinkRuntimeException(String.format("Unknown job id %s", jobId)); } }) .setResourceManagerLeaderElectionService(resourceManagerLeaderElectionService) .build(); testingFatalErrorHandler = new TestingFatalErrorHandler(); resourceManager = createAndStartResourceManager(); // wait until the leader election has been completed resourceManagerLeaderElectionService.isLeader(UUID.randomUUID()).get(); resourceManagerGateway = resourceManager.getSelfGateway(ResourceManagerGateway.class); }
Example #10
Source File: DispatcherHATest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that all JobManagerRunner are terminated if the leadership of the * Dispatcher is revoked. */ @Test public void testRevokeLeadershipTerminatesJobManagerRunners() throws Exception { final TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService(); final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder() .setDispatcherLeaderElectionService(leaderElectionService) .build(); final ArrayBlockingQueue<DispatcherId> fencingTokens = new ArrayBlockingQueue<>(2); final HATestingDispatcher dispatcher = createDispatcherWithObservableFencingTokens( highAvailabilityServices, fencingTokens); dispatcher.start(); try { // grant leadership and submit a single job final DispatcherId expectedDispatcherId = DispatcherId.generate(); leaderElectionService.isLeader(expectedDispatcherId.toUUID()).get(); assertThat(fencingTokens.take(), is(equalTo(expectedDispatcherId))); final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class); final CompletableFuture<Acknowledge> submissionFuture = dispatcherGateway.submitJob(createNonEmptyJobGraph(), timeout); submissionFuture.get(); assertThat(dispatcher.getNumberJobs(timeout).get(), is(1)); // revoke the leadership --> this should stop all running JobManagerRunners leaderElectionService.notLeader(); assertThat(fencingTokens.take(), is(equalTo(NULL_FENCING_TOKEN))); assertThat(dispatcher.getNumberJobs(timeout).get(), is(0)); } finally { RpcUtils.terminateRpcEndpoint(dispatcher, timeout); } }
Example #11
Source File: DispatcherHATest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a Dispatcher does not remove the JobGraph from the submitted job graph store * when losing leadership and recovers it when regaining leadership. */ @Test public void testJobRecoveryWhenChangingLeadership() throws Exception { final InMemorySubmittedJobGraphStore submittedJobGraphStore = new InMemorySubmittedJobGraphStore(); final CompletableFuture<JobID> recoveredJobFuture = new CompletableFuture<>(); submittedJobGraphStore.setRecoverJobGraphFunction((jobID, jobIDSubmittedJobGraphMap) -> { recoveredJobFuture.complete(jobID); return jobIDSubmittedJobGraphMap.get(jobID); }); final TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService(); final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder() .setSubmittedJobGraphStore(submittedJobGraphStore) .setDispatcherLeaderElectionService(leaderElectionService) .build(); final ArrayBlockingQueue<DispatcherId> fencingTokens = new ArrayBlockingQueue<>(2); final HATestingDispatcher dispatcher = createDispatcherWithObservableFencingTokens( highAvailabilityServices, fencingTokens); dispatcher.start(); try { // grant leadership and submit a single job final DispatcherId expectedDispatcherId = DispatcherId.generate(); leaderElectionService.isLeader(expectedDispatcherId.toUUID()).get(); assertThat(fencingTokens.take(), is(equalTo(expectedDispatcherId))); final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class); final JobGraph jobGraph = createNonEmptyJobGraph(); final CompletableFuture<Acknowledge> submissionFuture = dispatcherGateway.submitJob(jobGraph, timeout); submissionFuture.get(); final JobID jobId = jobGraph.getJobID(); assertThat(submittedJobGraphStore.contains(jobId), is(true)); // revoke the leadership --> this should stop all running JobManagerRunners leaderElectionService.notLeader(); assertThat(fencingTokens.take(), is(equalTo(NULL_FENCING_TOKEN))); assertThat(submittedJobGraphStore.contains(jobId), is(true)); assertThat(recoveredJobFuture.isDone(), is(false)); // re-grant leadership leaderElectionService.isLeader(DispatcherId.generate().toUUID()); assertThat(recoveredJobFuture.get(), is(equalTo(jobId))); } finally { RpcUtils.terminateRpcEndpoint(dispatcher, timeout); } }
Example #12
Source File: DispatcherHATest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that all JobManagerRunner are terminated if the leadership of the * Dispatcher is revoked. */ @Test public void testRevokeLeadershipTerminatesJobManagerRunners() throws Exception { final TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService(); final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder() .setDispatcherLeaderElectionService(leaderElectionService) .build(); final ArrayBlockingQueue<DispatcherId> fencingTokens = new ArrayBlockingQueue<>(2); final HATestingDispatcher dispatcher = createDispatcherWithObservableFencingTokens( highAvailabilityServices, fencingTokens); dispatcher.start(); try { // grant leadership and submit a single job final DispatcherId expectedDispatcherId = DispatcherId.generate(); leaderElectionService.isLeader(expectedDispatcherId.toUUID()).get(); assertThat(fencingTokens.take(), is(equalTo(expectedDispatcherId))); final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class); final CompletableFuture<Acknowledge> submissionFuture = dispatcherGateway.submitJob(createNonEmptyJobGraph(), timeout); submissionFuture.get(); assertThat(dispatcher.getNumberJobs(timeout).get(), is(1)); // revoke the leadership --> this should stop all running JobManagerRunners leaderElectionService.notLeader(); assertThat(fencingTokens.take(), is(equalTo(NULL_FENCING_TOKEN))); assertThat(dispatcher.getNumberJobs(timeout).get(), is(0)); } finally { RpcUtils.terminateRpcEndpoint(dispatcher, timeout); } }
Example #13
Source File: DispatcherHATest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a Dispatcher does not remove the JobGraph from the submitted job graph store * when losing leadership and recovers it when regaining leadership. */ @Test public void testJobRecoveryWhenChangingLeadership() throws Exception { final InMemorySubmittedJobGraphStore submittedJobGraphStore = new InMemorySubmittedJobGraphStore(); final CompletableFuture<JobID> recoveredJobFuture = new CompletableFuture<>(); submittedJobGraphStore.setRecoverJobGraphFunction((jobID, jobIDSubmittedJobGraphMap) -> { recoveredJobFuture.complete(jobID); return jobIDSubmittedJobGraphMap.get(jobID); }); final TestingLeaderElectionService leaderElectionService = new TestingLeaderElectionService(); final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder() .setSubmittedJobGraphStore(submittedJobGraphStore) .setDispatcherLeaderElectionService(leaderElectionService) .build(); final ArrayBlockingQueue<DispatcherId> fencingTokens = new ArrayBlockingQueue<>(2); final HATestingDispatcher dispatcher = createDispatcherWithObservableFencingTokens( highAvailabilityServices, fencingTokens); dispatcher.start(); try { // grant leadership and submit a single job final DispatcherId expectedDispatcherId = DispatcherId.generate(); leaderElectionService.isLeader(expectedDispatcherId.toUUID()).get(); assertThat(fencingTokens.take(), is(equalTo(expectedDispatcherId))); final DispatcherGateway dispatcherGateway = dispatcher.getSelfGateway(DispatcherGateway.class); final JobGraph jobGraph = createNonEmptyJobGraph(); final CompletableFuture<Acknowledge> submissionFuture = dispatcherGateway.submitJob(jobGraph, timeout); submissionFuture.get(); final JobID jobId = jobGraph.getJobID(); assertThat(submittedJobGraphStore.contains(jobId), is(true)); // revoke the leadership --> this should stop all running JobManagerRunners leaderElectionService.notLeader(); assertThat(fencingTokens.take(), is(equalTo(NULL_FENCING_TOKEN))); assertThat(submittedJobGraphStore.contains(jobId), is(true)); assertThat(recoveredJobFuture.isDone(), is(false)); // re-grant leadership leaderElectionService.isLeader(DispatcherId.generate().toUUID()); assertThat(recoveredJobFuture.get(), is(equalTo(jobId))); } finally { RpcUtils.terminateRpcEndpoint(dispatcher, timeout); } }
Example #14
Source File: MiniDispatcherTest.java From flink with Apache License 2.0 | 4 votes |
@Before public void setup() throws Exception { highAvailabilityServices = new TestingHighAvailabilityServicesBuilder().build(); testingJobManagerRunnerFactory = new TestingJobManagerRunnerFactory(); }
Example #15
Source File: ZooKeeperDefaultDispatcherRunnerTest.java From flink with Apache License 2.0 | 4 votes |
/** * See FLINK-11665. */ @Test public void testResourceCleanupUnderLeadershipChange() throws Exception { final TestingRpcService rpcService = testingRpcServiceResource.getTestingRpcService(); final TestingLeaderElectionService dispatcherLeaderElectionService = new TestingLeaderElectionService(); final CuratorFramework client = ZooKeeperUtils.startCuratorFramework(configuration); try (final TestingHighAvailabilityServices highAvailabilityServices = new TestingHighAvailabilityServicesBuilder() .setRunningJobsRegistry(new ZooKeeperRunningJobsRegistry(client, configuration)) .setDispatcherLeaderElectionService(dispatcherLeaderElectionService) .setJobMasterLeaderRetrieverFunction(jobId -> ZooKeeperUtils.createLeaderRetrievalService(client, configuration)) .build()) { final PartialDispatcherServices partialDispatcherServices = new PartialDispatcherServices( configuration, highAvailabilityServices, CompletableFuture::new, blobServer, new TestingHeartbeatServices(), UnregisteredMetricGroups::createUnregisteredJobManagerMetricGroup, new MemoryArchivedExecutionGraphStore(), fatalErrorHandler, VoidHistoryServerArchivist.INSTANCE, null); final JobGraph jobGraph = createJobGraphWithBlobs(); final DefaultDispatcherRunnerFactory defaultDispatcherRunnerFactory = DefaultDispatcherRunnerFactory.createSessionRunner(SessionDispatcherFactory.INSTANCE); try (final DispatcherRunner dispatcherRunner = createDispatcherRunner( rpcService, dispatcherLeaderElectionService, () -> createZooKeeperJobGraphStore(client), partialDispatcherServices, defaultDispatcherRunnerFactory)) { // initial run DispatcherGateway dispatcherGateway = grantLeadership(dispatcherLeaderElectionService); LOG.info("Initial job submission {}.", jobGraph.getJobID()); dispatcherGateway.submitJob(jobGraph, TESTING_TIMEOUT).get(); dispatcherLeaderElectionService.notLeader(); // recovering submitted jobs LOG.info("Re-grant leadership first time."); dispatcherGateway = grantLeadership(dispatcherLeaderElectionService); LOG.info("Cancel recovered job {}.", jobGraph.getJobID()); // cancellation of the job should remove everything final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), TESTING_TIMEOUT); dispatcherGateway.cancelJob(jobGraph.getJobID(), TESTING_TIMEOUT).get(); // a successful cancellation should eventually remove all job information final JobResult jobResult = jobResultFuture.get(); assertThat(jobResult.getApplicationStatus(), is(ApplicationStatus.CANCELED)); dispatcherLeaderElectionService.notLeader(); // check that the job has been removed from ZooKeeper final ZooKeeperJobGraphStore submittedJobGraphStore = createZooKeeperJobGraphStore(client); CommonTestUtils.waitUntilCondition(() -> submittedJobGraphStore.getJobIds().isEmpty(), Deadline.fromNow(VERIFICATION_TIMEOUT), 20L); } } // check resource clean up assertThat(clusterHaStorageDir.listFiles(), is(emptyArray())); }