Java Code Examples for org.apache.flink.runtime.rpc.RpcUtils#terminateRpcEndpoint()
The following examples show how to use
org.apache.flink.runtime.rpc.RpcUtils#terminateRpcEndpoint() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DispatcherTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@After public void tearDown() throws Exception { try { fatalErrorHandler.rethrowError(); } finally { if (dispatcher != null) { RpcUtils.terminateRpcEndpoint(dispatcher, TIMEOUT); } } if (haServices != null) { haServices.closeAndCleanupAllData(); } if (blobServer != null) { blobServer.close(); } }
Example 2
Source File: AkkaRpcActorTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests that the {@link AkkaRpcActor} only completes after the asynchronous * post stop action has completed. */ @Test public void testActorTerminationWithAsynchronousOnStopAction() throws Exception { final CompletableFuture<Void> onStopFuture = new CompletableFuture<>(); final AsynchronousOnStopEndpoint endpoint = new AsynchronousOnStopEndpoint(akkaRpcService, onStopFuture); try { endpoint.start(); final CompletableFuture<Void> terminationFuture = endpoint.closeAsync(); assertFalse(terminationFuture.isDone()); onStopFuture.complete(null); // the onStopFuture completion should allow the endpoint to terminate terminationFuture.get(); } finally { RpcUtils.terminateRpcEndpoint(endpoint, timeout); } }
Example 3
Source File: AkkaRpcActorHandshakeTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testVersionMatchBetweenRpcComponents() throws Exception { AkkaRpcActorTest.DummyRpcEndpoint rpcEndpoint = new AkkaRpcActorTest.DummyRpcEndpoint(akkaRpcService1); final int value = 42; rpcEndpoint.setFoobar(value); rpcEndpoint.start(); try { final AkkaRpcActorTest.DummyRpcGateway dummyRpcGateway = akkaRpcService2.connect(rpcEndpoint.getAddress(), AkkaRpcActorTest.DummyRpcGateway.class).get(); assertThat(dummyRpcGateway.foobar().get(), equalTo(value)); } finally { RpcUtils.terminateRpcEndpoint(rpcEndpoint, timeout); } }
Example 4
Source File: AkkaRpcActorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the {@link AkkaRpcActor} discards messages until the corresponding * {@link RpcEndpoint} has been started. */ @Test public void testMessageDiscarding() throws Exception { int expectedValue = 1337; DummyRpcEndpoint rpcEndpoint = new DummyRpcEndpoint(akkaRpcService); DummyRpcGateway rpcGateway = rpcEndpoint.getSelfGateway(DummyRpcGateway.class); // this message should be discarded and completed with an AkkaRpcException CompletableFuture<Integer> result = rpcGateway.foobar(); try { result.get(timeout.getSize(), timeout.getUnit()); fail("Expected an AkkaRpcException."); } catch (ExecutionException ee) { // expected this exception, because the endpoint has not been started assertTrue(ee.getCause() instanceof AkkaRpcException); } // set a new value which we expect to be returned rpcEndpoint.setFoobar(expectedValue); // start the endpoint so that it can process messages rpcEndpoint.start(); try { // send the rpc again result = rpcGateway.foobar(); // now we should receive a result :-) Integer actualValue = result.get(timeout.getSize(), timeout.getUnit()); assertThat("The new foobar value should have been returned.", actualValue, Is.is(expectedValue)); } finally { RpcUtils.terminateRpcEndpoint(rpcEndpoint, timeout); } }
Example 5
Source File: JobMasterPartitionReleaseTest.java From flink with Apache License 2.0 | 5 votes |
public void close() throws Exception { try { if (jobMaster != null) { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } } finally { temporaryFolder.delete(); } }
Example 6
Source File: AkkaRpcActorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that multiple termination calls won't trigger the onStop action multiple times. * Note that this test is a probabilistic test which only fails sometimes without the fix. * See FLINK-16703. */ @Test public void callsOnStopOnlyOnce() throws Exception { final CompletableFuture<Void> onStopFuture = new CompletableFuture<>(); final OnStopCountingRpcEndpoint endpoint = new OnStopCountingRpcEndpoint(akkaRpcService, onStopFuture); try { endpoint.start(); final AkkaBasedEndpoint selfGateway = endpoint.getSelfGateway(AkkaBasedEndpoint.class); // try to terminate the actor twice selfGateway.getActorRef().tell(ControlMessages.TERMINATE, ActorRef.noSender()); selfGateway.getActorRef().tell(ControlMessages.TERMINATE, ActorRef.noSender()); endpoint.waitUntilOnStopHasBeenCalled(); onStopFuture.complete(null); endpoint.getTerminationFuture().get(); assertThat(endpoint.getNumOnStopCalls(), is(1)); } finally { onStopFuture.complete(null); RpcUtils.terminateRpcEndpoint(endpoint, timeout); } }
Example 7
Source File: MiniDispatcherTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the {@link MiniDispatcher} only terminates in {@link ClusterEntrypoint.ExecutionMode#NORMAL} * after it has served the {@link org.apache.flink.runtime.jobmaster.JobResult} once. */ @Test public void testJobResultRetrieval() throws Exception { final MiniDispatcher miniDispatcher = createMiniDispatcher(ClusterEntrypoint.ExecutionMode.NORMAL); miniDispatcher.start(); try { // wait until the Dispatcher is the leader dispatcherLeaderElectionService.isLeader(UUID.randomUUID()).get(); // wait until we have submitted the job jobGraphFuture.get(); resultFuture.complete(archivedExecutionGraph); assertFalse(miniDispatcher.getTerminationFuture().isDone()); final DispatcherGateway dispatcherGateway = miniDispatcher.getSelfGateway(DispatcherGateway.class); final CompletableFuture<JobResult> jobResultFuture = dispatcherGateway.requestJobResult(jobGraph.getJobID(), timeout); final JobResult jobResult = jobResultFuture.get(); assertThat(jobResult.getJobId(), is(jobGraph.getJobID())); } finally { RpcUtils.terminateRpcEndpoint(miniDispatcher, timeout); } }
Example 8
Source File: DispatcherTest.java From flink with Apache License 2.0 | 5 votes |
@After public void tearDown() throws Exception { if (dispatcher != null) { RpcUtils.terminateRpcEndpoint(dispatcher, TIMEOUT); } if (haServices != null) { haServices.closeAndCleanupAllData(); } if (blobServer != null) { blobServer.close(); } }
Example 9
Source File: ResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@After public void after() throws Exception { if (resourceManager != null) { RpcUtils.terminateRpcEndpoint(resourceManager, TIMEOUT); } if (highAvailabilityServices != null) { highAvailabilityServices.closeAndCleanupAllData(); } if (testingFatalErrorHandler.hasExceptionOccurred()) { testingFatalErrorHandler.rethrowError(); } }
Example 10
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job is removed from the JobLeaderService once a TaskExecutor has * no more slots assigned to this job. * * <p>See FLINK-8504 */ @Test public void testRemoveJobFromJobLeaderService() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices); try { final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReport.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID()); final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>(); final CompletableFuture<Void> stopFuture = new CompletableFuture<>(); final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService( startFuture, stopFuture); haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever); taskExecutor.start(); taskExecutor.waitUntilStarted(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final AllocationID allocationId = new AllocationID(); assertThat(startFuture.isDone(), is(false)); final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService(); assertThat(jobLeaderService.containsJob(jobId), is(false)); // wait for the initial slot report initialSlotReport.get(); taskExecutorGateway.requestSlot( slotId, jobId, allocationId, "foobar", resourceManagerId, timeout).get(); // wait until the job leader retrieval service for jobId is started startFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(true)); taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get(); // wait that the job leader retrieval service for jobId stopped becaue it should get removed stopFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(false)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 11
Source File: JobMasterTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testDuplicatedKvStateRegistrationsFailTask() throws Exception { final JobGraph graph = createKvJobGraph(); final List<JobVertex> jobVertices = graph.getVerticesSortedTopologicallyFromSources(); final JobVertex vertex1 = jobVertices.get(0); final JobVertex vertex2 = jobVertices.get(1); final JobMaster jobMaster = createJobMaster( configuration, graph, haServices, new TestingJobManagerSharedServicesBuilder().build(), heartbeatServices); CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId); final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class); try { // wait for the start to complete startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); // duplicate registration fails task // register a KvState final String registrationName = "duplicate-me"; final KvStateID kvStateID = new KvStateID(); final KeyGroupRange keyGroupRange = new KeyGroupRange(0, 0); final InetSocketAddress address = new InetSocketAddress(InetAddress.getLocalHost(), 4396); jobMasterGateway.notifyKvStateRegistered( graph.getJobID(), vertex1.getID(), keyGroupRange, registrationName, kvStateID, address).get(); try { jobMasterGateway.notifyKvStateRegistered( graph.getJobID(), vertex2.getID(), // <--- different operator, but... keyGroupRange, registrationName, // ...same name kvStateID, address).get(); fail("Expected to fail because of clashing registration message."); } catch (Exception e) { assertTrue(ExceptionUtils.findThrowableWithMessage(e, "Registration name clash").isPresent()); assertEquals(JobStatus.FAILED, jobMasterGateway.requestJobStatus(testingTimeout).get()); } } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example 12
Source File: JobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testHeartbeatTimeoutWithResourceManager() throws Exception { final String resourceManagerAddress = "rm"; final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceID rmResourceId = new ResourceID(resourceManagerAddress); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway( resourceManagerId, rmResourceId, resourceManagerAddress, "localhost"); final CompletableFuture<Tuple3<JobMasterId, ResourceID, JobID>> jobManagerRegistrationFuture = new CompletableFuture<>(); final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>(); final CountDownLatch registrationAttempts = new CountDownLatch(2); resourceManagerGateway.setRegisterJobManagerConsumer(tuple -> { jobManagerRegistrationFuture.complete( Tuple3.of( tuple.f0, tuple.f1, tuple.f3)); registrationAttempts.countDown(); }); resourceManagerGateway.setDisconnectJobManagerConsumer(tuple -> disconnectedJobManagerFuture.complete(tuple.f0)); rpcService.registerGateway(resourceManagerAddress, resourceManagerGateway); final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build(); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, jobManagerSharedServices); CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId); try { // wait for the start operation to complete startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); // define a leader and see that a registration happens rmLeaderRetrievalService.notifyListener(resourceManagerAddress, resourceManagerId.toUUID()); // register job manager success will trigger monitor heartbeat target between jm and rm final Tuple3<JobMasterId, ResourceID, JobID> registrationInformation = jobManagerRegistrationFuture.get( testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); assertThat(registrationInformation.f0, Matchers.equalTo(jobMasterId)); assertThat(registrationInformation.f1, Matchers.equalTo(jmResourceId)); assertThat(registrationInformation.f2, Matchers.equalTo(jobGraph.getJobID())); final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); // heartbeat timeout should trigger disconnect JobManager from ResourceManager assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID())); // the JobMaster should try to reconnect to the RM registrationAttempts.await(); } finally { jobManagerSharedServices.shutdown(); RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example 13
Source File: JobMasterTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests the updateGlobalAggregate functionality. */ @Test public void testJobMasterAggregatesValuesCorrectly() throws Exception { final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build(), heartbeatServices); CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId); final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class); try { // wait for the start to complete startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); CompletableFuture<Object> updateAggregateFuture; AggregateFunction<Integer, Integer, Integer> aggregateFunction = createAggregateFunction(); ClosureCleaner.clean(aggregateFunction, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); byte[] serializedAggregateFunction = InstantiationUtil.serializeObject(aggregateFunction); updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 1, serializedAggregateFunction); assertThat(updateAggregateFuture.get(), equalTo(1)); updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 2, serializedAggregateFunction); assertThat(updateAggregateFuture.get(), equalTo(3)); updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 3, serializedAggregateFunction); assertThat(updateAggregateFuture.get(), equalTo(6)); updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg1", 4, serializedAggregateFunction); assertThat(updateAggregateFuture.get(), equalTo(10)); updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg2", 10, serializedAggregateFunction); assertThat(updateAggregateFuture.get(), equalTo(10)); updateAggregateFuture = jobMasterGateway.updateGlobalAggregate("agg2", 23, serializedAggregateFunction); assertThat(updateAggregateFuture.get(), equalTo(33)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example 14
Source File: JobMasterTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testHeartbeatTimeoutWithTaskManager() throws Exception { final CompletableFuture<ResourceID> heartbeatResourceIdFuture = new CompletableFuture<>(); final CompletableFuture<JobID> disconnectedJobManagerFuture = new CompletableFuture<>(); final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation(); final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setHeartbeatJobManagerConsumer((taskManagerId, ignored) -> heartbeatResourceIdFuture.complete(taskManagerId)) .setDisconnectJobManagerConsumer((jobId, throwable) -> disconnectedJobManagerFuture.complete(jobId)) .createTestingTaskExecutorGateway(); rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway); final JobManagerSharedServices jobManagerSharedServices = new TestingJobManagerSharedServicesBuilder().build(); final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, jobManagerSharedServices); CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId); try { // wait for the start to complete startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class); // register task manager will trigger monitor heartbeat target, schedule heartbeat request at interval time CompletableFuture<RegistrationResponse> registrationResponse = jobMasterGateway.registerTaskManager( taskExecutorGateway.getAddress(), unresolvedTaskManagerLocation, testingTimeout); // wait for the completion of the registration registrationResponse.get(); final JobID disconnectedJobManager = disconnectedJobManagerFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); assertThat(disconnectedJobManager, Matchers.equalTo(jobGraph.getJobID())); final ResourceID heartbeatResourceId = heartbeatResourceIdFuture.getNow(null); assertThat(heartbeatResourceId, anyOf(nullValue(), equalTo(jmResourceId))); } finally { jobManagerSharedServices.shutdown(); RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example 15
Source File: JobMasterTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that we continue reconnecting to the latest known RM after a disconnection * message. */ @Test public void testReconnectionAfterDisconnect() throws Exception { final JobMaster jobMaster = createJobMaster( configuration, jobGraph, haServices, new TestingJobManagerSharedServicesBuilder().build(), heartbeatServices); final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class); CompletableFuture<Acknowledge> startFuture = jobMaster.start(jobMasterId); try { // wait for the start to complete startFuture.get(testingTimeout.toMilliseconds(), TimeUnit.MILLISECONDS); final TestingResourceManagerGateway testingResourceManagerGateway = createAndRegisterTestingResourceManagerGateway(); final BlockingQueue<JobMasterId> registrationsQueue = new ArrayBlockingQueue<>(1); testingResourceManagerGateway.setRegisterJobManagerFunction((jobMasterId, resourceID, s, jobID) -> { registrationsQueue.offer(jobMasterId); return CompletableFuture.completedFuture(testingResourceManagerGateway.getJobMasterRegistrationSuccess()); }); final ResourceManagerId resourceManagerId = testingResourceManagerGateway.getFencingToken(); notifyResourceManagerLeaderListeners(testingResourceManagerGateway); // wait for first registration attempt final JobMasterId firstRegistrationAttempt = registrationsQueue.take(); assertThat(firstRegistrationAttempt, equalTo(jobMasterId)); assertThat(registrationsQueue.isEmpty(), is(true)); jobMasterGateway.disconnectResourceManager(resourceManagerId, new FlinkException("Test exception")); // wait for the second registration attempt after the disconnect call assertThat(registrationsQueue.take(), equalTo(jobMasterId)); } finally { RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout); } }
Example 16
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 17
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 18
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testMaximumRegistrationDurationAfterConnectionLoss() throws Exception { configuration.setString(TaskManagerOptions.REGISTRATION_TIMEOUT, "100 ms"); final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final long heartbeatInterval = 10L; final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build(); final TaskExecutor taskExecutor = new TaskExecutor( rpc, TaskManagerConfiguration.fromConfiguration(configuration), haServices, taskManagerServices, new HeartbeatServices(heartbeatInterval, 10L), UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); taskExecutor.start(); final CompletableFuture<ResourceID> registrationFuture = new CompletableFuture<>(); final OneShotLatch secondRegistration = new OneShotLatch(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setRegisterTaskExecutorFunction( tuple -> { if (registrationFuture.complete(tuple.f1)) { return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess( new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("localhost", 1234))); } else { secondRegistration.trigger(); return CompletableFuture.completedFuture(new RegistrationResponse.Decline("Only the first registration should succeed.")); } } ); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), UUID.randomUUID()); final ResourceID registrationResourceId = registrationFuture.get(); assertThat(registrationResourceId, equalTo(taskManagerServices.getTaskManagerLocation().getResourceID())); secondRegistration.await(); final Throwable error = testingFatalErrorHandler.getErrorFuture().get(); assertThat(error, is(notNullValue())); assertThat(ExceptionUtils.stripExecutionException(error), instanceOf(RegistrationTimeoutException.class)); testingFatalErrorHandler.clearError(); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 19
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the TaskExecutor tries to reconnect to a ResourceManager from which it * was explicitly disconnected. */ @Test public void testReconnectionAttemptIfExplicitlyDisconnected() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation(); final TaskExecutor taskExecutor = createTaskExecutor(new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .build()); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final ClusterInformation clusterInformation = new ClusterInformation("foobar", 1234); final CompletableFuture<RegistrationResponse> registrationResponseFuture = CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess(new InstanceID(), ResourceID.generate(), clusterInformation)); final BlockingQueue<ResourceID> registrationQueue = new ArrayBlockingQueue<>(1); testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> { registrationQueue.offer(taskExecutorRegistration.getResourceId()); return registrationResponseFuture; }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final ResourceID firstRegistrationAttempt = registrationQueue.take(); assertThat(firstRegistrationAttempt, equalTo(unresolvedTaskManagerLocation.getResourceID())); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); assertThat(registrationQueue, is(empty())); taskExecutorGateway.disconnectResourceManager(new FlinkException("Test exception")); final ResourceID secondRegistrationAttempt = registrationQueue.take(); assertThat(secondRegistrationAttempt, equalTo(unresolvedTaskManagerLocation.getResourceID())); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 20
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, ResourceProfile.ZERO, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }