Java Code Examples for org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#setSendSlotReportFunction()
The following examples show how to use
org.apache.flink.runtime.resourcemanager.utils.TestingResourceManagerGateway#setSendSlotReportFunction() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 6 votes |
private TestingResourceManagerGateway createRmWithTmRegisterAndNotifySlotHooks( InstanceID registrationId, OneShotLatch taskExecutorIsRegistered, CompletableFuture<Tuple3<InstanceID, SlotID, AllocationID>> availableSlotFuture) { final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); resourceManagerLeaderRetriever.notifyListener( resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); resourceManagerGateway.setRegisterTaskExecutorFunction( taskExecutorRegistration -> CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess(registrationId, resourceManagerGateway.getOwnResourceId(), new ClusterInformation("localhost", 1234)))); resourceManagerGateway.setNotifySlotAvailableConsumer(availableSlotFuture::complete); resourceManagerGateway.setSendSlotReportFunction(ignored -> { taskExecutorIsRegistered.trigger(); return CompletableFuture.completedFuture(Acknowledge.get()); }); return resourceManagerGateway; }
Example 2
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests that the {@link TaskExecutor} sends the initial slot report after it * registered at the ResourceManager. */ @Test public void testInitialSlotReport() throws Exception { final TaskExecutor taskExecutor = createTaskExecutor(1); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); testingResourceManagerGateway.setSendSlotReportFunction( resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f0); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); assertThat(initialSlotReportFuture.get(), equalTo(taskExecutor.getResourceID())); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 3
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the {@link TaskExecutor} sends the initial slot report after it * registered at the ResourceManager. */ @Test public void testInitialSlotReport() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setTaskManagerLocation(taskManagerLocation) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); testingResourceManagerGateway.setSendSlotReportFunction( resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f0); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); assertThat(initialSlotReportFuture.get(), equalTo(taskManagerLocation.getResourceID())); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 4
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the {@link TaskExecutor} sends the initial slot report after it * registered at the ResourceManager. */ @Test public void testInitialSlotReport() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setTaskManagerLocation(taskManagerLocation) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); testingResourceManagerGateway.setSendSlotReportFunction( resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f0); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); assertThat(initialSlotReportFuture.get(), equalTo(taskManagerLocation.getResourceID())); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 5
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report * fails. */ @Test public void testInitialSlotReportFailure() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2); testingResourceManagerGateway.setSendSlotReportFunction( resourceIDInstanceIDSlotReportTuple3 -> { try { return responseQueue.take(); } catch (InterruptedException e) { return FutureUtils.completedExceptionally(e); } }); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))); final CountDownLatch numberRegistrations = new CountDownLatch(2); testingResourceManagerGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> { numberRegistrations.countDown(); return registrationResponse; }); responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception"))); responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get())); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); //wait for the second registration attempt numberRegistrations.await(); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 6
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, ResourceProfile.ZERO, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example 7
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the correct partition/slot report is sent as part of the heartbeat response. */ @Test public void testHeartbeatReporting() throws Exception { final String rmAddress = "rm"; final UUID rmLeaderId = UUID.randomUUID(); // register the mock resource manager gateway final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); final ResourceID rmResourceId = rmGateway.getOwnResourceId(); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234))); rmGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> { taskExecutorRegistrationFuture.complete(taskExecutorRegistration.getResourceId()); return registrationResponse; }); final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>(); rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<TaskExecutorHeartbeatPayload> heartbeatPayloadCompletableFuture = new CompletableFuture<>(); rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, heartbeatPayload) -> heartbeatPayloadCompletableFuture.complete(heartbeatPayload)); rpc.registerGateway(rmAddress, rmGateway); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1); final SlotReport slotReport1 = new SlotReport( new SlotStatus( slotId, resourceProfile)); final SlotReport slotReport2 = new SlotReport( new SlotStatus( slotId, resourceProfile, new JobID(), new AllocationID())); final Queue<SlotReport> reports = new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2)); final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable .<Task>newBuilder() .createSlotReportSupplier(reports::poll) .closeAsyncReturns(CompletableFuture.completedFuture(null)) .build(); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutorPartitionTracker partitionTracker = createPartitionTrackerWithFixedPartitionReport(taskManagerServices.getShuffleEnvironment()); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, partitionTracker); try { taskManager.start(); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId); // register resource manager success will trigger monitoring heartbeat target between tm and rm assertThat(taskExecutorRegistrationFuture.get(), equalTo(unresolvedTaskManagerLocation.getResourceID())); assertThat(initialSlotReportFuture.get(), equalTo(slotReport1)); TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // trigger the heartbeat asynchronously taskExecutorGateway.heartbeatFromResourceManager(rmResourceId); // wait for heartbeat response SlotReport actualSlotReport = heartbeatPayloadCompletableFuture.get().getSlotReport(); // the new slot report should be reported assertEquals(slotReport2, actualSlotReport); ClusterPartitionReport actualClusterPartitionReport = heartbeatPayloadCompletableFuture.get().getClusterPartitionReport(); assertEquals(partitionTracker.createClusterPartitionReport(), actualClusterPartitionReport); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example 8
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the {@link SlotReport} sent to the RM does not contain * out dated/stale information as slots are being requested from the * TM. * * <p>This is a probabilistic test case and needs to be executed * several times to produce a failure without the fix for FLINK-12865. */ @Test public void testSlotReportDoesNotContainStaleInformation() throws Exception { final OneShotLatch receivedSlotRequest = new OneShotLatch(); final CompletableFuture<Void> verifySlotReportFuture = new CompletableFuture<>(); final OneShotLatch terminateSlotReportVerification = new OneShotLatch(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); // Assertions for this test testingResourceManagerGateway.setTaskExecutorHeartbeatConsumer((ignored, slotReport) -> { try { final ArrayList<SlotStatus> slots = Lists.newArrayList(slotReport); assertThat(slots, hasSize(1)); final SlotStatus slotStatus = slots.get(0); log.info("Received SlotStatus: {}", slotStatus); if (receivedSlotRequest.isTriggered()) { assertThat(slotStatus.getAllocationID(), is(notNullValue())); } else { assertThat(slotStatus.getAllocationID(), is(nullValue())); } } catch (AssertionError e) { verifySlotReportFuture.completeExceptionally(e); } if (terminateSlotReportVerification.isTriggered()) { verifySlotReportFuture.complete(null); } }); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); testingResourceManagerGateway.setSendSlotReportFunction(ignored -> { taskExecutorRegistrationFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(new AllocateSlotNotifyingTaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService, receivedSlotRequest)) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final ResourceID taskExecutorResourceId = taskManagerServices.getTaskManagerLocation().getResourceID(); taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final ScheduledExecutorService heartbeatExecutor = java.util.concurrent.Executors.newSingleThreadScheduledExecutor(); try { taskExecutorRegistrationFuture.get(); final OneShotLatch scheduleFirstHeartbeat = new OneShotLatch(); final ResourceID resourceManagerResourceId = testingResourceManagerGateway.getOwnResourceId(); final long heartbeatInterval = 5L; heartbeatExecutor.scheduleWithFixedDelay( () -> { scheduleFirstHeartbeat.trigger(); taskExecutorGateway.heartbeatFromResourceManager(resourceManagerResourceId); }, 0L, heartbeatInterval, TimeUnit.MILLISECONDS); scheduleFirstHeartbeat.await(); SlotID slotId = new SlotID(taskExecutorResourceId, 0); final CompletableFuture<Acknowledge> requestSlotFuture = taskExecutorGateway.requestSlot( slotId, jobId, new AllocationID(), "foobar", testingResourceManagerGateway.getFencingToken(), timeout); requestSlotFuture.get(); terminateSlotReportVerification.trigger(); verifySlotReportFuture.get(); } finally { ExecutorUtils.gracefulShutdown(timeout.toMilliseconds(), TimeUnit.MILLISECONDS, heartbeatExecutor); RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 9
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the TaskExecutor syncs its slots view with the JobMaster's view * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059). */ @Test public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception { final CountDownLatch activeSlots = new CountDownLatch(2); final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService, activeSlots); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2); OneShotLatch initialSlotReporting = new OneShotLatch(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReporting.trigger(); return CompletableFuture.completedFuture(Acknowledge.get()); }); testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 -> allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2)); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2); final ResourceID jobManagerResourceId = ResourceID.generate(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setFailSlotConsumer((resourceID, allocationID, throwable) -> failedSlotFutures.offer(allocationID)) .setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers))) .setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); taskExecutor.start(); try { final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); initialSlotReporting.await(); final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0); final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1); final AllocationID allocationIdInBoth = new AllocationID(); final AllocationID allocationIdOnlyInJM = new AllocationID(); final AllocationID allocationIdOnlyInTM = new AllocationID(); taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); activeSlots.await(); List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList( new AllocatedSlotInfo(0, allocationIdInBoth), new AllocatedSlotInfo(1, allocationIdOnlyInJM) ); AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos); taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport); assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM)); assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM)); assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue()); assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue()); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 10
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job is removed from the JobLeaderService once a TaskExecutor has * no more slots assigned to this job. * * <p>See FLINK-8504 */ @Test public void testRemoveJobFromJobLeaderService() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices); try { final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReport.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID()); final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>(); final CompletableFuture<Void> stopFuture = new CompletableFuture<>(); final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService( startFuture, stopFuture); haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever); taskExecutor.start(); taskExecutor.waitUntilStarted(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final AllocationID allocationId = new AllocationID(); assertThat(startFuture.isDone(), is(false)); final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService(); assertThat(jobLeaderService.containsJob(jobId), is(false)); // wait for the initial slot report initialSlotReport.get(); taskExecutorGateway.requestSlot( slotId, jobId, allocationId, ResourceProfile.ZERO, "foobar", resourceManagerId, timeout).get(); // wait until the job leader retrieval service for jobId is started startFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(true)); taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get(); // wait that the job leader retrieval service for jobId stopped becaue it should get removed stopFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(false)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 11
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report * fails. */ @Test public void testInitialSlotReportFailure() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setTaskManagerLocation(taskManagerLocation) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2); testingResourceManagerGateway.setSendSlotReportFunction( resourceIDInstanceIDSlotReportTuple3 -> { try { return responseQueue.take(); } catch (InterruptedException e) { return FutureUtils.completedExceptionally(e); } }); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))); final CountDownLatch numberRegistrations = new CountDownLatch(2); testingResourceManagerGateway.setRegisterTaskExecutorFunction(new Function<Tuple4<String, ResourceID, Integer, HardwareDescription>, CompletableFuture<RegistrationResponse>>() { @Override public CompletableFuture<RegistrationResponse> apply(Tuple4<String, ResourceID, Integer, HardwareDescription> stringResourceIDIntegerHardwareDescriptionTuple4) { numberRegistrations.countDown(); return registrationResponse; } }); responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception"))); responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get())); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); //wait for the second registration attempt numberRegistrations.await(); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 12
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the correct slot report is sent as part of the heartbeat response. */ @Test public void testHeartbeatSlotReporting() throws Exception { final String rmAddress = "rm"; final UUID rmLeaderId = UUID.randomUUID(); // register the mock resource manager gateway final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); final ResourceID rmResourceId = rmGateway.getOwnResourceId(); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234))); rmGateway.setRegisterTaskExecutorFunction(stringResourceIDIntegerHardwareDescriptionTuple4 -> { taskExecutorRegistrationFuture.complete(stringResourceIDIntegerHardwareDescriptionTuple4.f1); return registrationResponse; }); final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>(); rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<SlotReport> heartbeatSlotReportFuture = new CompletableFuture<>(); rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, slotReport) -> heartbeatSlotReportFuture.complete(slotReport)); rpc.registerGateway(rmAddress, rmGateway); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1); final SlotReport slotReport1 = new SlotReport( new SlotStatus( slotId, resourceProfile)); final SlotReport slotReport2 = new SlotReport( new SlotStatus( slotId, resourceProfile, new JobID(), new AllocationID())); final TestingTaskSlotTable taskSlotTable = new TestingTaskSlotTable(new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2))); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = new TaskExecutor( rpc, taskManagerConfiguration, haServices, taskManagerServices, HEARTBEAT_SERVICES, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); try { taskManager.start(); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId); // register resource manager success will trigger monitoring heartbeat target between tm and rm assertThat(taskExecutorRegistrationFuture.get(), equalTo(taskManagerLocation.getResourceID())); assertThat(initialSlotReportFuture.get(), equalTo(slotReport1)); TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // trigger the heartbeat asynchronously taskExecutorGateway.heartbeatFromResourceManager(rmResourceId); // wait for heartbeat response SlotReport actualSlotReport = heartbeatSlotReportFuture.get(); // the new slot report should be reported assertEquals(slotReport2, actualSlotReport); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example 13
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final JobManagerTable jobManagerTable = new JobManagerTable(); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobManagerTable(jobManagerTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example 14
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the correct slot report is sent as part of the heartbeat response. */ @Test public void testHeartbeatSlotReporting() throws Exception { final String rmAddress = "rm"; final UUID rmLeaderId = UUID.randomUUID(); // register the mock resource manager gateway final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); final ResourceID rmResourceId = rmGateway.getOwnResourceId(); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234))); rmGateway.setRegisterTaskExecutorFunction(stringResourceIDIntegerHardwareDescriptionTuple4 -> { taskExecutorRegistrationFuture.complete(stringResourceIDIntegerHardwareDescriptionTuple4.f1); return registrationResponse; }); final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>(); rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<SlotReport> heartbeatSlotReportFuture = new CompletableFuture<>(); rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, slotReport) -> heartbeatSlotReportFuture.complete(slotReport)); rpc.registerGateway(rmAddress, rmGateway); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final ResourceProfile resourceProfile = new ResourceProfile(1.0, 1); final SlotReport slotReport1 = new SlotReport( new SlotStatus( slotId, resourceProfile)); final SlotReport slotReport2 = new SlotReport( new SlotStatus( slotId, resourceProfile, new JobID(), new AllocationID())); final TestingTaskSlotTable taskSlotTable = new TestingTaskSlotTable(new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2))); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId); // register resource manager success will trigger monitoring heartbeat target between tm and rm assertThat(taskExecutorRegistrationFuture.get(), equalTo(taskManagerLocation.getResourceID())); assertThat(initialSlotReportFuture.get(), equalTo(slotReport1)); TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // trigger the heartbeat asynchronously taskExecutorGateway.heartbeatFromResourceManager(rmResourceId); // wait for heartbeat response SlotReport actualSlotReport = heartbeatSlotReportFuture.get(); // the new slot report should be reported assertEquals(slotReport2, actualSlotReport); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example 15
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the {@link SlotReport} sent to the RM does not contain * out dated/stale information as slots are being requested from the * TM. * * <p>This is a probabilistic test case and needs to be executed * several times to produce a failure without the fix for FLINK-12865. */ @Test public void testSlotReportDoesNotContainStaleInformation() throws Exception { final OneShotLatch receivedSlotRequest = new OneShotLatch(); final CompletableFuture<Void> verifySlotReportFuture = new CompletableFuture<>(); final OneShotLatch terminateSlotReportVerification = new OneShotLatch(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); // Assertions for this test testingResourceManagerGateway.setTaskExecutorHeartbeatConsumer((ignored, slotReport) -> { try { final ArrayList<SlotStatus> slots = Lists.newArrayList(slotReport); assertThat(slots, hasSize(1)); final SlotStatus slotStatus = slots.get(0); log.info("Received SlotStatus: {}", slotStatus); if (receivedSlotRequest.isTriggered()) { assertThat(slotStatus.getAllocationID(), is(notNullValue())); } else { assertThat(slotStatus.getAllocationID(), is(nullValue())); } } catch (AssertionError e) { verifySlotReportFuture.completeExceptionally(e); } if (terminateSlotReportVerification.isTriggered()) { verifySlotReportFuture.complete(null); } }); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); testingResourceManagerGateway.setSendSlotReportFunction(ignored -> { taskExecutorRegistrationFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(new AllocateSlotNotifyingTaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService, receivedSlotRequest)) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final ResourceID taskExecutorResourceId = taskManagerServices.getTaskManagerLocation().getResourceID(); taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final ScheduledExecutorService heartbeatExecutor = java.util.concurrent.Executors.newSingleThreadScheduledExecutor(); try { taskExecutorRegistrationFuture.get(); final OneShotLatch scheduleFirstHeartbeat = new OneShotLatch(); final ResourceID resourceManagerResourceId = testingResourceManagerGateway.getOwnResourceId(); final long heartbeatInterval = 5L; heartbeatExecutor.scheduleWithFixedDelay( () -> { scheduleFirstHeartbeat.trigger(); taskExecutorGateway.heartbeatFromResourceManager(resourceManagerResourceId); }, 0L, heartbeatInterval, TimeUnit.MILLISECONDS); scheduleFirstHeartbeat.await(); SlotID slotId = new SlotID(taskExecutorResourceId, 0); final CompletableFuture<Acknowledge> requestSlotFuture = taskExecutorGateway.requestSlot( slotId, jobId, new AllocationID(), "foobar", testingResourceManagerGateway.getFencingToken(), timeout); requestSlotFuture.get(); terminateSlotReportVerification.trigger(); verifySlotReportFuture.get(); } finally { ExecutorUtils.gracefulShutdown(timeout.toMilliseconds(), TimeUnit.MILLISECONDS, heartbeatExecutor); RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 16
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the TaskExecutor syncs its slots view with the JobMaster's view * via the AllocatedSlotReport reported by the heartbeat (See FLINK-11059). */ @Test public void testSyncSlotsWithJobMasterByHeartbeat() throws Exception { final CountDownLatch activeSlots = new CountDownLatch(2); final TaskSlotTable taskSlotTable = new ActivateSlotNotifyingTaskSlotTable( Arrays.asList(ResourceProfile.UNKNOWN, ResourceProfile.UNKNOWN), timerService, activeSlots); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder().setTaskSlotTable(taskSlotTable).build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<AllocationID> allocationsNotifiedFree = new ArrayBlockingQueue<>(2); OneShotLatch initialSlotReporting = new OneShotLatch(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReporting.trigger(); return CompletableFuture.completedFuture(Acknowledge.get()); }); testingResourceManagerGateway.setNotifySlotAvailableConsumer(instanceIDSlotIDAllocationIDTuple3 -> allocationsNotifiedFree.offer(instanceIDSlotIDAllocationIDTuple3.f2)); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final BlockingQueue<AllocationID> failedSlotFutures = new ArrayBlockingQueue<>(2); final ResourceID jobManagerResourceId = ResourceID.generate(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setFailSlotConsumer((resourceID, allocationID, throwable) -> failedSlotFutures.offer(allocationID)) .setOfferSlotsFunction((resourceID, slotOffers) -> CompletableFuture.completedFuture(new ArrayList<>(slotOffers))) .setRegisterTaskManagerFunction((ignoredA, ignoredB) -> CompletableFuture.completedFuture(new JMTMRegistrationSuccess(jobManagerResourceId))) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); taskExecutor.start(); try { final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); initialSlotReporting.await(); final SlotID slotId1 = new SlotID(taskExecutor.getResourceID(), 0); final SlotID slotId2 = new SlotID(taskExecutor.getResourceID(), 1); final AllocationID allocationIdInBoth = new AllocationID(); final AllocationID allocationIdOnlyInJM = new AllocationID(); final AllocationID allocationIdOnlyInTM = new AllocationID(); taskExecutorGateway.requestSlot(slotId1, jobId, allocationIdInBoth, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); taskExecutorGateway.requestSlot(slotId2, jobId, allocationIdOnlyInTM, "foobar", testingResourceManagerGateway.getFencingToken(), timeout); activeSlots.await(); List<AllocatedSlotInfo> allocatedSlotInfos = Arrays.asList( new AllocatedSlotInfo(0, allocationIdInBoth), new AllocatedSlotInfo(1, allocationIdOnlyInJM) ); AllocatedSlotReport allocatedSlotReport = new AllocatedSlotReport(jobId, allocatedSlotInfos); taskExecutorGateway.heartbeatFromJobManager(jobManagerResourceId, allocatedSlotReport); assertThat(failedSlotFutures.take(), is(allocationIdOnlyInJM)); assertThat(allocationsNotifiedFree.take(), is(allocationIdOnlyInTM)); assertThat(failedSlotFutures.poll(5L, TimeUnit.MILLISECONDS), nullValue()); assertThat(allocationsNotifiedFree.poll(5L, TimeUnit.MILLISECONDS), nullValue()); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 17
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that offers slots to job master timeout and retry. */ @Test public void testOfferSlotToJobMasterAfterTimeout() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(2); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); final AllocationID allocationId = new AllocationID(); final CompletableFuture<ResourceID> initialSlotReportFuture = new CompletableFuture<>(); final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); testingResourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); final CountDownLatch slotOfferings = new CountDownLatch(3); final CompletableFuture<AllocationID> offeredSlotFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { assertThat(slotOffers.size(), is(1)); slotOfferings.countDown(); if (slotOfferings.getCount() == 0) { offeredSlotFuture.complete(slotOffers.iterator().next().getAllocationId()); return CompletableFuture.completedFuture(slotOffers); } else { return FutureUtils.completedExceptionally(new TimeoutException()); } }) .build(); final String jobManagerAddress = jobMasterGateway.getAddress(); rpc.registerGateway(jobManagerAddress, jobMasterGateway); jobManagerLeaderRetriever.notifyListener(jobManagerAddress, jobMasterGateway.getFencingToken().toUUID()); try { taskExecutor.start(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); // wait for the connection to the ResourceManager initialSlotReportFuture.get(); taskExecutorGateway.requestSlot( new SlotID(taskExecutor.getResourceID(), 0), jobId, allocationId, ResourceProfile.ZERO, jobManagerAddress, testingResourceManagerGateway.getFencingToken(), timeout).get(); slotOfferings.await(); assertThat(offeredSlotFuture.get(), is(allocationId)); assertTrue(taskSlotTable.isSlotFree(1)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 18
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the {@link TaskExecutor} tries to reconnect if the initial slot report * fails. */ @Test public void testInitialSlotReportFailure() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskManagerLocation taskManagerLocation = new LocalTaskManagerLocation(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setTaskManagerLocation(taskManagerLocation) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices); taskExecutor.start(); try { final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(2); testingResourceManagerGateway.setSendSlotReportFunction( resourceIDInstanceIDSlotReportTuple3 -> { try { return responseQueue.take(); } catch (InterruptedException e) { return FutureUtils.completedExceptionally(e); } }); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), testingResourceManagerGateway.getOwnResourceId(), new ClusterInformation("foobar", 1234))); final CountDownLatch numberRegistrations = new CountDownLatch(2); testingResourceManagerGateway.setRegisterTaskExecutorFunction(new Function<Tuple4<String, ResourceID, Integer, HardwareDescription>, CompletableFuture<RegistrationResponse>>() { @Override public CompletableFuture<RegistrationResponse> apply(Tuple4<String, ResourceID, Integer, HardwareDescription> stringResourceIDIntegerHardwareDescriptionTuple4) { numberRegistrations.countDown(); return registrationResponse; } }); responseQueue.offer(FutureUtils.completedExceptionally(new FlinkException("Test exception"))); responseQueue.offer(CompletableFuture.completedFuture(Acknowledge.get())); rpc.registerGateway(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(testingResourceManagerGateway.getAddress(), testingResourceManagerGateway.getFencingToken().toUUID()); //wait for the second registration attempt numberRegistrations.await(); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 19
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a job is removed from the JobLeaderService once a TaskExecutor has * no more slots assigned to this job. * * <p>See FLINK-8504 */ @Test public void testRemoveJobFromJobLeaderService() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TestingTaskExecutor taskExecutor = new TestingTaskExecutor( rpc, taskManagerConfiguration, haServices, taskManagerServices, HEARTBEAT_SERVICES, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); try { final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReport.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID()); final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>(); final CompletableFuture<Void> stopFuture = new CompletableFuture<>(); final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService( startFuture, stopFuture); haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever); taskExecutor.start(); taskExecutor.waitUntilStarted(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final AllocationID allocationId = new AllocationID(); assertThat(startFuture.isDone(), is(false)); final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService(); assertThat(jobLeaderService.containsJob(jobId), is(false)); // wait for the initial slot report initialSlotReport.get(); taskExecutorGateway.requestSlot( slotId, jobId, allocationId, "foobar", resourceManagerId, timeout).get(); // wait until the job leader retrieval service for jobId is started startFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(true)); taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get(); // wait that the job leader retrieval service for jobId stopped becaue it should get removed stopFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(false)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example 20
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final JobManagerTable jobManagerTable = new JobManagerTable(); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobManagerTable(jobManagerTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = new TaskExecutor( rpc, taskManagerConfiguration, haServices, taskManagerServices, HEARTBEAT_SERVICES, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }