org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager Java Examples
The following examples show how to use
org.apache.flink.runtime.state.TaskExecutorLocalStateStoresManager.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 6 votes |
private TaskExecutorTestingContext createTaskExecutorTestingContext(final TaskSlotTable<Task> taskSlotTable) throws IOException { final OneShotLatch offerSlotsLatch = new OneShotLatch(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offerSlotsLatch.trigger(); return CompletableFuture.completedFuture(slotOffers); }).build(); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final JobLeaderService jobLeaderService = new DefaultJobLeaderService( unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); TaskExecutorLocalStateStoresManager stateStoresManager = createTaskExecutorLocalStateStoresManager(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(new TaskManagerServicesBuilder() .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(stateStoresManager) .build()); jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); return new TaskExecutorTestingContext(jobMasterGateway, taskSlotTable, taskExecutor); }
Example #2
Source File: TaskManagerServicesBuilder.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public TaskManagerServicesBuilder() { taskManagerLocation = new LocalTaskManagerLocation(); memoryManager = new MemoryManager( MemoryManager.MIN_PAGE_SIZE, 1, MemoryManager.MIN_PAGE_SIZE, MemoryType.HEAP, false); ioManager = mock(IOManager.class); networkEnvironment = mock(NetworkEnvironment.class); broadcastVariableManager = new BroadcastVariableManager(); taskSlotTable = mock(TaskSlotTable.class); jobManagerTable = new JobManagerTable(); jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); taskStateManager = mock(TaskExecutorLocalStateStoresManager.class); }
Example #3
Source File: TaskManagerServices.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
TaskManagerServices( TaskManagerLocation taskManagerLocation, MemoryManager memoryManager, IOManager ioManager, NetworkEnvironment networkEnvironment, BroadcastVariableManager broadcastVariableManager, TaskSlotTable taskSlotTable, JobManagerTable jobManagerTable, JobLeaderService jobLeaderService, TaskExecutorLocalStateStoresManager taskManagerStateStore) { this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation); this.memoryManager = Preconditions.checkNotNull(memoryManager); this.ioManager = Preconditions.checkNotNull(ioManager); this.networkEnvironment = Preconditions.checkNotNull(networkEnvironment); this.broadcastVariableManager = Preconditions.checkNotNull(broadcastVariableManager); this.taskSlotTable = Preconditions.checkNotNull(taskSlotTable); this.jobManagerTable = Preconditions.checkNotNull(jobManagerTable); this.jobLeaderService = Preconditions.checkNotNull(jobLeaderService); this.taskManagerStateStore = Preconditions.checkNotNull(taskManagerStateStore); }
Example #4
Source File: TaskManagerServicesBuilder.java From flink with Apache License 2.0 | 6 votes |
public TaskManagerServicesBuilder() { taskManagerLocation = new LocalTaskManagerLocation(); memoryManager = new MemoryManager( MemoryManager.MIN_PAGE_SIZE, 1, MemoryManager.MIN_PAGE_SIZE, MemoryType.HEAP, false); ioManager = mock(IOManager.class); shuffleEnvironment = mock(ShuffleEnvironment.class); kvStateService = new KvStateService(new KvStateRegistry(), null, null); broadcastVariableManager = new BroadcastVariableManager(); taskEventDispatcher = new TaskEventDispatcher(); taskSlotTable = mock(TaskSlotTable.class); jobManagerTable = new JobManagerTable(); jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); taskStateManager = mock(TaskExecutorLocalStateStoresManager.class); }
Example #5
Source File: TaskManagerServices.java From flink with Apache License 2.0 | 6 votes |
TaskManagerServices( TaskManagerLocation taskManagerLocation, MemoryManager memoryManager, IOManager ioManager, ShuffleEnvironment<?, ?> shuffleEnvironment, KvStateService kvStateService, BroadcastVariableManager broadcastVariableManager, TaskSlotTable taskSlotTable, JobManagerTable jobManagerTable, JobLeaderService jobLeaderService, TaskExecutorLocalStateStoresManager taskManagerStateStore, TaskEventDispatcher taskEventDispatcher) { this.taskManagerLocation = Preconditions.checkNotNull(taskManagerLocation); this.memoryManager = Preconditions.checkNotNull(memoryManager); this.ioManager = Preconditions.checkNotNull(ioManager); this.shuffleEnvironment = Preconditions.checkNotNull(shuffleEnvironment); this.kvStateService = Preconditions.checkNotNull(kvStateService); this.broadcastVariableManager = Preconditions.checkNotNull(broadcastVariableManager); this.taskSlotTable = Preconditions.checkNotNull(taskSlotTable); this.jobManagerTable = Preconditions.checkNotNull(jobManagerTable); this.jobLeaderService = Preconditions.checkNotNull(jobLeaderService); this.taskManagerStateStore = Preconditions.checkNotNull(taskManagerStateStore); this.taskEventDispatcher = Preconditions.checkNotNull(taskEventDispatcher); }
Example #6
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception { final String resourceManagerAddress = "/resource/manager/address/one"; final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final CountDownLatch taskManagerRegisteredLatch = new CountDownLatch(1); testingResourceManagerGateway.setRegisterTaskExecutorFunction(FunctionUtils.uncheckedFunction( ignored -> { taskManagerRegisteredLatch.countDown(); return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess( new InstanceID(), new ResourceID(resourceManagerAddress), new ClusterInformation("localhost", 1234))); } )); rpc.registerGateway(resourceManagerAddress, testingResourceManagerGateway); final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); resourceManagerLeaderRetriever.notifyListener(resourceManagerAddress, UUID.randomUUID()); assertTrue(taskManagerRegisteredLatch.await(timeout.toMilliseconds(), TimeUnit.MILLISECONDS)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #7
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception { final String resourceManagerAddress = "/resource/manager/address/one"; final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final CountDownLatch taskManagerRegisteredLatch = new CountDownLatch(1); testingResourceManagerGateway.setRegisterTaskExecutorFunction(FunctionUtils.uncheckedFunction( ignored -> { taskManagerRegisteredLatch.countDown(); return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess( new InstanceID(), new ResourceID(resourceManagerAddress), new ClusterInformation("localhost", 1234))); } )); rpc.registerGateway(resourceManagerAddress, testingResourceManagerGateway); final TaskSlotTable taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); resourceManagerLeaderRetriever.notifyListener(resourceManagerAddress, UUID.randomUUID()); assertTrue(taskManagerRegisteredLatch.await(timeout.toMilliseconds(), TimeUnit.MILLISECONDS)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #8
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testShouldShutDownTaskManagerServicesInPostStop() throws Exception { final TaskSlotTableImpl<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final IOManager ioManager = new IOManagerAsync(tmp.newFolder().getAbsolutePath()); final TaskExecutorLocalStateStoresManager localStateStoresManager = new TaskExecutorLocalStateStoresManager( false, ioManager.getSpillingDirectories(), Executors.directExecutor()); nettyShuffleEnvironment.start(); final KvStateService kvStateService = new KvStateService(new KvStateRegistry(), null, null); kvStateService.start(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setIoManager(ioManager) .setShuffleEnvironment(nettyShuffleEnvironment) .setKvStateService(kvStateService) .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } assertThat(taskSlotTable.isClosed(), is(true)); assertThat(nettyShuffleEnvironment.isClosed(), is(true)); assertThat(kvStateService.isShutdown(), is(true)); }
Example #9
Source File: TaskManagerServicesBuilder.java From flink with Apache License 2.0 | 5 votes |
public TaskManagerServicesBuilder() { unresolvedTaskManagerLocation = new LocalUnresolvedTaskManagerLocation(); ioManager = mock(IOManager.class); shuffleEnvironment = mock(ShuffleEnvironment.class); kvStateService = new KvStateService(new KvStateRegistry(), null, null); broadcastVariableManager = new BroadcastVariableManager(); taskEventDispatcher = new TaskEventDispatcher(); taskSlotTable = TestingTaskSlotTable.<Task>newBuilder().closeAsyncReturns(CompletableFuture.completedFuture(null)).build(); jobTable = DefaultJobTable.create(); jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); taskStateManager = mock(TaskExecutorLocalStateStoresManager.class); ioExecutor = TestingUtils.defaultExecutor(); libraryCacheManager = TestingLibraryCacheManager.newBuilder().build(); }
Example #10
Source File: TaskManagerServices.java From flink with Apache License 2.0 | 5 votes |
TaskManagerServices( UnresolvedTaskManagerLocation unresolvedTaskManagerLocation, long managedMemorySize, IOManager ioManager, ShuffleEnvironment<?, ?> shuffleEnvironment, KvStateService kvStateService, BroadcastVariableManager broadcastVariableManager, TaskSlotTable<Task> taskSlotTable, JobTable jobTable, JobLeaderService jobLeaderService, TaskExecutorLocalStateStoresManager taskManagerStateStore, TaskEventDispatcher taskEventDispatcher, ExecutorService ioExecutor, LibraryCacheManager libraryCacheManager) { this.unresolvedTaskManagerLocation = Preconditions.checkNotNull(unresolvedTaskManagerLocation); this.managedMemorySize = managedMemorySize; this.ioManager = Preconditions.checkNotNull(ioManager); this.shuffleEnvironment = Preconditions.checkNotNull(shuffleEnvironment); this.kvStateService = Preconditions.checkNotNull(kvStateService); this.broadcastVariableManager = Preconditions.checkNotNull(broadcastVariableManager); this.taskSlotTable = Preconditions.checkNotNull(taskSlotTable); this.jobTable = Preconditions.checkNotNull(jobTable); this.jobLeaderService = Preconditions.checkNotNull(jobLeaderService); this.taskManagerStateStore = Preconditions.checkNotNull(taskManagerStateStore); this.taskEventDispatcher = Preconditions.checkNotNull(taskEventDispatcher); this.ioExecutor = Preconditions.checkNotNull(ioExecutor); this.libraryCacheManager = Preconditions.checkNotNull(libraryCacheManager); }
Example #11
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testImmediatelyRegistersIfLeaderIsKnown() throws Exception { final String resourceManagerAddress = "/resource/manager/address/one"; final TestingResourceManagerGateway testingResourceManagerGateway = new TestingResourceManagerGateway(); final CountDownLatch taskManagerRegisteredLatch = new CountDownLatch(1); testingResourceManagerGateway.setRegisterTaskExecutorFunction(FunctionUtils.uncheckedFunction( ignored -> { taskManagerRegisteredLatch.countDown(); return CompletableFuture.completedFuture(new TaskExecutorRegistrationSuccess( new InstanceID(), new ResourceID(resourceManagerAddress), new ClusterInformation("localhost", 1234))); } )); rpc.registerGateway(resourceManagerAddress, testingResourceManagerGateway); final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); resourceManagerLeaderRetriever.notifyListener(resourceManagerAddress, UUID.randomUUID()); assertTrue(taskManagerRegisteredLatch.await(timeout.toMilliseconds(), TimeUnit.MILLISECONDS)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #12
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the correct partition/slot report is sent as part of the heartbeat response. */ @Test public void testHeartbeatReporting() throws Exception { final String rmAddress = "rm"; final UUID rmLeaderId = UUID.randomUUID(); // register the mock resource manager gateway final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway(); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); final ResourceID rmResourceId = rmGateway.getOwnResourceId(); final CompletableFuture<RegistrationResponse> registrationResponse = CompletableFuture.completedFuture( new TaskExecutorRegistrationSuccess( new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234))); rmGateway.setRegisterTaskExecutorFunction(taskExecutorRegistration -> { taskExecutorRegistrationFuture.complete(taskExecutorRegistration.getResourceId()); return registrationResponse; }); final CompletableFuture<SlotReport> initialSlotReportFuture = new CompletableFuture<>(); rmGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(resourceIDInstanceIDSlotReportTuple3.f2); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<TaskExecutorHeartbeatPayload> heartbeatPayloadCompletableFuture = new CompletableFuture<>(); rmGateway.setTaskExecutorHeartbeatConsumer((resourceID, heartbeatPayload) -> heartbeatPayloadCompletableFuture.complete(heartbeatPayload)); rpc.registerGateway(rmAddress, rmGateway); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1); final SlotReport slotReport1 = new SlotReport( new SlotStatus( slotId, resourceProfile)); final SlotReport slotReport2 = new SlotReport( new SlotStatus( slotId, resourceProfile, new JobID(), new AllocationID())); final Queue<SlotReport> reports = new ArrayDeque<>(Arrays.asList(slotReport1, slotReport2)); final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable .<Task>newBuilder() .createSlotReportSupplier(reports::poll) .closeAsyncReturns(CompletableFuture.completedFuture(null)) .build(); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutorPartitionTracker partitionTracker = createPartitionTrackerWithFixedPartitionReport(taskManagerServices.getShuffleEnvironment()); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, HEARTBEAT_SERVICES, partitionTracker); try { taskManager.start(); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId); // register resource manager success will trigger monitoring heartbeat target between tm and rm assertThat(taskExecutorRegistrationFuture.get(), equalTo(unresolvedTaskManagerLocation.getResourceID())); assertThat(initialSlotReportFuture.get(), equalTo(slotReport1)); TaskExecutorGateway taskExecutorGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // trigger the heartbeat asynchronously taskExecutorGateway.heartbeatFromResourceManager(rmResourceId); // wait for heartbeat response SlotReport actualSlotReport = heartbeatPayloadCompletableFuture.get().getSlotReport(); // the new slot report should be reported assertEquals(slotReport2, actualSlotReport); ClusterPartitionReport actualClusterPartitionReport = heartbeatPayloadCompletableFuture.get().getClusterPartitionReport(); assertEquals(partitionTracker.createClusterPartitionReport(), actualClusterPartitionReport); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #13
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job is removed from the JobLeaderService once a TaskExecutor has * no more slots assigned to this job. * * <p>See FLINK-8504 */ @Test public void testRemoveJobFromJobLeaderService() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable( Collections.singleton(ResourceProfile.UNKNOWN), timerService); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices); try { final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReport.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID()); final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>(); final CompletableFuture<Void> stopFuture = new CompletableFuture<>(); final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService( startFuture, stopFuture); haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever); taskExecutor.start(); taskExecutor.waitUntilStarted(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final SlotID slotId = new SlotID(taskManagerLocation.getResourceID(), 0); final AllocationID allocationId = new AllocationID(); assertThat(startFuture.isDone(), is(false)); final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService(); assertThat(jobLeaderService.containsJob(jobId), is(false)); // wait for the initial slot report initialSlotReport.get(); taskExecutorGateway.requestSlot( slotId, jobId, allocationId, "foobar", resourceManagerId, timeout).get(); // wait until the job leader retrieval service for jobId is started startFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(true)); taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get(); // wait that the job leader retrieval service for jobId stopped becaue it should get removed stopFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(false)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #14
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
private TaskExecutorLocalStateStoresManager createTaskExecutorLocalStateStoresManager() throws IOException { return new TaskExecutorLocalStateStoresManager( false, new File[]{tmp.newFolder()}, Executors.directExecutor()); }
Example #15
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testHeartbeatTimeoutWithResourceManager() throws Exception { final String rmAddress = "rm"; final ResourceID rmResourceId = new ResourceID(rmAddress); final long heartbeatInterval = 1L; final long heartbeatTimeout = 3L; final ResourceManagerId rmLeaderId = ResourceManagerId.generate(); TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway( rmLeaderId, rmResourceId, rmAddress, rmAddress); final TaskExecutorRegistrationSuccess registrationResponse = new TaskExecutorRegistrationSuccess( new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234)); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); final CountDownLatch registrationAttempts = new CountDownLatch(2); rmGateway.setRegisterTaskExecutorFunction( registration -> { taskExecutorRegistrationFuture.complete(registration.f1); registrationAttempts.countDown(); return CompletableFuture.completedFuture(registrationResponse); }); final CompletableFuture<ResourceID> taskExecutorDisconnectFuture = new CompletableFuture<>(); rmGateway.setDisconnectTaskExecutorConsumer( disconnectInfo -> taskExecutorDisconnectFuture.complete(disconnectInfo.f0)); rpc.registerGateway(rmAddress, rmGateway); final TaskSlotTable taskSlotTable = mock(TaskSlotTable.class); final SlotReport slotReport = new SlotReport(); when(taskSlotTable.createSlotReport(any(ResourceID.class))).thenReturn(slotReport); HeartbeatServices heartbeatServices = new HeartbeatServices(heartbeatInterval, heartbeatTimeout); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = new TaskExecutor( rpc, taskManagerConfiguration, haServices, taskManagerServices, heartbeatServices, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); try { taskManager.start(); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId.toUUID()); // register resource manager success will trigger monitoring heartbeat target between tm and rm assertThat(taskExecutorRegistrationFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS), equalTo(taskManagerLocation.getResourceID())); // heartbeat timeout should trigger disconnect TaskManager from ResourceManager assertThat(taskExecutorDisconnectFuture.get(heartbeatTimeout * 50L, TimeUnit.MILLISECONDS), equalTo(taskManagerLocation.getResourceID())); assertTrue( "The TaskExecutor should try to reconnect to the RM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #16
Source File: TaskManagerServices.java From flink with Apache License 2.0 | 4 votes |
public TaskExecutorLocalStateStoresManager getTaskManagerStateStore() { return taskManagerStateStore; }
Example #17
Source File: TaskManagerServices.java From flink with Apache License 2.0 | 4 votes |
/** * Creates and returns the task manager services. * * @param taskManagerServicesConfiguration task manager configuration * @param permanentBlobService permanentBlobService used by the services * @param taskManagerMetricGroup metric group of the task manager * @param ioExecutor executor for async IO operations * @param fatalErrorHandler to handle class loading OOMs * @return task manager components * @throws Exception */ public static TaskManagerServices fromConfiguration( TaskManagerServicesConfiguration taskManagerServicesConfiguration, PermanentBlobService permanentBlobService, MetricGroup taskManagerMetricGroup, ExecutorService ioExecutor, FatalErrorHandler fatalErrorHandler) throws Exception { // pre-start checks checkTempDirs(taskManagerServicesConfiguration.getTmpDirPaths()); final TaskEventDispatcher taskEventDispatcher = new TaskEventDispatcher(); // start the I/O manager, it will create some temp directories. final IOManager ioManager = new IOManagerAsync(taskManagerServicesConfiguration.getTmpDirPaths()); final ShuffleEnvironment<?, ?> shuffleEnvironment = createShuffleEnvironment( taskManagerServicesConfiguration, taskEventDispatcher, taskManagerMetricGroup, ioExecutor); final int listeningDataPort = shuffleEnvironment.start(); final KvStateService kvStateService = KvStateService.fromConfiguration(taskManagerServicesConfiguration); kvStateService.start(); final UnresolvedTaskManagerLocation unresolvedTaskManagerLocation = new UnresolvedTaskManagerLocation( taskManagerServicesConfiguration.getResourceID(), taskManagerServicesConfiguration.getExternalAddress(), // we expose the task manager location with the listening port // iff the external data port is not explicitly defined taskManagerServicesConfiguration.getExternalDataPort() > 0 ? taskManagerServicesConfiguration.getExternalDataPort() : listeningDataPort); final BroadcastVariableManager broadcastVariableManager = new BroadcastVariableManager(); final TaskSlotTable<Task> taskSlotTable = createTaskSlotTable( taskManagerServicesConfiguration.getNumberOfSlots(), taskManagerServicesConfiguration.getTaskExecutorResourceSpec(), taskManagerServicesConfiguration.getTimerServiceShutdownTimeout(), taskManagerServicesConfiguration.getPageSize()); final JobTable jobTable = DefaultJobTable.create(); final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getRetryingRegistrationConfiguration()); final String[] stateRootDirectoryStrings = taskManagerServicesConfiguration.getLocalRecoveryStateRootDirectories(); final File[] stateRootDirectoryFiles = new File[stateRootDirectoryStrings.length]; for (int i = 0; i < stateRootDirectoryStrings.length; ++i) { stateRootDirectoryFiles[i] = new File(stateRootDirectoryStrings[i], LOCAL_STATE_SUB_DIRECTORY_ROOT); } final TaskExecutorLocalStateStoresManager taskStateManager = new TaskExecutorLocalStateStoresManager( taskManagerServicesConfiguration.isLocalRecoveryEnabled(), stateRootDirectoryFiles, ioExecutor); final boolean failOnJvmMetaspaceOomError = taskManagerServicesConfiguration.getConfiguration().getBoolean(CoreOptions.FAIL_ON_USER_CLASS_LOADING_METASPACE_OOM); final LibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager( permanentBlobService, BlobLibraryCacheManager.defaultClassLoaderFactory( taskManagerServicesConfiguration.getClassLoaderResolveOrder(), taskManagerServicesConfiguration.getAlwaysParentFirstLoaderPatterns(), failOnJvmMetaspaceOomError ? fatalErrorHandler : null)); return new TaskManagerServices( unresolvedTaskManagerLocation, taskManagerServicesConfiguration.getManagedMemorySize().getBytes(), ioManager, shuffleEnvironment, kvStateService, broadcastVariableManager, taskSlotTable, jobTable, jobLeaderService, taskStateManager, taskEventDispatcher, ioExecutor, libraryCacheManager); }
Example #18
Source File: TaskExecutorTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testShouldShutDownTaskManagerServicesInPostStop() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final IOManager ioManager = new IOManagerAsync(tmp.newFolder().getAbsolutePath()); final TaskExecutorLocalStateStoresManager localStateStoresManager = new TaskExecutorLocalStateStoresManager( false, ioManager.getSpillingDirectories(), Executors.directExecutor()); final MemoryManager memoryManager = new MemoryManager( 4096, 1, 4096, MemoryType.HEAP, false); final NetworkEnvironment networkEnvironment = new NetworkEnvironment( 1, 1, 0, 0, 2, 8, true); networkEnvironment.start(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setMemoryManager(memoryManager) .setIoManager(ioManager) .setNetworkEnvironment(networkEnvironment) .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); final long heartbeatInterval = 1000L; final long heartbeatTimeout = 1000L; final HeartbeatServices heartbeatServices = new HeartbeatServices(heartbeatInterval, heartbeatTimeout); final TaskExecutor taskManager = new TaskExecutor( rpc, taskManagerConfiguration, haServices, taskManagerServices, heartbeatServices, UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup(), null, dummyBlobCacheService, testingFatalErrorHandler); try { taskManager.start(); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } assertThat(memoryManager.isShutdown(), is(true)); assertThat(networkEnvironment.isShutdown(), is(true)); assertThat(ioManager.isProperlyShutDown(), is(true)); }
Example #19
Source File: TaskManagerServicesBuilder.java From flink with Apache License 2.0 | 4 votes |
public TaskManagerServicesBuilder setTaskStateManager(TaskExecutorLocalStateStoresManager taskStateManager) { this.taskStateManager = taskStateManager; return this; }
Example #20
Source File: TaskManagerServicesBuilder.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public TaskManagerServicesBuilder setTaskStateManager(TaskExecutorLocalStateStoresManager taskStateManager) { this.taskStateManager = taskStateManager; return this; }
Example #21
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testHeartbeatTimeoutWithResourceManager() throws Exception { final String rmAddress = "rm"; final ResourceID rmResourceId = new ResourceID(rmAddress); final long heartbeatInterval = 1L; final long heartbeatTimeout = 3L; final ResourceManagerId rmLeaderId = ResourceManagerId.generate(); TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway( rmLeaderId, rmResourceId, rmAddress, rmAddress); final TaskExecutorRegistrationSuccess registrationResponse = new TaskExecutorRegistrationSuccess( new InstanceID(), rmResourceId, new ClusterInformation("localhost", 1234)); final CompletableFuture<ResourceID> taskExecutorRegistrationFuture = new CompletableFuture<>(); final CountDownLatch registrationAttempts = new CountDownLatch(2); rmGateway.setRegisterTaskExecutorFunction( registration -> { taskExecutorRegistrationFuture.complete(registration.getResourceId()); registrationAttempts.countDown(); return CompletableFuture.completedFuture(registrationResponse); }); final CompletableFuture<ResourceID> taskExecutorDisconnectFuture = new CompletableFuture<>(); rmGateway.setDisconnectTaskExecutorConsumer( disconnectInfo -> taskExecutorDisconnectFuture.complete(disconnectInfo.f0)); rpc.registerGateway(rmAddress, rmGateway); final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable .<Task>newBuilder() .createSlotReportSupplier(SlotReport::new) .closeAsyncReturns(CompletableFuture.completedFuture(null)) .build(); HeartbeatServices heartbeatServices = new HeartbeatServices(heartbeatInterval, heartbeatTimeout); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices, heartbeatServices); try { taskManager.start(); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmAddress, rmLeaderId.toUUID()); // register resource manager success will trigger monitoring heartbeat target between tm and rm assertThat(taskExecutorRegistrationFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS), equalTo(unresolvedTaskManagerLocation.getResourceID())); // heartbeat timeout should trigger disconnect TaskManager from ResourceManager assertThat(taskExecutorDisconnectFuture.get(timeout.toMilliseconds(), TimeUnit.MILLISECONDS), equalTo(unresolvedTaskManagerLocation.getResourceID())); assertTrue( "The TaskExecutor should try to reconnect to the RM", registrationAttempts.await(timeout.toMilliseconds(), TimeUnit.SECONDS)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #22
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * This tests makes sure that duplicate JobMaster gained leadership messages are filtered out * by the TaskExecutor. See FLINK-7526. */ @Test public void testFilterOutDuplicateJobMasterRegistrations() throws Exception { final long verificationTimeout = 500L; final JobLeaderService jobLeaderService = mock(JobLeaderService.class); final JobMasterGateway jobMasterGateway = mock(JobMasterGateway.class); when(jobMasterGateway.getHostname()).thenReturn("localhost"); final JMTMRegistrationSuccess registrationMessage = new JMTMRegistrationSuccess(ResourceID.generate()); final JobManagerTable jobManagerTableMock = spy(new JobManagerTable()); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setJobManagerTable(jobManagerTableMock) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices); try { taskExecutor.start(); taskExecutor.waitUntilStarted(); ArgumentCaptor<JobLeaderListener> jobLeaderListenerArgumentCaptor = ArgumentCaptor.forClass(JobLeaderListener.class); verify(jobLeaderService).start(anyString(), any(RpcService.class), any(HighAvailabilityServices.class), jobLeaderListenerArgumentCaptor.capture()); JobLeaderListener taskExecutorListener = jobLeaderListenerArgumentCaptor.getValue(); taskExecutorListener.jobManagerGainedLeadership(jobId, jobMasterGateway, registrationMessage); // duplicate job manager gained leadership message taskExecutorListener.jobManagerGainedLeadership(jobId, jobMasterGateway, registrationMessage); ArgumentCaptor<JobManagerConnection> jobManagerConnectionArgumentCaptor = ArgumentCaptor.forClass(JobManagerConnection.class); verify(jobManagerTableMock, Mockito.timeout(verificationTimeout).times(1)).put(eq(jobId), jobManagerConnectionArgumentCaptor.capture()); JobManagerConnection jobManagerConnection = jobManagerConnectionArgumentCaptor.getValue(); assertEquals(jobMasterGateway, jobManagerConnection.getJobManagerGateway()); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #23
Source File: TaskManagerServices.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Creates and returns the task manager services. * * @param resourceID resource ID of the task manager * @param taskManagerServicesConfiguration task manager configuration * @param taskIOExecutor executor for async IO operations. * @param freeHeapMemoryWithDefrag an estimate of the size of the free heap memory * @param maxJvmHeapMemory the maximum JVM heap size * @return task manager components * @throws Exception */ public static TaskManagerServices fromConfiguration( TaskManagerServicesConfiguration taskManagerServicesConfiguration, ResourceID resourceID, Executor taskIOExecutor, long freeHeapMemoryWithDefrag, long maxJvmHeapMemory) throws Exception { // pre-start checks checkTempDirs(taskManagerServicesConfiguration.getTmpDirPaths()); final NetworkEnvironment network = createNetworkEnvironment(taskManagerServicesConfiguration, maxJvmHeapMemory); network.start(); final TaskManagerLocation taskManagerLocation = new TaskManagerLocation( resourceID, taskManagerServicesConfiguration.getTaskManagerAddress(), network.getConnectionManager().getDataPort()); // this call has to happen strictly after the network stack has been initialized final MemoryManager memoryManager = createMemoryManager(taskManagerServicesConfiguration, freeHeapMemoryWithDefrag, maxJvmHeapMemory); // start the I/O manager, it will create some temp directories. final IOManager ioManager = new IOManagerAsync(taskManagerServicesConfiguration.getTmpDirPaths()); final BroadcastVariableManager broadcastVariableManager = new BroadcastVariableManager(); final List<ResourceProfile> resourceProfiles = new ArrayList<>(taskManagerServicesConfiguration.getNumberOfSlots()); for (int i = 0; i < taskManagerServicesConfiguration.getNumberOfSlots(); i++) { resourceProfiles.add(ResourceProfile.ANY); } final TimerService<AllocationID> timerService = new TimerService<>( new ScheduledThreadPoolExecutor(1), taskManagerServicesConfiguration.getTimerServiceShutdownTimeout()); final TaskSlotTable taskSlotTable = new TaskSlotTable(resourceProfiles, timerService); final JobManagerTable jobManagerTable = new JobManagerTable(); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, taskManagerServicesConfiguration.getRetryingRegistrationConfiguration()); final String[] stateRootDirectoryStrings = taskManagerServicesConfiguration.getLocalRecoveryStateRootDirectories(); final File[] stateRootDirectoryFiles = new File[stateRootDirectoryStrings.length]; for (int i = 0; i < stateRootDirectoryStrings.length; ++i) { stateRootDirectoryFiles[i] = new File(stateRootDirectoryStrings[i], LOCAL_STATE_SUB_DIRECTORY_ROOT); } final TaskExecutorLocalStateStoresManager taskStateManager = new TaskExecutorLocalStateStoresManager( taskManagerServicesConfiguration.isLocalRecoveryEnabled(), stateRootDirectoryFiles, taskIOExecutor); return new TaskManagerServices( taskManagerLocation, memoryManager, ioManager, network, broadcastVariableManager, taskSlotTable, jobManagerTable, jobLeaderService, taskStateManager); }
Example #24
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTriggerRegistrationOnLeaderChange() throws Exception { final UUID leaderId1 = UUID.randomUUID(); final UUID leaderId2 = UUID.randomUUID(); // register the mock resource manager gateways final CompletableFuture<TaskExecutorRegistration> rmGateway1TaskExecutorRegistration = new CompletableFuture<>(); TestingResourceManagerGateway rmGateway1 = new TestingResourceManagerGateway(); rmGateway1.setRegisterTaskExecutorFunction( taskExecutorRegistration -> { rmGateway1TaskExecutorRegistration.complete(taskExecutorRegistration); return createRegistrationResponse(rmGateway1); }); final CompletableFuture<TaskExecutorRegistration> rmGateway2TaskExecutorRegistration = new CompletableFuture<>(); TestingResourceManagerGateway rmGateway2 = new TestingResourceManagerGateway(); rmGateway2.setRegisterTaskExecutorFunction( taskExecutorRegistration -> { rmGateway2TaskExecutorRegistration.complete(taskExecutorRegistration); return createRegistrationResponse(rmGateway2); }); rpc.registerGateway(rmGateway1.getAddress(), rmGateway1); rpc.registerGateway(rmGateway2.getAddress(), rmGateway2); final TaskSlotTable<Task> taskSlotTable = TestingTaskSlotTable .<Task>newBuilder() .createSlotReportSupplier(SlotReport::new) .closeAsyncReturns(CompletableFuture.completedFuture(null)) .build(); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); String taskManagerAddress = taskManager.getAddress(); // no connection initially, since there is no leader assertNull(taskManager.getResourceManagerConnection()); // define a leader and see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmGateway1.getAddress(), leaderId1); final TaskExecutorRegistration taskExecutorRegistration1 = rmGateway1TaskExecutorRegistration.join(); assertThat(taskExecutorRegistration1.getTaskExecutorAddress(), is(taskManagerAddress)); assertThat(taskExecutorRegistration1.getResourceId(), is(unresolvedTaskManagerLocation.getResourceID())); assertNotNull(taskManager.getResourceManagerConnection()); // cancel the leader resourceManagerLeaderRetriever.notifyListener(null, null); // set a new leader, see that a registration happens resourceManagerLeaderRetriever.notifyListener(rmGateway2.getAddress(), leaderId2); final TaskExecutorRegistration taskExecutorRegistration2 = rmGateway2TaskExecutorRegistration.join(); assertThat(taskExecutorRegistration2.getTaskExecutorAddress(), is(taskManagerAddress)); assertThat(taskExecutorRegistration2.getResourceId(), is(unresolvedTaskManagerLocation.getResourceID())); assertNotNull(taskManager.getResourceManagerConnection()); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #25
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a TaskManager detects a job leader for which it has reserved slots. Upon detecting * the job leader, it will offer all reserved slots to the JobManager. */ @Test public void testJobLeaderDetection() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final JobLeaderService jobLeaderService = new DefaultJobLeaderService(unresolvedTaskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); CompletableFuture<Void> initialSlotReportFuture = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReportFuture.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final CompletableFuture<Collection<SlotOffer>> offeredSlotsFuture = new CompletableFuture<>(); final TestingJobMasterGateway jobMasterGateway = new TestingJobMasterGatewayBuilder() .setOfferSlotsFunction((resourceID, slotOffers) -> { offeredSlotsFuture.complete(new ArrayList<>(slotOffers)); return CompletableFuture.completedFuture(slotOffers); }) .build(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); rpc.registerGateway(jobMasterGateway.getAddress(), jobMasterGateway); final AllocationID allocationId = new AllocationID(); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); final TaskExecutorGateway tmGateway = taskManager.getSelfGateway(TaskExecutorGateway.class); // tell the task manager about the rm leader resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerGateway.getFencingToken().toUUID()); // wait for the initial slot report initialSlotReportFuture.get(); // request slots from the task manager under the given allocation id CompletableFuture<Acknowledge> slotRequestAck = tmGateway.requestSlot( slotId, jobId, allocationId, ResourceProfile.ZERO, jobMasterGateway.getAddress(), resourceManagerGateway.getFencingToken(), timeout); slotRequestAck.get(); // now inform the task manager about the new job leader jobManagerLeaderRetriever.notifyListener(jobMasterGateway.getAddress(), jobMasterGateway.getFencingToken().toUUID()); final Collection<SlotOffer> offeredSlots = offeredSlotsFuture.get(); final Collection<AllocationID> allocationIds = offeredSlots.stream().map(SlotOffer::getAllocationId).collect(Collectors.toList()); assertThat(allocationIds, containsInAnyOrder(allocationId)); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } }
Example #26
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the heartbeat is stopped once the TaskExecutor detects that the RM is no longer leader. * * <p>See FLINK-8462 */ @Test public void testRMHeartbeatStopWhenLeadershipRevoked() throws Exception { final long heartbeatInterval = 1L; final long heartbeatTimeout = 10000L; final long pollTimeout = 1000L; final RecordingHeartbeatServices heartbeatServices = new RecordingHeartbeatServices(heartbeatInterval, heartbeatTimeout); final ResourceID rmResourceID = ResourceID.generate(); final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final String rmAddress = "rm"; final TestingResourceManagerGateway rmGateway = new TestingResourceManagerGateway( ResourceManagerId.generate(), rmResourceID, rmAddress, rmAddress); rpc.registerGateway(rmAddress, rmGateway); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskExecutor = createTaskExecutor(taskManagerServices, heartbeatServices); try { taskExecutor.start(); final BlockingQueue<ResourceID> unmonitoredTargets = heartbeatServices.getUnmonitoredTargets(); final BlockingQueue<ResourceID> monitoredTargets = heartbeatServices.getMonitoredTargets(); resourceManagerLeaderRetriever.notifyListener(rmAddress, rmGateway.getFencingToken().toUUID()); // wait for TM registration by checking the registered heartbeat targets assertThat( monitoredTargets.poll(pollTimeout, TimeUnit.MILLISECONDS), equalTo(rmResourceID)); // let RM lose leadership resourceManagerLeaderRetriever.notifyListener(null, null); // the timeout should not have triggered since it is much higher assertThat(unmonitoredTargets.poll(pollTimeout, TimeUnit.MILLISECONDS), equalTo(rmResourceID)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #27
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a job is removed from the JobLeaderService once a TaskExecutor has * no more slots assigned to this job. * * <p>See FLINK-8504 */ @Test public void testRemoveJobFromJobLeaderService() throws Exception { final TaskSlotTable<Task> taskSlotTable = TaskSlotUtils.createTaskSlotTable(1); final TaskExecutorLocalStateStoresManager localStateStoresManager = createTaskExecutorLocalStateStoresManager(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setUnresolvedTaskManagerLocation(unresolvedTaskManagerLocation) .setTaskSlotTable(taskSlotTable) .setTaskStateManager(localStateStoresManager) .build(); final TestingTaskExecutor taskExecutor = createTestingTaskExecutor(taskManagerServices); try { final TestingResourceManagerGateway resourceManagerGateway = new TestingResourceManagerGateway(); final CompletableFuture<Void> initialSlotReport = new CompletableFuture<>(); resourceManagerGateway.setSendSlotReportFunction(resourceIDInstanceIDSlotReportTuple3 -> { initialSlotReport.complete(null); return CompletableFuture.completedFuture(Acknowledge.get()); }); final ResourceManagerId resourceManagerId = resourceManagerGateway.getFencingToken(); rpc.registerGateway(resourceManagerGateway.getAddress(), resourceManagerGateway); resourceManagerLeaderRetriever.notifyListener(resourceManagerGateway.getAddress(), resourceManagerId.toUUID()); final CompletableFuture<LeaderRetrievalListener> startFuture = new CompletableFuture<>(); final CompletableFuture<Void> stopFuture = new CompletableFuture<>(); final StartStopNotifyingLeaderRetrievalService jobMasterLeaderRetriever = new StartStopNotifyingLeaderRetrievalService( startFuture, stopFuture); haServices.setJobMasterLeaderRetriever(jobId, jobMasterLeaderRetriever); taskExecutor.start(); taskExecutor.waitUntilStarted(); final TaskExecutorGateway taskExecutorGateway = taskExecutor.getSelfGateway(TaskExecutorGateway.class); final SlotID slotId = new SlotID(unresolvedTaskManagerLocation.getResourceID(), 0); final AllocationID allocationId = new AllocationID(); assertThat(startFuture.isDone(), is(false)); final JobLeaderService jobLeaderService = taskManagerServices.getJobLeaderService(); assertThat(jobLeaderService.containsJob(jobId), is(false)); // wait for the initial slot report initialSlotReport.get(); taskExecutorGateway.requestSlot( slotId, jobId, allocationId, ResourceProfile.ZERO, "foobar", resourceManagerId, timeout).get(); // wait until the job leader retrieval service for jobId is started startFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(true)); taskExecutorGateway.freeSlot(allocationId, new FlinkException("Test exception"), timeout).get(); // wait that the job leader retrieval service for jobId stopped becaue it should get removed stopFuture.get(); assertThat(jobLeaderService.containsJob(jobId), is(false)); } finally { RpcUtils.terminateRpcEndpoint(taskExecutor, timeout); } }
Example #28
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
private TaskExecutorLocalStateStoresManager createTaskExecutorLocalStateStoresManager() throws IOException { return new TaskExecutorLocalStateStoresManager( false, new File[]{tmp.newFolder()}, Executors.directExecutor()); }
Example #29
Source File: TaskManagerServices.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public TaskExecutorLocalStateStoresManager getTaskManagerStateStore() { return taskManagerStateStore; }
Example #30
Source File: TaskExecutorTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testShouldShutDownTaskManagerServicesInPostStop() throws Exception { final TaskSlotTable taskSlotTable = new TaskSlotTable(Collections.singleton(ResourceProfile.UNKNOWN), timerService); final JobLeaderService jobLeaderService = new JobLeaderService(taskManagerLocation, RetryingRegistrationConfiguration.defaultConfiguration()); final IOManager ioManager = new IOManagerAsync(tmp.newFolder().getAbsolutePath()); final TaskExecutorLocalStateStoresManager localStateStoresManager = new TaskExecutorLocalStateStoresManager( false, ioManager.getSpillingDirectories(), Executors.directExecutor()); final MemoryManager memoryManager = new MemoryManager( 4096, 1, 4096, MemoryType.HEAP, false); nettyShuffleEnvironment.start(); final KvStateService kvStateService = new KvStateService(new KvStateRegistry(), null, null); kvStateService.start(); final TaskManagerServices taskManagerServices = new TaskManagerServicesBuilder() .setTaskManagerLocation(taskManagerLocation) .setMemoryManager(memoryManager) .setIoManager(ioManager) .setShuffleEnvironment(nettyShuffleEnvironment) .setKvStateService(kvStateService) .setTaskSlotTable(taskSlotTable) .setJobLeaderService(jobLeaderService) .setTaskStateManager(localStateStoresManager) .build(); final TaskExecutor taskManager = createTaskExecutor(taskManagerServices); try { taskManager.start(); } finally { RpcUtils.terminateRpcEndpoint(taskManager, timeout); } assertThat(memoryManager.isShutdown(), is(true)); assertThat(nettyShuffleEnvironment.isClosed(), is(true)); assertThat(kvStateService.isShutdown(), is(true)); }