org.apache.flink.runtime.clusterframework.types.ResourceID Java Examples
The following examples show how to use
org.apache.flink.runtime.clusterframework.types.ResourceID.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ExecutionVertexLocalityTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This test validates that vertices with too many input streams do not have a location * preference any more. */ @Test public void testNoLocalityInputLargeAllToAll() throws Exception { final int parallelism = 100; final ExecutionGraph graph = createTestGraph(parallelism, true); // set the location for all sources to a distinct location for (int i = 0; i < parallelism; i++) { ExecutionVertex source = graph.getAllVertices().get(sourceVertexId).getTaskVertices()[i]; TaskManagerLocation location = new TaskManagerLocation( ResourceID.generate(), InetAddress.getLoopbackAddress(), 10000 + i); initializeLocation(source, location); } // validate that the target vertices have no location preference for (int i = 0; i < parallelism; i++) { ExecutionVertex target = graph.getAllVertices().get(targetVertexId).getTaskVertices()[i]; Iterator<CompletableFuture<TaskManagerLocation>> preference = target.getPreferredLocations().iterator(); assertFalse(preference.hasNext()); } }
Example #2
Source File: YarnResourceManagerTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Create mock RM dependencies. */ Context() throws Exception { rpcService = new TestingRpcService(); rmServices = new MockResourceManagerRuntimeServices(); // resource manager rmResourceID = ResourceID.generate(); resourceManager = new TestingYarnResourceManager( rpcService, RM_ADDRESS, rmResourceID, flinkConfig, env, rmServices.highAvailabilityServices, rmServices.heartbeatServices, rmServices.slotManager, rmServices.metricRegistry, rmServices.jobLeaderIdService, new ClusterInformation("localhost", 1234), testingFatalErrorHandler, null, mockResourceManagerClient, mockNMClient, mockJMMetricGroup); }
Example #3
Source File: TaskManagerInfo.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@JsonCreator public TaskManagerInfo( @JsonDeserialize(using = ResourceIDDeserializer.class) @JsonProperty(FIELD_NAME_RESOURCE_ID) ResourceID resourceId, @JsonProperty(FIELD_NAME_ADDRESS) String address, @JsonProperty(FIELD_NAME_DATA_PORT) int dataPort, @JsonProperty(FIELD_NAME_LAST_HEARTBEAT) long lastHeartbeat, @JsonProperty(FIELD_NAME_NUMBER_SLOTS) int numberSlots, @JsonProperty(FIELD_NAME_NUMBER_AVAILABLE_SLOTS) int numberAvailableSlots, @JsonProperty(FIELD_NAME_HARDWARE) HardwareDescription hardwareDescription) { this.resourceId = Preconditions.checkNotNull(resourceId); this.address = Preconditions.checkNotNull(address); this.dataPort = dataPort; this.lastHeartbeat = lastHeartbeat; this.numberSlots = numberSlots; this.numberAvailableSlots = numberAvailableSlots; this.hardwareDescription = Preconditions.checkNotNull(hardwareDescription); }
Example #4
Source File: HeartbeatManagerSenderImpl.java From flink with Apache License 2.0 | 6 votes |
HeartbeatManagerSenderImpl( long heartbeatPeriod, long heartbeatTimeout, ResourceID ownResourceID, HeartbeatListener<I, O> heartbeatListener, ScheduledExecutor mainThreadExecutor, Logger log) { this( heartbeatPeriod, heartbeatTimeout, ownResourceID, heartbeatListener, mainThreadExecutor, log, new HeartbeatMonitorImpl.Factory<>()); }
Example #5
Source File: ResourceManager.java From flink with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<Collection<TaskManagerInfo>> requestTaskManagerInfo(Time timeout) { final ArrayList<TaskManagerInfo> taskManagerInfos = new ArrayList<>(taskExecutors.size()); for (Map.Entry<ResourceID, WorkerRegistration<WorkerType>> taskExecutorEntry : taskExecutors.entrySet()) { final ResourceID resourceId = taskExecutorEntry.getKey(); final WorkerRegistration<WorkerType> taskExecutor = taskExecutorEntry.getValue(); taskManagerInfos.add( new TaskManagerInfo( resourceId, taskExecutor.getTaskExecutorGateway().getAddress(), taskExecutor.getDataPort(), taskManagerHeartbeatManager.getLastHeartbeatFrom(resourceId), slotManager.getNumberRegisteredSlotsOf(taskExecutor.getInstanceID()), slotManager.getNumberFreeSlotsOf(taskExecutor.getInstanceID()), slotManager.getRegisteredResourceOf(taskExecutor.getInstanceID()), slotManager.getFreeResourceOf(taskExecutor.getInstanceID()), taskExecutor.getHardwareDescription())); } return CompletableFuture.completedFuture(taskManagerInfos); }
Example #6
Source File: ResourceManagerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testHeartbeatTimeoutWithTaskExecutor() throws Exception { final ResourceID taskExecutorId = ResourceID.generate(); final CompletableFuture<ResourceID> heartbeatRequestFuture = new CompletableFuture<>(); final CompletableFuture<Exception> disconnectFuture = new CompletableFuture<>(); final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setDisconnectResourceManagerConsumer(disconnectFuture::complete) .setHeartbeatResourceManagerConsumer(heartbeatRequestFuture::complete) .createTestingTaskExecutorGateway(); rpcService.registerGateway(taskExecutorGateway.getAddress(), taskExecutorGateway); runHeartbeatTimeoutTest( resourceManagerGateway -> { registerTaskExecutor(resourceManagerGateway, taskExecutorId, taskExecutorGateway.getAddress()); }, resourceManagerResourceId -> { // might have been completed or not depending whether the timeout was triggered first final ResourceID optionalHeartbeatRequestOrigin = heartbeatRequestFuture.getNow(null); assertThat(optionalHeartbeatRequestOrigin, anyOf(is(resourceManagerResourceId), is(nullValue()))); assertThat(disconnectFuture.get(), instanceOf(TimeoutException.class)); } ); }
Example #7
Source File: YarnResourceManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void onContainersCompleted(final List<ContainerStatus> statuses) { runAsync(() -> { log.debug("YARN ResourceManager reported the following containers completed: {}.", statuses); for (final ContainerStatus containerStatus : statuses) { final ResourceID resourceId = new ResourceID(containerStatus.getContainerId().toString()); final YarnWorkerNode yarnWorkerNode = workerNodeMap.remove(resourceId); if (yarnWorkerNode != null) { // Container completed unexpectedly ~> start a new one requestYarnContainerIfRequired(); } // Eagerly close the connection with task manager. closeTaskManagerConnection(resourceId, new Exception(containerStatus.getDiagnostics())); } } ); }
Example #8
Source File: SlotSharingGroupAssignment.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Gets the number of shared slots into which the given group can place subtasks or * nested task groups. * * @param groupId The ID of the group. * @return The number of shared slots available to the given job vertex. */ public int getNumberOfAvailableSlotsForGroup(AbstractID groupId) { synchronized (lock) { Map<ResourceID, List<SharedSlot>> available = availableSlotsPerJid.get(groupId); if (available != null) { Set<SharedSlot> set = new HashSet<SharedSlot>(); for (List<SharedSlot> list : available.values()) { for (SharedSlot slot : list) { set.add(slot); } } return set.size(); } else { // if no entry exists for a JobVertexID so far, then the vertex with that ID can // add a subtask into each shared slot of this group. Consequently, all // of them are available for that JobVertexID. return allSlots.size(); } } }
Example #9
Source File: TaskExecutor.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void notifyHeartbeatTimeout(final ResourceID resourceID) { validateRunsInMainThread(); log.info("The heartbeat of JobManager with id {} timed out.", resourceID); if (jobManagerConnections.containsKey(resourceID)) { JobManagerConnection jobManagerConnection = jobManagerConnections.get(resourceID); if (jobManagerConnection != null) { closeJobManagerConnection( jobManagerConnection.getJobID(), new TimeoutException("The heartbeat of JobManager with id " + resourceID + " timed out.")); jobLeaderService.reconnect(jobManagerConnection.getJobID()); } } }
Example #10
Source File: StandaloneResourceManagerFactoryTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void createResourceManager_WithLessMemoryThanContainerizedHeapCutoffMin_ShouldSucceed() throws Exception { final StandaloneResourceManagerFactory resourceManagerFactory = StandaloneResourceManagerFactory.INSTANCE; final TestingRpcService rpcService = new TestingRpcService(); try { final Configuration configuration = new Configuration(); configuration.setString(TaskManagerOptions.TASK_MANAGER_HEAP_MEMORY, new MemorySize(128 * 1024 * 1024).toString()); configuration.setInteger(ResourceManagerOptions.CONTAINERIZED_HEAP_CUTOFF_MIN, 600); final ResourceManager<ResourceID> ignored = resourceManagerFactory.createResourceManager( configuration, ResourceID.generate(), rpcService, new TestingHighAvailabilityServices(), new TestingHeartbeatServices(), NoOpMetricRegistry.INSTANCE, new TestingFatalErrorHandler(), new ClusterInformation("foobar", 1234), null, UnregisteredMetricGroups.createUnregisteredJobManagerMetricGroup()); } finally { RpcUtils.terminateRpcService(rpcService, Time.seconds(10L)); } }
Example #11
Source File: KubernetesResourceManagerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testTaskManagerPodErrorAfterRegistration() throws Exception { new Context() {{ runTest(() -> { registerSlotRequest(); final Pod pod = kubeClient.pods().list().getItems().get(0); resourceManager.onAdded(Collections.singletonList(new KubernetesPod(pod))); registerTaskExecutor(new ResourceID(pod.getMetadata().getName())); // Error happens in the pod. Should not request a new pod. terminatePod(pod); resourceManager.onError(Collections.singletonList(new KubernetesPod(pod))); assertEquals(0, kubeClient.pods().list().getItems().size()); }); }}; }
Example #12
Source File: HeartbeatManagerSenderImpl.java From flink with Apache License 2.0 | 6 votes |
HeartbeatManagerSenderImpl( long heartbeatPeriod, long heartbeatTimeout, ResourceID ownResourceID, HeartbeatListener<I, O> heartbeatListener, ScheduledExecutor mainThreadExecutor, Logger log, HeartbeatMonitor.Factory<O> heartbeatMonitorFactory) { super( heartbeatTimeout, ownResourceID, heartbeatListener, mainThreadExecutor, log, heartbeatMonitorFactory); this.heartbeatPeriod = heartbeatPeriod; mainThreadExecutor.schedule(this, 0L, TimeUnit.MILLISECONDS); }
Example #13
Source File: MetricRegistryImplTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testMetricQueryServiceSetup() throws Exception { MetricRegistryImpl metricRegistry = new MetricRegistryImpl(MetricRegistryConfiguration.defaultMetricRegistryConfiguration()); Assert.assertNull(metricRegistry.getMetricQueryServiceGatewayRpcAddress()); metricRegistry.startQueryService(new TestingRpcService(), new ResourceID("mqs")); MetricQueryServiceGateway metricQueryServiceGateway = metricRegistry.getMetricQueryServiceGateway(); Assert.assertNotNull(metricQueryServiceGateway); metricRegistry.register(new SimpleCounter(), "counter", UnregisteredMetricGroups.createUnregisteredTaskManagerMetricGroup()); boolean metricsSuccessfullyQueried = false; for (int x = 0; x < 10; x++) { MetricDumpSerialization.MetricSerializationResult metricSerializationResult = metricQueryServiceGateway.queryMetrics(Time.seconds(5)) .get(5, TimeUnit.SECONDS); if (metricSerializationResult.numCounters == 1) { metricsSuccessfullyQueried = true; } else { Thread.sleep(50); } } Assert.assertTrue("metrics query did not return expected result", metricsSuccessfullyQueried); }
Example #14
Source File: NettyShuffleEnvironment.java From flink with Apache License 2.0 | 6 votes |
NettyShuffleEnvironment( ResourceID taskExecutorResourceId, NettyShuffleEnvironmentConfiguration config, NetworkBufferPool networkBufferPool, ConnectionManager connectionManager, ResultPartitionManager resultPartitionManager, FileChannelManager fileChannelManager, ResultPartitionFactory resultPartitionFactory, SingleInputGateFactory singleInputGateFactory, Executor ioExecutor) { this.taskExecutorResourceId = taskExecutorResourceId; this.config = config; this.networkBufferPool = networkBufferPool; this.connectionManager = connectionManager; this.resultPartitionManager = resultPartitionManager; this.inputGatesById = new ConcurrentHashMap<>(10); this.fileChannelManager = fileChannelManager; this.resultPartitionFactory = resultPartitionFactory; this.singleInputGateFactory = singleInputGateFactory; this.ioExecutor = ioExecutor; this.isClosed = false; }
Example #15
Source File: JobMaster.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<Acknowledge> disconnectTaskManager(final ResourceID resourceID, final Exception cause) { log.debug("Disconnect TaskExecutor {} because: {}", resourceID, cause.getMessage()); taskManagerHeartbeatManager.unmonitorTarget(resourceID); slotPool.releaseTaskManager(resourceID, cause); Tuple2<TaskManagerLocation, TaskExecutorGateway> taskManagerConnection = registeredTaskManagers.remove(resourceID); if (taskManagerConnection != null) { taskManagerConnection.f1.disconnectJobManager(jobGraph.getJobID(), cause); } return CompletableFuture.completedFuture(Acknowledge.get()); }
Example #16
Source File: ResourceManagerPartitionTrackerImpl.java From flink with Apache License 2.0 | 5 votes |
@Override public Map<IntermediateDataSetID, DataSetMetaInfo> listDataSets() { return dataSetMetaInfo.entrySet().stream() .collect(Collectors.toMap(Map.Entry::getKey, entry -> { final Map<ResourceID, Set<ResultPartitionID>> taskExecutorToPartitions = dataSetToTaskExecutors.get(entry.getKey()); Preconditions.checkState(taskExecutorToPartitions != null, "Have metadata entry for dataset %s, but no partition is tracked.", entry.getKey()); int numTrackedPartitions = 0; for (Set<ResultPartitionID> hostedPartitions : taskExecutorToPartitions.values()) { numTrackedPartitions += hostedPartitions.size(); } return DataSetMetaInfo.withNumRegisteredPartitions(numTrackedPartitions, entry.getValue().getNumTotalPartitions()); })); }
Example #17
Source File: TestingResourceManagerGateway.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<TransientBlobKey> requestTaskManagerFileUpload(ResourceID taskManagerId, FileType fileType, Time timeout) { final Function<Tuple2<ResourceID, FileType>, CompletableFuture<TransientBlobKey>> function = requestTaskManagerFileUploadFunction; if (function != null) { return function.apply(Tuple2.of(taskManagerId, fileType)); } else { return CompletableFuture.completedFuture(new TransientBlobKey()); } }
Example #18
Source File: InstanceTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testInstanceDies() { try { ResourceID resourceID = ResourceID.generate(); HardwareDescription hardwareDescription = new HardwareDescription(4, 2L*1024*1024*1024, 1024*1024*1024, 512*1024*1024); InetAddress address = InetAddress.getByName("127.0.0.1"); TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001); Instance instance = new Instance( new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), connection, new InstanceID(), hardwareDescription, 3); assertEquals(3, instance.getNumberOfAvailableSlots()); SimpleSlot slot1 = instance.allocateSimpleSlot(); SimpleSlot slot2 = instance.allocateSimpleSlot(); SimpleSlot slot3 = instance.allocateSimpleSlot(); instance.markDead(); assertEquals(0, instance.getNumberOfAllocatedSlots()); assertEquals(0, instance.getNumberOfAvailableSlots()); assertTrue(slot1.isCanceled()); assertTrue(slot2.isCanceled()); assertTrue(slot3.isCanceled()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example #19
Source File: ResourceManagerTest.java From flink with Apache License 2.0 | 5 votes |
private void registerTaskExecutor(ResourceManagerGateway resourceManagerGateway, ResourceID taskExecutorId, String taskExecutorAddress) throws Exception { final CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerTaskExecutor( taskExecutorAddress, taskExecutorId, dataPort, hardwareDescription, TestingUtils.TIMEOUT()); assertThat(registrationFuture.get(), instanceOf(RegistrationResponse.Success.class)); }
Example #20
Source File: JobMasterPartitionTrackerImpl.java From flink with Apache License 2.0 | 5 votes |
@Override public void stopTrackingAndReleaseOrPromotePartitionsFor(ResourceID producingTaskExecutorId) { Preconditions.checkNotNull(producingTaskExecutorId); Collection<ResultPartitionDeploymentDescriptor> resultPartitionIds = CollectionUtil.project(stopTrackingPartitionsFor(producingTaskExecutorId), PartitionTrackerEntry::getMetaInfo); internalReleaseOrPromotePartitions(producingTaskExecutorId, resultPartitionIds); }
Example #21
Source File: DefaultJobTableTest.java From flink with Apache License 2.0 | 5 votes |
@Test(expected = IllegalStateException.class) public void connectJob_AfterBeingClosed_WillFail() { final JobTable.Job job = jobTable.getOrCreateJob(jobId, DEFAULT_JOB_SERVICES_SUPPLIER); job.close(); connectJob(job, ResourceID.generate()); }
Example #22
Source File: SimpleSlotTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
public static SimpleSlot getSlot() throws Exception { ResourceID resourceID = ResourceID.generate(); HardwareDescription hardwareDescription = new HardwareDescription(4, 2L*1024*1024*1024, 1024*1024*1024, 512*1024*1024); InetAddress address = InetAddress.getByName("127.0.0.1"); TaskManagerLocation connection = new TaskManagerLocation(resourceID, address, 10001); Instance instance = new Instance( new ActorTaskManagerGateway(DummyActorGateway.INSTANCE), connection, new InstanceID(), hardwareDescription, 1); return instance.allocateSimpleSlot(); }
Example #23
Source File: SlotManagerImplTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that a task manager timeout does not remove the slots from the SlotManager. * A timeout should only trigger the {@link ResourceActions#releaseResource(InstanceID, Exception)} * callback. The receiver of the callback can then decide what to do with the TaskManager. * * <p>See FLINK-7793 */ @Test public void testTaskManagerTimeoutDoesNotRemoveSlots() throws Exception { final Time taskManagerTimeout = Time.milliseconds(10L); final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceID resourceID = ResourceID.generate(); final CompletableFuture<InstanceID> releaseResourceFuture = new CompletableFuture<>(); final ResourceActions resourceActions = new TestingResourceActionsBuilder() .setReleaseResourceConsumer((instanceId, ignored) -> releaseResourceFuture.complete(instanceId)) .build(); final TaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder().createTestingTaskExecutorGateway(); final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(resourceID, taskExecutorGateway); final SlotStatus slotStatus = createEmptySlotStatus(new SlotID(resourceID, 0), ResourceProfile.fromResources(1.0, 1)); final SlotReport initialSlotReport = new SlotReport(slotStatus); try (final SlotManager slotManager = createSlotManagerBuilder() .setTaskManagerTimeout(taskManagerTimeout) .buildAndStartWithDirectExec(resourceManagerId, resourceActions)) { slotManager.registerTaskManager(taskExecutorConnection, initialSlotReport); assertEquals(1, slotManager.getNumberRegisteredSlots()); // wait for the timeout call to happen assertThat(releaseResourceFuture.get(), is(taskExecutorConnection.getInstanceID())); assertEquals(1, slotManager.getNumberRegisteredSlots()); slotManager.unregisterTaskManager(taskExecutorConnection.getInstanceID(), TEST_EXCEPTION); assertEquals(0, slotManager.getNumberRegisteredSlots()); } }
Example #24
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public void notifyHeartbeatTimeout(final ResourceID resourceID) { validateRunsInMainThread(); log.info("The heartbeat of TaskManager with id {} timed out.", resourceID); closeTaskManagerConnection( resourceID, new TimeoutException("The heartbeat of TaskManager with id " + resourceID + " timed out.")); }
Example #25
Source File: SlotManagerTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that a task manager timeout does not remove the slots from the SlotManager. * A timeout should only trigger the {@link ResourceActions#releaseResource(InstanceID, Exception)} * callback. The receiver of the callback can then decide what to do with the TaskManager. * * <p>See FLINK-7793 */ @Test public void testTaskManagerTimeoutDoesNotRemoveSlots() throws Exception { final Time taskManagerTimeout = Time.milliseconds(10L); final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceID resourceID = ResourceID.generate(); final ResourceActions resourceActions = mock(ResourceActions.class); final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class); when(taskExecutorGateway.canBeReleased()).thenReturn(CompletableFuture.completedFuture(true)); final TaskExecutorConnection taskExecutorConnection = new TaskExecutorConnection(resourceID, taskExecutorGateway); final SlotStatus slotStatus = new SlotStatus( new SlotID(resourceID, 0), new ResourceProfile(1.0, 1)); final SlotReport initialSlotReport = new SlotReport(slotStatus); try (final SlotManager slotManager = SlotManagerBuilder.newBuilder() .setTaskManagerTimeout(taskManagerTimeout) .build()) { slotManager.start(resourceManagerId, Executors.directExecutor(), resourceActions); slotManager.registerTaskManager(taskExecutorConnection, initialSlotReport); assertEquals(1, slotManager.getNumberRegisteredSlots()); // wait for the timeout call to happen verify(resourceActions, timeout(taskManagerTimeout.toMilliseconds() * 20L).atLeast(1)).releaseResource(eq(taskExecutorConnection.getInstanceID()), any(Exception.class)); assertEquals(1, slotManager.getNumberRegisteredSlots()); slotManager.unregisterTaskManager(taskExecutorConnection.getInstanceID()); assertEquals(0, slotManager.getNumberRegisteredSlots()); } }
Example #26
Source File: ExecutionPartitionLifecycleTest.java From flink with Apache License 2.0 | 5 votes |
private void testPartitionTrackingForStateTransition(final Consumer<Execution> stateTransition, final PartitionReleaseResult partitionReleaseResult) throws Exception { CompletableFuture<Tuple2<ResourceID, ResultPartitionDeploymentDescriptor>> partitionStartTrackingFuture = new CompletableFuture<>(); CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingFuture = new CompletableFuture<>(); CompletableFuture<Collection<ResultPartitionID>> partitionStopTrackingAndReleaseFuture = new CompletableFuture<>(); final TestingPartitionTracker partitionTracker = new TestingPartitionTracker(); partitionTracker.setStartTrackingPartitionsConsumer( (resourceID, resultPartitionDeploymentDescriptor) -> partitionStartTrackingFuture.complete(Tuple2.of(resourceID, resultPartitionDeploymentDescriptor)) ); partitionTracker.setStopTrackingPartitionsConsumer(partitionStopTrackingFuture::complete); partitionTracker.setStopTrackingAndReleasePartitionsConsumer(partitionStopTrackingAndReleaseFuture::complete); setupExecutionGraphAndStartRunningJob(ResultPartitionType.BLOCKING, partitionTracker, new SimpleAckingTaskManagerGateway(), NettyShuffleMaster.INSTANCE); Tuple2<ResourceID, ResultPartitionDeploymentDescriptor> startTrackingCall = partitionStartTrackingFuture.get(); assertThat(startTrackingCall.f0, equalTo(taskExecutorResourceId)); assertThat(startTrackingCall.f1, equalTo(descriptor)); stateTransition.accept(execution); switch (partitionReleaseResult) { case NONE: assertFalse(partitionStopTrackingFuture.isDone()); assertFalse(partitionStopTrackingAndReleaseFuture.isDone()); break; case STOP_TRACKING: assertTrue(partitionStopTrackingFuture.isDone()); assertFalse(partitionStopTrackingAndReleaseFuture.isDone()); final Collection<ResultPartitionID> stopTrackingCall = partitionStopTrackingFuture.get(); assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingCall); break; case STOP_TRACKING_AND_RELEASE: assertFalse(partitionStopTrackingFuture.isDone()); assertTrue(partitionStopTrackingAndReleaseFuture.isDone()); final Collection<ResultPartitionID> stopTrackingAndReleaseCall = partitionStopTrackingAndReleaseFuture.get(); assertEquals(Collections.singletonList(descriptor.getShuffleDescriptor().getResultPartitionID()), stopTrackingAndReleaseCall); break; } }
Example #27
Source File: YarnResourceManager.java From flink with Apache License 2.0 | 5 votes |
private void releaseFailedContainerAndRequestNewContainerIfRequired(ContainerId containerId, Throwable throwable) { validateRunsInMainThread(); log.error("Could not start TaskManager in container {}.", containerId, throwable); final ResourceID resourceId = new ResourceID(containerId.toString()); // release the failed container workerNodeMap.remove(resourceId); resourceManagerClient.releaseAssignedContainer(containerId); notifyAllocatedWorkerStopped(resourceId); // and ask for a new one requestYarnContainerIfRequired(); }
Example #28
Source File: HeartbeatManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the heartbeat monitors are updated when receiving a new heartbeat signal. */ @Test public void testHeartbeatMonitorUpdate() { long heartbeatTimeout = 1000L; ResourceID ownResourceID = new ResourceID("foobar"); ResourceID targetResourceID = new ResourceID("barfoo"); @SuppressWarnings("unchecked") HeartbeatListener<Object, Object> heartbeatListener = mock(HeartbeatListener.class); ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class); ScheduledFuture<?> scheduledFuture = mock(ScheduledFuture.class); doReturn(scheduledFuture).when(scheduledExecutor).schedule(any(Runnable.class), anyLong(), any(TimeUnit.class)); Object expectedObject = new Object(); when(heartbeatListener.retrievePayload(any(ResourceID.class))).thenReturn(CompletableFuture.completedFuture(expectedObject)); HeartbeatManagerImpl<Object, Object> heartbeatManager = new HeartbeatManagerImpl<>( heartbeatTimeout, ownResourceID, heartbeatListener, scheduledExecutor, LOG); @SuppressWarnings("unchecked") HeartbeatTarget<Object> heartbeatTarget = mock(HeartbeatTarget.class); heartbeatManager.monitorTarget(targetResourceID, heartbeatTarget); heartbeatManager.receiveHeartbeat(targetResourceID, expectedObject); verify(scheduledFuture, times(1)).cancel(true); verify(scheduledExecutor, times(2)).schedule(any(Runnable.class), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS)); }
Example #29
Source File: JobManagerRegistration.java From flink with Apache License 2.0 | 5 votes |
public JobManagerRegistration( JobID jobID, ResourceID jobManagerResourceID, JobMasterGateway jobManagerGateway) { this.jobID = Preconditions.checkNotNull(jobID); this.jobManagerResourceID = Preconditions.checkNotNull(jobManagerResourceID); this.jobManagerGateway = Preconditions.checkNotNull(jobManagerGateway); }
Example #30
Source File: SlotManagerImplTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that slots are updated with respect to the latest incoming slot report. This means that * slots for which a report was received are updated accordingly. */ @Test public void testUpdateSlotReport() throws Exception { final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().build(); final JobID jobId = new JobID(); final AllocationID allocationId = new AllocationID(); final TaskExecutorConnection taskManagerConnection = createTaskExecutorConnection(); final ResourceID resourceId = taskManagerConnection.getResourceID(); final SlotID slotId1 = new SlotID(resourceId, 0); final SlotID slotId2 = new SlotID(resourceId, 1); final ResourceProfile resourceProfile = ResourceProfile.fromResources(1.0, 1); final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile); final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile); final SlotStatus newSlotStatus2 = new SlotStatus(slotId2, resourceProfile, jobId, allocationId); final SlotReport slotReport1 = new SlotReport(Arrays.asList(slotStatus1, slotStatus2)); final SlotReport slotReport2 = new SlotReport(Arrays.asList(newSlotStatus2, slotStatus1)); try (SlotManagerImpl slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) { // check that we don't have any slots registered assertTrue(0 == slotManager.getNumberRegisteredSlots()); slotManager.registerTaskManager(taskManagerConnection, slotReport1); TaskManagerSlot slot1 = slotManager.getSlot(slotId1); TaskManagerSlot slot2 = slotManager.getSlot(slotId2); assertTrue(2 == slotManager.getNumberRegisteredSlots()); assertTrue(slot1.getState() == TaskManagerSlot.State.FREE); assertTrue(slot2.getState() == TaskManagerSlot.State.FREE); assertTrue(slotManager.reportSlotStatus(taskManagerConnection.getInstanceID(), slotReport2)); assertTrue(2 == slotManager.getNumberRegisteredSlots()); assertNotNull(slotManager.getSlot(slotId1)); assertNotNull(slotManager.getSlot(slotId2)); // slotId2 should have been allocated for allocationId assertEquals(allocationId, slotManager.getSlot(slotId2).getAllocationId()); } }