org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException Java Examples
The following examples show how to use
org.apache.flink.runtime.resourcemanager.exceptions.ResourceManagerException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SlotManagerImpl.java From flink with Apache License 2.0 | 6 votes |
/** * Handles a failed slot request. The slot manager tries to find a new slot fulfilling * the resource requirements for the failed slot request. * * @param slotId identifying the slot which was assigned to the slot request before * @param allocationId identifying the failed slot request * @param cause of the failure */ private void handleFailedSlotRequest(SlotID slotId, AllocationID allocationId, Throwable cause) { PendingSlotRequest pendingSlotRequest = pendingSlotRequests.get(allocationId); LOG.debug("Slot request with allocation id {} failed for slot {}.", allocationId, slotId, cause); if (null != pendingSlotRequest) { pendingSlotRequest.setRequestFuture(null); try { internalRequestSlot(pendingSlotRequest); } catch (ResourceManagerException e) { pendingSlotRequests.remove(allocationId); resourceActions.notifyAllocationFailure( pendingSlotRequest.getJobId(), allocationId, e); } } else { LOG.debug("There was not pending slot request with allocation id {}. Probably the request has been fulfilled or cancelled.", allocationId); } }
Example #2
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Fetches framework/worker information persisted by a prior incarnation of the RM. */ private CompletableFuture<List<MesosWorkerStore.Worker>> getWorkersAsync() { // if this resource manager is recovering from failure, // then some worker tasks are most likely still alive and we can re-obtain them return CompletableFuture.supplyAsync(() -> { try { final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts = workerStore.recoverWorkers(); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { if (worker.state() == MesosWorkerStore.WorkerState.New) { // remove new workers because allocation requests are transient workerStore.removeWorker(worker.taskID()); } } return tasksFromPreviousAttempts; } catch (final Exception e) { throw new CompletionException(new ResourceManagerException(e)); } }, getRpcService().getExecutor()); }
Example #3
Source File: SlotManagerFailUnfulfillableTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUnfulfillableRequestsFailWhenOn() { // setup final ResourceProfile availableProfile = ResourceProfile.fromResources(2.0, 100); final ResourceProfile unfulfillableProfile = ResourceProfile.fromResources(2.0, 200); final List<Tuple3<JobID, AllocationID, Exception>> notifiedAllocationFailures = new ArrayList<>(); final SlotManager slotManager = createSlotManagerNotStartingNewTMs(notifiedAllocationFailures); registerFreeSlot(slotManager, availableProfile); // test try { slotManager.registerSlotRequest(slotRequest(unfulfillableProfile)); fail("this should cause an exception"); } catch (ResourceManagerException exception) { assertTrue(ExceptionUtils.findThrowable(exception, UnfulfillableSlotRequestException.class).isPresent()); } // assert assertEquals(0, notifiedAllocationFailures.size()); assertEquals(0, slotManager.getNumberPendingSlotRequests()); }
Example #4
Source File: ResourceManagerJobMasterTest.java From flink with Apache License 2.0 | 6 votes |
/** * Check and verify return RegistrationResponse. Decline when failed to start a * job master Leader retrieval listener. */ @Test public void testRegisterJobMasterWithFailureLeaderListener() throws Exception { JobID unknownJobIDToHAServices = new JobID(); // this should fail because we try to register a job leader listener for an unknown job id CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager( jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), unknownJobIDToHAServices, TIMEOUT); try { registrationFuture.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS); fail("Expected to fail with a ResourceManagerException."); } catch (ExecutionException e) { assertTrue(ExceptionUtils.stripExecutionException(e) instanceof ResourceManagerException); } // ignore the reported error testingFatalErrorHandler.clearError(); }
Example #5
Source File: MesosResourceManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public Collection<ResourceProfile> startNewWorker(ResourceProfile resourceProfile) { LOG.info("Starting a new worker."); try { // generate new workers into persistent state and launch associated actors MesosWorkerStore.Worker worker = MesosWorkerStore.Worker.newWorker(workerStore.newTaskID(), resourceProfile); workerStore.putWorker(worker); workersInNew.put(extractResourceID(worker.taskID()), worker); LaunchableMesosWorker launchable = createLaunchableMesosWorker(worker.taskID()); LOG.info("Scheduling Mesos task {} with ({} MB, {} cpus).", launchable.taskID().getValue(), launchable.taskRequest().getMemory(), launchable.taskRequest().getCPUs()); // tell the task monitor about the new plans taskMonitor.tell(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker)), selfActor); // tell the launch coordinator to launch the new tasks launchCoordinator.tell(new LaunchCoordinator.Launch(Collections.singletonList(launchable)), selfActor); return slotsPerWorker; } catch (Exception ex) { onFatalError(new ResourceManagerException("Unable to request new workers.", ex)); return Collections.emptyList(); } }
Example #6
Source File: SlotManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Tries to allocate a slot for the given slot request. If there is no slot available, the * resource manager is informed to allocate more resources and a timeout for the request is * registered. * * @param pendingSlotRequest to allocate a slot for * @throws ResourceManagerException if the resource manager cannot allocate more resource */ private void internalRequestSlot(PendingSlotRequest pendingSlotRequest) throws ResourceManagerException { final ResourceProfile resourceProfile = pendingSlotRequest.getResourceProfile(); TaskManagerSlot taskManagerSlot = findMatchingSlot(resourceProfile); if (taskManagerSlot != null) { allocateSlot(taskManagerSlot, pendingSlotRequest); } else { Optional<PendingTaskManagerSlot> pendingTaskManagerSlotOptional = findFreeMatchingPendingTaskManagerSlot(resourceProfile); if (!pendingTaskManagerSlotOptional.isPresent()) { pendingTaskManagerSlotOptional = allocateResource(resourceProfile); } pendingTaskManagerSlotOptional.ifPresent(pendingTaskManagerSlot -> assignPendingTaskManagerSlot(pendingSlotRequest, pendingTaskManagerSlot)); } }
Example #7
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 6 votes |
/** * Fetches framework/worker information persisted by a prior incarnation of the RM. */ private CompletableFuture<List<MesosWorkerStore.Worker>> getWorkersAsync() { // if this resource manager is recovering from failure, // then some worker tasks are most likely still alive and we can re-obtain them return CompletableFuture.supplyAsync(() -> { try { final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts = workerStore.recoverWorkers(); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { if (worker.state() == MesosWorkerStore.WorkerState.New) { // remove new workers because allocation requests are transient workerStore.removeWorker(worker.taskID()); } } return tasksFromPreviousAttempts; } catch (final Exception e) { throw new CompletionException(new ResourceManagerException(e)); } }, getRpcService().getExecutor()); }
Example #8
Source File: SlotManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
private Optional<PendingTaskManagerSlot> allocateResource(ResourceProfile resourceProfile) throws ResourceManagerException { final Collection<ResourceProfile> requestedSlots = resourceActions.allocateResource(resourceProfile); if (requestedSlots.isEmpty()) { return Optional.empty(); } else { final Iterator<ResourceProfile> slotIterator = requestedSlots.iterator(); final PendingTaskManagerSlot pendingTaskManagerSlot = new PendingTaskManagerSlot(slotIterator.next()); pendingSlots.put(pendingTaskManagerSlot.getTaskManagerSlotId(), pendingTaskManagerSlot); while (slotIterator.hasNext()) { final PendingTaskManagerSlot additionalPendingTaskManagerSlot = new PendingTaskManagerSlot(slotIterator.next()); pendingSlots.put(additionalPendingTaskManagerSlot.getTaskManagerSlotId(), additionalPendingTaskManagerSlot); } return Optional.of(pendingTaskManagerSlot); } }
Example #9
Source File: SlotManager.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Handles a failed slot request. The slot manager tries to find a new slot fulfilling * the resource requirements for the failed slot request. * * @param slotId identifying the slot which was assigned to the slot request before * @param allocationId identifying the failed slot request * @param cause of the failure */ private void handleFailedSlotRequest(SlotID slotId, AllocationID allocationId, Throwable cause) { PendingSlotRequest pendingSlotRequest = pendingSlotRequests.get(allocationId); LOG.debug("Slot request with allocation id {} failed for slot {}.", allocationId, slotId, cause); if (null != pendingSlotRequest) { pendingSlotRequest.setRequestFuture(null); try { internalRequestSlot(pendingSlotRequest); } catch (ResourceManagerException e) { pendingSlotRequests.remove(allocationId); resourceActions.notifyAllocationFailure( pendingSlotRequest.getJobId(), allocationId, e); } } else { LOG.debug("There was not pending slot request with allocation id {}. Probably the request has been fulfilled or cancelled.", allocationId); } }
Example #10
Source File: ResourceManagerJobMasterTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Check and verify return RegistrationResponse. Decline when failed to start a * job master Leader retrieval listener. */ @Test public void testRegisterJobMasterWithFailureLeaderListener() throws Exception { JobID unknownJobIDToHAServices = new JobID(); // this should fail because we try to register a job leader listener for an unknown job id CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager( jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), unknownJobIDToHAServices, TIMEOUT); try { registrationFuture.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS); fail("Expected to fail with a ResourceManagerException."); } catch (ExecutionException e) { assertTrue(ExceptionUtils.stripExecutionException(e) instanceof ResourceManagerException); } // ignore the reported error testingFatalErrorHandler.clearError(); }
Example #11
Source File: SlotManagerFailUnfulfillableTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testUnfulfillableRequestsFailWhenOn() { // setup final ResourceProfile availableProfile = new ResourceProfile(2.0, 100); final ResourceProfile unfulfillableProfile = new ResourceProfile(2.0, 200); final List<Tuple3<JobID, AllocationID, Exception>> notifiedAllocationFailures = new ArrayList<>(); final SlotManager slotManager = createSlotManagerNotStartingNewTMs(notifiedAllocationFailures); registerFreeSlot(slotManager, availableProfile); // test try { slotManager.registerSlotRequest(slotRequest(unfulfillableProfile)); fail("this should cause an exception"); } catch (ResourceManagerException exception) { assertTrue(ExceptionUtils.findThrowable(exception, UnfulfillableSlotRequestException.class).isPresent()); } // assert assertEquals(0, notifiedAllocationFailures.size()); assertEquals(0, slotManager.getNumberPendingSlotRequests()); }
Example #12
Source File: ResourceManagerJobMasterTest.java From flink with Apache License 2.0 | 6 votes |
/** * Check and verify return RegistrationResponse. Decline when failed to start a * job master Leader retrieval listener. */ @Test public void testRegisterJobMasterWithFailureLeaderListener() throws Exception { JobID unknownJobIDToHAServices = new JobID(); // this should fail because we try to register a job leader listener for an unknown job id CompletableFuture<RegistrationResponse> registrationFuture = resourceManagerGateway.registerJobManager( jobMasterGateway.getFencingToken(), jobMasterResourceId, jobMasterGateway.getAddress(), unknownJobIDToHAServices, TIMEOUT); try { registrationFuture.get(TIMEOUT.toMilliseconds(), TimeUnit.MILLISECONDS); fail("Expected to fail with a ResourceManagerException."); } catch (ExecutionException e) { assertTrue(ExceptionUtils.stripExecutionException(e) instanceof ResourceManagerException); } // ignore the reported error testingFatalErrorHandler.clearError(); }
Example #13
Source File: SlotManagerImpl.java From flink with Apache License 2.0 | 6 votes |
/** * Handles a failed slot request. The slot manager tries to find a new slot fulfilling * the resource requirements for the failed slot request. * * @param slotId identifying the slot which was assigned to the slot request before * @param allocationId identifying the failed slot request * @param cause of the failure */ private void handleFailedSlotRequest(SlotID slotId, AllocationID allocationId, Throwable cause) { PendingSlotRequest pendingSlotRequest = pendingSlotRequests.get(allocationId); LOG.debug("Slot request with allocation id {} failed for slot {}.", allocationId, slotId, cause); if (null != pendingSlotRequest) { pendingSlotRequest.setRequestFuture(null); try { internalRequestSlot(pendingSlotRequest); } catch (ResourceManagerException e) { pendingSlotRequests.remove(allocationId); resourceActions.notifyAllocationFailure( pendingSlotRequest.getJobId(), allocationId, e); } } else { LOG.debug("There was not pending slot request with allocation id {}. Probably the request has been fulfilled or cancelled.", allocationId); } }
Example #14
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 6 votes |
/** * Fetches framework/worker information persisted by a prior incarnation of the RM. */ private CompletableFuture<List<MesosWorkerStore.Worker>> getWorkersAsync() { // if this resource manager is recovering from failure, // then some worker tasks are most likely still alive and we can re-obtain them return CompletableFuture.supplyAsync(() -> { try { final List<MesosWorkerStore.Worker> tasksFromPreviousAttempts = workerStore.recoverWorkers(); for (final MesosWorkerStore.Worker worker : tasksFromPreviousAttempts) { if (worker.state() == MesosWorkerStore.WorkerState.New) { // remove new workers because allocation requests are transient workerStore.removeWorker(worker.taskID()); } } return tasksFromPreviousAttempts; } catch (final Exception e) { throw new CompletionException(new ResourceManagerException(e)); } }, getRpcService().getExecutor()); }
Example #15
Source File: SlotManagerImpl.java From flink with Apache License 2.0 | 6 votes |
private void fulfillPendingSlotRequestWithPendingTaskManagerSlot(PendingSlotRequest pendingSlotRequest) throws ResourceManagerException { ResourceProfile resourceProfile = pendingSlotRequest.getResourceProfile(); Optional<PendingTaskManagerSlot> pendingTaskManagerSlotOptional = findFreeMatchingPendingTaskManagerSlot(resourceProfile); if (!pendingTaskManagerSlotOptional.isPresent()) { pendingTaskManagerSlotOptional = allocateResource(resourceProfile); } OptionalConsumer.of(pendingTaskManagerSlotOptional) .ifPresent(pendingTaskManagerSlot -> assignPendingTaskManagerSlot(pendingSlotRequest, pendingTaskManagerSlot)) .ifNotPresent(() -> { // request can not be fulfilled by any free slot or pending slot that can be allocated, // check whether it can be fulfilled by allocated slots if (failUnfulfillableRequest && !isFulfillableByRegisteredOrPendingSlots(pendingSlotRequest.getResourceProfile())) { throw new UnfulfillableSlotRequestException(pendingSlotRequest.getAllocationId(), pendingSlotRequest.getResourceProfile()); } }); }
Example #16
Source File: SlotManagerImpl.java From flink with Apache License 2.0 | 6 votes |
/** * Tries to allocate a slot for the given slot request. If there is no slot available, the * resource manager is informed to allocate more resources and a timeout for the request is * registered. * * @param pendingSlotRequest to allocate a slot for * @throws ResourceManagerException if the slot request failed or is unfulfillable */ private void internalRequestSlot(PendingSlotRequest pendingSlotRequest) throws ResourceManagerException { final ResourceProfile resourceProfile = pendingSlotRequest.getResourceProfile(); TaskManagerSlot taskManagerSlot = findMatchingSlot(resourceProfile); if (taskManagerSlot != null) { allocateSlot(taskManagerSlot, pendingSlotRequest); } else { Optional<PendingTaskManagerSlot> pendingTaskManagerSlotOptional = findFreeMatchingPendingTaskManagerSlot(resourceProfile); if (!pendingTaskManagerSlotOptional.isPresent()) { pendingTaskManagerSlotOptional = allocateResource(resourceProfile); } if (pendingTaskManagerSlotOptional.isPresent()) { assignPendingTaskManagerSlot(pendingSlotRequest, pendingTaskManagerSlotOptional.get()); } else { // request can not be fulfilled by any free slot or pending slot that can be allocated, // check whether it can be fulfilled by allocated slots if (failUnfulfillableRequest && !isFulfillableByRegisteredSlots(pendingSlotRequest.getResourceProfile())) { throw new UnfulfillableSlotRequestException(pendingSlotRequest.getAllocationId(), pendingSlotRequest.getResourceProfile()); } } } }
Example #17
Source File: SlotManagerImplTest.java From flink with Apache License 2.0 | 6 votes |
/** * Tests that the slot request fails if we cannot allocate more resources. */ @Test public void testSlotRequestWithResourceAllocationFailure() throws Exception { final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceProfile resourceProfile = ResourceProfile.fromResources(42.0, 1337); final SlotRequest slotRequest = new SlotRequest( new JobID(), new AllocationID(), resourceProfile, "localhost"); ResourceActions resourceManagerActions = new TestingResourceActionsBuilder() .setAllocateResourceFunction(value -> false) .build(); try (SlotManager slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) { slotManager.registerSlotRequest(slotRequest); fail("The slot request should have failed with a ResourceManagerException."); } catch (ResourceManagerException e) { // expected exception } }
Example #18
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Cleanup application and shut down cluster. * * @param finalStatus of the Flink application * @param diagnostics diagnostics message for the Flink application or {@code null} */ @Override public CompletableFuture<Acknowledge> deregisterApplication( final ApplicationStatus finalStatus, @Nullable final String diagnostics) { log.info("Shut down cluster because application is in {}, diagnostics {}.", finalStatus, diagnostics); try { internalDeregisterApplication(finalStatus, diagnostics); } catch (ResourceManagerException e) { log.warn("Could not properly shutdown the application.", e); } return CompletableFuture.completedFuture(Acknowledge.get()); }
Example #19
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Accept offers as advised by the launch coordinator. * * <p>Acceptance is routed through the RM to update the persistent state before * forwarding the message to Mesos. */ public void acceptOffers(AcceptOffers msg) { try { List<TaskMonitor.TaskGoalStateUpdated> toMonitor = new ArrayList<>(msg.operations().size()); // transition the persistent state of some tasks to Launched for (Protos.Offer.Operation op : msg.operations()) { if (op.getType() == Protos.Offer.Operation.Type.LAUNCH) { for (Protos.TaskInfo info : op.getLaunch().getTaskInfosList()) { MesosWorkerStore.Worker worker = workersInNew.remove(extractResourceID(info.getTaskId())); assert (worker != null); worker = worker.launchWorker(info.getSlaveId(), msg.hostname()); workerStore.putWorker(worker); workersInLaunch.put(extractResourceID(worker.taskID()), worker); LOG.info("Launching Mesos task {} on host {}.", worker.taskID().getValue(), worker.hostname().get()); toMonitor.add(new TaskMonitor.TaskGoalStateUpdated(extractGoalState(worker))); } } } // tell the task monitor about the new plans for (TaskMonitor.TaskGoalStateUpdated update : toMonitor) { taskMonitor.tell(update, selfActor); } // send the acceptance message to Mesos schedulerDriver.acceptOffers(msg.offerIds(), msg.operations(), msg.filters()); } catch (Exception ex) { onFatalError(new ResourceManagerException("unable to accept offers", ex)); } }
Example #20
Source File: KubernetesResourceManager.java From flink with Apache License 2.0 | 5 votes |
private void recoverWorkerNodesFromPreviousAttempts() throws ResourceManagerException { final List<KubernetesPod> podList = kubeClient.getPodsWithLabels(KubernetesUtils.getTaskManagerLabels(clusterId)); for (KubernetesPod pod : podList) { final KubernetesWorkerNode worker = new KubernetesWorkerNode(new ResourceID(pod.getName())); workerNodes.put(worker.getResourceID(), worker); final long attempt = worker.getAttempt(); if (attempt > currentMaxAttemptId) { currentMaxAttemptId = attempt; } } log.info("Recovered {} pods from previous attempts, current attempt id is {}.", workerNodes.size(), ++currentMaxAttemptId); }
Example #21
Source File: MesosResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Handles a termination notification from a task monitor. */ public void taskTerminated(TaskMonitor.TaskTerminated message) { Protos.TaskID taskID = message.taskID(); Protos.TaskStatus status = message.status(); // note: this callback occurs for failed containers and for released containers alike final ResourceID id = extractResourceID(taskID); boolean existed; try { existed = workerStore.removeWorker(taskID); } catch (Exception ex) { onFatalError(new ResourceManagerException("unable to remove worker", ex)); return; } if (!existed) { LOG.info("Received a termination notice for an unrecognized worker: {}", id); return; } // check if this is a failed task or a released task assert(!workersInNew.containsKey(id)); if (workersBeingReturned.remove(id) != null) { // regular finished worker that we released LOG.info("Worker {} finished successfully with message: {}", id, status.getMessage()); } else { // failed worker, either at startup, or running final MesosWorkerStore.Worker launched = workersInLaunch.remove(id); assert(launched != null); LOG.info("Worker {} failed with status: {}, reason: {}, message: {}.", id, status.getState(), status.getReason(), status.getMessage()); startNewWorker(launched.workerResourceSpec()); } closeTaskManagerConnection(id, new Exception(status.getMessage())); }
Example #22
Source File: SlotManagerTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the slot request fails if we cannot allocate more resources. */ @Test public void testSlotRequestWithResourceAllocationFailure() throws Exception { final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceProfile resourceProfile = new ResourceProfile(42.0, 1337); final SlotRequest slotRequest = new SlotRequest( new JobID(), new AllocationID(), resourceProfile, "localhost"); ResourceActions resourceManagerActions = new TestingResourceActionsBuilder() .setAllocateResourceFunction(value -> { throw new ResourceManagerException("Test exception"); }) .build(); try (SlotManager slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) { slotManager.registerSlotRequest(slotRequest); fail("The slot request should have failed with a ResourceManagerException."); } catch (ResourceManagerException e) { // expected exception } }
Example #23
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<Acknowledge> sendSlotReport(ResourceID taskManagerResourceId, InstanceID taskManagerRegistrationId, SlotReport slotReport, Time timeout) { final WorkerRegistration<WorkerType> workerTypeWorkerRegistration = taskExecutors.get(taskManagerResourceId); if (workerTypeWorkerRegistration.getInstanceID().equals(taskManagerRegistrationId)) { if (slotManager.registerTaskManager(workerTypeWorkerRegistration, slotReport)) { onTaskManagerRegistration(workerTypeWorkerRegistration); } return CompletableFuture.completedFuture(Acknowledge.get()); } else { return FutureUtils.completedExceptionally(new ResourceManagerException(String.format("Unknown TaskManager registration id %s.", taskManagerRegistrationId))); } }
Example #24
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<Acknowledge> requestSlot( JobMasterId jobMasterId, SlotRequest slotRequest, final Time timeout) { JobID jobId = slotRequest.getJobId(); JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId); if (null != jobManagerRegistration) { if (Objects.equals(jobMasterId, jobManagerRegistration.getJobMasterId())) { log.info("Request slot with profile {} for job {} with allocation id {}.", slotRequest.getResourceProfile(), slotRequest.getJobId(), slotRequest.getAllocationId()); try { slotManager.registerSlotRequest(slotRequest); } catch (ResourceManagerException e) { return FutureUtils.completedExceptionally(e); } return CompletableFuture.completedFuture(Acknowledge.get()); } else { return FutureUtils.completedExceptionally(new ResourceManagerException("The job leader's id " + jobManagerRegistration.getJobMasterId() + " does not match the received id " + jobMasterId + '.')); } } else { return FutureUtils.completedExceptionally(new ResourceManagerException("Could not find registered job manager for job " + jobId + '.')); } }
Example #25
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
private void clearStateInternal() { jobManagerRegistrations.clear(); jmResourceIdRegistrations.clear(); taskExecutors.clear(); try { jobLeaderIdService.clear(); } catch (Exception e) { onFatalError(new ResourceManagerException("Could not properly clear the job leader id service.", e)); } clearStateFuture = clearStateAsync(); }
Example #26
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
private void clearStateInternal() { jobManagerRegistrations.clear(); jmResourceIdRegistrations.clear(); taskExecutors.clear(); try { jobLeaderIdService.clear(); } catch (Exception e) { onFatalError(new ResourceManagerException("Could not properly clear the job leader id service.", e)); } clearStateFuture = clearStateAsync(); }
Example #27
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
/** * Cleanup application and shut down cluster. * * @param finalStatus of the Flink application * @param diagnostics diagnostics message for the Flink application or {@code null} */ @Override public CompletableFuture<Acknowledge> deregisterApplication( final ApplicationStatus finalStatus, @Nullable final String diagnostics) { log.info("Shut down cluster because application is in {}, diagnostics {}.", finalStatus, diagnostics); try { internalDeregisterApplication(finalStatus, diagnostics); } catch (ResourceManagerException e) { log.warn("Could not properly shutdown the application.", e); } return CompletableFuture.completedFuture(Acknowledge.get()); }
Example #28
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<Acknowledge> requestSlot( JobMasterId jobMasterId, SlotRequest slotRequest, final Time timeout) { JobID jobId = slotRequest.getJobId(); JobManagerRegistration jobManagerRegistration = jobManagerRegistrations.get(jobId); if (null != jobManagerRegistration) { if (Objects.equals(jobMasterId, jobManagerRegistration.getJobMasterId())) { log.info("Request slot with profile {} for job {} with allocation id {}.", slotRequest.getResourceProfile(), slotRequest.getJobId(), slotRequest.getAllocationId()); try { slotManager.registerSlotRequest(slotRequest); } catch (ResourceManagerException e) { return FutureUtils.completedExceptionally(e); } return CompletableFuture.completedFuture(Acknowledge.get()); } else { return FutureUtils.completedExceptionally(new ResourceManagerException("The job leader's id " + jobManagerRegistration.getJobMasterId() + " does not match the received id " + jobMasterId + '.')); } } else { return FutureUtils.completedExceptionally(new ResourceManagerException("Could not find registered job manager for job " + jobId + '.')); } }
Example #29
Source File: ResourceManager.java From flink with Apache License 2.0 | 5 votes |
@Override public void onStart() throws Exception { try { startResourceManagerServices(); } catch (Exception e) { final ResourceManagerException exception = new ResourceManagerException(String.format("Could not start the ResourceManager %s", getAddress()), e); onFatalError(exception); throw exception; } }
Example #30
Source File: SlotManagerTest.java From flink with Apache License 2.0 | 5 votes |
private static FunctionWithException<ResourceProfile, Collection<ResourceProfile>, ResourceManagerException> convert(FunctionWithException<ResourceProfile, Integer, ResourceManagerException> function) { return (ResourceProfile resourceProfile) -> { final int slots = function.apply(resourceProfile); final ArrayList<ResourceProfile> result = new ArrayList<>(slots); for (int i = 0; i < slots; i++) { result.add(resourceProfile); } return result; }; }