org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException Java Examples
The following examples show how to use
org.apache.flink.runtime.taskexecutor.exceptions.SlotAllocationException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskExecutor.java From flink with Apache License 2.0 | 6 votes |
private void allocateSlot( SlotID slotId, JobID jobId, AllocationID allocationId, ResourceProfile resourceProfile) throws SlotAllocationException { if (taskSlotTable.isSlotFree(slotId.getSlotNumber())) { if (taskSlotTable.allocateSlot(slotId.getSlotNumber(), jobId, allocationId, resourceProfile, taskManagerConfiguration.getTimeout())) { log.info("Allocated slot for {}.", allocationId); } else { log.info("Could not allocate slot for {}.", allocationId); throw new SlotAllocationException("Could not allocate slot."); } } else if (!taskSlotTable.isAllocated(slotId.getSlotNumber(), jobId, allocationId)) { final String message = "The slot " + slotId + " has already been allocated for a different job."; log.info(message); final AllocationID allocationID = taskSlotTable.getCurrentAllocation(slotId.getSlotNumber()); throw new SlotOccupiedException(message, allocationID, taskSlotTable.getOwningJob(allocationID)); } }
Example #2
Source File: SlotManager.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
Example #3
Source File: SlotManagerImpl.java From flink with Apache License 2.0 | 5 votes |
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
Example #4
Source File: SlotManagerImpl.java From flink with Apache License 2.0 | 5 votes |
/** * Rejects the pending slot request by failing the request future with a * {@link SlotAllocationException}. * * @param pendingSlotRequest to reject * @param cause of the rejection */ private void rejectPendingSlotRequest(PendingSlotRequest pendingSlotRequest, Exception cause) { CompletableFuture<Acknowledge> request = pendingSlotRequest.getRequestFuture(); if (null != request) { request.completeExceptionally(new SlotAllocationException(cause)); } else { LOG.debug("Cannot reject pending slot request {}, since no request has been sent.", pendingSlotRequest.getAllocationId()); } }
Example #5
Source File: SlotManagerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that a slot request is retried if it times out on the task manager side. */ @Test @SuppressWarnings("unchecked") public void testTaskManagerSlotRequestTimeoutHandling() throws Exception { final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceActions resourceManagerActions = mock(ResourceActions.class); final JobID jobId = new JobID(); final AllocationID allocationId = new AllocationID(); final ResourceProfile resourceProfile = new ResourceProfile(42.0, 1337); final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile, "foobar"); final CompletableFuture<Acknowledge> slotRequestFuture1 = new CompletableFuture<>(); final CompletableFuture<Acknowledge> slotRequestFuture2 = new CompletableFuture<>(); final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class); when(taskExecutorGateway.requestSlot( any(SlotID.class), any(JobID.class), eq(allocationId), anyString(), any(ResourceManagerId.class), any(Time.class))).thenReturn(slotRequestFuture1, slotRequestFuture2); final ResourceID resourceId = ResourceID.generate(); final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceId, taskExecutorGateway); final SlotID slotId1 = new SlotID(resourceId, 0); final SlotID slotId2 = new SlotID(resourceId, 1); final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile); final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile); final SlotReport slotReport = new SlotReport(Arrays.asList(slotStatus1, slotStatus2)); try (SlotManager slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) { slotManager.registerTaskManager(taskManagerConnection, slotReport); slotManager.registerSlotRequest(slotRequest); ArgumentCaptor<SlotID> slotIdCaptor = ArgumentCaptor.forClass(SlotID.class); verify(taskExecutorGateway, times(1)).requestSlot( slotIdCaptor.capture(), eq(jobId), eq(allocationId), anyString(), eq(resourceManagerId), any(Time.class)); TaskManagerSlot failedSlot = slotManager.getSlot(slotIdCaptor.getValue()); // let the first attempt fail --> this should trigger a second attempt slotRequestFuture1.completeExceptionally(new SlotAllocationException("Test exception.")); verify(taskExecutorGateway, times(2)).requestSlot( slotIdCaptor.capture(), eq(jobId), eq(allocationId), anyString(), eq(resourceManagerId), any(Time.class)); // the second attempt succeeds slotRequestFuture2.complete(Acknowledge.get()); TaskManagerSlot slot = slotManager.getSlot(slotIdCaptor.getValue()); assertTrue(slot.getState() == TaskManagerSlot.State.ALLOCATED); assertEquals(allocationId, slot.getAllocationId()); if (!failedSlot.getSlotId().equals(slot.getSlotId())) { assertTrue(failedSlot.getState() == TaskManagerSlot.State.FREE); } } }
Example #6
Source File: SlotManagerTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call * fails. */ @Test public void testSlotRequestFailure() throws Exception { try (final SlotManager slotManager = createSlotManager(ResourceManagerId.generate(), new TestingResourceActionsBuilder().build())) { final SlotRequest slotRequest = new SlotRequest(new JobID(), new AllocationID(), ResourceProfile.UNKNOWN, "foobar"); slotManager.registerSlotRequest(slotRequest); final BlockingQueue<Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1); final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1); final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple5 -> { requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple5); try { return responseQueue.take(); } catch (InterruptedException ignored) { return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted.")); } }) .createTestingTaskExecutorGateway(); final ResourceID taskExecutorResourceId = ResourceID.generate(); final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway); final SlotReport slotReport = new SlotReport(new SlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.UNKNOWN)); final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>(); responseQueue.offer(firstManualSlotRequestResponse); slotManager.registerTaskManager(taskExecutionConnection, slotReport); final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> firstRequest = requestSlotQueue.take(); final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>(); responseQueue.offer(secondManualSlotRequestResponse); // fail first request firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception")); final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> secondRequest = requestSlotQueue.take(); assertThat(secondRequest.f2, equalTo(firstRequest.f2)); assertThat(secondRequest.f0, equalTo(firstRequest.f0)); secondManualSlotRequestResponse.complete(Acknowledge.get()); final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0); assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED)); assertThat(slot.getAllocationId(), equalTo(secondRequest.f2)); } }
Example #7
Source File: SlotManagerTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a slot request is retried if it times out on the task manager side. */ @Test @SuppressWarnings("unchecked") public void testTaskManagerSlotRequestTimeoutHandling() throws Exception { final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceActions resourceManagerActions = mock(ResourceActions.class); final JobID jobId = new JobID(); final AllocationID allocationId = new AllocationID(); final ResourceProfile resourceProfile = new ResourceProfile(42.0, 1337); final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile, "foobar"); final CompletableFuture<Acknowledge> slotRequestFuture1 = new CompletableFuture<>(); final CompletableFuture<Acknowledge> slotRequestFuture2 = new CompletableFuture<>(); final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class); when(taskExecutorGateway.requestSlot( any(SlotID.class), any(JobID.class), eq(allocationId), anyString(), any(ResourceManagerId.class), any(Time.class))).thenReturn(slotRequestFuture1, slotRequestFuture2); final ResourceID resourceId = ResourceID.generate(); final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceId, taskExecutorGateway); final SlotID slotId1 = new SlotID(resourceId, 0); final SlotID slotId2 = new SlotID(resourceId, 1); final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile); final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile); final SlotReport slotReport = new SlotReport(Arrays.asList(slotStatus1, slotStatus2)); try (SlotManagerImpl slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) { slotManager.registerTaskManager(taskManagerConnection, slotReport); slotManager.registerSlotRequest(slotRequest); ArgumentCaptor<SlotID> slotIdCaptor = ArgumentCaptor.forClass(SlotID.class); verify(taskExecutorGateway, times(1)).requestSlot( slotIdCaptor.capture(), eq(jobId), eq(allocationId), anyString(), eq(resourceManagerId), any(Time.class)); TaskManagerSlot failedSlot = slotManager.getSlot(slotIdCaptor.getValue()); // let the first attempt fail --> this should trigger a second attempt slotRequestFuture1.completeExceptionally(new SlotAllocationException("Test exception.")); verify(taskExecutorGateway, times(2)).requestSlot( slotIdCaptor.capture(), eq(jobId), eq(allocationId), anyString(), eq(resourceManagerId), any(Time.class)); // the second attempt succeeds slotRequestFuture2.complete(Acknowledge.get()); TaskManagerSlot slot = slotManager.getSlot(slotIdCaptor.getValue()); assertTrue(slot.getState() == TaskManagerSlot.State.ALLOCATED); assertEquals(allocationId, slot.getAllocationId()); if (!failedSlot.getSlotId().equals(slot.getSlotId())) { assertTrue(failedSlot.getState() == TaskManagerSlot.State.FREE); } } }
Example #8
Source File: SlotManagerTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call * fails. */ @Test public void testSlotRequestFailure() throws Exception { try (final SlotManagerImpl slotManager = createSlotManager(ResourceManagerId.generate(), new TestingResourceActionsBuilder().build())) { final SlotRequest slotRequest = new SlotRequest(new JobID(), new AllocationID(), ResourceProfile.UNKNOWN, "foobar"); slotManager.registerSlotRequest(slotRequest); final BlockingQueue<Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1); final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1); final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple5 -> { requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple5); try { return responseQueue.take(); } catch (InterruptedException ignored) { return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted.")); } }) .createTestingTaskExecutorGateway(); final ResourceID taskExecutorResourceId = ResourceID.generate(); final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway); final SlotReport slotReport = new SlotReport(new SlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.UNKNOWN)); final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>(); responseQueue.offer(firstManualSlotRequestResponse); slotManager.registerTaskManager(taskExecutionConnection, slotReport); final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> firstRequest = requestSlotQueue.take(); final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>(); responseQueue.offer(secondManualSlotRequestResponse); // fail first request firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception")); final Tuple5<SlotID, JobID, AllocationID, String, ResourceManagerId> secondRequest = requestSlotQueue.take(); assertThat(secondRequest.f2, equalTo(firstRequest.f2)); assertThat(secondRequest.f0, equalTo(firstRequest.f0)); secondManualSlotRequestResponse.complete(Acknowledge.get()); final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0); assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED)); assertThat(slot.getAllocationId(), equalTo(secondRequest.f2)); } }
Example #9
Source File: TaskExecutor.java From flink with Apache License 2.0 | 4 votes |
@Override public CompletableFuture<Acknowledge> requestSlot( final SlotID slotId, final JobID jobId, final AllocationID allocationId, final ResourceProfile resourceProfile, final String targetAddress, final ResourceManagerId resourceManagerId, final Time timeout) { // TODO: Filter invalid requests from the resource manager by using the instance/registration Id log.info("Receive slot request {} for job {} from resource manager with leader id {}.", allocationId, jobId, resourceManagerId); if (!isConnectedToResourceManager(resourceManagerId)) { final String message = String.format("TaskManager is not connected to the resource manager %s.", resourceManagerId); log.debug(message); return FutureUtils.completedExceptionally(new TaskManagerException(message)); } try { allocateSlot( slotId, jobId, allocationId, resourceProfile); } catch (SlotAllocationException sae) { return FutureUtils.completedExceptionally(sae); } final JobTable.Job job; try { job = jobTable.getOrCreateJob(jobId, () -> registerNewJobAndCreateServices(jobId, targetAddress)); } catch (Exception e) { // free the allocated slot try { taskSlotTable.freeSlot(allocationId); } catch (SlotNotFoundException slotNotFoundException) { // slot no longer existent, this should actually never happen, because we've // just allocated the slot. So let's fail hard in this case! onFatalError(slotNotFoundException); } // release local state under the allocation id. localStateStoresManager.releaseLocalStateForAllocationId(allocationId); // sanity check if (!taskSlotTable.isSlotFree(slotId.getSlotNumber())) { onFatalError(new Exception("Could not free slot " + slotId)); } return FutureUtils.completedExceptionally(new SlotAllocationException("Could not create new job.", e)); } if (job.isConnected()) { offerSlotsToJobManager(jobId); } return CompletableFuture.completedFuture(Acknowledge.get()); }
Example #10
Source File: SlotManagerImplTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that a slot request is retried if it times out on the task manager side. */ @Test public void testTaskManagerSlotRequestTimeoutHandling() throws Exception { final ResourceManagerId resourceManagerId = ResourceManagerId.generate(); final ResourceActions resourceManagerActions = new TestingResourceActionsBuilder().build(); final JobID jobId = new JobID(); final AllocationID allocationId = new AllocationID(); final ResourceProfile resourceProfile = ResourceProfile.fromResources(42.0, 1337); final SlotRequest slotRequest = new SlotRequest(jobId, allocationId, resourceProfile, "foobar"); final CompletableFuture<Acknowledge> slotRequestFuture1 = new CompletableFuture<>(); final CompletableFuture<Acknowledge> slotRequestFuture2 = new CompletableFuture<>(); final Iterator<CompletableFuture<Acknowledge>> slotRequestFutureIterator = Arrays.asList(slotRequestFuture1, slotRequestFuture2).iterator(); final ArrayBlockingQueue<SlotID> slotIds = new ArrayBlockingQueue<>(2); final TestingTaskExecutorGateway taskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setRequestSlotFunction(FunctionUtils.uncheckedFunction( requestSlotParameters -> { slotIds.put(requestSlotParameters.f0); return slotRequestFutureIterator.next(); })) .createTestingTaskExecutorGateway(); final ResourceID resourceId = ResourceID.generate(); final TaskExecutorConnection taskManagerConnection = new TaskExecutorConnection(resourceId, taskExecutorGateway); final SlotID slotId1 = new SlotID(resourceId, 0); final SlotID slotId2 = new SlotID(resourceId, 1); final SlotStatus slotStatus1 = new SlotStatus(slotId1, resourceProfile); final SlotStatus slotStatus2 = new SlotStatus(slotId2, resourceProfile); final SlotReport slotReport = new SlotReport(Arrays.asList(slotStatus1, slotStatus2)); try (SlotManagerImpl slotManager = createSlotManager(resourceManagerId, resourceManagerActions)) { slotManager.registerTaskManager(taskManagerConnection, slotReport); slotManager.registerSlotRequest(slotRequest); final SlotID firstSlotId = slotIds.take(); assertThat(slotIds, is(empty())); TaskManagerSlot failedSlot = slotManager.getSlot(firstSlotId); // let the first attempt fail --> this should trigger a second attempt slotRequestFuture1.completeExceptionally(new SlotAllocationException("Test exception.")); // the second attempt succeeds slotRequestFuture2.complete(Acknowledge.get()); final SlotID secondSlotId = slotIds.take(); assertThat(slotIds, is(empty())); TaskManagerSlot slot = slotManager.getSlot(secondSlotId); assertTrue(slot.getState() == TaskManagerSlot.State.ALLOCATED); assertEquals(allocationId, slot.getAllocationId()); if (!failedSlot.getSlotId().equals(slot.getSlotId())) { assertTrue(failedSlot.getState() == TaskManagerSlot.State.FREE); } } }
Example #11
Source File: SlotManagerImplTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the SlotManager retries allocating a slot if the TaskExecutor#requestSlot call * fails. */ @Test public void testSlotRequestFailure() throws Exception { try (final SlotManagerImpl slotManager = createSlotManager(ResourceManagerId.generate(), new TestingResourceActionsBuilder().build())) { final SlotRequest slotRequest = new SlotRequest(new JobID(), new AllocationID(), ResourceProfile.UNKNOWN, "foobar"); slotManager.registerSlotRequest(slotRequest); final BlockingQueue<Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId>> requestSlotQueue = new ArrayBlockingQueue<>(1); final BlockingQueue<CompletableFuture<Acknowledge>> responseQueue = new ArrayBlockingQueue<>(1); final TestingTaskExecutorGateway testingTaskExecutorGateway = new TestingTaskExecutorGatewayBuilder() .setRequestSlotFunction(slotIDJobIDAllocationIDStringResourceManagerIdTuple6 -> { requestSlotQueue.offer(slotIDJobIDAllocationIDStringResourceManagerIdTuple6); try { return responseQueue.take(); } catch (InterruptedException ignored) { return FutureUtils.completedExceptionally(new FlinkException("Response queue was interrupted.")); } }) .createTestingTaskExecutorGateway(); final ResourceID taskExecutorResourceId = ResourceID.generate(); final TaskExecutorConnection taskExecutionConnection = new TaskExecutorConnection(taskExecutorResourceId, testingTaskExecutorGateway); final SlotReport slotReport = new SlotReport(createEmptySlotStatus(new SlotID(taskExecutorResourceId, 0), ResourceProfile.ANY)); final CompletableFuture<Acknowledge> firstManualSlotRequestResponse = new CompletableFuture<>(); responseQueue.offer(firstManualSlotRequestResponse); slotManager.registerTaskManager(taskExecutionConnection, slotReport); final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> firstRequest = requestSlotQueue.take(); final CompletableFuture<Acknowledge> secondManualSlotRequestResponse = new CompletableFuture<>(); responseQueue.offer(secondManualSlotRequestResponse); // fail first request firstManualSlotRequestResponse.completeExceptionally(new SlotAllocationException("Test exception")); final Tuple6<SlotID, JobID, AllocationID, ResourceProfile, String, ResourceManagerId> secondRequest = requestSlotQueue.take(); assertThat(secondRequest.f2, equalTo(firstRequest.f2)); assertThat(secondRequest.f0, equalTo(firstRequest.f0)); secondManualSlotRequestResponse.complete(Acknowledge.get()); final TaskManagerSlot slot = slotManager.getSlot(secondRequest.f0); assertThat(slot.getState(), equalTo(TaskManagerSlot.State.ALLOCATED)); assertThat(slot.getAllocationId(), equalTo(secondRequest.f2)); } }