org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException Java Examples
The following examples show how to use
org.apache.flink.runtime.jobmanager.scheduler.NoResourceAvailableException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SimpleSlotProvider.java From Flink-CEPplus with Apache License 2.0 | 7 votes |
@Override public CompletableFuture<LogicalSlot> allocateSlot( SlotRequestId slotRequestId, ScheduledUnit task, SlotProfile slotProfile, boolean allowQueued, Time allocationTimeout) { final SlotContext slot; synchronized (lock) { if (slots.isEmpty()) { slot = null; } else { slot = slots.removeFirst(); } if (slot != null) { SimpleSlot result = new SimpleSlot(slot, this, 0); allocatedSlots.put(slotRequestId, slot); return CompletableFuture.completedFuture(result); } else { return FutureUtils.completedExceptionally(new NoResourceAvailableException()); } } }
Example #2
Source File: RestartIndividualStrategy.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Override public void onTaskFailure(Execution taskExecution, Throwable cause) { executionGraph.getJobMasterMainThreadExecutor().assertRunningInMainThread(); // to better handle the lack of resources (potentially by a scale-in), we // make failures due to missing resources global failures if (cause instanceof NoResourceAvailableException) { LOG.info("Not enough resources to schedule {} - triggering full recovery.", taskExecution); executionGraph.failGlobal(cause); return; } LOG.info("Recovering task failure for {} (#{}) via individual restart.", taskExecution.getVertex().getTaskNameWithSubtaskIndex(), taskExecution.getAttemptNumber()); numTaskFailures.inc(); // trigger the restart once the task has reached its terminal state // Note: currently all tasks passed here are already in their terminal state, // so we could actually avoid the future. We use it anyways because it is cheap and // it helps to support better testing final CompletableFuture<ExecutionState> terminationFuture = taskExecution.getTerminalStateFuture(); terminationFuture.thenRun( () -> performExecutionVertexRestart(taskExecution.getVertex(), taskExecution.getGlobalModVersion())); }
Example #3
Source File: DefaultSchedulerTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void failJobIfNotEnoughResources() throws Exception { final JobGraph jobGraph = singleNonParallelJobVertexJobGraph(); testRestartBackoffTimeStrategy.setCanRestart(false); testExecutionSlotAllocator.disableAutoCompletePendingRequests(); final DefaultScheduler scheduler = createSchedulerAndStartScheduling(jobGraph); testExecutionSlotAllocator.timeoutPendingRequests(); waitForTermination(scheduler); final JobStatus jobStatus = scheduler.requestJobStatus(); assertThat(jobStatus, is(equalTo(JobStatus.FAILED))); Throwable failureCause = scheduler.requestJob() .getFailureInfo() .getException() .deserializeError(DefaultSchedulerTest.class.getClassLoader()); assertTrue(findThrowable(failureCause, NoResourceAvailableException.class).isPresent()); assertTrue( findThrowableWithMessage( failureCause, "Could not allocate the required slot within slot request timeout.").isPresent()); assertThat(jobStatus, is(equalTo(JobStatus.FAILED))); }
Example #4
Source File: MiniClusterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testHandleBatchJobsWhenNotEnoughSlot() throws Exception { try { setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.LAZY_FROM_SOURCES); fail("Job should fail."); } catch (JobExecutionException e) { assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent()); assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent()); //TODO: remove the legacy scheduler message check once legacy scheduler is removed final String legacySchedulerErrorMessage = "Could not allocate enough slots"; final String ngSchedulerErrorMessage = "Could not allocate the required slot within slot request timeout"; assertTrue(findThrowableWithMessage(e, legacySchedulerErrorMessage).isPresent() || findThrowableWithMessage(e, ngSchedulerErrorMessage).isPresent()); } }
Example #5
Source File: MiniClusterITCase.java From flink with Apache License 2.0 | 6 votes |
@Test public void testHandleStreamingJobsWhenNotEnoughSlot() throws Exception { try { setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.EAGER); fail("Job should fail."); } catch (JobExecutionException e) { assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent()); assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent()); //TODO: remove the legacy scheduler message check once legacy scheduler is removed final String legacySchedulerErrorMessage = "Slots required: 2, slots allocated: 1"; final String ngSchedulerErrorMessage = "Could not allocate the required slot within slot request timeout"; assertTrue(findThrowableWithMessage(e, legacySchedulerErrorMessage).isPresent() || findThrowableWithMessage(e, ngSchedulerErrorMessage).isPresent()); } }
Example #6
Source File: SlotPoolImpl.java From flink with Apache License 2.0 | 6 votes |
private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) { final PendingRequest request = pendingRequests.getValueByKeyA(slotRequestID); if (request != null) { if (isBatchRequestAndFailureCanBeIgnored(request, failure)) { log.debug("Ignoring failed request to the resource manager for a batch slot request."); } else { removePendingRequest(slotRequestID); request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException( "No pooled slot available and request to ResourceManager for new slot failed", failure)); } } else { if (log.isDebugEnabled()) { log.debug("Unregistered slot request [{}] failed.", slotRequestID, failure); } } }
Example #7
Source File: SlotPoolImpl.java From flink with Apache License 2.0 | 6 votes |
private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) { final PendingRequest request = pendingRequests.getKeyA(slotRequestID); if (request != null) { if (isBatchRequestAndFailureCanBeIgnored(request, failure)) { log.debug("Ignoring failed request to the resource manager for a batch slot request."); } else { pendingRequests.removeKeyA(slotRequestID); request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException( "No pooled slot available and request to ResourceManager for new slot failed", failure)); } } else { if (log.isDebugEnabled()) { log.debug("Unregistered slot request [{}] failed.", slotRequestID, failure); } } }
Example #8
Source File: RestartIndividualStrategy.java From flink with Apache License 2.0 | 6 votes |
@Override public void onTaskFailure(Execution taskExecution, Throwable cause) { executionGraph.getJobMasterMainThreadExecutor().assertRunningInMainThread(); // to better handle the lack of resources (potentially by a scale-in), we // make failures due to missing resources global failures if (cause instanceof NoResourceAvailableException) { LOG.info("Not enough resources to schedule {} - triggering full recovery.", taskExecution); executionGraph.failGlobal(cause); return; } LOG.info("Recovering task failure for {} (#{}) via individual restart.", taskExecution.getVertex().getTaskNameWithSubtaskIndex(), taskExecution.getAttemptNumber()); numTaskFailures.inc(); // trigger the restart once the task has reached its terminal state // Note: currently all tasks passed here are already in their terminal state, // so we could actually avoid the future. We use it anyways because it is cheap and // it helps to support better testing final CompletableFuture<ExecutionState> terminationFuture = taskExecution.getTerminalStateFuture(); terminationFuture.thenRun( () -> performExecutionVertexRestart(taskExecution.getVertex(), taskExecution.getGlobalModVersion())); }
Example #9
Source File: MiniClusterITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testHandleBatchJobsWhenNotEnoughSlot() throws Exception { try { setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.LAZY_FROM_SOURCES); fail("Job should fail."); } catch (JobExecutionException e) { assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent()); assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent()); assertTrue(findThrowableWithMessage(e, "Could not allocate enough slots").isPresent()); } }
Example #10
Source File: SimpleSlotProvider.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<LogicalSlot> allocateSlot( SlotRequestId slotRequestId, ScheduledUnit task, SlotProfile slotProfile, Time allocationTimeout) { final SlotContext slot; synchronized (lock) { if (slots.isEmpty()) { slot = null; } else { slot = slots.removeFirst(); } if (slot != null) { TestingLogicalSlot result = new TestingLogicalSlotBuilder() .setTaskManagerLocation(slot.getTaskManagerLocation()) .setTaskManagerGateway(slot.getTaskManagerGateway()) .setSlotNumber(slot.getPhysicalSlotNumber()) .setAllocationId(slot.getAllocationId()) .setSlotRequestId(slotRequestId) .setSlotSharingGroupId(task.getSlotSharingGroupId()) .setSlotOwner(this) .createTestingLogicalSlot(); allocatedSlots.put(slotRequestId, slot); return CompletableFuture.completedFuture(result); } else { return FutureUtils.completedExceptionally(new NoResourceAvailableException()); } } }
Example #11
Source File: DefaultScheduler.java From flink with Apache License 2.0 | 5 votes |
private static Throwable maybeWrapWithNoResourceAvailableException(final Throwable failure) { final Throwable strippedThrowable = ExceptionUtils.stripCompletionException(failure); if (strippedThrowable instanceof TimeoutException) { return new NoResourceAvailableException("Could not allocate the required slot within slot request timeout. " + "Please make sure that the cluster has enough resources.", failure); } else { return failure; } }
Example #12
Source File: ThrowableClassifierTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testThrowableType_NonRecoverable() { assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception("")))); assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new NoResourceAvailableException())); }
Example #13
Source File: SimpleSlotProvider.java From flink with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<LogicalSlot> allocateSlot( SlotRequestId slotRequestId, ScheduledUnit task, SlotProfile slotProfile, boolean allowQueued, Time allocationTimeout) { final SlotContext slot; synchronized (lock) { if (slots.isEmpty()) { slot = null; } else { slot = slots.removeFirst(); } if (slot != null) { TestingLogicalSlot result = new TestingLogicalSlotBuilder() .setTaskManagerLocation(slot.getTaskManagerLocation()) .setTaskManagerGateway(slot.getTaskManagerGateway()) .setSlotNumber(slot.getPhysicalSlotNumber()) .setAllocationId(slot.getAllocationId()) .setSlotRequestId(slotRequestId) .setSlotOwner(this) .createTestingLogicalSlot(); allocatedSlots.put(slotRequestId, slot); return CompletableFuture.completedFuture(result); } else { return FutureUtils.completedExceptionally(new NoResourceAvailableException()); } } }
Example #14
Source File: MiniClusterITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testHandleStreamingJobsWhenNotEnoughSlot() throws Exception { try { setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.EAGER); fail("Job should fail."); } catch (JobExecutionException e) { assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent()); assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent()); assertTrue(findThrowableWithMessage(e, "Slots required: 2, slots allocated: 1").isPresent()); } }
Example #15
Source File: ThrowableClassifierTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testThrowableType_NonRecoverable() { assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new SuppressRestartsException(new Exception("")))); assertEquals(ThrowableType.NonRecoverableError, ThrowableClassifier.getThrowableType(new NoResourceAvailableException())); }
Example #16
Source File: MiniClusterITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testHandleBatchJobsWhenNotEnoughSlot() throws Exception { try { setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.LAZY_FROM_SOURCES); fail("Job should fail."); } catch (JobExecutionException e) { assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent()); assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent()); assertTrue(findThrowableWithMessage(e, "Could not allocate enough slots").isPresent()); } }
Example #17
Source File: MiniClusterITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testHandleStreamingJobsWhenNotEnoughSlot() throws Exception { try { setupAndRunHandleJobsWhenNotEnoughSlots(ScheduleMode.EAGER); fail("Job should fail."); } catch (JobExecutionException e) { assertTrue(findThrowableWithMessage(e, "Job execution failed.").isPresent()); assertTrue(findThrowable(e, NoResourceAvailableException.class).isPresent()); assertTrue(findThrowableWithMessage(e, "Slots required: 2, slots allocated: 1").isPresent()); } }
Example #18
Source File: SlotPoolImpl.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void slotRequestToResourceManagerFailed(SlotRequestId slotRequestID, Throwable failure) { PendingRequest request = pendingRequests.removeKeyA(slotRequestID); if (request != null) { request.getAllocatedSlotFuture().completeExceptionally(new NoResourceAvailableException( "No pooled slot available and request to ResourceManager for new slot failed", failure)); } else { if (log.isDebugEnabled()) { log.debug("Unregistered slot request [{}] failed.", slotRequestID, failure); } } }
Example #19
Source File: ExecutionGraphRestartTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testRestartWithSlotSharingAndNotEnoughResources() throws Exception { // this test is inconclusive if not used with a proper multi-threaded executor assertTrue("test assumptions violated", ((ThreadPoolExecutor) executor).getCorePoolSize() > 1); final int numRestarts = 10; final int parallelism = 20; try (SlotPool slotPool = createSlotPoolImpl()) { final Scheduler scheduler = createSchedulerWithSlots( parallelism - 1, slotPool, new LocalTaskManagerLocation()); final SlotSharingGroup sharingGroup = new SlotSharingGroup(); final JobVertex source = new JobVertex("source"); source.setInvokableClass(NoOpInvokable.class); source.setParallelism(parallelism); source.setSlotSharingGroup(sharingGroup); final JobVertex sink = new JobVertex("sink"); sink.setInvokableClass(NoOpInvokable.class); sink.setParallelism(parallelism); sink.setSlotSharingGroup(sharingGroup); sink.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED_BOUNDED); TestRestartStrategy restartStrategy = new TestRestartStrategy(numRestarts, false); final ExecutionGraph eg = new ExecutionGraphTestUtils.TestingExecutionGraphBuilder(TEST_JOB_ID, source, sink) .setSlotProvider(scheduler) .setRestartStrategy(restartStrategy) .setIoExecutor(executor) .setFutureExecutor(executor) .setScheduleMode(ScheduleMode.EAGER) .build(); eg.start(mainThreadExecutor); eg.scheduleForExecution(); // wait until no more changes happen while (eg.getNumberOfFullRestarts() < numRestarts) { Thread.sleep(1); } assertEquals(JobStatus.FAILED, eg.getState()); final Throwable t = eg.getFailureCause(); if (!(t instanceof NoResourceAvailableException)) { ExceptionUtils.rethrowException(t, t.getMessage()); } } }
Example #20
Source File: SchedulerImpl.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private CompletableFuture<LogicalSlot> allocateSharedSlot( SlotRequestId slotRequestId, ScheduledUnit scheduledUnit, SlotProfile slotProfile, boolean allowQueuedScheduling, Time allocationTimeout) { // allocate slot with slot sharing final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent( scheduledUnit.getSlotSharingGroupId(), id -> new SlotSharingManager( id, slotPool, this)); final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality; try { if (scheduledUnit.getCoLocationConstraint() != null) { multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot( scheduledUnit.getCoLocationConstraint(), multiTaskSlotManager, slotProfile, allowQueuedScheduling, allocationTimeout); } else { multiTaskSlotLocality = allocateMultiTaskSlot( scheduledUnit.getJobVertexId(), multiTaskSlotManager, slotProfile, allowQueuedScheduling, allocationTimeout); } } catch (NoResourceAvailableException noResourceException) { return FutureUtils.completedExceptionally(noResourceException); } // sanity check Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(scheduledUnit.getJobVertexId())); final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot( slotRequestId, scheduledUnit.getJobVertexId(), multiTaskSlotLocality.getLocality()); return leaf.getLogicalSlotFuture(); }
Example #21
Source File: SchedulerImpl.java From flink with Apache License 2.0 | 4 votes |
private CompletableFuture<LogicalSlot> allocateSharedSlot( SlotRequestId slotRequestId, ScheduledUnit scheduledUnit, SlotProfile slotProfile, @Nullable Time allocationTimeout) { // allocate slot with slot sharing final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent( scheduledUnit.getSlotSharingGroupId(), id -> new SlotSharingManager( id, slotPool, this)); final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality; try { if (scheduledUnit.getCoLocationConstraint() != null) { multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot( scheduledUnit.getCoLocationConstraint(), multiTaskSlotManager, slotProfile, allocationTimeout); } else { multiTaskSlotLocality = allocateMultiTaskSlot( scheduledUnit.getJobVertexId(), multiTaskSlotManager, slotProfile, allocationTimeout); } } catch (NoResourceAvailableException noResourceException) { return FutureUtils.completedExceptionally(noResourceException); } // sanity check Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(scheduledUnit.getJobVertexId())); final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot( slotRequestId, slotProfile.getTaskResourceProfile(), scheduledUnit.getJobVertexId(), multiTaskSlotLocality.getLocality()); return leaf.getLogicalSlotFuture(); }
Example #22
Source File: SchedulerImpl.java From flink with Apache License 2.0 | 4 votes |
/** * Allocates a co-located {@link SlotSharingManager.MultiTaskSlot} for the given {@link CoLocationConstraint}. * * <p>The returned {@link SlotSharingManager.MultiTaskSlot} can be uncompleted. * * @param coLocationConstraint for which to allocate a {@link SlotSharingManager.MultiTaskSlot} * @param multiTaskSlotManager responsible for the slot sharing group for which to allocate the slot * @param slotProfile specifying the requirements for the requested slot * @param allocationTimeout timeout before the slot allocation times out * @return A {@link SlotAndLocality} which contains the allocated{@link SlotSharingManager.MultiTaskSlot} * and its locality wrt the given location preferences */ private SlotSharingManager.MultiTaskSlotLocality allocateCoLocatedMultiTaskSlot( CoLocationConstraint coLocationConstraint, SlotSharingManager multiTaskSlotManager, SlotProfile slotProfile, @Nullable Time allocationTimeout) throws NoResourceAvailableException { final SlotRequestId coLocationSlotRequestId = coLocationConstraint.getSlotRequestId(); if (coLocationSlotRequestId != null) { // we have a slot assigned --> try to retrieve it final SlotSharingManager.TaskSlot taskSlot = multiTaskSlotManager.getTaskSlot(coLocationSlotRequestId); if (taskSlot != null) { Preconditions.checkState(taskSlot instanceof SlotSharingManager.MultiTaskSlot); SlotSharingManager.MultiTaskSlot multiTaskSlot = (SlotSharingManager.MultiTaskSlot) taskSlot; if (multiTaskSlot.mayHaveEnoughResourcesToFulfill(slotProfile.getTaskResourceProfile())) { return SlotSharingManager.MultiTaskSlotLocality.of(multiTaskSlot, Locality.LOCAL); } throw new NoResourceAvailableException("Not enough resources in the slot for all co-located tasks."); } else { // the slot may have been cancelled in the mean time coLocationConstraint.setSlotRequestId(null); } } if (coLocationConstraint.isAssigned()) { // refine the preferred locations of the slot profile slotProfile = SlotProfile.priorAllocation( slotProfile.getTaskResourceProfile(), slotProfile.getPhysicalSlotResourceProfile(), Collections.singleton(coLocationConstraint.getLocation()), slotProfile.getPreferredAllocations(), slotProfile.getPreviousExecutionGraphAllocations()); } // get a new multi task slot SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality = allocateMultiTaskSlot( coLocationConstraint.getGroupId(), multiTaskSlotManager, slotProfile, allocationTimeout); // check whether we fulfill the co-location constraint if (coLocationConstraint.isAssigned() && multiTaskSlotLocality.getLocality() != Locality.LOCAL) { multiTaskSlotLocality.getMultiTaskSlot().release( new FlinkException("Multi task slot is not local and, thus, does not fulfill the co-location constraint.")); throw new NoResourceAvailableException("Could not allocate a local multi task slot for the " + "co location constraint " + coLocationConstraint + '.'); } final SlotRequestId slotRequestId = new SlotRequestId(); final SlotSharingManager.MultiTaskSlot coLocationSlot = multiTaskSlotLocality.getMultiTaskSlot().allocateMultiTaskSlot( slotRequestId, coLocationConstraint.getGroupId()); // mark the requested slot as co-located slot for other co-located tasks coLocationConstraint.setSlotRequestId(slotRequestId); // lock the co-location constraint once we have obtained the allocated slot coLocationSlot.getSlotContextFuture().whenComplete( (SlotContext slotContext, Throwable throwable) -> { if (throwable == null) { // check whether we are still assigned to the co-location constraint if (Objects.equals(coLocationConstraint.getSlotRequestId(), slotRequestId)) { coLocationConstraint.lockLocation(slotContext.getTaskManagerLocation()); } else { log.debug("Failed to lock colocation constraint {} because assigned slot " + "request {} differs from fulfilled slot request {}.", coLocationConstraint.getGroupId(), coLocationConstraint.getSlotRequestId(), slotRequestId); } } else { log.debug("Failed to lock colocation constraint {} because the slot " + "allocation for slot request {} failed.", coLocationConstraint.getGroupId(), coLocationConstraint.getSlotRequestId(), throwable); } }); return SlotSharingManager.MultiTaskSlotLocality.of(coLocationSlot, multiTaskSlotLocality.getLocality()); }
Example #23
Source File: SchedulerImpl.java From flink with Apache License 2.0 | 4 votes |
private CompletableFuture<LogicalSlot> allocateSharedSlot( SlotRequestId slotRequestId, ScheduledUnit scheduledUnit, SlotProfile slotProfile, boolean allowQueuedScheduling, @Nullable Time allocationTimeout) { // allocate slot with slot sharing final SlotSharingManager multiTaskSlotManager = slotSharingManagers.computeIfAbsent( scheduledUnit.getSlotSharingGroupId(), id -> new SlotSharingManager( id, slotPool, this)); final SlotSharingManager.MultiTaskSlotLocality multiTaskSlotLocality; try { if (scheduledUnit.getCoLocationConstraint() != null) { multiTaskSlotLocality = allocateCoLocatedMultiTaskSlot( scheduledUnit.getCoLocationConstraint(), multiTaskSlotManager, slotProfile, allowQueuedScheduling, allocationTimeout); } else { multiTaskSlotLocality = allocateMultiTaskSlot( scheduledUnit.getJobVertexId(), multiTaskSlotManager, slotProfile, allowQueuedScheduling, allocationTimeout); } } catch (NoResourceAvailableException noResourceException) { return FutureUtils.completedExceptionally(noResourceException); } // sanity check Preconditions.checkState(!multiTaskSlotLocality.getMultiTaskSlot().contains(scheduledUnit.getJobVertexId())); final SlotSharingManager.SingleTaskSlot leaf = multiTaskSlotLocality.getMultiTaskSlot().allocateSingleTaskSlot( slotRequestId, slotProfile.getResourceProfile(), scheduledUnit.getJobVertexId(), multiTaskSlotLocality.getLocality()); return leaf.getLogicalSlotFuture(); }
Example #24
Source File: ExecutionGraphRestartTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testRestartWithSlotSharingAndNotEnoughResources() throws Exception { // this test is inconclusive if not used with a proper multi-threaded executor assertTrue("test assumptions violated", ((ThreadPoolExecutor) executor).getCorePoolSize() > 1); final int numRestarts = 10; final int parallelism = 20; TaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway(); final Scheduler scheduler = createSchedulerWithInstances(parallelism - 1, taskManagerGateway); final SlotSharingGroup sharingGroup = new SlotSharingGroup(); final JobVertex source = new JobVertex("source"); source.setInvokableClass(NoOpInvokable.class); source.setParallelism(parallelism); source.setSlotSharingGroup(sharingGroup); final JobVertex sink = new JobVertex("sink"); sink.setInvokableClass(NoOpInvokable.class); sink.setParallelism(parallelism); sink.setSlotSharingGroup(sharingGroup); sink.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED_BOUNDED); TestRestartStrategy restartStrategy = new TestRestartStrategy(numRestarts, false); final ExecutionGraph eg = ExecutionGraphTestUtils.createExecutionGraph( new JobID(), scheduler, restartStrategy, executor, source, sink); eg.start(mainThreadExecutor); eg.setScheduleMode(ScheduleMode.EAGER); eg.scheduleForExecution(); // wait until no more changes happen while (eg.getNumberOfFullRestarts() < numRestarts) { Thread.sleep(1); } assertEquals(JobStatus.FAILED, eg.getState()); final Throwable t = eg.getFailureCause(); if (!(t instanceof NoResourceAvailableException)) { ExceptionUtils.rethrowException(t, t.getMessage()); } }
Example #25
Source File: ExecutionGraphNotEnoughResourceTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testRestartWithSlotSharingAndNotEnoughResources() throws Exception { final int numRestarts = 10; final int parallelism = 20; SlotPool slotPool = null; try { slotPool = new TestingSlotPoolImpl(TEST_JOB_ID); final Scheduler scheduler = createSchedulerWithSlots( parallelism - 1, slotPool, new LocalTaskManagerLocation()); final SlotSharingGroup sharingGroup = new SlotSharingGroup(); final JobVertex source = new JobVertex("source"); source.setInvokableClass(NoOpInvokable.class); source.setParallelism(parallelism); source.setSlotSharingGroup(sharingGroup); final JobVertex sink = new JobVertex("sink"); sink.setInvokableClass(NoOpInvokable.class); sink.setParallelism(parallelism); sink.setSlotSharingGroup(sharingGroup); sink.connectNewDataSetAsInput(source, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED_BOUNDED); final JobGraph jobGraph = new JobGraph(TEST_JOB_ID, "Test Job", source, sink); jobGraph.setScheduleMode(ScheduleMode.EAGER); TestRestartStrategy restartStrategy = new TestRestartStrategy(numRestarts, false); final ExecutionGraph eg = TestingExecutionGraphBuilder .newBuilder() .setJobGraph(jobGraph) .setSlotProvider(scheduler) .setRestartStrategy(restartStrategy) .setAllocationTimeout(Time.milliseconds(1L)) .build(); eg.start(mainThreadExecutor); mainThreadExecutor.execute(ThrowingRunnable.unchecked(eg::scheduleForExecution)); CommonTestUtils.waitUntilCondition( () -> CompletableFuture.supplyAsync(eg::getState, mainThreadExecutor).join() == JobStatus.FAILED, Deadline.fromNow(Duration.ofMillis(2000))); // the last suppressed restart is also counted assertEquals(numRestarts + 1, CompletableFuture.supplyAsync(eg::getNumberOfRestarts, mainThreadExecutor).join().longValue()); final Throwable t = CompletableFuture.supplyAsync(eg::getFailureCause, mainThreadExecutor).join(); if (!(t instanceof NoResourceAvailableException)) { ExceptionUtils.rethrowException(t, t.getMessage()); } } finally { if (slotPool != null) { CompletableFuture.runAsync(slotPool::close, mainThreadExecutor).join(); } } }