org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils Java Examples
The following examples show how to use
org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RMServerUtils.java From big-c with Apache License 2.0 | 6 votes |
/** * Utility method to validate a list resource requests, by insuring that the * requested memory/vcore is non-negative and not greater than max */ public static void normalizeAndValidateRequests(List<ResourceRequest> ask, Resource maximumResource, String queueName, YarnScheduler scheduler, RMContext rmContext) throws InvalidResourceRequestException { QueueInfo queueInfo = null; try { queueInfo = scheduler.getQueueInfo(queueName, false, false); } catch (IOException e) { } for (ResourceRequest resReq : ask) { SchedulerUtils.normalizeAndvalidateRequest(resReq, maximumResource, queueName, scheduler, rmContext, queueInfo); } }
Example #2
Source File: TestRMAppAttemptTransitions.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testAMCrashAtScheduled() { // This is to test sending CONTAINER_FINISHED event at SCHEDULED state. // Verify the state transition is correct. scheduleApplicationAttempt(); ContainerStatus cs = SchedulerUtils.createAbnormalContainerStatus( BuilderUtils.newContainerId( applicationAttempt.getAppAttemptId(), 1), SchedulerUtils.LOST_CONTAINER); // send CONTAINER_FINISHED event at SCHEDULED state, // The state should be FINAL_SAVING with previous state SCHEDULED NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), cs, anyNodeId)); // createApplicationAttemptState will return previous state (SCHEDULED), // if the current state is FINAL_SAVING. assertEquals(YarnApplicationAttemptState.SCHEDULED, applicationAttempt.createApplicationAttemptState()); // send ATTEMPT_UPDATE_SAVED event, // verify the state is changed to state FAILED. sendAttemptUpdateSavedEvent(applicationAttempt); assertEquals(RMAppAttemptState.FAILED, applicationAttempt.getAppAttemptState()); verifyApplicationAttemptFinished(RMAppAttemptState.FAILED); }
Example #3
Source File: FifoScheduler.java From big-c with Apache License 2.0 | 6 votes |
private synchronized void removeNode(RMNode nodeInfo) { FiCaSchedulerNode node = getNode(nodeInfo.getNodeID()); if (node == null) { return; } // Kill running containers for(RMContainer container : node.getRunningContainers()) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } //Remove the node this.nodes.remove(nodeInfo.getNodeID()); updateMaximumAllocation(node, false); // Update cluster metrics Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability()); }
Example #4
Source File: RMServerUtils.java From hadoop with Apache License 2.0 | 6 votes |
/** * Utility method to validate a list resource requests, by insuring that the * requested memory/vcore is non-negative and not greater than max */ public static void normalizeAndValidateRequests(List<ResourceRequest> ask, Resource maximumResource, String queueName, YarnScheduler scheduler, RMContext rmContext) throws InvalidResourceRequestException { QueueInfo queueInfo = null; try { queueInfo = scheduler.getQueueInfo(queueName, false, false); } catch (IOException e) { } for (ResourceRequest resReq : ask) { SchedulerUtils.normalizeAndvalidateRequest(resReq, maximumResource, queueName, scheduler, rmContext, queueInfo); } }
Example #5
Source File: TestRMAppAttemptTransitions.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testAMCrashAtScheduled() { // This is to test sending CONTAINER_FINISHED event at SCHEDULED state. // Verify the state transition is correct. scheduleApplicationAttempt(); ContainerStatus cs = SchedulerUtils.createAbnormalContainerStatus( BuilderUtils.newContainerId( applicationAttempt.getAppAttemptId(), 1), SchedulerUtils.LOST_CONTAINER); // send CONTAINER_FINISHED event at SCHEDULED state, // The state should be FINAL_SAVING with previous state SCHEDULED NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), cs, anyNodeId)); // createApplicationAttemptState will return previous state (SCHEDULED), // if the current state is FINAL_SAVING. assertEquals(YarnApplicationAttemptState.SCHEDULED, applicationAttempt.createApplicationAttemptState()); // send ATTEMPT_UPDATE_SAVED event, // verify the state is changed to state FAILED. sendAttemptUpdateSavedEvent(applicationAttempt); assertEquals(RMAppAttemptState.FAILED, applicationAttempt.getAppAttemptState()); verifyApplicationAttemptFinished(RMAppAttemptState.FAILED); }
Example #6
Source File: FifoScheduler.java From hadoop with Apache License 2.0 | 6 votes |
private synchronized void removeNode(RMNode nodeInfo) { FiCaSchedulerNode node = getNode(nodeInfo.getNodeID()); if (node == null) { return; } // Kill running containers for(RMContainer container : node.getRunningContainers()) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } //Remove the node this.nodes.remove(nodeInfo.getNodeID()); updateMaximumAllocation(node, false); // Update cluster metrics Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability()); }
Example #7
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 5 votes |
private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID()); // This can occur when an UNHEALTHY node reconnects if (node == null) { return; } Resources.subtractFrom(clusterResource, rmNode.getTotalCapability()); updateRootQueueMetrics(); // Remove running containers List<RMContainer> runningContainers = node.getRunningContainers(); for (RMContainer container : runningContainers) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } // Remove reservations, if any RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { completedContainer(reservedContainer, SchedulerUtils.createAbnormalContainerStatus( reservedContainer.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } nodes.remove(rmNode.getNodeID()); queueMgr.getRootQueue().setSteadyFairShare(clusterResource); queueMgr.getRootQueue().recomputeSteadyShares(); updateMaximumAllocation(node, false); LOG.info("Removed node " + rmNode.getNodeAddress() + " cluster capacity: " + clusterResource); }
Example #8
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
@Override public void killContainer(RMContainer cont) { if (LOG.isDebugEnabled()) { LOG.debug("KILL_CONTAINER: container" + cont.toString()); } recoverResourceRequestForContainer(cont); completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus( cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL); }
Example #9
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
@Override public void dropContainerReservation(RMContainer container) { if(LOG.isDebugEnabled()){ LOG.debug("DROP_RESERVATION:" + container.toString()); } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.UNRESERVED_CONTAINER), RMContainerEventType.KILL); }
Example #10
Source File: CapacitySchedulerConfiguration.java From big-c with Apache License 2.0 | 5 votes |
public Map<AccessType, AccessControlList> getAcls(String queue) { Map<AccessType, AccessControlList> acls = new HashMap<AccessType, AccessControlList>(); for (QueueACL acl : QueueACL.values()) { acls.put(SchedulerUtils.toAccessType(acl), getAcl(queue, acl)); } return acls; }
Example #11
Source File: FairScheduler.java From big-c with Apache License 2.0 | 5 votes |
private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID()); // This can occur when an UNHEALTHY node reconnects if (node == null) { return; } Resources.subtractFrom(clusterResource, rmNode.getTotalCapability()); updateRootQueueMetrics(); // Remove running containers List<RMContainer> runningContainers = node.getRunningContainers(); for (RMContainer container : runningContainers) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } // Remove reservations, if any RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { completedContainer(reservedContainer, SchedulerUtils.createAbnormalContainerStatus( reservedContainer.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } nodes.remove(rmNode.getNodeID()); queueMgr.getRootQueue().setSteadyFairShare(clusterResource); queueMgr.getRootQueue().recomputeSteadyShares(); updateMaximumAllocation(node, false); LOG.info("Removed node " + rmNode.getNodeAddress() + " cluster capacity: " + clusterResource); }
Example #12
Source File: FairScheduler.java From big-c with Apache License 2.0 | 5 votes |
protected void warnOrKillContainer(RMContainer container) { ApplicationAttemptId appAttemptId = container.getApplicationAttemptId(); FSAppAttempt app = getSchedulerApp(appAttemptId); FSLeafQueue queue = app.getQueue(); LOG.info("Preempting container (prio=" + container.getContainer().getPriority() + "res=" + container.getContainer().getResource() + ") from queue " + queue.getName()); Long time = app.getContainerPreemptionTime(container); if (time != null) { // if we asked for preemption more than maxWaitTimeBeforeKill ms ago, // proceed with kill if (time + waitTimeBeforeKill < getClock().getTime()) { ContainerStatus status = SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); recoverResourceRequestForContainer(container); // TODO: Not sure if this ever actually adds this to the list of cleanup // containers on the RMNode (see SchedulerNode.releaseContainer()). completedContainer(container, status, RMContainerEventType.KILL); LOG.info("Killing container" + container + " (after waiting for premption for " + (getClock().getTime() - time) + "ms)"); } } else { // track the request in the FSAppAttempt itself app.addPreemption(container, getClock().getTime()); } }
Example #13
Source File: FifoScheduler.java From hadoop with Apache License 2.0 | 5 votes |
private synchronized void doneApplicationAttempt( ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) throws IOException { FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId); SchedulerApplication<FiCaSchedulerApp> application = applications.get(applicationAttemptId.getApplicationId()); if (application == null || attempt == null) { throw new IOException("Unknown application " + applicationAttemptId + " has completed!"); } // Kill all 'live' containers for (RMContainer container : attempt.getLiveContainers()) { if (keepContainers && container.getState().equals(RMContainerState.RUNNING)) { // do not kill the running container in the case of work-preserving AM // restart. LOG.info("Skip killing " + container.getContainerId()); continue; } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), RMContainerEventType.KILL); } // Clean up pending requests, metrics etc. attempt.stop(rmAppAttemptFinalState); }
Example #14
Source File: FifoScheduler.java From big-c with Apache License 2.0 | 5 votes |
private synchronized void doneApplicationAttempt( ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) throws IOException { FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId); SchedulerApplication<FiCaSchedulerApp> application = applications.get(applicationAttemptId.getApplicationId()); if (application == null || attempt == null) { throw new IOException("Unknown application " + applicationAttemptId + " has completed!"); } // Kill all 'live' containers for (RMContainer container : attempt.getLiveContainers()) { if (keepContainers && container.getState().equals(RMContainerState.RUNNING)) { // do not kill the running container in the case of work-preserving AM // restart. LOG.info("Skip killing " + container.getContainerId()); continue; } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), RMContainerEventType.KILL); } // Clean up pending requests, metrics etc. attempt.stop(rmAppAttemptFinalState); }
Example #15
Source File: CapacityScheduler.java From hadoop with Apache License 2.0 | 5 votes |
@Override public void killContainer(RMContainer cont) { if (LOG.isDebugEnabled()) { LOG.debug("KILL_CONTAINER: container" + cont.toString()); } recoverResourceRequestForContainer(cont); completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus( cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL); }
Example #16
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 5 votes |
protected void warnOrKillContainer(RMContainer container) { ApplicationAttemptId appAttemptId = container.getApplicationAttemptId(); FSAppAttempt app = getSchedulerApp(appAttemptId); FSLeafQueue queue = app.getQueue(); LOG.info("Preempting container (prio=" + container.getContainer().getPriority() + "res=" + container.getContainer().getResource() + ") from queue " + queue.getName()); Long time = app.getContainerPreemptionTime(container); if (time != null) { // if we asked for preemption more than maxWaitTimeBeforeKill ms ago, // proceed with kill if (time + waitTimeBeforeKill < getClock().getTime()) { ContainerStatus status = SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); recoverResourceRequestForContainer(container); // TODO: Not sure if this ever actually adds this to the list of cleanup // containers on the RMNode (see SchedulerNode.releaseContainer()). completedContainer(container, status, RMContainerEventType.KILL); LOG.info("Killing container" + container + " (after waiting for premption for " + (getClock().getTime() - time) + "ms)"); } } else { // track the request in the FSAppAttempt itself app.addPreemption(container, getClock().getTime()); } }
Example #17
Source File: CapacitySchedulerConfiguration.java From hadoop with Apache License 2.0 | 5 votes |
public Map<AccessType, AccessControlList> getAcls(String queue) { Map<AccessType, AccessControlList> acls = new HashMap<AccessType, AccessControlList>(); for (QueueACL acl : QueueACL.values()) { acls.put(SchedulerUtils.toAccessType(acl), getAcl(queue, acl)); } return acls; }
Example #18
Source File: CapacityScheduler.java From hadoop with Apache License 2.0 | 5 votes |
@Override public void dropContainerReservation(RMContainer container) { if(LOG.isDebugEnabled()){ LOG.debug("DROP_RESERVATION:" + container.toString()); } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.UNRESERVED_CONTAINER), RMContainerEventType.KILL); }
Example #19
Source File: FairScheduler.java From big-c with Apache License 2.0 | 4 votes |
@Override public Allocation allocate(ApplicationAttemptId appAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) { // Make sure this application exists FSAppAttempt application = getSchedulerApp(appAttemptId); if (application == null) { LOG.info("Calling allocate on removed " + "or non existant application " + appAttemptId); return EMPTY_ALLOCATION; } // Sanity check SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR, clusterResource, minimumAllocation, getMaximumResourceCapability(), incrAllocation); // Set amResource for this app if (!application.getUnmanagedAM() && ask.size() == 1 && application.getLiveContainers().isEmpty()) { application.setAMResource(ask.get(0).getCapability()); } // Release containers releaseContainers(release, application); synchronized (application) { if (!ask.isEmpty()) { if (LOG.isDebugEnabled()) { LOG.debug("allocate: pre-update" + " applicationAttemptId=" + appAttemptId + " application=" + application.getApplicationId()); } application.showRequests(); // Update application requests application.updateResourceRequests(ask); application.showRequests(); } if (LOG.isDebugEnabled()) { LOG.debug("allocate: post-update" + " applicationAttemptId=" + appAttemptId + " #ask=" + ask.size() + " reservation= " + application.getCurrentReservation()); LOG.debug("Preempting " + application.getPreemptionContainers().size() + " container(s)"); } Set<ContainerId> preemptionContainerIds = new HashSet<ContainerId>(); for (RMContainer container : application.getPreemptionContainers()) { preemptionContainerIds.add(container.getContainerId()); } application.updateBlacklist(blacklistAdditions, blacklistRemovals); ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens(); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); return new Allocation(allocation.getContainerList(), headroom, preemptionContainerIds, null, null, allocation.getNMTokenList()); } }
Example #20
Source File: FifoScheduler.java From hadoop with Apache License 2.0 | 4 votes |
@Override public Allocation allocate( ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { LOG.error("Calling allocate on removed " + "or non existant application " + applicationAttemptId); return EMPTY_ALLOCATION; } // Sanity check SchedulerUtils.normalizeRequests(ask, resourceCalculator, clusterResource, minimumAllocation, getMaximumResourceCapability()); // Release containers releaseContainers(release, application); synchronized (application) { // make sure we aren't stopping/removing the application // when the allocate comes in if (application.isStopped()) { LOG.info("Calling allocate on a stopped " + "application " + applicationAttemptId); return EMPTY_ALLOCATION; } if (!ask.isEmpty()) { LOG.debug("allocate: pre-update" + " applicationId=" + applicationAttemptId + " application=" + application); application.showRequests(); // Update application requests application.updateResourceRequests(ask); LOG.debug("allocate: post-update" + " applicationId=" + applicationAttemptId + " application=" + application); application.showRequests(); LOG.debug("allocate:" + " applicationId=" + applicationAttemptId + " #ask=" + ask.size()); } application.updateBlacklist(blacklistAdditions, blacklistRemovals); ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens(); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); return new Allocation(allocation.getContainerList(), headroom, null, null, null, allocation.getNMTokenList()); } }
Example #21
Source File: TestRMContainerImpl.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testExpireWhileRunning() { DrainDispatcher drainDispatcher = new DrainDispatcher(); EventHandler<RMAppAttemptEvent> appAttemptEventHandler = mock(EventHandler.class); EventHandler generic = mock(EventHandler.class); drainDispatcher.register(RMAppAttemptEventType.class, appAttemptEventHandler); drainDispatcher.register(RMNodeEventType.class, generic); drainDispatcher.init(new YarnConfiguration()); drainDispatcher.start(); NodeId nodeId = BuilderUtils.newNodeId("host", 3425); ApplicationId appId = BuilderUtils.newApplicationId(1, 1); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( appId, 1); ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class); Resource resource = BuilderUtils.newResource(512, 1); Priority priority = BuilderUtils.newPriority(5); Container container = BuilderUtils.newContainer(containerId, nodeId, "host:3465", resource, priority, null); RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class); SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); RMContext rmContext = mock(RMContext.class); when(rmContext.getDispatcher()).thenReturn(drainDispatcher); when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer); when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, nodeId, "user", rmContext); assertEquals(RMContainerState.NEW, rmContainer.getState()); assertEquals(resource, rmContainer.getAllocatedResource()); assertEquals(nodeId, rmContainer.getAllocatedNode()); assertEquals(priority, rmContainer.getAllocatedPriority()); verify(writer).containerStarted(any(RMContainer.class)); verify(publisher).containerCreated(any(RMContainer.class), anyLong()); rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.START)); drainDispatcher.await(); assertEquals(RMContainerState.ALLOCATED, rmContainer.getState()); rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.ACQUIRED)); drainDispatcher.await(); assertEquals(RMContainerState.ACQUIRED, rmContainer.getState()); rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); drainDispatcher.await(); assertEquals(RMContainerState.RUNNING, rmContainer.getState()); assertEquals("http://host:3465/node/containerlogs/container_1_0001_01_000001/user", rmContainer.getLogURL()); // In RUNNING state. Verify EXPIRE and associated actions. reset(appAttemptEventHandler); ContainerStatus containerStatus = SchedulerUtils .createAbnormalContainerStatus(containerId, SchedulerUtils.EXPIRED_CONTAINER); rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, RMContainerEventType.EXPIRE)); drainDispatcher.await(); assertEquals(RMContainerState.RUNNING, rmContainer.getState()); verify(writer, never()).containerFinished(any(RMContainer.class)); verify(publisher, never()).containerFinished(any(RMContainer.class), anyLong()); }
Example #22
Source File: TestRMContainerImpl.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testReleaseWhileRunning() { DrainDispatcher drainDispatcher = new DrainDispatcher(); EventHandler<RMAppAttemptEvent> appAttemptEventHandler = mock(EventHandler.class); EventHandler generic = mock(EventHandler.class); drainDispatcher.register(RMAppAttemptEventType.class, appAttemptEventHandler); drainDispatcher.register(RMNodeEventType.class, generic); drainDispatcher.init(new YarnConfiguration()); drainDispatcher.start(); NodeId nodeId = BuilderUtils.newNodeId("host", 3425); ApplicationId appId = BuilderUtils.newApplicationId(1, 1); ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( appId, 1); ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class); Resource resource = BuilderUtils.newResource(512, 1); Priority priority = BuilderUtils.newPriority(5); Container container = BuilderUtils.newContainer(containerId, nodeId, "host:3465", resource, priority, null); ConcurrentMap<ApplicationId, RMApp> rmApps = spy(new ConcurrentHashMap<ApplicationId, RMApp>()); RMApp rmApp = mock(RMApp.class); when(rmApp.getRMAppAttempt((ApplicationAttemptId)Matchers.any())).thenReturn(null); Mockito.doReturn(rmApp).when(rmApps).get((ApplicationId)Matchers.any()); RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class); SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); RMContext rmContext = mock(RMContext.class); when(rmContext.getDispatcher()).thenReturn(drainDispatcher); when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer); when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); when(rmContext.getRMApps()).thenReturn(rmApps); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, nodeId, "user", rmContext); assertEquals(RMContainerState.NEW, rmContainer.getState()); assertEquals(resource, rmContainer.getAllocatedResource()); assertEquals(nodeId, rmContainer.getAllocatedNode()); assertEquals(priority, rmContainer.getAllocatedPriority()); verify(writer).containerStarted(any(RMContainer.class)); verify(publisher).containerCreated(any(RMContainer.class), anyLong()); rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.START)); drainDispatcher.await(); assertEquals(RMContainerState.ALLOCATED, rmContainer.getState()); rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.ACQUIRED)); drainDispatcher.await(); assertEquals(RMContainerState.ACQUIRED, rmContainer.getState()); rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); drainDispatcher.await(); assertEquals(RMContainerState.RUNNING, rmContainer.getState()); assertEquals("http://host:3465/node/containerlogs/container_1_0001_01_000001/user", rmContainer.getLogURL()); // In RUNNING state. Verify RELEASED and associated actions. reset(appAttemptEventHandler); ContainerStatus containerStatus = SchedulerUtils .createAbnormalContainerStatus(containerId, SchedulerUtils.RELEASED_CONTAINER); rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, RMContainerEventType.RELEASED)); drainDispatcher.await(); assertEquals(RMContainerState.RELEASED, rmContainer.getState()); assertEquals(SchedulerUtils.RELEASED_CONTAINER, rmContainer.getDiagnosticsInfo()); assertEquals(ContainerExitStatus.ABORTED, rmContainer.getContainerExitStatus()); assertEquals(ContainerState.COMPLETE, rmContainer.getContainerState()); verify(writer).containerFinished(any(RMContainer.class)); verify(publisher).containerFinished(any(RMContainer.class), anyLong()); ArgumentCaptor<RMAppAttemptContainerFinishedEvent> captor = ArgumentCaptor .forClass(RMAppAttemptContainerFinishedEvent.class); verify(appAttemptEventHandler).handle(captor.capture()); RMAppAttemptContainerFinishedEvent cfEvent = captor.getValue(); assertEquals(appAttemptId, cfEvent.getApplicationAttemptId()); assertEquals(containerStatus, cfEvent.getContainerStatus()); assertEquals(RMAppAttemptEventType.CONTAINER_FINISHED, cfEvent.getType()); // In RELEASED state. A FINIHSED event may come in. rmContainer.handle(new RMContainerFinishedEvent(containerId, SchedulerUtils .createAbnormalContainerStatus(containerId, "FinishedContainer"), RMContainerEventType.FINISHED)); assertEquals(RMContainerState.RELEASED, rmContainer.getState()); }
Example #23
Source File: RMAppManager.java From big-c with Apache License 2.0 | 4 votes |
private ResourceRequest validateAndCreateResourceRequest( ApplicationSubmissionContext submissionContext, boolean isRecovery) throws InvalidResourceRequestException { // Validation of the ApplicationSubmissionContext needs to be completed // here. Only those fields that are dependent on RM's configuration are // checked here as they have to be validated whether they are part of new // submission or just being recovered. // Check whether AM resource requirements are within required limits if (!submissionContext.getUnmanagedAM()) { ResourceRequest amReq = submissionContext.getAMContainerResourceRequest(); if (amReq == null) { amReq = BuilderUtils .newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, submissionContext.getResource(), 1); } // set label expression for AM container if (null == amReq.getNodeLabelExpression()) { amReq.setNodeLabelExpression(submissionContext .getNodeLabelExpression()); } try { SchedulerUtils.normalizeAndValidateRequest(amReq, scheduler.getMaximumResourceCapability(), submissionContext.getQueue(), scheduler, isRecovery, rmContext); } catch (InvalidResourceRequestException e) { LOG.warn("RM app submission failed in validating AM resource request" + " for application " + submissionContext.getApplicationId(), e); throw e; } SchedulerUtils.normalizeRequest(amReq, scheduler.getResourceCalculator(), scheduler.getClusterResource(), scheduler.getMinimumResourceCapability(), scheduler.getMaximumResourceCapability(), scheduler.getMinimumResourceCapability()); return amReq; } return null; }
Example #24
Source File: LeafQueue.java From big-c with Apache License 2.0 | 4 votes |
@Private protected boolean findNodeToUnreserve(Resource clusterResource, FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority, Resource minimumUnreservedResource) { // need to unreserve some other container first NodeId idToUnreserve = application.getNodeIdToUnreserve(priority, minimumUnreservedResource, resourceCalculator, clusterResource); if (idToUnreserve == null) { if (LOG.isDebugEnabled()) { LOG.debug("checked to see if could unreserve for app but nothing " + "reserved that matches for this app"); } return false; } FiCaSchedulerNode nodeToUnreserve = scheduler.getNode(idToUnreserve); if (nodeToUnreserve == null) { LOG.error("node to unreserve doesn't exist, nodeid: " + idToUnreserve); return false; } { LOG.info("unreserving for app: " + application.getApplicationId() + " on nodeId: " + idToUnreserve + " in order to replace reserved application and place it on node: " + node.getNodeID() + " needing: " + minimumUnreservedResource); } // headroom Resources.addTo(application.getHeadroom(), nodeToUnreserve .getReservedContainer().getReservedResource()); // Make sure to not have completedContainers sort the queues here since // we are already inside an iterator loop for the queues and this would // cause an concurrent modification exception. completedContainer(clusterResource, application, nodeToUnreserve, nodeToUnreserve.getReservedContainer(), SchedulerUtils.createAbnormalContainerStatus(nodeToUnreserve .getReservedContainer().getContainerId(), SchedulerUtils.UNRESERVED_CONTAINER), RMContainerEventType.RELEASED, null, false); return true; }
Example #25
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 4 votes |
@Override public void suspendContianer(RMContainer cont, Resource toPreempt) { // TODO Auto-generated method stub if (LOG.isDebugEnabled()) { LOG.debug("SUSPEND_CONTAINER: container" + cont.toString()); } LOG.info("capacity scheduler try to preempt "+cont.getContainerId()+" resource: "+toPreempt); if(toPreempt == null){ LOG.info("preempted resource can not be null"); return; } if(!Resources.greaterThan(getResourceCalculator(), clusterResource, toPreempt,Resources.none())){ LOG.info("preempted resource is none"); return; } //set preempted resource cont.addPreemptedResource(toPreempt); //mark this container to be preempted completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus( cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.SUSPEND); //send this resource update info to NodeManager NodeId nodeId = cont.getContainer().getNodeId(); ContainerId containerId = cont.getContainerId(); //get current resource after preemption Resource currentResource = cont.getCurrentUsedResource(); NodeContainerUpdate nodeContainerUpdate= NodeContainerUpdate.newInstance(containerId, currentResource.getMemory(), currentResource.getVirtualCores(),true,false); LOG.info("get container "+containerId+" to suspend "+" on host "+nodeId.getHost()+" currentresource: "+currentResource); if(nodeContainerUpdateMap.get(nodeId) == null){ ConcurrentLinkedQueue<NodeContainerUpdate> listNodeContainerUpdate = new ConcurrentLinkedQueue<NodeContainerUpdate>(); listNodeContainerUpdate.add(nodeContainerUpdate); nodeContainerUpdateMap.put(nodeId, listNodeContainerUpdate); }else{ nodeContainerUpdateMap.get(nodeId).add(nodeContainerUpdate); } }
Example #26
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 4 votes |
@Override public Allocation allocate(ApplicationAttemptId appAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) { // Make sure this application exists FSAppAttempt application = getSchedulerApp(appAttemptId); if (application == null) { LOG.info("Calling allocate on removed " + "or non existant application " + appAttemptId); return EMPTY_ALLOCATION; } // Sanity check SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR, clusterResource, minimumAllocation, getMaximumResourceCapability(), incrAllocation); // Set amResource for this app if (!application.getUnmanagedAM() && ask.size() == 1 && application.getLiveContainers().isEmpty()) { application.setAMResource(ask.get(0).getCapability()); } // Release containers releaseContainers(release, application); synchronized (application) { if (!ask.isEmpty()) { if (LOG.isDebugEnabled()) { LOG.debug("allocate: pre-update" + " applicationAttemptId=" + appAttemptId + " application=" + application.getApplicationId()); } application.showRequests(); // Update application requests application.updateResourceRequests(ask); application.showRequests(); } if (LOG.isDebugEnabled()) { LOG.debug("allocate: post-update" + " applicationAttemptId=" + appAttemptId + " #ask=" + ask.size() + " reservation= " + application.getCurrentReservation()); LOG.debug("Preempting " + application.getPreemptionContainers().size() + " container(s)"); } Set<ContainerId> preemptionContainerIds = new HashSet<ContainerId>(); for (RMContainer container : application.getPreemptionContainers()) { preemptionContainerIds.add(container.getContainerId()); } application.updateBlacklist(blacklistAdditions, blacklistRemovals); ContainersAndNMTokensAllocation allocation = application.pullNewlyAllocatedContainersAndNMTokens(); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); return new Allocation(allocation.getContainerList(), headroom, preemptionContainerIds, null, null, allocation.getNMTokenList()); } }
Example #27
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 4 votes |
private synchronized void removeNode(RMNode nodeInfo) { // update this node to node label manager if (labelManager != null) { labelManager.deactivateNode(nodeInfo.getNodeID()); } FiCaSchedulerNode node = nodes.get(nodeInfo.getNodeID()); if (node == null) { return; } Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability()); root.updateClusterResource(clusterResource, new ResourceLimits( clusterResource)); int numNodes = numNodeManagers.decrementAndGet(); if (scheduleAsynchronously && numNodes == 0) { asyncSchedulerThread.suspendSchedule(); } // Remove running containers List<RMContainer> runningContainers = node.getRunningContainers(); for (RMContainer container : runningContainers) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } // Remove reservations, if any RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { completedContainer(reservedContainer, SchedulerUtils.createAbnormalContainerStatus( reservedContainer.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } this.nodes.remove(nodeInfo.getNodeID()); updateMaximumAllocation(node, false); LOG.info("Removed node " + nodeInfo.getNodeAddress() + " clusterResource: " + clusterResource); }
Example #28
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 4 votes |
@Override @Lock(Lock.NoLock.class) public Allocation allocate(ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask, List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { LOG.info("Calling allocate on removed " + "or non existant application " + applicationAttemptId); return EMPTY_ALLOCATION; } // Sanity check SchedulerUtils.normalizeRequests( ask, getResourceCalculator(), getClusterResource(), getMinimumResourceCapability(), getMaximumResourceCapability()); // Release containers releaseContainers(release, application); synchronized (application) { // make sure we aren't stopping/removing the application // when the allocate comes in if (application.isStopped()) { LOG.info("Calling allocate on a stopped " + "application " + applicationAttemptId); return EMPTY_ALLOCATION; } if (!ask.isEmpty()) { if(LOG.isDebugEnabled()) { LOG.debug("allocate: pre-update" + " applicationAttemptId=" + applicationAttemptId + " application=" + application); } application.showRequests(); // Update application requests application.updateResourceRequests(ask); LOG.debug("allocate: post-update"); application.showRequests(); } if(LOG.isDebugEnabled()) { LOG.debug("allocate:" + " applicationAttemptId=" + applicationAttemptId + " #ask=" + ask.size()); } application.updateBlacklist(blacklistAdditions, blacklistRemovals); return application.getAllocation(getResourceCalculator(), clusterResource, getMinimumResourceCapability()); } }
Example #29
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 4 votes |
@Override public void handle(SchedulerEvent event) { switch (event.getType()) { case NODE_ADDED: if (!(event instanceof NodeAddedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event; addNode(nodeAddedEvent.getAddedRMNode()); recoverContainersOnNode(nodeAddedEvent.getContainerReports(), nodeAddedEvent.getAddedRMNode()); break; case NODE_REMOVED: if (!(event instanceof NodeRemovedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event; removeNode(nodeRemovedEvent.getRemovedRMNode()); break; case NODE_UPDATE: if (!(event instanceof NodeUpdateSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event; nodeUpdate(nodeUpdatedEvent.getRMNode()); break; case APP_ADDED: if (!(event instanceof AppAddedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event; String queueName = resolveReservationQueueName(appAddedEvent.getQueue(), appAddedEvent.getApplicationId(), appAddedEvent.getReservationID()); if (queueName != null) { addApplication(appAddedEvent.getApplicationId(), queueName, appAddedEvent.getUser(), appAddedEvent.getIsAppRecovering()); } break; case APP_REMOVED: if (!(event instanceof AppRemovedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event; removeApplication(appRemovedEvent.getApplicationID(), appRemovedEvent.getFinalState()); break; case NODE_RESOURCE_UPDATE: if (!(event instanceof NodeResourceUpdateSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = (NodeResourceUpdateSchedulerEvent)event; updateNodeResource(nodeResourceUpdatedEvent.getRMNode(), nodeResourceUpdatedEvent.getResourceOption()); break; case APP_ATTEMPT_ADDED: if (!(event instanceof AppAttemptAddedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } AppAttemptAddedSchedulerEvent appAttemptAddedEvent = (AppAttemptAddedSchedulerEvent) event; addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(), appAttemptAddedEvent.getTransferStateFromPreviousAttempt(), appAttemptAddedEvent.getIsAttemptRecovering()); break; case APP_ATTEMPT_REMOVED: if (!(event instanceof AppAttemptRemovedSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent = (AppAttemptRemovedSchedulerEvent) event; removeApplicationAttempt( appAttemptRemovedEvent.getApplicationAttemptID(), appAttemptRemovedEvent.getFinalAttemptState(), appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts()); break; case CONTAINER_EXPIRED: if (!(event instanceof ContainerExpiredSchedulerEvent)) { throw new RuntimeException("Unexpected event type: " + event); } ContainerExpiredSchedulerEvent containerExpiredEvent = (ContainerExpiredSchedulerEvent)event; ContainerId containerId = containerExpiredEvent.getContainerId(); completedContainer(getRMContainer(containerId), SchedulerUtils.createAbnormalContainerStatus( containerId, SchedulerUtils.EXPIRED_CONTAINER), RMContainerEventType.EXPIRE); break; default: LOG.error("Unknown event arrived at FairScheduler: " + event.toString()); } }
Example #30
Source File: AbstractCSQueue.java From big-c with Apache License 2.0 | 4 votes |
@Override public boolean hasAccess(QueueACL acl, UserGroupInformation user) { return authorizer.checkPermission(SchedulerUtils.toAccessType(acl), queueEntity, user); }