org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType Java Examples
The following examples show how to use
org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractYarnScheduler.java From hadoop with Apache License 2.0 | 6 votes |
protected void releaseContainers(List<ContainerId> containers, SchedulerApplicationAttempt attempt) { for (ContainerId containerId : containers) { RMContainer rmContainer = getRMContainer(containerId); if (rmContainer == null) { if (System.currentTimeMillis() - ResourceManager.getClusterTimeStamp() < nmExpireInterval) { LOG.info(containerId + " doesn't exist. Add the container" + " to the release request cache as it maybe on recovery."); synchronized (attempt) { attempt.getPendingRelease().add(containerId); } } else { RMAuditLogger.logFailure(attempt.getUser(), AuditConstants.RELEASE_CONTAINER, "Unauthorized access or invalid container", "Scheduler", "Trying to release container not owned by app or with invalid id.", attempt.getApplicationId(), containerId); } } completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(containerId, SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED); } }
Example #2
Source File: AbstractYarnScheduler.java From big-c with Apache License 2.0 | 6 votes |
public synchronized void adjusctContianerResouce(ContainerId contianerId, Resource resource){ RMContainer container = getRMContainer(contianerId); //get original resource Resource allocatedResource = container.getAllocatedResource(); Resource marginalResource=Resources.subtract(allocatedResource, resource); if(marginalResource.getMemory() > 0 ){ LOG.info("increase memory on continaer"+contianerId.getContainerId()+"by"+marginalResource.getMemory()); }else{ LOG.info("decrease memory on continaer"+contianerId.getContainerId()+"by"+marginalResource.getMemory()); } if(marginalResource.getVirtualCores() > 0){ LOG.info("increase cores on contianer"+contianerId.getContainerId()+"by"+marginalResource.getVirtualCores()); }else{ LOG.info("dncrease cores on contianer"+contianerId.getContainerId()+"by"+marginalResource.getVirtualCores()); } SchedulerNode schedulerNode = getSchedulerNode(container.getAllocatedNode()); //now inform the schedulerNode to adjust resurce assumption schedulerNode.addAvailableResource(marginalResource); //now inform RMContinaer to adjuct its allocatedResource container.handle(new RMContainerResourceUpdateEvent(contianerId, RMContainerEventType.RESOURCE_UPDATE, resource)); }
Example #3
Source File: FifoScheduler.java From hadoop with Apache License 2.0 | 6 votes |
private synchronized void removeNode(RMNode nodeInfo) { FiCaSchedulerNode node = getNode(nodeInfo.getNodeID()); if (node == null) { return; } // Kill running containers for(RMContainer container : node.getRunningContainers()) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } //Remove the node this.nodes.remove(nodeInfo.getNodeID()); updateMaximumAllocation(node, false); // Update cluster metrics Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability()); }
Example #4
Source File: FifoScheduler.java From big-c with Apache License 2.0 | 6 votes |
private synchronized void removeNode(RMNode nodeInfo) { FiCaSchedulerNode node = getNode(nodeInfo.getNodeID()); if (node == null) { return; } // Kill running containers for(RMContainer container : node.getRunningContainers()) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } //Remove the node this.nodes.remove(nodeInfo.getNodeID()); updateMaximumAllocation(node, false); // Update cluster metrics Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability()); }
Example #5
Source File: FSAppAttempt.java From hadoop with Apache License 2.0 | 5 votes |
synchronized public void containerCompleted(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { Container container = rmContainer.getContainer(); ContainerId containerId = container.getId(); // Remove from the list of newly allocated containers if found newlyAllocatedContainers.remove(rmContainer); // Inform the container rmContainer.handle( new RMContainerFinishedEvent( containerId, containerStatus, event) ); LOG.info("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event); // Remove from the list of containers liveContainers.remove(rmContainer.getContainerId()); RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId); // Update usage metrics Resource containerResource = rmContainer.getContainer().getResource(); queue.getMetrics().releaseResources(getUser(), 1, containerResource); this.attemptResourceUsage.decUsed(containerResource); // remove from preemption map if it is completed preemptionMap.remove(rmContainer); // Clear resource utilization metrics cache. lastMemoryAggregateAllocationUpdateTime = -1; }
Example #6
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
@Lock(CapacityScheduler.class) @Override protected synchronized void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { if (rmContainer == null) { LOG.info("Null container completed..."); return; } Container container = rmContainer.getContainer(); // Get the application for the finished container FiCaSchedulerApp application = getCurrentAttemptForContainer(container.getId()); ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId(); if (application == null) { LOG.info("Container " + container + " of" + " unknown application " + appId + " completed or suspended with event " + event); return; } // Get the node on which the container was allocated FiCaSchedulerNode node = getNode(container.getNodeId()); // Inform the queue LeafQueue queue = (LeafQueue)application.getQueue(); queue.completedContainer(clusterResource, application, node, rmContainer, containerStatus, event, null, true); LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event); }
Example #7
Source File: CapacityScheduler.java From hadoop with Apache License 2.0 | 5 votes |
@Override public void killContainer(RMContainer cont) { if (LOG.isDebugEnabled()) { LOG.debug("KILL_CONTAINER: container" + cont.toString()); } recoverResourceRequestForContainer(cont); completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus( cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL); }
Example #8
Source File: CapacityScheduler.java From hadoop with Apache License 2.0 | 5 votes |
@Override public void dropContainerReservation(RMContainer container) { if(LOG.isDebugEnabled()){ LOG.debug("DROP_RESERVATION:" + container.toString()); } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.UNRESERVED_CONTAINER), RMContainerEventType.KILL); }
Example #9
Source File: CapacityScheduler.java From hadoop with Apache License 2.0 | 5 votes |
@Lock(CapacityScheduler.class) @Override protected synchronized void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { if (rmContainer == null) { LOG.info("Null container completed..."); return; } Container container = rmContainer.getContainer(); // Get the application for the finished container FiCaSchedulerApp application = getCurrentAttemptForContainer(container.getId()); ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId(); if (application == null) { LOG.info("Container " + container + " of" + " unknown application " + appId + " completed with event " + event); return; } // Get the node on which the container was allocated FiCaSchedulerNode node = getNode(container.getNodeId()); // Inform the queue LeafQueue queue = (LeafQueue)application.getQueue(); queue.completedContainer(clusterResource, application, node, rmContainer, containerStatus, event, null, true); LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event); }
Example #10
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
@Override public void killContainer(RMContainer cont) { if (LOG.isDebugEnabled()) { LOG.debug("KILL_CONTAINER: container" + cont.toString()); } recoverResourceRequestForContainer(cont); completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus( cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER), RMContainerEventType.KILL); }
Example #11
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 5 votes |
protected void warnOrKillContainer(RMContainer container) { ApplicationAttemptId appAttemptId = container.getApplicationAttemptId(); FSAppAttempt app = getSchedulerApp(appAttemptId); FSLeafQueue queue = app.getQueue(); LOG.info("Preempting container (prio=" + container.getContainer().getPriority() + "res=" + container.getContainer().getResource() + ") from queue " + queue.getName()); Long time = app.getContainerPreemptionTime(container); if (time != null) { // if we asked for preemption more than maxWaitTimeBeforeKill ms ago, // proceed with kill if (time + waitTimeBeforeKill < getClock().getTime()) { ContainerStatus status = SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); recoverResourceRequestForContainer(container); // TODO: Not sure if this ever actually adds this to the list of cleanup // containers on the RMNode (see SchedulerNode.releaseContainer()). completedContainer(container, status, RMContainerEventType.KILL); LOG.info("Killing container" + container + " (after waiting for premption for " + (getClock().getTime() - time) + "ms)"); } } else { // track the request in the FSAppAttempt itself app.addPreemption(container, getClock().getTime()); } }
Example #12
Source File: TestLeafQueue.java From hadoop with Apache License 2.0 | 5 votes |
static LeafQueue stubLeafQueue(LeafQueue queue) { // Mock some methods for ease in these unit tests // 1. LeafQueue.createContainer to return dummy containers doAnswer( new Answer<Container>() { @Override public Container answer(InvocationOnMock invocation) throws Throwable { final FiCaSchedulerApp application = (FiCaSchedulerApp)(invocation.getArguments()[0]); final ContainerId containerId = TestUtils.getMockContainerId(application); Container container = TestUtils.getMockContainer( containerId, ((FiCaSchedulerNode)(invocation.getArguments()[1])).getNodeID(), (Resource)(invocation.getArguments()[2]), ((Priority)invocation.getArguments()[3])); return container; } } ). when(queue).createContainer( any(FiCaSchedulerApp.class), any(FiCaSchedulerNode.class), any(Resource.class), any(Priority.class) ); // 2. Stub out LeafQueue.parent.completedContainer CSQueue parent = queue.getParent(); doNothing().when(parent).completedContainer( any(Resource.class), any(FiCaSchedulerApp.class), any(FiCaSchedulerNode.class), any(RMContainer.class), any(ContainerStatus.class), any(RMContainerEventType.class), any(CSQueue.class), anyBoolean()); return queue; }
Example #13
Source File: YarnNodeCapacityManager.java From incubator-myriad with Apache License 2.0 | 5 votes |
@Override public void beforeCompletedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType type) { if (type.equals(RMContainerEventType.KILL) || type.equals(RMContainerEventType.RELEASED)) { LOGGER.info("{} completed with exit status {}, killing cooresponding mesos task.", rmContainer.getContainerId().toString(), type); removeYarnTask(rmContainer); } }
Example #14
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 5 votes |
/** * Process a heartbeat update from a node. */ private synchronized void nodeUpdate(RMNode nm) { long start = getClock().getTime(); if (LOG.isDebugEnabled()) { LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterResource); } eventLog.log("HEARTBEAT", nm.getHostName()); FSSchedulerNode node = getFSSchedulerNode(nm.getNodeID()); List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates(); List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>(); List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>(); for(UpdatedContainerInfo containerInfo : containerInfoList) { newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers()); completedContainers.addAll(containerInfo.getCompletedContainers()); } // Processing the newly launched containers for (ContainerStatus launchedContainer : newlyLaunchedContainers) { containerLaunchedOnNode(launchedContainer.getContainerId(), node); } // Process completed containers for (ContainerStatus completedContainer : completedContainers) { ContainerId containerId = completedContainer.getContainerId(); LOG.debug("Container FINISHED: " + containerId); completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); } if (continuousSchedulingEnabled) { if (!completedContainers.isEmpty()) { attemptScheduling(node); } } else { attemptScheduling(node); } long duration = getClock().getTime() - start; fsOpDurations.addNodeUpdateDuration(duration); }
Example #15
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 5 votes |
private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID()); // This can occur when an UNHEALTHY node reconnects if (node == null) { return; } Resources.subtractFrom(clusterResource, rmNode.getTotalCapability()); updateRootQueueMetrics(); // Remove running containers List<RMContainer> runningContainers = node.getRunningContainers(); for (RMContainer container : runningContainers) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } // Remove reservations, if any RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { completedContainer(reservedContainer, SchedulerUtils.createAbnormalContainerStatus( reservedContainer.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } nodes.remove(rmNode.getNodeID()); queueMgr.getRootQueue().setSteadyFairShare(clusterResource); queueMgr.getRootQueue().recomputeSteadyShares(); updateMaximumAllocation(node, false); LOG.info("Removed node " + rmNode.getNodeAddress() + " cluster capacity: " + clusterResource); }
Example #16
Source File: FairScheduler.java From hadoop with Apache License 2.0 | 5 votes |
/** * Clean up a completed container. */ @Override protected synchronized void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { if (rmContainer == null) { LOG.info("Null container completed..."); return; } Container container = rmContainer.getContainer(); // Get the application for the finished container FSAppAttempt application = getCurrentAttemptForContainer(container.getId()); ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId(); if (application == null) { LOG.info("Container " + container + " of" + " unknown application attempt " + appId + " completed with event " + event); return; } // Get the node on which the container was allocated FSSchedulerNode node = getFSSchedulerNode(container.getNodeId()); if (rmContainer.getState() == RMContainerState.RESERVED) { application.unreserve(rmContainer.getReservedPriority(), node); } else { application.containerCompleted(rmContainer, containerStatus, event); node.releaseContainer(container); updateRootQueueMetrics(); } LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event); }
Example #17
Source File: SchedulerApplicationAttempt.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public synchronized void containerLaunchedOnNode(ContainerId containerId, NodeId nodeId) { // Inform the container RMContainer rmContainer = getRMContainer(containerId); if (rmContainer == null) { // Some unknown container sneaked into the system. Kill it. rmContext.getDispatcher().getEventHandler() .handle(new RMNodeCleanContainerEvent(nodeId, containerId)); return; } rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); }
Example #18
Source File: TestReservations.java From hadoop with Apache License 2.0 | 5 votes |
static LeafQueue stubLeafQueue(LeafQueue queue) { // Mock some methods for ease in these unit tests // 1. LeafQueue.createContainer to return dummy containers doAnswer(new Answer<Container>() { @Override public Container answer(InvocationOnMock invocation) throws Throwable { final FiCaSchedulerApp application = (FiCaSchedulerApp) (invocation .getArguments()[0]); final ContainerId containerId = TestUtils .getMockContainerId(application); Container container = TestUtils.getMockContainer(containerId, ((FiCaSchedulerNode) (invocation.getArguments()[1])).getNodeID(), (Resource) (invocation.getArguments()[2]), ((Priority) invocation.getArguments()[3])); return container; } }).when(queue).createContainer(any(FiCaSchedulerApp.class), any(FiCaSchedulerNode.class), any(Resource.class), any(Priority.class)); // 2. Stub out LeafQueue.parent.completedContainer CSQueue parent = queue.getParent(); doNothing().when(parent).completedContainer(any(Resource.class), any(FiCaSchedulerApp.class), any(FiCaSchedulerNode.class), any(RMContainer.class), any(ContainerStatus.class), any(RMContainerEventType.class), any(CSQueue.class), anyBoolean()); return queue; }
Example #19
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
@Override public void dropContainerReservation(RMContainer container) { if(LOG.isDebugEnabled()){ LOG.debug("DROP_RESERVATION:" + container.toString()); } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.UNRESERVED_CONTAINER), RMContainerEventType.KILL); }
Example #20
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
/** * Process node labels update on a node. * * TODO: Currently capacity scheduler will kill containers on a node when * labels on the node changed. It is a simply solution to ensure guaranteed * capacity on labels of queues. When YARN-2498 completed, we can let * preemption policy to decide if such containers need to be killed or just * keep them running. */ private synchronized void updateLabelsOnNode(NodeId nodeId, Set<String> newLabels) { FiCaSchedulerNode node = nodes.get(nodeId); if (null == node) { return; } // labels is same, we don't need do update if (node.getLabels().size() == newLabels.size() && node.getLabels().containsAll(newLabels)) { return; } // Kill running containers since label is changed for (RMContainer rmContainer : node.getRunningContainers()) { ContainerId containerId = rmContainer.getContainerId(); completedContainer(rmContainer, ContainerStatus.newInstance(containerId, ContainerState.COMPLETE, String.format( "Container=%s killed since labels on the node=%s changed", containerId.toString(), nodeId.toString()), ContainerExitStatus.KILLED_BY_RESOURCEMANAGER), RMContainerEventType.KILL); } // Unreserve container on this node RMContainer reservedContainer = node.getReservedContainer(); if (null != reservedContainer) { dropContainerReservation(reservedContainer); } // Update node labels after we've done this node.updateLabels(newLabels); }
Example #21
Source File: CapacityScheduler.java From big-c with Apache License 2.0 | 5 votes |
private synchronized void nodeUpdate(RMNode nm) { if (LOG.isDebugEnabled()) { LOG.debug("nodeUpdate: " + nm + " clusterResources: " + clusterResource); } FiCaSchedulerNode node = getNode(nm.getNodeID()); List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates(); List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>(); List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>(); for(UpdatedContainerInfo containerInfo : containerInfoList) { newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers()); completedContainers.addAll(containerInfo.getCompletedContainers()); } // Processing the newly launched containers for (ContainerStatus launchedContainer : newlyLaunchedContainers) { containerLaunchedOnNode(launchedContainer, node); LOG.info("Container LAUNCHED:"+launchedContainer.getContainerId()); } // Process completed containers for (ContainerStatus completedContainer : completedContainers) { ContainerId containerId = completedContainer.getContainerId(); LOG.info("Container FINISHED: " + containerId); completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); } // Now node data structures are up to date and ready for scheduling. if(LOG.isDebugEnabled()) { LOG.info("Node being looked for scheduling " + nm + " availableResource: " + node.getAvailableResource()); } }
Example #22
Source File: FifoScheduler.java From big-c with Apache License 2.0 | 5 votes |
private synchronized void doneApplicationAttempt( ApplicationAttemptId applicationAttemptId, RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers) throws IOException { FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId); SchedulerApplication<FiCaSchedulerApp> application = applications.get(applicationAttemptId.getApplicationId()); if (application == null || attempt == null) { throw new IOException("Unknown application " + applicationAttemptId + " has completed!"); } // Kill all 'live' containers for (RMContainer container : attempt.getLiveContainers()) { if (keepContainers && container.getState().equals(RMContainerState.RUNNING)) { // do not kill the running container in the case of work-preserving AM // restart. LOG.info("Skip killing " + container.getContainerId()); continue; } completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION), RMContainerEventType.KILL); } // Clean up pending requests, metrics etc. attempt.stop(rmAppAttemptFinalState); }
Example #23
Source File: SchedulerApplicationAttempt.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public synchronized void containerLaunchedOnNode(ContainerId containerId, NodeId nodeId) { // Inform the container RMContainer rmContainer = getRMContainer(containerId); if (rmContainer == null) { // Some unknown container sneaked into the system. Kill it. rmContext.getDispatcher().getEventHandler() .handle(new RMNodeCleanContainerEvent(nodeId, containerId)); return; } rmContainer.handle(new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); }
Example #24
Source File: SchedulerApplicationAttempt.java From big-c with Apache License 2.0 | 5 votes |
public synchronized ContainersAndNMTokensAllocation pullNewlyAllocatedContainersAndNMTokens() { List<Container> returnContainerList = new ArrayList<Container>(newlyAllocatedContainers.size()); List<NMToken> nmTokens = new ArrayList<NMToken>(); for (Iterator<RMContainer> i = newlyAllocatedContainers.iterator(); i .hasNext();) { RMContainer rmContainer = i.next(); Container container = rmContainer.getContainer(); try { // create container token and NMToken altogether. container.setContainerToken(rmContext.getContainerTokenSecretManager() .createContainerToken(container.getId(), container.getNodeId(), getUser(), container.getResource(), container.getPriority(), rmContainer.getCreationTime(), this.logAggregationContext)); NMToken nmToken = rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), getApplicationAttemptId(), container); if (nmToken != null) { nmTokens.add(nmToken); } } catch (IllegalArgumentException e) { // DNS might be down, skip returning this container. LOG.error("Error trying to assign container token and NM token to" + " an allocated container " + container.getId(), e); continue; } returnContainerList.add(container); i.remove(); rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), RMContainerEventType.ACQUIRED)); } return new ContainersAndNMTokensAllocation(returnContainerList, nmTokens); }
Example #25
Source File: FairScheduler.java From big-c with Apache License 2.0 | 5 votes |
protected void warnOrKillContainer(RMContainer container) { ApplicationAttemptId appAttemptId = container.getApplicationAttemptId(); FSAppAttempt app = getSchedulerApp(appAttemptId); FSLeafQueue queue = app.getQueue(); LOG.info("Preempting container (prio=" + container.getContainer().getPriority() + "res=" + container.getContainer().getResource() + ") from queue " + queue.getName()); Long time = app.getContainerPreemptionTime(container); if (time != null) { // if we asked for preemption more than maxWaitTimeBeforeKill ms ago, // proceed with kill if (time + waitTimeBeforeKill < getClock().getTime()) { ContainerStatus status = SchedulerUtils.createPreemptedContainerStatus( container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER); recoverResourceRequestForContainer(container); // TODO: Not sure if this ever actually adds this to the list of cleanup // containers on the RMNode (see SchedulerNode.releaseContainer()). completedContainer(container, status, RMContainerEventType.KILL); LOG.info("Killing container" + container + " (after waiting for premption for " + (getClock().getTime() - time) + "ms)"); } } else { // track the request in the FSAppAttempt itself app.addPreemption(container, getClock().getTime()); } }
Example #26
Source File: FairScheduler.java From big-c with Apache License 2.0 | 5 votes |
/** * Clean up a completed container. */ @Override protected synchronized void completedContainer(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { if (rmContainer == null) { LOG.info("Null container completed..."); return; } Container container = rmContainer.getContainer(); // Get the application for the finished container FSAppAttempt application = getCurrentAttemptForContainer(container.getId()); ApplicationId appId = container.getId().getApplicationAttemptId().getApplicationId(); if (application == null) { LOG.info("Container " + container + " of" + " unknown application attempt " + appId + " completed with event " + event); return; } // Get the node on which the container was allocated FSSchedulerNode node = getFSSchedulerNode(container.getNodeId()); if (rmContainer.getState() == RMContainerState.RESERVED) { application.unreserve(rmContainer.getReservedPriority(), node); } else { application.containerCompleted(rmContainer, containerStatus, event); node.releaseContainer(container,container.getResource()); updateRootQueueMetrics(); } LOG.info("Application attempt " + application.getApplicationAttemptId() + " released container " + container.getId() + " on node: " + node + " with event: " + event); }
Example #27
Source File: FairScheduler.java From big-c with Apache License 2.0 | 5 votes |
private synchronized void removeNode(RMNode rmNode) { FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID()); // This can occur when an UNHEALTHY node reconnects if (node == null) { return; } Resources.subtractFrom(clusterResource, rmNode.getTotalCapability()); updateRootQueueMetrics(); // Remove running containers List<RMContainer> runningContainers = node.getRunningContainers(); for (RMContainer container : runningContainers) { completedContainer(container, SchedulerUtils.createAbnormalContainerStatus( container.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } // Remove reservations, if any RMContainer reservedContainer = node.getReservedContainer(); if (reservedContainer != null) { completedContainer(reservedContainer, SchedulerUtils.createAbnormalContainerStatus( reservedContainer.getContainerId(), SchedulerUtils.LOST_CONTAINER), RMContainerEventType.KILL); } nodes.remove(rmNode.getNodeID()); queueMgr.getRootQueue().setSteadyFairShare(clusterResource); queueMgr.getRootQueue().recomputeSteadyShares(); updateMaximumAllocation(node, false); LOG.info("Removed node " + rmNode.getNodeAddress() + " cluster capacity: " + clusterResource); }
Example #28
Source File: FairScheduler.java From big-c with Apache License 2.0 | 5 votes |
/** * Process a heartbeat update from a node. */ private synchronized void nodeUpdate(RMNode nm) { long start = getClock().getTime(); if (LOG.isDebugEnabled()) { LOG.debug("nodeUpdate: " + nm + " cluster capacity: " + clusterResource); } eventLog.log("HEARTBEAT", nm.getHostName()); FSSchedulerNode node = getFSSchedulerNode(nm.getNodeID()); List<UpdatedContainerInfo> containerInfoList = nm.pullContainerUpdates(); List<ContainerStatus> newlyLaunchedContainers = new ArrayList<ContainerStatus>(); List<ContainerStatus> completedContainers = new ArrayList<ContainerStatus>(); for(UpdatedContainerInfo containerInfo : containerInfoList) { newlyLaunchedContainers.addAll(containerInfo.getNewlyLaunchedContainers()); completedContainers.addAll(containerInfo.getCompletedContainers()); } // Processing the newly launched containers for (ContainerStatus launchedContainer : newlyLaunchedContainers) { containerLaunchedOnNode(launchedContainer, node); } // Process completed containers for (ContainerStatus completedContainer : completedContainers) { ContainerId containerId = completedContainer.getContainerId(); LOG.debug("Container FINISHED: " + containerId); completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); } if (continuousSchedulingEnabled) { if (!completedContainers.isEmpty()) { attemptScheduling(node); } } else { attemptScheduling(node); } long duration = getClock().getTime() - start; fsOpDurations.addNodeUpdateDuration(duration); }
Example #29
Source File: FSAppAttempt.java From big-c with Apache License 2.0 | 5 votes |
synchronized public void containerCompleted(RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { Container container = rmContainer.getContainer(); ContainerId containerId = container.getId(); // Remove from the list of newly allocated containers if found newlyAllocatedContainers.remove(rmContainer); // Inform the container rmContainer.handle( new RMContainerFinishedEvent( containerId, containerStatus, event) ); LOG.info("Completed container: " + rmContainer.getContainerId() + " in state: " + rmContainer.getState() + " event:" + event); // Remove from the list of containers liveContainers.remove(rmContainer.getContainerId()); RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, "SchedulerApp", getApplicationId(), containerId); // Update usage metrics Resource containerResource = rmContainer.getContainer().getResource(); queue.getMetrics().releaseResources(getUser(), 1, containerResource); Resources.subtractFrom(currentConsumption, containerResource); // remove from preemption map if it is completed preemptionMap.remove(rmContainer); // Clear resource utilization metrics cache. lastMemoryAggregateAllocationUpdateTime = -1; }
Example #30
Source File: AbstractYarnScheduler.java From big-c with Apache License 2.0 | 5 votes |
protected void releaseContainers(List<ContainerId> containers, SchedulerApplicationAttempt attempt) { for (ContainerId containerId : containers) { RMContainer rmContainer = getRMContainer(containerId); if (rmContainer == null) { if (System.currentTimeMillis() - ResourceManager.getClusterTimeStamp() < nmExpireInterval) { LOG.info(containerId + " doesn't exist. Add the container" + " to the release request cache as it maybe on recovery."); synchronized (attempt) { attempt.getPendingRelease().add(containerId); } } else { RMAuditLogger.logFailure(attempt.getUser(), AuditConstants.RELEASE_CONTAINER, "Unauthorized access or invalid container", "Scheduler", "Trying to release container not owned by app or with invalid id.", attempt.getApplicationId(), containerId); } } LOG.info("Container FINISHED by AbstractYarnScheduler by releaseContainers: " + containerId); completedContainer(rmContainer, SchedulerUtils.createAbnormalContainerStatus(containerId, SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED); } }