Java Code Examples for org.apache.hadoop.mapreduce.v2.app.job.Job#getTask()
The following examples show how to use
org.apache.hadoop.mapreduce.v2.app.job.Job#getTask() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TaskAttemptListenerImpl.java From hadoop with Apache License 2.0 | 6 votes |
/** * Child checking whether it can commit. * * <br> * Commit is a two-phased protocol. First the attempt informs the * ApplicationMaster that it is * {@link #commitPending(TaskAttemptID, TaskStatus)}. Then it repeatedly polls * the ApplicationMaster whether it {@link #canCommit(TaskAttemptID)} This is * a legacy from the centralized commit protocol handling by the JobTracker. */ @Override public boolean canCommit(TaskAttemptID taskAttemptID) throws IOException { LOG.info("Commit go/no-go request from " + taskAttemptID.toString()); // An attempt is asking if it can commit its output. This can be decided // only by the task which is managing the multiple attempts. So redirect the // request there. org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); taskHeartbeatHandler.progressing(attemptID); // tell task to retry later if AM has not heard from RM within the commit // window to help avoid double-committing in a split-brain situation long now = context.getClock().getTime(); if (now - rmHeartbeatHandler.getLastHeartbeatTime() > commitWindowMs) { return false; } Job job = context.getJob(attemptID.getTaskId().getJobId()); Task task = job.getTask(attemptID.getTaskId()); return task.canCommit(attemptID); }
Example 2
Source File: LegacyTaskRuntimeEstimator.java From big-c with Apache License 2.0 | 6 votes |
private long storedPerAttemptValue (Map<TaskAttempt, AtomicLong> data, TaskAttemptId attemptID) { TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); Task task = job.getTask(taskID); if (task == null) { return -1L; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return -1L; } AtomicLong estimate = data.get(taskAttempt); return estimate == null ? -1L : estimate.get(); }
Example 3
Source File: HsWebServices.java From big-c with Apache License 2.0 | 6 votes |
@GET @Path("/mapreduce/jobs/{jobid}/tasks/{taskid}/counters") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public JobTaskCounterInfo getSingleTaskCounters( @Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid) { init(); Job job = AMWebServices.getJobFromJobIdString(jid, ctx); checkAccess(job, hsr); TaskId taskID = MRApps.toTaskID(tid); if (taskID == null) { throw new NotFoundException("taskid " + tid + " not found or invalid"); } Task task = job.getTask(taskID); if (task == null) { throw new NotFoundException("task not found with id " + tid); } return new JobTaskCounterInfo(task); }
Example 4
Source File: StartEndTimesBase.java From hadoop with Apache License 2.0 | 6 votes |
protected DataStatistics dataStatisticsForTask(TaskId taskID) { JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return null; } Task task = job.getTask(taskID); if (task == null) { return null; } return task.getType() == TaskType.MAP ? mapperStatistics.get(job) : task.getType() == TaskType.REDUCE ? reducerStatistics.get(job) : null; }
Example 5
Source File: TestAMWebApp.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testSingleTaskCounterView() { AppContext appContext = new MockAppContext(0, 1, 1, 2); Map<String, String> params = getTaskParams(appContext); params.put(AMParams.COUNTER_GROUP, "org.apache.hadoop.mapreduce.FileSystemCounter"); params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS"); // remove counters from one task attempt // to test handling of missing counters TaskId taskID = MRApps.toTaskID(params.get(AMParams.TASK_ID)); Job job = appContext.getJob(taskID.getJobId()); Task task = job.getTask(taskID); TaskAttempt attempt = task.getAttempts().values().iterator().next(); attempt.getReport().setCounters(null); WebAppTests.testPage(SingleCounterPage.class, AppContext.class, appContext, params); }
Example 6
Source File: StartEndTimesBase.java From big-c with Apache License 2.0 | 6 votes |
protected DataStatistics dataStatisticsForTask(TaskId taskID) { JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return null; } Task task = job.getTask(taskID); if (task == null) { return null; } return task.getType() == TaskType.MAP ? mapperStatistics.get(job) : task.getType() == TaskType.REDUCE ? reducerStatistics.get(job) : null; }
Example 7
Source File: LegacyTaskRuntimeEstimator.java From hadoop with Apache License 2.0 | 6 votes |
private long storedPerAttemptValue (Map<TaskAttempt, AtomicLong> data, TaskAttemptId attemptID) { TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); Task task = job.getTask(taskID); if (task == null) { return -1L; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return -1L; } AtomicLong estimate = data.get(taskAttempt); return estimate == null ? -1L : estimate.get(); }
Example 8
Source File: TestAMWebApp.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testSingleTaskCounterView() { AppContext appContext = new MockAppContext(0, 1, 1, 2); Map<String, String> params = getTaskParams(appContext); params.put(AMParams.COUNTER_GROUP, "org.apache.hadoop.mapreduce.FileSystemCounter"); params.put(AMParams.COUNTER_NAME, "HDFS_WRITE_OPS"); // remove counters from one task attempt // to test handling of missing counters TaskId taskID = MRApps.toTaskID(params.get(AMParams.TASK_ID)); Job job = appContext.getJob(taskID.getJobId()); Task task = job.getTask(taskID); TaskAttempt attempt = task.getAttempts().values().iterator().next(); attempt.getReport().setCounters(null); WebAppTests.testPage(SingleCounterPage.class, AppContext.class, appContext, params); }
Example 9
Source File: HsWebServices.java From hadoop with Apache License 2.0 | 6 votes |
@GET @Path("/mapreduce/jobs/{jobid}/tasks/{taskid}/counters") @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) public JobTaskCounterInfo getSingleTaskCounters( @Context HttpServletRequest hsr, @PathParam("jobid") String jid, @PathParam("taskid") String tid) { init(); Job job = AMWebServices.getJobFromJobIdString(jid, ctx); checkAccess(job, hsr); TaskId taskID = MRApps.toTaskID(tid); if (taskID == null) { throw new NotFoundException("taskid " + tid + " not found or invalid"); } Task task = job.getTask(taskID); if (task == null) { throw new NotFoundException("task not found with id " + tid); } return new JobTaskCounterInfo(task); }
Example 10
Source File: TaskAttemptListenerImpl.java From big-c with Apache License 2.0 | 6 votes |
/** * Child checking whether it can commit. * * <br> * Commit is a two-phased protocol. First the attempt informs the * ApplicationMaster that it is * {@link #commitPending(TaskAttemptID, TaskStatus)}. Then it repeatedly polls * the ApplicationMaster whether it {@link #canCommit(TaskAttemptID)} This is * a legacy from the centralized commit protocol handling by the JobTracker. */ @Override public boolean canCommit(TaskAttemptID taskAttemptID) throws IOException { LOG.info("Commit go/no-go request from " + taskAttemptID.toString()); // An attempt is asking if it can commit its output. This can be decided // only by the task which is managing the multiple attempts. So redirect the // request there. org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter.toYarn(taskAttemptID); taskHeartbeatHandler.progressing(attemptID); // tell task to retry later if AM has not heard from RM within the commit // window to help avoid double-committing in a split-brain situation long now = context.getClock().getTime(); if (now - rmHeartbeatHandler.getLastHeartbeatTime() > commitWindowMs) { return false; } Job job = context.getJob(attemptID.getTaskId().getJobId()); Task task = job.getTask(attemptID.getTaskId()); return task.canCommit(attemptID); }
Example 11
Source File: MRAppMaster.java From big-c with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void handle(TaskAttemptEvent event) { Job job = context.getJob(event.getTaskAttemptID().getTaskId().getJobId()); Task task = job.getTask(event.getTaskAttemptID().getTaskId()); TaskAttempt attempt = task.getAttempt(event.getTaskAttemptID()); ((EventHandler<TaskAttemptEvent>) attempt).handle(event); }
Example 12
Source File: TaskSpeculationPredicate.java From big-c with Apache License 2.0 | 5 votes |
boolean canSpeculate(AppContext context, TaskId taskID) { // This class rejects speculating any task that already has speculations, // or isn't running. // Subclasses should call TaskSpeculationPredicate.canSpeculate(...) , but // can be even more restrictive. JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); Task task = job.getTask(taskID); return task.getAttempts().size() == 1; }
Example 13
Source File: DefaultSpeculator.java From big-c with Apache License 2.0 | 5 votes |
/** * Absorbs one TaskAttemptStatus * * @param reportedStatus the status report that we got from a task attempt * that we want to fold into the speculation data for this job * @param timestamp the time this status corresponds to. This matters * because statuses contain progress. */ protected void statusUpdate(TaskAttemptStatus reportedStatus, long timestamp) { String stateString = reportedStatus.taskState.toString(); TaskAttemptId attemptID = reportedStatus.id; TaskId taskID = attemptID.getTaskId(); Job job = context.getJob(taskID.getJobId()); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } estimator.updateAttempt(reportedStatus, timestamp); if (stateString.equals(TaskAttemptState.RUNNING.name())) { runningTasks.putIfAbsent(taskID, Boolean.TRUE); } else { runningTasks.remove(taskID, Boolean.TRUE); if (!stateString.equals(TaskAttemptState.STARTING.name())) { runningTaskAttemptStatistics.remove(attemptID); } } }
Example 14
Source File: MRAppMaster.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") @Override public void handle(TaskAttemptEvent event) { Job job = context.getJob(event.getTaskAttemptID().getTaskId().getJobId()); Task task = job.getTask(event.getTaskAttemptID().getTaskId()); TaskAttempt attempt = task.getAttempt(event.getTaskAttemptID()); ((EventHandler<TaskAttemptEvent>) attempt).handle(event); }
Example 15
Source File: DefaultSpeculator.java From hadoop with Apache License 2.0 | 5 votes |
/** * Absorbs one TaskAttemptStatus * * @param reportedStatus the status report that we got from a task attempt * that we want to fold into the speculation data for this job * @param timestamp the time this status corresponds to. This matters * because statuses contain progress. */ protected void statusUpdate(TaskAttemptStatus reportedStatus, long timestamp) { String stateString = reportedStatus.taskState.toString(); TaskAttemptId attemptID = reportedStatus.id; TaskId taskID = attemptID.getTaskId(); Job job = context.getJob(taskID.getJobId()); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } estimator.updateAttempt(reportedStatus, timestamp); if (stateString.equals(TaskAttemptState.RUNNING.name())) { runningTasks.putIfAbsent(taskID, Boolean.TRUE); } else { runningTasks.remove(taskID, Boolean.TRUE); if (!stateString.equals(TaskAttemptState.STARTING.name())) { runningTaskAttemptStatistics.remove(attemptID); } } }
Example 16
Source File: DefaultSpeculator.java From big-c with Apache License 2.0 | 4 votes |
private long speculationValue(TaskId taskID, long now) { Job job = context.getJob(taskID.getJobId()); Task task = job.getTask(taskID); Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts(); long acceptableRuntime = Long.MIN_VALUE; long result = Long.MIN_VALUE; if (!mayHaveSpeculated.contains(taskID)) { acceptableRuntime = estimator.thresholdRuntime(taskID); if (acceptableRuntime == Long.MAX_VALUE) { return ON_SCHEDULE; } } TaskAttemptId runningTaskAttemptID = null; int numberRunningAttempts = 0; for (TaskAttempt taskAttempt : attempts.values()) { if (taskAttempt.getState() == TaskAttemptState.RUNNING || taskAttempt.getState() == TaskAttemptState.STARTING) { if (++numberRunningAttempts > 1) { return ALREADY_SPECULATING; } runningTaskAttemptID = taskAttempt.getID(); long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID); long taskAttemptStartTime = estimator.attemptEnrolledTime(runningTaskAttemptID); if (taskAttemptStartTime > now) { // This background process ran before we could process the task // attempt status change that chronicles the attempt start return TOO_NEW; } long estimatedEndTime = estimatedRunTime + taskAttemptStartTime; long estimatedReplacementEndTime = now + estimator.estimatedNewAttemptRuntime(taskID); float progress = taskAttempt.getProgress(); TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics.get(runningTaskAttemptID); if (data == null) { runningTaskAttemptStatistics.put(runningTaskAttemptID, new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now)); } else { if (estimatedRunTime == data.getEstimatedRunTime() && progress == data.getProgress()) { // Previous stats are same as same stats if (data.notHeartbeatedInAWhile(now)) { // Stats have stagnated for a while, simulate heart-beat. TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus(); taskAttemptStatus.id = runningTaskAttemptID; taskAttemptStatus.progress = progress; taskAttemptStatus.taskState = taskAttempt.getState(); // Now simulate the heart-beat handleAttempt(taskAttemptStatus); } } else { // Stats have changed - update our data structure data.setEstimatedRunTime(estimatedRunTime); data.setProgress(progress); data.resetHeartBeatTime(now); } } if (estimatedEndTime < now) { return PROGRESS_IS_GOOD; } if (estimatedReplacementEndTime >= estimatedEndTime) { return TOO_LATE_TO_SPECULATE; } result = estimatedEndTime - estimatedReplacementEndTime; } } // If we are here, there's at most one task attempt. if (numberRunningAttempts == 0) { return NOT_RUNNING; } if (acceptableRuntime == Long.MIN_VALUE) { acceptableRuntime = estimator.thresholdRuntime(taskID); if (acceptableRuntime == Long.MAX_VALUE) { return ON_SCHEDULE; } } return result; }
Example 17
Source File: LegacyTaskRuntimeEstimator.java From hadoop with Apache License 2.0 | 4 votes |
@Override public void updateAttempt(TaskAttemptStatus status, long timestamp) { super.updateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return; } Long boxedStart = startTimes.get(attemptID); long start = boxedStart == null ? Long.MIN_VALUE : boxedStart; // We need to do two things. // 1: If this is a completion, we accumulate statistics in the superclass // 2: If this is not a completion, we learn more about it. // This is not a completion, but we're cooking. // if (taskAttempt.getState() == TaskAttemptState.RUNNING) { // See if this task is already in the registry AtomicLong estimateContainer = attemptRuntimeEstimates.get(taskAttempt); AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt); if (estimateContainer == null) { if (attemptRuntimeEstimates.get(taskAttempt) == null) { attemptRuntimeEstimates.put(taskAttempt, new AtomicLong()); estimateContainer = attemptRuntimeEstimates.get(taskAttempt); } } if (estimateVarianceContainer == null) { attemptRuntimeEstimateVariances.putIfAbsent(taskAttempt, new AtomicLong()); estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt); } long estimate = -1; long varianceEstimate = -1; // This code assumes that we'll never consider starting a third // speculative task attempt if two are already running for this task if (start > 0 && timestamp > start) { estimate = (long) ((timestamp - start) / Math.max(0.0001, status.progress)); varianceEstimate = (long) (estimate * status.progress / 10); } if (estimateContainer != null) { estimateContainer.set(estimate); } if (estimateVarianceContainer != null) { estimateVarianceContainer.set(varianceEstimate); } } }
Example 18
Source File: StartEndTimesBase.java From big-c with Apache License 2.0 | 4 votes |
@Override public void updateAttempt(TaskAttemptStatus status, long timestamp) { TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } Long boxedStart = startTimes.get(attemptID); long start = boxedStart == null ? Long.MIN_VALUE : boxedStart; TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt.getState() == TaskAttemptState.SUCCEEDED) { boolean isNew = false; // is this a new success? synchronized (doneTasks) { if (!doneTasks.contains(task)) { doneTasks.add(task); isNew = true; } } // It's a new completion // Note that if a task completes twice [because of a previous speculation // and a race, or a success followed by loss of the machine with the // local data] we only count the first one. if (isNew) { long finish = timestamp; if (start > 1L && finish > 1L && start <= finish) { long duration = finish - start; DataStatistics statistics = dataStatisticsForTask(taskID); if (statistics != null) { statistics.add(duration); } } } } }
Example 19
Source File: LegacyTaskRuntimeEstimator.java From big-c with Apache License 2.0 | 4 votes |
@Override public void updateAttempt(TaskAttemptStatus status, long timestamp) { super.updateAttempt(status, timestamp); TaskAttemptId attemptID = status.id; TaskId taskID = attemptID.getTaskId(); JobId jobID = taskID.getJobId(); Job job = context.getJob(jobID); if (job == null) { return; } Task task = job.getTask(taskID); if (task == null) { return; } TaskAttempt taskAttempt = task.getAttempt(attemptID); if (taskAttempt == null) { return; } Long boxedStart = startTimes.get(attemptID); long start = boxedStart == null ? Long.MIN_VALUE : boxedStart; // We need to do two things. // 1: If this is a completion, we accumulate statistics in the superclass // 2: If this is not a completion, we learn more about it. // This is not a completion, but we're cooking. // if (taskAttempt.getState() == TaskAttemptState.RUNNING) { // See if this task is already in the registry AtomicLong estimateContainer = attemptRuntimeEstimates.get(taskAttempt); AtomicLong estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt); if (estimateContainer == null) { if (attemptRuntimeEstimates.get(taskAttempt) == null) { attemptRuntimeEstimates.put(taskAttempt, new AtomicLong()); estimateContainer = attemptRuntimeEstimates.get(taskAttempt); } } if (estimateVarianceContainer == null) { attemptRuntimeEstimateVariances.putIfAbsent(taskAttempt, new AtomicLong()); estimateVarianceContainer = attemptRuntimeEstimateVariances.get(taskAttempt); } long estimate = -1; long varianceEstimate = -1; // This code assumes that we'll never consider starting a third // speculative task attempt if two are already running for this task if (start > 0 && timestamp > start) { estimate = (long) ((timestamp - start) / Math.max(0.0001, status.progress)); varianceEstimate = (long) (estimate * status.progress / 10); } if (estimateContainer != null) { estimateContainer.set(estimate); } if (estimateVarianceContainer != null) { estimateVarianceContainer.set(varianceEstimate); } } }
Example 20
Source File: DefaultSpeculator.java From hadoop with Apache License 2.0 | 4 votes |
private long speculationValue(TaskId taskID, long now) { Job job = context.getJob(taskID.getJobId()); Task task = job.getTask(taskID); Map<TaskAttemptId, TaskAttempt> attempts = task.getAttempts(); long acceptableRuntime = Long.MIN_VALUE; long result = Long.MIN_VALUE; if (!mayHaveSpeculated.contains(taskID)) { acceptableRuntime = estimator.thresholdRuntime(taskID); if (acceptableRuntime == Long.MAX_VALUE) { return ON_SCHEDULE; } } TaskAttemptId runningTaskAttemptID = null; int numberRunningAttempts = 0; for (TaskAttempt taskAttempt : attempts.values()) { if (taskAttempt.getState() == TaskAttemptState.RUNNING || taskAttempt.getState() == TaskAttemptState.STARTING) { if (++numberRunningAttempts > 1) { return ALREADY_SPECULATING; } runningTaskAttemptID = taskAttempt.getID(); long estimatedRunTime = estimator.estimatedRuntime(runningTaskAttemptID); long taskAttemptStartTime = estimator.attemptEnrolledTime(runningTaskAttemptID); if (taskAttemptStartTime > now) { // This background process ran before we could process the task // attempt status change that chronicles the attempt start return TOO_NEW; } long estimatedEndTime = estimatedRunTime + taskAttemptStartTime; long estimatedReplacementEndTime = now + estimator.estimatedNewAttemptRuntime(taskID); float progress = taskAttempt.getProgress(); TaskAttemptHistoryStatistics data = runningTaskAttemptStatistics.get(runningTaskAttemptID); if (data == null) { runningTaskAttemptStatistics.put(runningTaskAttemptID, new TaskAttemptHistoryStatistics(estimatedRunTime, progress, now)); } else { if (estimatedRunTime == data.getEstimatedRunTime() && progress == data.getProgress()) { // Previous stats are same as same stats if (data.notHeartbeatedInAWhile(now)) { // Stats have stagnated for a while, simulate heart-beat. TaskAttemptStatus taskAttemptStatus = new TaskAttemptStatus(); taskAttemptStatus.id = runningTaskAttemptID; taskAttemptStatus.progress = progress; taskAttemptStatus.taskState = taskAttempt.getState(); // Now simulate the heart-beat handleAttempt(taskAttemptStatus); } } else { // Stats have changed - update our data structure data.setEstimatedRunTime(estimatedRunTime); data.setProgress(progress); data.resetHeartBeatTime(now); } } if (estimatedEndTime < now) { return PROGRESS_IS_GOOD; } if (estimatedReplacementEndTime >= estimatedEndTime) { return TOO_LATE_TO_SPECULATE; } result = estimatedEndTime - estimatedReplacementEndTime; } } // If we are here, there's at most one task attempt. if (numberRunningAttempts == 0) { return NOT_RUNNING; } if (acceptableRuntime == Long.MIN_VALUE) { acceptableRuntime = estimator.thresholdRuntime(taskID); if (acceptableRuntime == Long.MAX_VALUE) { return ON_SCHEDULE; } } return result; }