org.apache.hadoop.mapreduce.TaskCounter Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.TaskCounter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Task.java From big-c with Apache License 2.0 | 6 votes |
public Task(String jobFile, TaskAttemptID taskId, int partition, int numSlotsRequired) { this.jobFile = jobFile; this.taskId = taskId; this.partition = partition; this.numSlotsRequired = numSlotsRequired; this.taskStatus = TaskStatus.createTaskStatus(isMapTask(), this.taskId, 0.0f, numSlotsRequired, TaskStatus.State.UNASSIGNED, "", "", "", isMapTask() ? TaskStatus.Phase.MAP : TaskStatus.Phase.SHUFFLE, counters); spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); failedShuffleCounter = counters.findCounter(TaskCounter.FAILED_SHUFFLE); mergedMapOutputsCounter = counters.findCounter(TaskCounter.MERGED_MAP_OUTPUTS); gcUpdater = new GcTimeUpdater(); }
Example #2
Source File: TestReduceFetch.java From hadoop with Apache License 2.0 | 6 votes |
/** * Verify that all segments are read from disk * @throws Exception might be thrown */ public void testReduceFromDisk() throws Exception { final int MAP_TASKS = 8; JobConf job = mrCluster.createJobConf(); job.set(JobContext.REDUCE_INPUT_BUFFER_PERCENT, "0.0"); job.setNumMapTasks(MAP_TASKS); job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.05"); job.setInt(JobContext.IO_SORT_FACTOR, 2); job.setInt(JobContext.REDUCE_MERGE_INMEM_THRESHOLD, 4); Counters c = runJob(job); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter(); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); assertTrue("Expected all records spilled during reduce (" + spill + ")", spill >= 2 * out); // all records spill at map, reduce assertTrue("Expected intermediate merges (" + spill + ")", spill >= 2 * out + (out / MAP_TASKS)); // some records hit twice }
Example #3
Source File: TestTableInputFormatScanBase.java From hbase with Apache License 2.0 | 6 votes |
/** * Run MR job to check the number of mapper = expectedNumOfSplits */ protected void testNumOfSplitsMR(int splitsPerRegion, int expectedNumOfSplits) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "TestJobForNumOfSplits-MR"; LOG.info("Before map/reduce startup - job " + jobName); JobConf c = new JobConf(TEST_UTIL.getConfiguration()); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILYS[0]); scan.addFamily(INPUT_FAMILYS[1]); c.setInt("hbase.mapreduce.tableinput.mappers.per.region", splitsPerRegion); c.set(KEY_STARTROW, ""); c.set(KEY_LASTROW, ""); Job job = Job.getInstance(c, jobName); TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(), scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(NullOutputFormat.class); assertTrue("job failed!", job.waitForCompletion(true)); // for some reason, hbase does not expose JobCounter.TOTAL_LAUNCHED_MAPS, // we use TaskCounter.SHUFFLED_MAPS to get total launched maps assertEquals("Saw the wrong count of mappers per region", expectedNumOfSplits, job.getCounters().findCounter(TaskCounter.SHUFFLED_MAPS).getValue()); }
Example #4
Source File: MapTask.java From hadoop with Apache License 2.0 | 6 votes |
TrackedRecordReader(TaskReporter reporter, JobConf job) throws IOException{ inputRecordCounter = reporter.getCounter(TaskCounter.MAP_INPUT_RECORDS); fileInputByteCounter = reporter.getCounter(FileInputFormatCounter.BYTES_READ); this.reporter = reporter; List<Statistics> matchedStats = null; if (this.reporter.getInputSplit() instanceof FileSplit) { matchedStats = getFsStatistics(((FileSplit) this.reporter .getInputSplit()).getPath(), job); } fsStats = matchedStats; bytesInPrev = getInputBytes(fsStats); rawIn = job.getInputFormat().getRecordReader(reporter.getInputSplit(), job, reporter); bytesInCurr = getInputBytes(fsStats); fileInputByteCounter.increment(bytesInCurr - bytesInPrev); }
Example #5
Source File: Task.java From hadoop with Apache License 2.0 | 6 votes |
public Task(String jobFile, TaskAttemptID taskId, int partition, int numSlotsRequired) { this.jobFile = jobFile; this.taskId = taskId; this.partition = partition; this.numSlotsRequired = numSlotsRequired; this.taskStatus = TaskStatus.createTaskStatus(isMapTask(), this.taskId, 0.0f, numSlotsRequired, TaskStatus.State.UNASSIGNED, "", "", "", isMapTask() ? TaskStatus.Phase.MAP : TaskStatus.Phase.SHUFFLE, counters); spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS); failedShuffleCounter = counters.findCounter(TaskCounter.FAILED_SHUFFLE); mergedMapOutputsCounter = counters.findCounter(TaskCounter.MERGED_MAP_OUTPUTS); gcUpdater = new GcTimeUpdater(); }
Example #6
Source File: TestMRCombiner.java From tez with Apache License 2.0 | 6 votes |
@Test public void testTop2RunNewCombiner() throws IOException, InterruptedException { TezConfiguration conf = new TezConfiguration(); setKeyAndValueClassTypes(conf); conf.setBoolean("mapred.mapper.new-api", true); conf.setClass(MRJobConfig.COMBINE_CLASS_ATTR, Top2NewReducer.class, Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); Writer writer = Mockito.mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); assertEquals(6, inputRecords); assertEquals(5, outputRecords); }
Example #7
Source File: TestReduceFetchFromPartialMem.java From hadoop with Apache License 2.0 | 6 votes |
/** Verify that at least one segment does not hit disk */ public void testReduceFromPartialMem() throws Exception { final int MAP_TASKS = 7; JobConf job = mrCluster.createJobConf(); job.setNumMapTasks(MAP_TASKS); job.setInt(JobContext.REDUCE_MERGE_INMEM_THRESHOLD, 0); job.set(JobContext.REDUCE_INPUT_BUFFER_PERCENT, "1.0"); job.setInt(JobContext.SHUFFLE_PARALLEL_COPIES, 1); job.setInt(JobContext.IO_SORT_MB, 10); job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.14"); job.set(JobContext.SHUFFLE_MERGE_PERCENT, "1.0"); Counters c = runJob(job); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter(); assertTrue("Expected some records not spilled during reduce" + spill + ")", spill < 2 * out); // spilled map records, some records at the reduce }
Example #8
Source File: TestJobCounters.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") private long getTaskCounterUsage (JobClient client, JobID id, int numReports, int taskId, TaskType type) throws Exception { TaskReport[] reports = null; if (TaskType.MAP.equals(type)) { reports = client.getMapTaskReports(id); } else if (TaskType.REDUCE.equals(type)) { reports = client.getReduceTaskReports(id); } assertNotNull("No reports found for task type '" + type.name() + "' in job " + id, reports); // make sure that the total number of reports match the expected assertEquals("Mismatch in task id", numReports, reports.length); Counters counters = reports[taskId].getCounters(); return counters.getCounter(TaskCounter.COMMITTED_HEAP_BYTES); }
Example #9
Source File: TestCounters.java From big-c with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") private void checkLegacyNames(Counters counters) { assertEquals("New name", 1, counters.findCounter( TaskCounter.class.getName(), "MAP_INPUT_RECORDS").getValue()); assertEquals("Legacy name", 1, counters.findCounter( "org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue()); assertEquals("Legacy enum", 1, counters.findCounter(Task.Counter.MAP_INPUT_RECORDS).getValue()); assertEquals("New name", 1, counters.findCounter( JobCounter.class.getName(), "DATA_LOCAL_MAPS").getValue()); assertEquals("Legacy name", 1, counters.findCounter( "org.apache.hadoop.mapred.JobInProgress$Counter", "DATA_LOCAL_MAPS").getValue()); assertEquals("Legacy enum", 1, counters.findCounter(JobInProgress.Counter.DATA_LOCAL_MAPS).getValue()); assertEquals("New name", 1, counters.findCounter( FileSystemCounter.class.getName(), "FILE_BYTES_READ").getValue()); assertEquals("New name and method", 1, counters.findCounter("file", FileSystemCounter.BYTES_READ).getValue()); assertEquals("Legacy name", 1, counters.findCounter( "FileSystemCounters", "FILE_BYTES_READ").getValue()); }
Example #10
Source File: TestMRCombiner.java From tez with Apache License 2.0 | 6 votes |
@Test public void testRunOldCombiner() throws IOException, InterruptedException { TezConfiguration conf = new TezConfiguration(); setKeyAndValueClassTypes(conf); conf.setClass("mapred.combiner.class", OldReducer.class, Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); Writer writer = Mockito.mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); assertEquals(6, inputRecords); assertEquals(3, outputRecords); // verify combiner output keys and values verifyKeyAndValues(writer); }
Example #11
Source File: TestMRCombiner.java From tez with Apache License 2.0 | 6 votes |
@Test public void testRunNewCombiner() throws IOException, InterruptedException { TezConfiguration conf = new TezConfiguration(); setKeyAndValueClassTypes(conf); conf.setBoolean("mapred.mapper.new-api", true); conf.setClass(MRJobConfig.COMBINE_CLASS_ATTR, NewReducer.class, Object.class); TaskContext taskContext = getTaskContext(conf); MRCombiner combiner = new MRCombiner(taskContext); Writer writer = Mockito.mock(Writer.class); combiner.combine(new TezRawKeyValueIteratorTest(), writer); long inputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS).getValue(); long outputRecords = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS).getValue(); assertEquals(6, inputRecords); assertEquals(3, outputRecords); // verify combiner output keys and values verifyKeyAndValues(writer); }
Example #12
Source File: TestJobCounters.java From hadoop with Apache License 2.0 | 6 votes |
private void validateFileCounters(Counters counter, long fileBytesRead, long fileBytesWritten, long mapOutputBytes, long mapOutputMaterializedBytes) { assertTrue(counter.findCounter(FileInputFormatCounter.BYTES_READ) .getValue() != 0); assertEquals(fileBytesRead, counter.findCounter(FileInputFormatCounter.BYTES_READ).getValue()); assertTrue(counter.findCounter(FileOutputFormatCounter.BYTES_WRITTEN) .getValue() != 0); if (mapOutputBytes >= 0) { assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_BYTES).getValue() != 0); } if (mapOutputMaterializedBytes >= 0) { assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_MATERIALIZED_BYTES) .getValue() != 0); } }
Example #13
Source File: HadoopCmdOutput.java From Kylin with Apache License 2.0 | 6 votes |
public void updateJobCounter() { try { Counters counters = job.getCounters(); if (counters == null) { String errorMsg = "no counters for job " + getMrJobId(); log.warn(errorMsg); output.append(errorMsg); return; } this.output.append(counters.toString()).append("\n"); log.debug(counters.toString()); mapInputRecords = String.valueOf(counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue()); hdfsBytesWritten = String.valueOf(counters.findCounter("FileSystemCounters", "HDFS_BYTES_WRITTEN").getValue()); hdfsBytesRead = String.valueOf(counters.findCounter("FileSystemCounters", "HDFS_BYTES_READ").getValue()); } catch (Exception e) { log.error(e.getLocalizedMessage(), e); output.append(e.getLocalizedMessage()); } }
Example #14
Source File: MapTask.java From hadoop with Apache License 2.0 | 6 votes |
NewTrackingRecordReader(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.InputFormat<K, V> inputFormat, TaskReporter reporter, org.apache.hadoop.mapreduce.TaskAttemptContext taskContext) throws InterruptedException, IOException { this.reporter = reporter; this.inputRecordCounter = reporter .getCounter(TaskCounter.MAP_INPUT_RECORDS); this.fileInputByteCounter = reporter .getCounter(FileInputFormatCounter.BYTES_READ); List <Statistics> matchedStats = null; if (split instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) { matchedStats = getFsStatistics(((org.apache.hadoop.mapreduce.lib.input.FileSplit) split) .getPath(), taskContext.getConfiguration()); } fsStats = matchedStats; long bytesInPrev = getInputBytes(fsStats); this.real = inputFormat.createRecordReader(split, taskContext); long bytesInCurr = getInputBytes(fsStats); fileInputByteCounter.increment(bytesInCurr - bytesInPrev); }
Example #15
Source File: MapTask.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void init(MapOutputCollector.Context context ) throws IOException, ClassNotFoundException { this.reporter = context.getReporter(); JobConf job = context.getJobConf(); String finalName = getOutputName(getPartition()); FileSystem fs = FileSystem.get(job); OutputFormat<K, V> outputFormat = job.getOutputFormat(); mapOutputRecordCounter = reporter.getCounter(TaskCounter.MAP_OUTPUT_RECORDS); fileOutputByteCounter = reporter .getCounter(FileOutputFormatCounter.BYTES_WRITTEN); List<Statistics> matchedStats = null; if (outputFormat instanceof FileOutputFormat) { matchedStats = getFsStatistics(FileOutputFormat.getOutputPath(job), job); } fsStats = matchedStats; long bytesOutPrev = getOutputBytes(fsStats); out = job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter); long bytesOutCurr = getOutputBytes(fsStats); fileOutputByteCounter.increment(bytesOutCurr - bytesOutPrev); }
Example #16
Source File: ReduceTask.java From hadoop with Apache License 2.0 | 6 votes |
public SkippingReduceValuesIterator(RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, TaskReporter reporter, TaskUmbilicalProtocol umbilical) throws IOException { super(in, comparator, keyClass, valClass, conf, reporter); this.umbilical = umbilical; this.skipGroupCounter = reporter.getCounter(TaskCounter.REDUCE_SKIPPED_GROUPS); this.skipRecCounter = reporter.getCounter(TaskCounter.REDUCE_SKIPPED_RECORDS); this.toWriteSkipRecs = toWriteSkipRecs() && SkipBadRecords.getSkipOutputPath(conf)!=null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; skipIt = getSkipRanges().skipRangeIterator(); mayBeSkip(); }
Example #17
Source File: TestCounters.java From hadoop with Apache License 2.0 | 6 votes |
@Test public void testCounters() throws IOException { Enum[] keysWithResource = {TaskCounter.MAP_INPUT_RECORDS, TaskCounter.MAP_OUTPUT_BYTES}; Enum[] keysWithoutResource = {myCounters.TEST1, myCounters.TEST2}; String[] groups = {"group1", "group2", "group{}()[]"}; String[] counters = {"counter1", "counter2", "counter{}()[]"}; try { // I. Check enum counters that have resource bundler testCounter(getEnumCounters(keysWithResource)); // II. Check enum counters that dont have resource bundler testCounter(getEnumCounters(keysWithoutResource)); // III. Check string counters testCounter(getEnumCounters(groups, counters)); } catch (ParseException pe) { throw new IOException(pe); } }
Example #18
Source File: TestCounters.java From big-c with Apache License 2.0 | 6 votes |
@Test public void testCounters() throws IOException { Enum[] keysWithResource = {TaskCounter.MAP_INPUT_RECORDS, TaskCounter.MAP_OUTPUT_BYTES}; Enum[] keysWithoutResource = {myCounters.TEST1, myCounters.TEST2}; String[] groups = {"group1", "group2", "group{}()[]"}; String[] counters = {"counter1", "counter2", "counter{}()[]"}; try { // I. Check enum counters that have resource bundler testCounter(getEnumCounters(keysWithResource)); // II. Check enum counters that dont have resource bundler testCounter(getEnumCounters(keysWithoutResource)); // III. Check string counters testCounter(getEnumCounters(groups, counters)); } catch (ParseException pe) { throw new IOException(pe); } }
Example #19
Source File: TestCounters.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") private void checkLegacyNames(Counters counters) { assertEquals("New name", 1, counters.findCounter( TaskCounter.class.getName(), "MAP_INPUT_RECORDS").getValue()); assertEquals("Legacy name", 1, counters.findCounter( "org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue()); assertEquals("Legacy enum", 1, counters.findCounter(Task.Counter.MAP_INPUT_RECORDS).getValue()); assertEquals("New name", 1, counters.findCounter( JobCounter.class.getName(), "DATA_LOCAL_MAPS").getValue()); assertEquals("Legacy name", 1, counters.findCounter( "org.apache.hadoop.mapred.JobInProgress$Counter", "DATA_LOCAL_MAPS").getValue()); assertEquals("Legacy enum", 1, counters.findCounter(JobInProgress.Counter.DATA_LOCAL_MAPS).getValue()); assertEquals("New name", 1, counters.findCounter( FileSystemCounter.class.getName(), "FILE_BYTES_READ").getValue()); assertEquals("New name and method", 1, counters.findCounter("file", FileSystemCounter.BYTES_READ).getValue()); assertEquals("Legacy name", 1, counters.findCounter( "FileSystemCounters", "FILE_BYTES_READ").getValue()); }
Example #20
Source File: ReduceTask.java From big-c with Apache License 2.0 | 6 votes |
public SkippingReduceValuesIterator(RawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass, Class<VALUE> valClass, Configuration conf, TaskReporter reporter, TaskUmbilicalProtocol umbilical) throws IOException { super(in, comparator, keyClass, valClass, conf, reporter); this.umbilical = umbilical; this.skipGroupCounter = reporter.getCounter(TaskCounter.REDUCE_SKIPPED_GROUPS); this.skipRecCounter = reporter.getCounter(TaskCounter.REDUCE_SKIPPED_RECORDS); this.toWriteSkipRecs = toWriteSkipRecs() && SkipBadRecords.getSkipOutputPath(conf)!=null; this.keyClass = keyClass; this.valClass = valClass; this.reporter = reporter; skipIt = getSkipRanges().skipRangeIterator(); mayBeSkip(); }
Example #21
Source File: MapTask.java From big-c with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") public void init(MapOutputCollector.Context context ) throws IOException, ClassNotFoundException { this.reporter = context.getReporter(); JobConf job = context.getJobConf(); String finalName = getOutputName(getPartition()); FileSystem fs = FileSystem.get(job); OutputFormat<K, V> outputFormat = job.getOutputFormat(); mapOutputRecordCounter = reporter.getCounter(TaskCounter.MAP_OUTPUT_RECORDS); fileOutputByteCounter = reporter .getCounter(FileOutputFormatCounter.BYTES_WRITTEN); List<Statistics> matchedStats = null; if (outputFormat instanceof FileOutputFormat) { matchedStats = getFsStatistics(FileOutputFormat.getOutputPath(job), job); } fsStats = matchedStats; long bytesOutPrev = getOutputBytes(fsStats); out = job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter); long bytesOutCurr = getOutputBytes(fsStats); fileOutputByteCounter.increment(bytesOutCurr - bytesOutPrev); }
Example #22
Source File: MapTask.java From big-c with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") NewDirectOutputCollector(MRJobConfig jobContext, JobConf job, TaskUmbilicalProtocol umbilical, TaskReporter reporter) throws IOException, ClassNotFoundException, InterruptedException { this.reporter = reporter; mapOutputRecordCounter = reporter .getCounter(TaskCounter.MAP_OUTPUT_RECORDS); fileOutputByteCounter = reporter .getCounter(FileOutputFormatCounter.BYTES_WRITTEN); List<Statistics> matchedStats = null; if (outputFormat instanceof org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) { matchedStats = getFsStatistics(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat .getOutputPath(taskContext), taskContext.getConfiguration()); } fsStats = matchedStats; long bytesOutPrev = getOutputBytes(fsStats); out = outputFormat.getRecordWriter(taskContext); long bytesOutCurr = getOutputBytes(fsStats); fileOutputByteCounter.increment(bytesOutCurr - bytesOutPrev); }
Example #23
Source File: MapTask.java From big-c with Apache License 2.0 | 6 votes |
NewTrackingRecordReader(org.apache.hadoop.mapreduce.InputSplit split, org.apache.hadoop.mapreduce.InputFormat<K, V> inputFormat, TaskReporter reporter, org.apache.hadoop.mapreduce.TaskAttemptContext taskContext) throws InterruptedException, IOException { this.reporter = reporter; this.inputRecordCounter = reporter .getCounter(TaskCounter.MAP_INPUT_RECORDS); this.fileInputByteCounter = reporter .getCounter(FileInputFormatCounter.BYTES_READ); List <Statistics> matchedStats = null; if (split instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) { matchedStats = getFsStatistics(((org.apache.hadoop.mapreduce.lib.input.FileSplit) split) .getPath(), taskContext.getConfiguration()); } fsStats = matchedStats; long bytesInPrev = getInputBytes(fsStats); this.real = inputFormat.createRecordReader(split, taskContext); long bytesInCurr = getInputBytes(fsStats); fileInputByteCounter.increment(bytesInCurr - bytesInPrev); }
Example #24
Source File: TestReduceFetch.java From big-c with Apache License 2.0 | 6 votes |
/** * Verify that all segments are read from disk * @throws Exception might be thrown */ public void testReduceFromDisk() throws Exception { final int MAP_TASKS = 8; JobConf job = mrCluster.createJobConf(); job.set(JobContext.REDUCE_INPUT_BUFFER_PERCENT, "0.0"); job.setNumMapTasks(MAP_TASKS); job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.05"); job.setInt(JobContext.IO_SORT_FACTOR, 2); job.setInt(JobContext.REDUCE_MERGE_INMEM_THRESHOLD, 4); Counters c = runJob(job); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter(); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); assertTrue("Expected all records spilled during reduce (" + spill + ")", spill >= 2 * out); // all records spill at map, reduce assertTrue("Expected intermediate merges (" + spill + ")", spill >= 2 * out + (out / MAP_TASKS)); // some records hit twice }
Example #25
Source File: TestJobCounters.java From big-c with Apache License 2.0 | 6 votes |
private void validateFileCounters(Counters counter, long fileBytesRead, long fileBytesWritten, long mapOutputBytes, long mapOutputMaterializedBytes) { assertTrue(counter.findCounter(FileInputFormatCounter.BYTES_READ) .getValue() != 0); assertEquals(fileBytesRead, counter.findCounter(FileInputFormatCounter.BYTES_READ).getValue()); assertTrue(counter.findCounter(FileOutputFormatCounter.BYTES_WRITTEN) .getValue() != 0); if (mapOutputBytes >= 0) { assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_BYTES).getValue() != 0); } if (mapOutputMaterializedBytes >= 0) { assertTrue(counter.findCounter(TaskCounter.MAP_OUTPUT_MATERIALIZED_BYTES) .getValue() != 0); } }
Example #26
Source File: MapTask.java From big-c with Apache License 2.0 | 6 votes |
TrackedRecordReader(TaskReporter reporter, JobConf job) throws IOException{ inputRecordCounter = reporter.getCounter(TaskCounter.MAP_INPUT_RECORDS); fileInputByteCounter = reporter.getCounter(FileInputFormatCounter.BYTES_READ); this.reporter = reporter; List<Statistics> matchedStats = null; if (this.reporter.getInputSplit() instanceof FileSplit) { matchedStats = getFsStatistics(((FileSplit) this.reporter .getInputSplit()).getPath(), job); } fsStats = matchedStats; bytesInPrev = getInputBytes(fsStats); rawIn = job.getInputFormat().getRecordReader(reporter.getInputSplit(), job, reporter); bytesInCurr = getInputBytes(fsStats); fileInputByteCounter.increment(bytesInCurr - bytesInPrev); }
Example #27
Source File: TestJobCounters.java From big-c with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") private long getTaskCounterUsage (JobClient client, JobID id, int numReports, int taskId, TaskType type) throws Exception { TaskReport[] reports = null; if (TaskType.MAP.equals(type)) { reports = client.getMapTaskReports(id); } else if (TaskType.REDUCE.equals(type)) { reports = client.getReduceTaskReports(id); } assertNotNull("No reports found for task type '" + type.name() + "' in job " + id, reports); // make sure that the total number of reports match the expected assertEquals("Mismatch in task id", numReports, reports.length); Counters counters = reports[taskId].getCounters(); return counters.getCounter(TaskCounter.COMMITTED_HEAP_BYTES); }
Example #28
Source File: TestReduceFetchFromPartialMem.java From big-c with Apache License 2.0 | 6 votes |
/** Verify that at least one segment does not hit disk */ public void testReduceFromPartialMem() throws Exception { final int MAP_TASKS = 7; JobConf job = mrCluster.createJobConf(); job.setNumMapTasks(MAP_TASKS); job.setInt(JobContext.REDUCE_MERGE_INMEM_THRESHOLD, 0); job.set(JobContext.REDUCE_INPUT_BUFFER_PERCENT, "1.0"); job.setInt(JobContext.SHUFFLE_PARALLEL_COPIES, 1); job.setInt(JobContext.IO_SORT_MB, 10); job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.14"); job.set(JobContext.SHUFFLE_MERGE_PERCENT, "1.0"); Counters c = runJob(job); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter(); assertTrue("Expected some records not spilled during reduce" + spill + ")", spill < 2 * out); // spilled map records, some records at the reduce }
Example #29
Source File: MapTask.java From hadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") NewDirectOutputCollector(MRJobConfig jobContext, JobConf job, TaskUmbilicalProtocol umbilical, TaskReporter reporter) throws IOException, ClassNotFoundException, InterruptedException { this.reporter = reporter; mapOutputRecordCounter = reporter .getCounter(TaskCounter.MAP_OUTPUT_RECORDS); fileOutputByteCounter = reporter .getCounter(FileOutputFormatCounter.BYTES_WRITTEN); List<Statistics> matchedStats = null; if (outputFormat instanceof org.apache.hadoop.mapreduce.lib.output.FileOutputFormat) { matchedStats = getFsStatistics(org.apache.hadoop.mapreduce.lib.output.FileOutputFormat .getOutputPath(taskContext), taskContext.getConfiguration()); } fsStats = matchedStats; long bytesOutPrev = getOutputBytes(fsStats); out = outputFormat.getRecordWriter(taskContext); long bytesOutCurr = getOutputBytes(fsStats); fileOutputByteCounter.increment(bytesOutCurr - bytesOutPrev); }
Example #30
Source File: Task.java From big-c with Apache License 2.0 | 5 votes |
/** * Update resource information counters */ void updateResourceCounters() { // Update generic resource counters updateHeapUsageCounter(); // Updating resources specified in ResourceCalculatorProcessTree if (pTree == null) { return; } pTree.updateProcessTree(); long cpuTime = pTree.getCumulativeCpuTime(); long pMem = pTree.getRssMemorySize(); long vMem = pTree.getVirtualMemorySize(); // Remove the CPU time consumed previously by JVM reuse if (cpuTime != ResourceCalculatorProcessTree.UNAVAILABLE && initCpuCumulativeTime != ResourceCalculatorProcessTree.UNAVAILABLE) { cpuTime -= initCpuCumulativeTime; } if (cpuTime != ResourceCalculatorProcessTree.UNAVAILABLE) { counters.findCounter(TaskCounter.CPU_MILLISECONDS).setValue(cpuTime); } if (pMem != ResourceCalculatorProcessTree.UNAVAILABLE) { counters.findCounter(TaskCounter.PHYSICAL_MEMORY_BYTES).setValue(pMem); } if (vMem != ResourceCalculatorProcessTree.UNAVAILABLE) { counters.findCounter(TaskCounter.VIRTUAL_MEMORY_BYTES).setValue(vMem); } }