Java Code Examples for org.apache.hadoop.mapred.JobConf#setLong()
The following examples show how to use
org.apache.hadoop.mapred.JobConf#setLong() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DFSGeneralTest.java From RDFS with Apache License 2.0 | 6 votes |
private void updateJobConf(JobConf conf, Path inputPath, Path outputPath) { // set specific job config conf.setLong(NUMBER_OF_MAPS_KEY, nmaps); conf.setLong(NUMBER_OF_THREADS_KEY, nthreads); conf.setInt(BUFFER_SIZE_KEY, buffersize); conf.setLong(WRITER_DATARATE_KEY, datarate); conf.setLong("mapred.task.timeout", Long.MAX_VALUE); conf.set(OUTPUT_DIR_KEY, output); // set the output and input for the map reduce FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setNumReduceTasks(1); conf.setSpeculativeExecution(false); }
Example 2
Source File: TeraValidate.java From RDFS with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { JobConf job = (JobConf) getConf(); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split job.setLong("mapred.min.split.size", Long.MAX_VALUE); job.setInputFormat(TeraInputFormat.class); JobClient.runJob(job); return 0; }
Example 3
Source File: TeraValidate.java From hadoop-gpu with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { JobConf job = (JobConf) getConf(); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split job.setLong("mapred.min.split.size", Long.MAX_VALUE); job.setInputFormat(TeraInputFormat.class); JobClient.runJob(job); return 0; }
Example 4
Source File: TeraValidate.java From hadoop-book with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { JobConf job = (JobConf) getConf(); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split job.setLong("mapred.min.split.size", Long.MAX_VALUE); job.setInputFormat(TeraInputFormat.class); JobClient.runJob(job); return 0; }
Example 5
Source File: MapProcessor.java From tez with Apache License 2.0 | 5 votes |
/** * Update the job with details about the file split * @param job the job configuration to update * @param inputSplit the file split */ private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) { if (inputSplit instanceof FileSplit) { FileSplit fileSplit = (FileSplit) inputSplit; job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString()); job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart()); job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength()); } LOG.info("Processing mapred split: " + inputSplit); }
Example 6
Source File: TestGroupedSplits.java From incubator-tez with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test(timeout=10000) public void testGroupedSplitSize() throws IOException { JobConf job = new JobConf(defaultConf); InputFormat mockWrappedFormat = mock(InputFormat.class); TezGroupedSplitsInputFormat<LongWritable , Text> format = new TezGroupedSplitsInputFormat<LongWritable, Text>(); format.setConf(job); format.setInputFormat(mockWrappedFormat); job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MAX_SIZE, 500*1000*1000l); job.setLong(TezConfiguration.TEZ_AM_GROUPING_SPLIT_MIN_SIZE, 50*1000*1000l); InputSplit mockSplit1 = mock(InputSplit.class); when(mockSplit1.getLength()).thenReturn(10*1000*1000l); when(mockSplit1.getLocations()).thenReturn(null); int numSplits = 100; InputSplit[] mockSplits = new InputSplit[numSplits]; for (int i=0; i<numSplits; i++) { mockSplits[i] = mockSplit1; } when(mockWrappedFormat.getSplits((JobConf)anyObject(), anyInt())).thenReturn(mockSplits); // desired splits not set. We end up choosing min/max split size based on // total data and num original splits. In this case, min size will be hit InputSplit[] splits = format.getSplits(job, 0); Assert.assertEquals(25, splits.length); // split too big. override with max format.setDesiredNumberOfSplits(1); splits = format.getSplits(job, 0); Assert.assertEquals(4, splits.length); // splits too small. override with min format.setDesiredNumberOfSplits(1000); splits = format.getSplits(job, 0); Assert.assertEquals(25, splits.length); }
Example 7
Source File: MapProcessor.java From incubator-tez with Apache License 2.0 | 5 votes |
/** * Update the job with details about the file split * @param job the job configuration to update * @param inputSplit the file split */ private void updateJobWithSplit(final JobConf job, InputSplit inputSplit) { if (inputSplit instanceof FileSplit) { FileSplit fileSplit = (FileSplit) inputSplit; job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString()); job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart()); job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength()); } LOG.info("Processing mapred split: " + inputSplit); }
Example 8
Source File: TestDeprecatedKeys.java From tez with Apache License 2.0 | 5 votes |
@Test(timeout = 5000) public void verifyReduceKeyTranslation() { JobConf jobConf = new JobConf(); jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f); jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l); jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000); jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f); jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f); jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f); jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true); jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f); jobConf.setBoolean(MRJobConfig.MAPREDUCE_JOB_USER_CLASSPATH_FIRST, false); MRHelpers.translateMRConfToTez(jobConf); assertEquals(0.4f, jobConf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f), 0.01f); assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0)); assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0)); assertEquals(0.55f, jobConf.getFloat( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0), 0.01f); assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0), 0.01f); assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f); assertEquals(true, jobConf.getBoolean( TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false)); assertEquals(0.33f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f); assertEquals(false, jobConf.getBoolean(TezConfiguration.TEZ_USER_CLASSPATH_FIRST, true)); }
Example 9
Source File: MapProcessor.java From incubator-tez with Apache License 2.0 | 5 votes |
private void updateJobWithSplit( final JobConf job, org.apache.hadoop.mapreduce.InputSplit inputSplit) { if (inputSplit instanceof org.apache.hadoop.mapreduce.lib.input.FileSplit) { org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) inputSplit; job.set(JobContext.MAP_INPUT_FILE, fileSplit.getPath().toString()); job.setLong(JobContext.MAP_INPUT_START, fileSplit.getStart()); job.setLong(JobContext.MAP_INPUT_PATH, fileSplit.getLength()); } LOG.info("Processing mapreduce split: " + inputSplit); }
Example 10
Source File: RandomWriter.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * This is the main routine for launching a distributed random write job. * It runs 10 maps/node and each node writes 1 gig of data to a DFS file. * The reduce doesn't do anything. * * @throws IOException */ public int run(String[] args) throws Exception { if (args.length == 0) { System.out.println("Usage: writer <out-dir>"); ToolRunner.printGenericCommandUsage(System.out); return -1; } Path outDir = new Path(args[0]); JobConf job = new JobConf(getConf()); job.setJarByClass(RandomWriter.class); job.setJobName("random-writer"); FileOutputFormat.setOutputPath(job, outDir); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Map.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); JobClient client = new JobClient(job); ClusterStatus cluster = client.getClusterStatus(); int numMapsPerHost = job.getInt("test.randomwriter.maps_per_host", 10); long numBytesToWritePerMap = job.getLong("test.randomwrite.bytes_per_map", 1*1024*1024*1024); if (numBytesToWritePerMap == 0) { System.err.println("Cannot have test.randomwrite.bytes_per_map set to 0"); return -2; } long totalBytesToWrite = job.getLong("test.randomwrite.total_bytes", numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers()); int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap); if (numMaps == 0 && totalBytesToWrite > 0) { numMaps = 1; job.setLong("test.randomwrite.bytes_per_map", totalBytesToWrite); } job.setNumMapTasks(numMaps); System.out.println("Running " + numMaps + " maps."); // reducer NONE job.setNumReduceTasks(0); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + (endTime.getTime() - startTime.getTime()) /1000 + " seconds."); return 0; }
Example 11
Source File: CrawlDbReader.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException { if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: starting (topN=" + topN + ", min=" + min + ")"); LOG.info("CrawlDb db: " + crawlDb); } Path outFolder = new Path(output); Path tempDir = new Path(config.get("mapred.temp.dir", ".") + "/readdb-topN-temp-"+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE))); JobConf job = new NutchJob(config); job.setJobName("topN prepare " + crawlDb); FileInputFormat.addInputPath(job, new Path(crawlDb, CrawlDb.CURRENT_NAME)); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(CrawlDbTopNMapper.class); job.setReducerClass(IdentityReducer.class); FileOutputFormat.setOutputPath(job, tempDir); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(Text.class); // XXX hmmm, no setFloat() in the API ... :( job.setLong("db.reader.topn.min", Math.round(1000000.0 * min)); JobClient.runJob(job); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: collecting topN scores."); } job = new NutchJob(config); job.setJobName("topN collect " + crawlDb); job.setLong("db.reader.topn", topN); FileInputFormat.addInputPath(job, tempDir); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(IdentityMapper.class); job.setReducerClass(CrawlDbTopNReducer.class); FileOutputFormat.setOutputPath(job, outFolder); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); // create a single file. JobClient.runJob(job); FileSystem fs = FileSystem.get(config); fs.delete(tempDir, true); if (LOG.isInfoEnabled()) { LOG.info("CrawlDb topN: done"); } }
Example 12
Source File: CompositeInputFormat.java From RDFS with Apache License 2.0 | 4 votes |
/** * Build a CompositeInputSplit from the child InputFormats by assigning the * ith split from each child to the ith composite split. */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setFormat(job); job.setLong("mapred.min.split.size", Long.MAX_VALUE); return root.getSplits(job, numSplits); }
Example 13
Source File: TeraGen.java From RDFS with Apache License 2.0 | 4 votes |
static void setNumberOfRows(JobConf job, long numRows) { job.setLong("terasort.num-rows", numRows); }
Example 14
Source File: TeraGen.java From hadoop-book with Apache License 2.0 | 4 votes |
static void setNumberOfRows(JobConf job, long numRows) { job.setLong("terasort.num-rows", numRows); }
Example 15
Source File: DataJoinJob.java From RDFS with Apache License 2.0 | 4 votes |
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0]; String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, DataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir)); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }
Example 16
Source File: TestMergeManager.java From big-c with Apache License 2.0 | 4 votes |
@Test(timeout=10000) public void testMemoryMerge() throws Exception { final int TOTAL_MEM_BYTES = 10000; final int OUTPUT_SIZE = 7950; JobConf conf = new JobConf(); conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f); conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES); conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f); conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f); TestExceptionReporter reporter = new TestExceptionReporter(); CyclicBarrier mergeStart = new CyclicBarrier(2); CyclicBarrier mergeComplete = new CyclicBarrier(2); StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter, mergeStart, mergeComplete); // reserve enough map output to cause a merge when it is committed MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out1 instanceof InMemoryMapOutput)); InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1; fillOutput(mout1); MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out2 instanceof InMemoryMapOutput)); InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2; fillOutput(mout2); // next reservation should be a WAIT MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertEquals("Should be told to wait", null, out3); // trigger the first merge and wait for merge thread to start merging // and free enough output to reserve more mout1.commit(); mout2.commit(); mergeStart.await(); Assert.assertEquals(1, mgr.getNumMerges()); // reserve enough map output to cause another merge when committed out1 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out1 instanceof InMemoryMapOutput)); mout1 = (InMemoryMapOutput<Text, Text>)out1; fillOutput(mout1); out2 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out2 instanceof InMemoryMapOutput)); mout2 = (InMemoryMapOutput<Text, Text>)out2; fillOutput(mout2); // next reservation should be null out3 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertEquals("Should be told to wait", null, out3); // commit output *before* merge thread completes mout1.commit(); mout2.commit(); // allow the first merge to complete mergeComplete.await(); // start the second merge and verify mergeStart.await(); Assert.assertEquals(2, mgr.getNumMerges()); // trigger the end of the second merge mergeComplete.await(); Assert.assertEquals(2, mgr.getNumMerges()); Assert.assertEquals("exception reporter invoked", 0, reporter.getNumExceptions()); }
Example 17
Source File: CompositeInputFormat.java From big-c with Apache License 2.0 | 4 votes |
/** * Build a CompositeInputSplit from the child InputFormats by assigning the * ith split from each child to the ith composite split. */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setFormat(job); job.setLong("mapred.min.split.size", Long.MAX_VALUE); return root.getSplits(job, numSplits); }
Example 18
Source File: CompositeInputFormat.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * Build a CompositeInputSplit from the child InputFormats by assigning the * ith split from each child to the ith composite split. */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setFormat(job); job.setLong("mapred.min.split.size", Long.MAX_VALUE); return root.getSplits(job, numSplits); }
Example 19
Source File: TestMergeManager.java From hadoop with Apache License 2.0 | 4 votes |
@Test(timeout=10000) public void testMemoryMerge() throws Exception { final int TOTAL_MEM_BYTES = 10000; final int OUTPUT_SIZE = 7950; JobConf conf = new JobConf(); conf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 1.0f); conf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, TOTAL_MEM_BYTES); conf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.8f); conf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.9f); TestExceptionReporter reporter = new TestExceptionReporter(); CyclicBarrier mergeStart = new CyclicBarrier(2); CyclicBarrier mergeComplete = new CyclicBarrier(2); StubbedMergeManager mgr = new StubbedMergeManager(conf, reporter, mergeStart, mergeComplete); // reserve enough map output to cause a merge when it is committed MapOutput<Text, Text> out1 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out1 instanceof InMemoryMapOutput)); InMemoryMapOutput<Text, Text> mout1 = (InMemoryMapOutput<Text, Text>)out1; fillOutput(mout1); MapOutput<Text, Text> out2 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out2 instanceof InMemoryMapOutput)); InMemoryMapOutput<Text, Text> mout2 = (InMemoryMapOutput<Text, Text>)out2; fillOutput(mout2); // next reservation should be a WAIT MapOutput<Text, Text> out3 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertEquals("Should be told to wait", null, out3); // trigger the first merge and wait for merge thread to start merging // and free enough output to reserve more mout1.commit(); mout2.commit(); mergeStart.await(); Assert.assertEquals(1, mgr.getNumMerges()); // reserve enough map output to cause another merge when committed out1 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out1 instanceof InMemoryMapOutput)); mout1 = (InMemoryMapOutput<Text, Text>)out1; fillOutput(mout1); out2 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertTrue("Should be a memory merge", (out2 instanceof InMemoryMapOutput)); mout2 = (InMemoryMapOutput<Text, Text>)out2; fillOutput(mout2); // next reservation should be null out3 = mgr.reserve(null, OUTPUT_SIZE, 0); Assert.assertEquals("Should be told to wait", null, out3); // commit output *before* merge thread completes mout1.commit(); mout2.commit(); // allow the first merge to complete mergeComplete.await(); // start the second merge and verify mergeStart.await(); Assert.assertEquals(2, mgr.getNumMerges()); // trigger the end of the second merge mergeComplete.await(); Assert.assertEquals(2, mgr.getNumMerges()); Assert.assertEquals("exception reporter invoked", 0, reporter.getNumExceptions()); }
Example 20
Source File: CompositeInputFormat.java From hadoop with Apache License 2.0 | 4 votes |
/** * Build a CompositeInputSplit from the child InputFormats by assigning the * ith split from each child to the ith composite split. */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setFormat(job); job.setLong("mapred.min.split.size", Long.MAX_VALUE); return root.getSplits(job, numSplits); }