Java Code Examples for org.apache.hadoop.mapreduce.Job#setNumReduceTasks()
The following examples show how to use
org.apache.hadoop.mapreduce.Job#setNumReduceTasks() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TeraChecksum.java From big-c with Apache License 2.0 | 6 votes |
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
Example 2
Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0 | 6 votes |
protected Job doLoad(Configuration conf, TableDescriptor tableDescriptor) throws Exception { Path outputDir = getTestDir(TEST_NAME, "load-output"); LOG.info("Load output dir: " + outputDir); NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT)); conf.set(TABLE_NAME_KEY, tableDescriptor.getTableName().getNameAsString()); Job job = Job.getInstance(conf); job.setJobName(TEST_NAME + " Load for " + tableDescriptor.getTableName()); job.setJarByClass(this.getClass()); setMapperClass(job); job.setInputFormatClass(NMapInputFormat.class); job.setNumReduceTasks(0); setJobScannerConf(job); FileOutputFormat.setOutputPath(job, outputDir); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); assertTrue(job.waitForCompletion(true)); return job; }
Example 3
Source File: TestMiniCoronaFederatedJT.java From RDFS with Apache License 2.0 | 6 votes |
public void testOneRemoteJT() throws Exception { LOG.info("Starting testOneRemoteJT"); String[] racks = "/rack-1".split(","); String[] trackers = "tracker-1".split(","); corona = new MiniCoronaCluster.Builder().numTaskTrackers(1).racks(racks) .hosts(trackers).build(); Configuration conf = corona.createJobConf(); conf.set("mapred.job.tracker", "corona"); conf.set("mapred.job.tracker.class", CoronaJobTracker.class.getName()); String locationsCsv = "tracker-1"; conf.set("test.locations", locationsCsv); conf.setBoolean("mapred.coronajobtracker.forceremote", true); Job job = new Job(conf); job.setMapperClass(TstJob.TestMapper.class); job.setInputFormatClass(TstJob.TestInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); job.getConfiguration().set("io.sort.record.pct", "0.50"); job.getConfiguration().set("io.sort.mb", "25"); boolean success = job.waitForCompletion(true); assertTrue("Job did not succeed", success); }
Example 4
Source File: TestBAMOutputFormat.java From Hadoop-BAM with MIT License | 6 votes |
private Path doMapReduce(final String inputFile) throws Exception { final FileSystem fileSystem = FileSystem.get(conf); final Path inputPath = new Path(inputFile); final Path outputPath = fileSystem.makeQualified(new Path("target/out")); fileSystem.delete(outputPath, true); final Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, inputPath); conf.set(BAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile); job.setInputFormatClass(BAMInputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(SAMRecordWritable.class); job.setOutputFormatClass(BAMTestNoHeaderOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); final boolean success = job.waitForCompletion(true); assertTrue(success); return outputPath; }
Example 5
Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0 | 6 votes |
protected void doVerify(Configuration conf, TableDescriptor tableDescriptor) throws Exception { Path outputDir = getTestDir(TEST_NAME, "verify-output"); LOG.info("Verify output dir: " + outputDir); Job job = Job.getInstance(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + tableDescriptor.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); TableMapReduceUtil.initTableMapperJob( tableDescriptor.getTableName().getNameAsString(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setReducerClass(VerifyReducer.class); job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT)); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue(); assertEquals(0, numOutputRecords); }
Example 6
Source File: TestTableInputFormat.java From hbase with Apache License 2.0 | 6 votes |
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException, InterruptedException, ClassNotFoundException { final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration()); job.setInputFormatClass(clazz); job.setOutputFormatClass(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); assertTrue("job failed!", job.waitForCompletion(true)); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue()); assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue()); assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue()); assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue()); }
Example 7
Source File: CassandraScanJobIT.java From titan1withtp3.1 with Apache License 2.0 | 6 votes |
private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException { Job job = Job.getInstance(c); job.setJarByClass(HadoopScanMapper.class); job.setJobName("testPartitionedVertexScan"); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(CassandraInputFormat.class); return job; }
Example 8
Source File: LeftJoin.java From BigData-In-Practice with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); GenericOptionsParser optionparser = new GenericOptionsParser(conf, args); conf = optionparser.getConfiguration(); Job job = Job.getInstance(conf, "leftjoin"); job.setJarByClass(LeftJoin.class); FileInputFormat.addInputPaths(job, conf.get("input_dir")); Path out = new Path(conf.get("output_dir")); FileOutputFormat.setOutputPath(job, out); job.setNumReduceTasks(conf.getInt("reduce_num", 1)); job.setMapperClass(LeftJoinMapper.class); job.setReducerClass(LeftJoinReduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); conf.set("mapred.textoutputformat.separator", ","); return (job.waitForCompletion(true) ? 0 : 1); }
Example 9
Source File: MetricsDailySummaryReducer.java From datawave with Apache License 2.0 | 5 votes |
public static void configureJob(Job job, int numDays, String instance, String zookeepers, String userName, String password, String outputTable) throws AccumuloSecurityException { job.setNumReduceTasks(Math.min(numDays, 100)); // Cap the number of reducers at 100, just in case we have a large day range (shouldn't really happen // though) job.setReducerClass(MetricsDailySummaryReducer.class); job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setZooKeeperInstance(job, ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zookeepers)); AccumuloOutputFormat.setConnectorInfo(job, userName, new PasswordToken(password)); AccumuloOutputFormat.setCreateTables(job, true); AccumuloOutputFormat.setDefaultTableName(job, outputTable); }
Example 10
Source File: TestCRAMInputFormat.java From Hadoop-BAM with MIT License | 5 votes |
@Test public void testMapReduceJob() throws Exception { Configuration conf = new Configuration(); conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference); FileSystem fileSystem = FileSystem.get(conf); Path inputPath = new Path(input); Path outputPath = fileSystem.makeQualified(new Path("target/out")); fileSystem.delete(outputPath, true); Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(CRAMInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); boolean success = job.waitForCompletion(true); assertTrue(success); List<String> samStrings = new ArrayList<String>(); SamReader samReader = SamReaderFactory.makeDefault() .referenceSequence(new File(URI.create(reference))).open(new File(input)); for (SAMRecord r : samReader) { samStrings.add(r.getSAMString().trim()); } File outputFile = new File(new File(outputPath.toUri()), "part-m-00000"); BufferedReader br = new BufferedReader(new FileReader(outputFile)); String line; int index = 0; while ((line = br.readLine()) != null) { String value = line.substring(line.indexOf("\t") + 1); // ignore key assertEquals(samStrings.get(index++), value); } br.close(); }
Example 11
Source File: DistBlockIntegrityMonitor.java From RDFS with Apache License 2.0 | 5 votes |
/** * creates and submits a job, updates file index and job index */ private void startJob(String jobName, Set<String> lostFiles, Priority priority, long detectTime) throws IOException, InterruptedException, ClassNotFoundException { Path inDir = new Path(JOB_NAME_PREFIX + "/in/" + jobName); Path outDir = new Path(JOB_NAME_PREFIX + "/out/" + jobName); List<String> filesInJob = createInputFile( jobName, inDir, lostFiles); if (filesInJob.isEmpty()) return; Configuration jobConf = new Configuration(getConf()); RaidUtils.parseAndSetOptions(jobConf, priority.configOption); Job job = new Job(jobConf, jobName); job.getConfiguration().set(CORRUPT_FILE_DETECT_TIME, Long.toString(detectTime)); configureJob(job, this.RECONSTRUCTOR_CLASS); job.setJarByClass(getClass()); job.setMapperClass(ReconstructionMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(ReconstructionInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); ReconstructionInputFormat.setInputPaths(job, inDir); SequenceFileOutputFormat.setOutputPath(job, outDir); submitJob(job, filesInJob, priority); List<LostFileInfo> fileInfos = updateFileIndex(jobName, filesInJob, priority); // The implementation of submitJob() need not update jobIndex. // So check if the job exists in jobIndex before updating jobInfos. if (jobIndex.containsKey(job)) { jobIndex.put(job, fileInfos); } numJobsRunning++; }
Example 12
Source File: FinalJoinJob.java From hiped2 with Apache License 2.0 | 5 votes |
public static void runJob(Configuration conf, Path userLogsPath, Path usersPath, Path outputPath) throws Exception { FileSystem fs = usersPath.getFileSystem(conf); FileStatus usersStatus = fs.getFileStatus(usersPath); if (usersStatus.isDir()) { for (FileStatus f : fs.listStatus(usersPath)) { if (f.getPath().getName().startsWith("part")) { DistributedCache.addCacheFile(f.getPath().toUri(), conf); } } } else { DistributedCache.addCacheFile(usersPath.toUri(), conf); } Job job = new Job(conf); job.setJarByClass(FinalJoinJob.class); job.setMapperClass(GenericReplicatedJoin.class); job.setNumReduceTasks(0); job.setInputFormatClass(KeyValueTextInputFormat.class); outputPath.getFileSystem(conf).delete(outputPath, true); FileInputFormat.setInputPaths(job, userLogsPath); FileOutputFormat.setOutputPath(job, outputPath); if (!job.waitForCompletion(true)) { throw new Exception("Job failed"); } }
Example 13
Source File: TestVCFRoundTrip.java From Hadoop-BAM with MIT License | 5 votes |
private Path doMapReduce(final Path inputPath, final boolean writeHeader) throws Exception { final FileSystem fileSystem = FileSystem.get(conf); final Path outputPath = fileSystem.makeQualified(new Path("target/out")); fileSystem.delete(outputPath, true); final Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(VCFInputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(VariantContextWritable.class); job.setOutputFormatClass(writeHeader ? VCFTestWithHeaderOutputFormat.class : VCFTestNoHeaderOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(VariantContextWritable.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); if (codecClass != null) { FileOutputFormat.setOutputCompressorClass(job, codecClass); } final boolean success = job.waitForCompletion(true); assertTrue(success); return outputPath; }
Example 14
Source File: TableToFile.java From accumulo-examples with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(TableToFile.class.getName(), args); List<IteratorSetting.Column> columnsToFetch = new ArrayList<>(); for (String col : opts.columns.split(",")) { int idx = col.indexOf(":"); String cf = idx < 0 ? col : col.substring(0, idx); String cq = idx < 0 ? null : col.substring(idx + 1); if (!cf.isEmpty()) columnsToFetch.add(new IteratorSetting.Column(cf, cq)); } Job job = Job.getInstance(opts.getHadoopConfig()); job.setJobName(TableToFile.class.getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(TableToFile.class); job.setInputFormatClass(AccumuloInputFormat.class); InputFormatBuilder.InputFormatOptions<Job> inputOpts = AccumuloInputFormat.configure() .clientProperties(opts.getClientProperties()).table(opts.tableName); if (!columnsToFetch.isEmpty()) { inputOpts.fetchColumns(columnsToFetch); } inputOpts.store(job); job.setMapperClass(TTFMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(opts.output)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example 15
Source File: IntegrationTestBigLinkedList.java From hbase with Apache License 2.0 | 5 votes |
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running RandomInputGenerator with numMappers=" + numMappers + ", numNodes=" + numNodes); Job job = Job.getInstance(getConf()); job.setJobName("Random Input Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); job.setInputFormatClass(GeneratorInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); job.setMapperClass(Mapper.class); //identity mapper FileOutputFormat.setOutputPath(job, tmpOutput); job.setOutputFormatClass(SequenceFileOutputFormat.class); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Random64.class); boolean success = jobCompletion(job); return success ? 0 : 1; }
Example 16
Source File: Hdfs2Tg.java From ecosys with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "HDFS to TG"); job.setJarByClass(Hdfs2Tg.class); job.setMapperClass(LineMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example 17
Source File: TestMiniMRLocalFS.java From RDFS with Apache License 2.0 | 4 votes |
private void runSecondarySort(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(new Path(TEST_ROOT_DIR + "/in"), true); localFs.delete(new Path(TEST_ROOT_DIR + "/out"), true); TestMapReduceLocal.writeFile ("in/part1", "-1 -4\n-3 23\n5 10\n-1 -2\n-1 300\n-1 10\n4 1\n" + "4 2\n4 10\n4 -1\n4 -10\n10 20\n10 30\n10 25\n"); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setNumReduceTasks(2); job.setMapperClass(SecondarySort.MapClass.class); job.setReducerClass(SecondarySort.Reduce.class); // group and partition by the first int in the pair job.setPartitionerClass(FirstPartitioner.class); job.setGroupingComparatorClass(FirstGroupingComparator.class); // the map output is IntPair, IntWritable job.setMapOutputKeyClass(IntPair.class); job.setMapOutputValueClass(IntWritable.class); // the reduce output is Text, IntWritable job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in")); FileOutputFormat.setOutputPath(job, new Path(TEST_ROOT_DIR + "/out")); assertTrue(job.waitForCompletion(true)); String out = TestMapReduceLocal.readFile("out/part-r-00000"); assertEquals("------------------------------------------------\n" + "4\t-10\n4\t-1\n4\t1\n4\t2\n4\t10\n" + "------------------------------------------------\n" + "10\t20\n10\t25\n10\t30\n", out); out = TestMapReduceLocal.readFile("out/part-r-00001"); assertEquals("------------------------------------------------\n" + "-3\t23\n" + "------------------------------------------------\n" + "-1\t-4\n-1\t-2\n-1\t10\n-1\t300\n" + "------------------------------------------------\n" + "5\t10\n", out); }
Example 18
Source File: JsonDataValidationExecutor.java From jumbune with GNU Lesser General Public License v3.0 | 4 votes |
public static void main( String[] args ) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); StringBuilder sb = new StringBuilder(); for (int j = 2; j < otherArgs.length; j++) { sb.append(otherArgs[j]); } LOGGER.debug("Arguments[ " + otherArgs.length+"]"+"and values respectively ["+otherArgs[0]+"], "+ otherArgs[1]+", ["+otherArgs[2]+"]"+", ["+otherArgs[3]+"],"+ otherArgs[4]); String inputpath = otherArgs[0]; String outputpath = "/tmp/jumbune/dvjsonreport"+ new Date().getTime(); String json = otherArgs[1]; String nullCondition = otherArgs[2]; String regex = otherArgs[3]; String dvDir = otherArgs[4]; if(regex.isEmpty()){ conf.set(JsonDataVaildationConstants.REGEX_ARGUMENT, ""); }else{ conf.set(JsonDataVaildationConstants.REGEX_ARGUMENT, regex); } if(nullCondition.isEmpty()){ conf.set(JsonDataVaildationConstants.NULL_ARGUMENT, ""); }else{ conf.set(JsonDataVaildationConstants.NULL_ARGUMENT, nullCondition); } conf.set(JsonDataVaildationConstants.SLAVE_DIR, dvDir); conf.set(JsonDataVaildationConstants.JSON_ARGUMENT, json); FileSystem fs = FileSystem.get(conf); @SuppressWarnings("deprecation") Job job = new Job(conf, "JSONDataValidation"); job.setJarByClass(JsonDataValidationExecutor.class); job.setInputFormatClass(JsonFileInputFormat.class); job.setMapperClass(JsonDataValidationMapper.class); job.setPartitionerClass(JsonDataValidationPartitioner.class); job.setReducerClass(JsonDataValidationReducer.class); job.setNumReduceTasks(5); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(FileKeyViolationBean.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(TotalReducerViolationBean.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); Path[] inputPaths = FileUtil.getAllJsonNestedFilePath(job, inputpath); FileInputFormat.setInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, new Path(outputpath)); if(fs.exists(new Path(outputpath))) { fs.delete(new Path(outputpath), true); } job.waitForCompletion(true); Map<String, JsonViolationReport> jsonMap = readDataFromHdfs(conf,outputpath); final Gson gson= new Gson(); final String jsonReport = gson.toJson(jsonMap); LOGGER.info("Completed DataValidation"); LOGGER.info(JsonDataVaildationConstants.JSON_DV_REPORT + jsonReport); }
Example 19
Source File: ComputeResponseTool.java From incubator-retired-pirk with Apache License 2.0 | 4 votes |
private boolean multiplyColumns(Path outPathInit, Path outPathColumnMult) throws IOException, ClassNotFoundException, InterruptedException { boolean success; Job columnMultJob = Job.getInstance(conf, "pir_columnMult"); columnMultJob.setSpeculativeExecution(false); String columnMultJobName = "pir_columnMult"; // Set the same job configs as for the first iteration columnMultJob.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); columnMultJob.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); columnMultJob.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); columnMultJob.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); columnMultJob.getConfiguration().set("mapreduce.map.speculative", "false"); columnMultJob.getConfiguration().set("mapreduce.reduce.speculative", "false"); columnMultJob.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); columnMultJob.setJobName(columnMultJobName); columnMultJob.setJarByClass(ColumnMultMapper.class); columnMultJob.setNumReduceTasks(numReduceTasks); // Set the Mapper, InputFormat, and input path columnMultJob.setMapperClass(ColumnMultMapper.class); columnMultJob.setInputFormatClass(TextInputFormat.class); FileStatus[] status = fs.listStatus(outPathInit); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR)) { logger.info("fstat.getPath() = " + fstat.getPath().toString()); FileInputFormat.addInputPath(columnMultJob, fstat.getPath()); } } columnMultJob.setMapOutputKeyClass(LongWritable.class); columnMultJob.setMapOutputValueClass(Text.class); // Set the reducer and output options columnMultJob.setReducerClass(ColumnMultReducer.class); columnMultJob.setOutputKeyClass(LongWritable.class); columnMultJob.setOutputValueClass(Text.class); columnMultJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); // Delete the output file, if it exists if (fs.exists(outPathColumnMult)) { fs.delete(outPathColumnMult, true); } FileOutputFormat.setOutputPath(columnMultJob, outPathColumnMult); MultipleOutputs.addNamedOutput(columnMultJob, FileConst.PIR_COLS, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = columnMultJob.waitForCompletion(true); return success; }
Example 20
Source File: TopBusyAirport.java From gemfirexd-oss with Apache License 2.0 | 4 votes |
public int run(String[] args) throws Exception { GfxdDataSerializable.initTypes(); Configuration conf = getConf(); Path outputPath = new Path(args[0]); Path intermediateOutputPath = new Path(args[0] + "_int"); String hdfsHomeDir = args[1]; String tableName = args[2]; outputPath.getFileSystem(conf).delete(outputPath, true); intermediateOutputPath.getFileSystem(conf).delete(intermediateOutputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); Job job = Job.getInstance(conf, "Busy Airport Count"); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setMapperClass(SampleMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // Only have one reduce task so that all of the results from mapping are // processed in one place. job.setNumReduceTasks(1); // configure output TextOutputFormat.setOutputPath(job, intermediateOutputPath); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); int rc = job.waitForCompletion(true) ? 0 : 1; if (rc == 0) { Job topJob = Job.getInstance(getConf(), "Top Busy Airport"); // We want the task to run on a single VM topJob.setNumReduceTasks(1); // Set the inputs topJob.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(topJob, intermediateOutputPath); // Set the mapper and reducer topJob.setMapperClass(TopBusyAirportMapper.class); topJob.setReducerClass(TopBusyAirportReducer.class); // Set the outputs TextOutputFormat.setOutputPath(topJob, outputPath); topJob.setOutputFormatClass(TextOutputFormat.class); topJob.setOutputKeyClass(Text.class); topJob.setOutputValueClass(IntWritable.class); topJob.setMapOutputKeyClass(Text.class); topJob.setMapOutputValueClass(StringIntPair.class); rc = topJob.waitForCompletion(true) ? 0 : 1; } return rc; }