Java Code Examples for org.apache.hadoop.mapreduce.Job#setOutputKeyClass()
The following examples show how to use
org.apache.hadoop.mapreduce.Job#setOutputKeyClass() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HdfsUtil.java From ES-Fastloader with Apache License 2.0 | 6 votes |
public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception { Job job = Job.getInstance(conf, MAIN_CLASS); job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate()); job.setJarByClass(FastIndex.class); job.setMapperClass(FastIndexMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(DefaultHCatRecord.class); HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr()); job.setReducerClass(FastIndexReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(indexInfo.getReducerNum()); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath())); return job; }
Example 2
Source File: WordCount.java From wifi with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); // System.out.println(otherArgs); if(otherArgs.length != 2) { System.out.println("Usage:wordcount <in> <out>"); System.exit(2); } // if(args.length != 2) { // System.out.println("param error!"); // System.exit(-1); // } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example 3
Source File: AbstractMRNewApiSearchTest.java From elasticsearch-hadoop with Apache License 2.0 | 6 votes |
private Configuration createConf() throws IOException { Configuration conf = HdpBootstrap.hadoopConfig(); HadoopCfgUtils.setGenericOptions(conf); Job job = new Job(conf); job.setInputFormatClass(EsInputFormat.class); job.setOutputFormatClass(PrintStreamOutputFormat.class); job.setOutputKeyClass(Text.class); boolean type = random.nextBoolean(); Class<?> mapType = (type ? MapWritable.class : LinkedMapWritable.class); job.setOutputValueClass(mapType); conf.set(ConfigurationOptions.ES_QUERY, query); conf.set(ConfigurationOptions.ES_READ_METADATA, String.valueOf(readMetadata)); conf.set(ConfigurationOptions.ES_OUTPUT_JSON, String.valueOf(readAsJson)); new QueryTestParams(tempFolder).provisionQueries(conf); job.setNumReduceTasks(0); //PrintStreamOutputFormat.stream(conf, Stream.OUT); Configuration cfg = job.getConfiguration(); HdpBootstrap.addProperties(cfg, TestSettings.TESTING_PROPS, false); return cfg; }
Example 4
Source File: WordStandardDeviation.java From hadoop with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordstddev <in> <out>"); return 0; } Configuration conf = getConf(); Job job = Job.getInstance(conf, "word stddev"); job.setJarByClass(WordStandardDeviation.class); job.setMapperClass(WordStandardDeviationMapper.class); job.setCombinerClass(WordStandardDeviationReducer.class); job.setReducerClass(WordStandardDeviationReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); Path outputpath = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputpath); boolean result = job.waitForCompletion(true); // read output and calculate standard deviation stddev = readAndCalcStdDev(outputpath, conf); return (result ? 0 : 1); }
Example 5
Source File: LinkCountHDFS.java From marklogic-contentpump with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountHDFS inputDir outputDir"); System.exit(2); } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count hdfs"); job.setJarByClass(LinkCountHDFS.class); job.setInputFormatClass(HDFSInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example 6
Source File: Main.java From hiped2 with Apache License 2.0 | 6 votes |
public static void runSortJob(Configuration conf, Path input, Path outputPath) throws Exception { Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(SortMapReduce.Map.class); job.setReducerClass(SortMapReduce.Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Person.class); job.setMapOutputValueClass(Person.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(PersonNamePartitioner.class); job.setSortComparatorClass(PersonComparator.class); job.setGroupingComparatorClass(PersonNameComparator.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); }
Example 7
Source File: DistributedGrep.java From hadoop-map-reduce-patterns with Apache License 2.0 | 6 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: DistributedGrep <regex> <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "Distributed Grep"); job.setJarByClass(DistributedGrep.class); job.setMapperClass(GrepMapper.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.getConfiguration().set(REGEX_KEY, otherArgs[0]); FileInputFormat.addInputPath(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 8
Source File: find_nth_driver.java From MLHadoop with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") public static String runSafely (String[] args, long n) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf= new Configuration(); FileSystem hdfs=FileSystem.get(conf); // Deleting previous stored nth row hdfs.delete(new Path(args[1])); conf.setLong("n", n); Job job = new Job(conf); job.setJarByClass(find_nth_driver.class); job.setJobName("Finds the nth row of the HDFS file"); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(find_nth_mapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongAndTextWritable.class); job.waitForCompletion(true); return readNthRow(args[1], conf); }
Example 9
Source File: BusyLegs.java From gemfirexd-oss with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception { GfxdDataSerializable.initTypes(); Configuration conf = getConf(); Path outputPath = new Path(args[0]); String hdfsHomeDir = args[1]; String tableName = args[2]; outputPath.getFileSystem(conf).delete(outputPath, true); conf.set(RowInputFormat.HOME_DIR, hdfsHomeDir); conf.set(RowInputFormat.INPUT_TABLE, tableName); conf.setBoolean(RowInputFormat.CHECKPOINT_MODE, false); Job job = Job.getInstance(conf, "Busy Leg Count"); job.setInputFormatClass(RowInputFormat.class); // configure mapper and reducer job.setMapperClass(SampleMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); // configure output TextOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); return job.waitForCompletion(true) ? 0 : 1; }
Example 10
Source File: SortMapReduce.java From hiped2 with Apache License 2.0 | 5 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(SortMapReduce.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Person.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(PersonNamePartitioner.class); job.setSortComparatorClass(PersonComparator.class); job.setGroupingComparatorClass(PersonNameComparator.class); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); if (job.waitForCompletion(true)) { return 0; } return 1; }
Example 11
Source File: ActiveUserRunner.java From BigDataArchitect with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = this.getConf(); // 初始化参数 this.processArgs(conf, args); // 创建job Job job = Job.getInstance(conf, "active_user"); // 设置job相关配置参数 job.setJarByClass(ActiveUserRunner.class); // hbase 输入mapper参数 // 1. 本地运行 TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.class, StatsUserDimension.class, TimeOutputValue.class, job, false); // 2. 集群运行 // TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.class, // StatsUserDimension.class, TimeOutputValue.class, job); // 设置reducer相关参数 job.setReducerClass(ActiveUserReducer.class); job.setOutputKeyClass(StatsUserDimension.class); job.setOutputValueClass(MapWritableValue.class); // 设置output相关参数 job.setOutputFormatClass(TransformerOutputFormat.class); // 开始毫秒数 long startTime = System.currentTimeMillis(); try { return job.waitForCompletion(true) ? 0 : -1; } finally { // 结束的毫秒数 long endTime = System.currentTimeMillis(); logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms"); } }
Example 12
Source File: Task.java From WIFIProbe with Apache License 2.0 | 5 votes |
private boolean analyze(final String inputFilePath, final String outputFilePath, final Long startTime) throws Exception { Configuration conf = new Configuration(); conf.setLong(Holistic.START_TIME, startTime); conf.setLong(Holistic.EXECUTE_TIME, executeHourTime); Job jobAnalyze = Job.getInstance(conf, "analyze"); jobAnalyze.setJarByClass(Holistic.class); MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.NEW_OLD_CUSTOMER, TextOutputFormat.class, KeyWrapper.class, Text.class); MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.CUSTOMER_FLOW_KEY, TextOutputFormat.class, KeyWrapper.class, Text.class); MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.CYCLE, TextOutputFormat.class, KeyWrapper.class, Text.class); MultipleOutputs.addNamedOutput(jobAnalyze, MapKeyConfig.IN_STORE_HOUR, TextOutputFormat.class, KeyWrapper.class, Text.class); jobAnalyze.setMapperClass(AnalysisMapper.class); jobAnalyze.setReducerClass(AnalysisReducer.class); jobAnalyze.setCombinerClass(AnalysisCombiner.class); jobAnalyze.setOutputKeyClass(LongWritable.class); jobAnalyze.setOutputValueClass(Text.class); jobAnalyze.setMapOutputKeyClass(KeyWrapper.class); jobAnalyze.setMapOutputValueClass(ValueWrapper.class); FileInputFormat.addInputPath(jobAnalyze, new Path(inputFilePath)); FileOutputFormat.setOutputPath(jobAnalyze, new Path(outputFilePath)); return jobAnalyze.waitForCompletion(true) ; }
Example 13
Source File: FlowCount.java From MapReduce-Demo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { //1.设置HDFS配置信息 String namenode_ip = "192.168.17.10"; String hdfs = "hdfs://" + namenode_ip + ":9000"; Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfs); conf.set("mapreduce.app-submission.cross-platform", "true"); //2.设置MapReduce作业配置信息 String jobName = "FlowCount"; //作业名称 Job job = Job.getInstance(conf, jobName); job.setJarByClass(FlowCount.class); //指定运行时作业类 job.setJar("export\\FlowCount.jar"); //指定本地jar包 job.setMapperClass(FlowCountMapper.class); //指定Mapper类 job.setMapOutputKeyClass(Text.class); //设置Mapper输出Key类型 job.setMapOutputValueClass(IntWritable.class); //设置Mapper输出Value类型 job.setReducerClass(FlowCountReducer.class); //指定Reducer类 job.setOutputKeyClass(Text.class); //设置Reduce输出Key类型 job.setOutputValueClass(IntWritable.class); //设置Reduce输出Value类型 //3.设置作业输入和输出路径 String dataDir = "/expr/weblog/data"; //实验数据目录 String outputDir = "/expr/weblog/output1"; //实验输出目录 Path inPath = new Path(hdfs + dataDir); Path outPath = new Path(hdfs + outputDir); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); FileSystem fs = FileSystem.get(conf); if(fs.exists(outPath)) { fs.delete(outPath, true); } //4.运行作业 System.out.println("Job: " + jobName + " is running..."); if(job.waitForCompletion(true)) { System.out.println("success!"); System.exit(0); } else { System.out.println("failed!"); System.exit(1); } }
Example 14
Source File: DirectBigQueryWordCount.java From hadoop-connectors with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { // GenericOptionsParser is a utility to parse command line arguments generic to the Hadoop // framework. This example won't cover the specifics, but will recognize several standard // command line arguments, enabling applications to easily specify a namenode, a // ResourceManager, additional configuration resources etc. GenericOptionsParser parser = new GenericOptionsParser(args); args = parser.getRemainingArgs(); // Make sure we have the right parameters. if (args.length != 3) { System.out.println( "Usage: hadoop jar bigquery_wordcount.jar [ProjectId] [QualifiedInputTableId] " + "[GcsOutputPath]\n" + " ProjectId - Project under which to issue the BigQuery operations. Also " + "serves as the default project for table IDs which don't explicitly specify a " + "project for the table.\n" + " QualifiedInputTableId - Input table ID of the form " + "(Optional ProjectId):[DatasetId].[TableId]\n" + " OutputPath - The output path to write data, e.g. " + "gs://bucket/dir/"); System.exit(1); } // Get the individual parameters from the command line. String projectId = args[0]; String inputQualifiedTableId = args[1]; String outputPath = args[2]; // Create the job and get its configuration. Job job = new Job(parser.getConfiguration(), "wordcount"); Configuration conf = job.getConfiguration(); // Set the job-level projectId. conf.set(PROJECT_ID.getKey(), projectId); // Configure input and output. BigQueryConfiguration.configureBigQueryInput(conf, inputQualifiedTableId); // Set column and predicate filters conf.set(SELECTED_FIELDS.getKey(), "word,word_count"); conf.set(SQL_FILTER.getKey(), "word >= 'A' AND word <= 'zzz'"); conf.set(MRJobConfig.NUM_MAPS, "999"); // This helps Hadoop identify the Jar which contains the mapper and reducer by specifying a // class in that Jar. This is required if the jar is being passed on the command line to Hadoop. job.setJarByClass(DirectBigQueryWordCount.class); // Tell the job what the output will be. job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(DirectBigQueryInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); }
Example 15
Source File: Application.java From Redis-4.x-Cookbook with MIT License | 4 votes |
public int run(String[] args) throws Exception { Configuration conf = getConf(); if (args.length != 6) { System.err .println("Usage: AddBalance <redis hosts> <hash prefix> <balance> <partition length> <begin> <end>"); System.exit(1); } String host = args[0]; String hashPrefix = args[1]; String balance = args[2]; String pLength = args[3]; String begin = args[4]; String end = args[5]; Job job = Job.getInstance(conf, "Add Balance"); job.setNumReduceTasks(0); job.setJarByClass(Application.class); job.setMapperClass(RedisOutputMapper.class); RedisOutputMapper.setBalance(job,balance); job.setInputFormatClass(RedisHashInputFormat.class); RedisHashInputFormat.setRedisHost(job, host); RedisHashInputFormat.setHashPrefix(job, hashPrefix); RedisHashInputFormat.setBegin(job, begin); RedisHashInputFormat.setEnd(job, end); RedisHashInputFormat.setPLength(job, pLength); job.setOutputFormatClass(RedisHashOutputFormat.class); RedisHashOutputFormat.setRedisHost(job, host); RedisHashOutputFormat.setPLength(job, pLength); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //Wait for job completion return (job.waitForCompletion(true) ? 0 : 1); }
Example 16
Source File: IPCount.java From MapReduce-Demo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { //1.设置HDFS配置信息 String namenode_ip = "192.168.17.10"; String hdfs = "hdfs://" + namenode_ip + ":9000"; Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfs); conf.set("mapreduce.app-submission.cross-platform", "true"); //2.设置MapReduce作业配置信息 String jobName = "IPCount"; //作业名称 Job job = Job.getInstance(conf, jobName); job.setJarByClass(IPCount.class); //指定运行时作业类 job.setJar("export\\IPCount.jar"); //指定本地jar包 job.setMapperClass(IPCountMapper.class); //指定Mapper类 job.setMapOutputKeyClass(DayAndIp.class); //设置Mapper输出Key类型 job.setMapOutputValueClass(IntWritable.class); //设置Mapper输出Value类型 job.setReducerClass(IPCountReducer.class); //指定Reducer类 job.setOutputKeyClass(DayAndIp.class); //设置Reduce输出Key类型 job.setOutputValueClass(IntWritable.class); //设置Reduce输出Value类型 //3.设置作业输入和输出路径 String dataDir = "/expr/weblog/data"; //实验数据目录 String outputDir = "/expr/weblog/output4"; //实验输出目录 Path inPath = new Path(hdfs + dataDir); Path outPath = new Path(hdfs + outputDir); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); FileSystem fs = FileSystem.get(conf); if(fs.exists(outPath)) { fs.delete(outPath, true); } //4.运行作业 System.out.println("Job: " + jobName + " is running..."); if(job.waitForCompletion(true)) { System.out.println("success!"); System.exit(0); } else { System.out.println("failed!"); System.exit(1); } }
Example 17
Source File: MedianStdDevJob.java From MapReduce-Demo with MIT License | 4 votes |
public static void main(String[] args) throws Exception { //1.设置HDFS配置信息 String namenode_ip = "192.168.17.10"; String hdfs = "hdfs://" + namenode_ip + ":9000"; Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfs); conf.set("mapreduce.app-submission.cross-platform", "true"); //2.设置MapReduce作业配置信息 String jobName = "MedianStdDevJob"; //作业名称 Job job = Job.getInstance(conf, jobName); job.setJarByClass(MedianStdDevJob.class); //指定运行时作业类 job.setJar("export\\MedianStdDevJob.jar"); //指定本地jar包 job.setMapperClass(MedianStdDevMapper.class); //指定Mapper类 job.setMapOutputKeyClass(IntWritable.class); //设置Mapper输出Key类型 job.setMapOutputValueClass(IntWritable.class); //设置Mapper输出Value类型 job.setReducerClass(MedianStdDevReducer.class); //指定Reducer类 job.setOutputKeyClass(IntWritable.class); //设置Reduce输出Key类型 job.setOutputValueClass(MedianStdDevTuple.class); //设置Reduce输出Value类型 //3.设置作业输入和输出路径 String dataDir = "/expr/medianstddev/data"; //实验数据目录 String outputDir = "/expr/medianstddev/output"; //实验输出目录 Path inPath = new Path(hdfs + dataDir); Path outPath = new Path(hdfs + outputDir); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outPath); FileSystem fs = FileSystem.get(conf); if(fs.exists(outPath)) { fs.delete(outPath, true); } //4.运行作业 System.out.println("Job: " + jobName + " is running..."); if(job.waitForCompletion(true)) { System.out.println("success!"); System.exit(0); } else { System.out.println("failed!"); System.exit(1); } }
Example 18
Source File: ComputeResponseTool.java From incubator-retired-pirk with Apache License 2.0 | 4 votes |
private boolean computeFinalResponse(Path outPathFinal) throws ClassNotFoundException, IOException, InterruptedException { boolean success; Job finalResponseJob = Job.getInstance(conf, "pir_finalResponse"); finalResponseJob.setSpeculativeExecution(false); String finalResponseJobName = "pir_finalResponse"; // Set the same job configs as for the first iteration finalResponseJob.getConfiguration().set("mapreduce.map.memory.mb", SystemConfiguration.getProperty("mapreduce.map.memory.mb", "2000")); finalResponseJob.getConfiguration().set("mapreduce.reduce.memory.mb", SystemConfiguration.getProperty("mapreduce.reduce.memory.mb", "2000")); finalResponseJob.getConfiguration().set("mapreduce.map.java.opts", SystemConfiguration.getProperty("mapreduce.map.java.opts", "-Xmx1800m")); finalResponseJob.getConfiguration().set("mapreduce.reduce.java.opts", SystemConfiguration.getProperty("mapreduce.reduce.java.opts", "-Xmx1800m")); finalResponseJob.getConfiguration().set("pirMR.queryInputDir", SystemConfiguration.getProperty("pir.queryInput")); finalResponseJob.getConfiguration().set("pirMR.outputFile", outputFile); finalResponseJob.getConfiguration().set("mapreduce.map.speculative", "false"); finalResponseJob.getConfiguration().set("mapreduce.reduce.speculative", "false"); finalResponseJob.setJobName(finalResponseJobName); finalResponseJob.setJarByClass(ColumnMultMapper.class); finalResponseJob.setNumReduceTasks(1); // Set the Mapper, InputFormat, and input path finalResponseJob.setMapperClass(ColumnMultMapper.class); finalResponseJob.setInputFormatClass(TextInputFormat.class); FileStatus[] status = fs.listStatus(new Path(outputDirColumnMult)); for (FileStatus fstat : status) { if (fstat.getPath().getName().startsWith(FileConst.PIR_COLS)) { logger.info("fstat.getPath() = " + fstat.getPath().toString()); FileInputFormat.addInputPath(finalResponseJob, fstat.getPath()); } } finalResponseJob.setMapOutputKeyClass(LongWritable.class); finalResponseJob.setMapOutputValueClass(Text.class); // Set the reducer and output options finalResponseJob.setReducerClass(FinalResponseReducer.class); finalResponseJob.setOutputKeyClass(LongWritable.class); finalResponseJob.setOutputValueClass(Text.class); finalResponseJob.getConfiguration().set("mapreduce.output.textoutputformat.separator", ","); // Delete the output file, if it exists if (fs.exists(outPathFinal)) { fs.delete(outPathFinal, true); } FileOutputFormat.setOutputPath(finalResponseJob, outPathFinal); MultipleOutputs.addNamedOutput(finalResponseJob, FileConst.PIR_FINAL, TextOutputFormat.class, LongWritable.class, Text.class); // Submit job, wait for completion success = finalResponseJob.waitForCompletion(true); return success; }
Example 19
Source File: JobRun.java From MapReduce-Demo with MIT License | 4 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { String namenode_ip = "192.168.17.10"; String hdfs = "hdfs://" + namenode_ip + ":9000"; Configuration conf = new Configuration(); conf.set("fs.defaultFS", hdfs); conf.set("mapreduce.app-submission.cross-platform", "true"); // job1配置信息 Job job1 = Job.getInstance(conf,"Decompose"); job1.setJarByClass(JobRun.class); job1.setJar("export\\mutualFriend.jar"); job1.setMapperClass(DecomposeFriendsMapper.class); job1.setReducerClass(DecomposeFriendsReducer.class); job1.setOutputKeyClass(Text.class); job1.setOutputValueClass(Text.class); Path input = new Path(hdfs+"/workspace/mutualFriends/data"); Path output1 = new Path(hdfs+"/workspace/mutualFriends/output_Dec"); FileInputFormat.addInputPath(job1, input); FileOutputFormat.setOutputPath(job1, output1); FileSystem fs = FileSystem.get(conf); if (fs.exists(output1)) { fs.delete(output1, true); } // job1如果运行成功则进入job2 if(job1.waitForCompletion(true)) {//job2完全依赖job1的结果,所以job1成功执行就开启job2 // job2配置信息 Job job2 = Job.getInstance(conf, "Merge"); job2.setJarByClass(JobRun.class); job2.setJar("export\\mutualFriend.jar"); job2.setMapperClass(MergeFriendsMapper.class); job2.setReducerClass(MergeFriendsReducer.class); job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); Path output2 = new Path(hdfs+"/workspace/mutualFriends/output_Meg"); FileInputFormat.addInputPath(job2, output1);// 输入是job1的输出 FileOutputFormat.setOutputPath(job2, output2); if (fs.exists(output2)) { fs.delete(output2, true); } if(job2.waitForCompletion(true)) { System.out.println("sucessed"); }else { System.out.println("failed"); } } }
Example 20
Source File: ChainReducer.java From hadoop with Apache License 2.0 | 3 votes |
/** * Adds a {@link Mapper} class to the chain reducer. * * <p> * The key and values are passed from one element of the chain to the next, by * value For the added Mapper the configuration given for it, * <code>mapperConf</code>, have precedence over the job's Configuration. This * precedence is in effect when the task is running. * </p> * <p> * IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the * chain. * </p> * * @param job * The job. * @param klass * the Mapper class to add. * @param inputKeyClass * mapper input key class. * @param inputValueClass * mapper input value class. * @param outputKeyClass * mapper output key class. * @param outputValueClass * mapper output value class. * @param mapperConf * a configuration for the Mapper class. It is recommended to use a * Configuration without default values using the * <code>Configuration(boolean loadDefaults)</code> constructor with * FALSE. */ public static void addMapper(Job job, Class<? extends Mapper> klass, Class<?> inputKeyClass, Class<?> inputValueClass, Class<?> outputKeyClass, Class<?> outputValueClass, Configuration mapperConf) throws IOException { job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); Chain.addMapper(false, job, klass, inputKeyClass, inputValueClass, outputKeyClass, outputValueClass, mapperConf); }