Java Code Examples for org.apache.hadoop.mapreduce.lib.output.MultipleOutputs#setCountersEnabled()
The following examples show how to use
org.apache.hadoop.mapreduce.lib.output.MultipleOutputs#setCountersEnabled() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractReasoningTool.java From rya with Apache License 2.0 | 6 votes |
/** * Set up a MapReduce job to output human-readable text. */ protected void configureTextOutput(String destination) { Path outPath; outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination); TextOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
Example 2
Source File: AbstractReasoningTool.java From rya with Apache License 2.0 | 5 votes |
/** * Set up the MapReduce job to output a schema (TBox). */ protected void configureSchemaOutput() { Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration()); SequenceFileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SchemaWritable.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
Example 3
Source File: AbstractReasoningTool.java From rya with Apache License 2.0 | 5 votes |
/** * Set up a MapReduce job to output newly derived triples. * @param intermediate True if this is intermediate data. Outputs * to [base]-[iteration]-[temp]. */ protected void configureDerivationOutput(boolean intermediate) { Path outPath; Configuration conf = job.getConfiguration(); int iteration = MRReasoningUtils.getCurrentIteration(conf); if (intermediate) { outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration + MRReasoningUtils.TEMP_SUFFIX); } else { outPath = MRReasoningUtils.getOutputPath(conf, MRReasoningUtils.OUTPUT_BASE + iteration); } SequenceFileOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, SequenceFileOutputFormat.class, Derivation.class, NullWritable.class); MultipleOutputs.setCountersEnabled(job, true); // Set up an output for diagnostic info, if needed MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); }
Example 4
Source File: BinningTags.java From hadoop-map-reduce-patterns with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: BinningTags <in> <out>"); ToolRunner.printGenericCommandUsage(System.err); System.exit(2); } Job job = new Job(conf, "Binning Tags"); job.setJarByClass(BinningTags.class); // Configure the MultipleOutputs by adding an output called "bins" // With the proper output format and mapper key/value pairs MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class); // Enable the counters for the job // If there are a significant number of different named outputs, this // should be disabled MultipleOutputs.setCountersEnabled(job, true); // Map-only job job.setNumReduceTasks(0); job.setMapperClass(BinningMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
Example 5
Source File: DataSourceCompJobExecutor.java From jumbune with GNU Lesser General Public License v3.0 | 4 votes |
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); // LOGGER.debug("Data validation job received args length [ " + // otherArgs.length + "]"); StringBuilder sb = new StringBuilder(); for (int j = 0; j < otherArgs.length; j++) { sb.append(otherArgs[j]); } String validationInfoJson = sb.toString(); Gson gson = new Gson(); DataSourceCompValidationInfo validationInfo = gson.fromJson(validationInfoJson, DataSourceCompValidationInfo.class); DataSourceCompJobExecutor dscJobExecutor = new DataSourceCompJobExecutor(); dscJobExecutor.removeSlash(validationInfo); dscJobExecutor.addTransformationNumber(validationInfo); DataSourceCompMapperInfo mapperInfo = dscJobExecutor.createMapperInfo(validationInfo); String outputPath = DataSourceCompConstants.OUTPUT_DIR_PATH + new Date().getTime(); // String outputPath = "/destination"; conf.set("validationInfoJson", gson.toJson(validationInfo)); conf.set("mapperInfoJson", gson.toJson(mapperInfo)); conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); FileSystem fileSystem = FileSystem.get(conf); List<Path> mapperFilesList = dscJobExecutor.getFiles(validationInfo.getSourcePath(), fileSystem); mapperFilesList.addAll(dscJobExecutor.getFiles(validationInfo.getDestinationPath(), fileSystem)); Map<String, String> filesMap = dscJobExecutor.encodeFilesMap(mapperFilesList); Map<String, String> reverseFilesMap = dscJobExecutor.invertMap(filesMap); Path[] patharr = new Path[mapperFilesList.size()]; for (int i = 0; i < mapperFilesList.size(); i++) { patharr[i] = mapperFilesList.get(i); } conf.set("filesMap", gson.toJson(filesMap)); String recordSeparator = validationInfo.getRecordSeparator(); if (recordSeparator == null || recordSeparator.trim().isEmpty()) { recordSeparator = "\n"; } conf.set("textinputformat.record.delimiter", recordSeparator); Job job = Job.getInstance(conf, "jumbune_dsc_" + validationInfo.getJobName()); job.setJarByClass(DataSourceCompJobExecutor.class); job.setMapperClass(org.jumbune.datavalidation.dsc.DataSourceCompMapper.class); job.setReducerClass(DataSourceCompReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DataSourceCompMapValueWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, patharr); FileOutputFormat.setOutputPath(job, new Path(outputPath)); MultipleOutputs.setCountersEnabled(job, true); job.waitForCompletion(true); String workerDirPath = validationInfo.getSlaveFileLoc(); dscJobExecutor.copyResult(conf, outputPath, workerDirPath); dscJobExecutor.renameFiles(workerDirPath, reverseFilesMap); DataSourceCompReportBean reportBean = dscJobExecutor.calculateCounters(job, outputPath, reverseFilesMap, validationInfo.getValidationsList()); LOGGER.info(DataValidationConstants.DV_REPORT + gson.toJson(reportBean)); }
Example 6
Source File: BasicJobChaining.java From hadoop-map-reduce-patterns with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: JobChainingDriver <posts> <users> <out>"); System.exit(2); } Path postInput = new Path(otherArgs[0]); Path userInput = new Path(otherArgs[1]); Path outputDirIntermediate = new Path(otherArgs[2] + "_int"); Path outputDir = new Path(otherArgs[2]); // Setup first job to counter user posts Job countingJob = new Job(conf, "JobChaining-Counting"); countingJob.setJarByClass(BasicJobChaining.class); // Set our mapper and reducer, we can use the API's long sum reducer for // a combiner! countingJob.setMapperClass(UserIdCountMapper.class); countingJob.setCombinerClass(LongSumReducer.class); countingJob.setReducerClass(UserIdSumReducer.class); countingJob.setOutputKeyClass(Text.class); countingJob.setOutputValueClass(LongWritable.class); countingJob.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(countingJob, postInput); countingJob.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(countingJob, outputDirIntermediate); // Execute job and grab exit code int code = countingJob.waitForCompletion(true) ? 0 : 1; if (code == 0) { // Calculate the average posts per user by getting counter values double numRecords = (double) countingJob.getCounters() .findCounter(AVERAGE_CALC_GROUP, UserIdCountMapper.RECORDS_COUNTER_NAME) .getValue(); double numUsers = (double) countingJob.getCounters() .findCounter(AVERAGE_CALC_GROUP, UserIdSumReducer.USERS_COUNTER_NAME) .getValue(); double averagePostsPerUser = numRecords / numUsers; // Setup binning job Job binningJob = new Job(new Configuration(), "JobChaining-Binning"); binningJob.setJarByClass(BasicJobChaining.class); // Set mapper and the average posts per user binningJob.setMapperClass(UserIdBinningMapper.class); UserIdBinningMapper.setAveragePostsPerUser(binningJob, averagePostsPerUser); binningJob.setNumReduceTasks(0); binningJob.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(binningJob, outputDirIntermediate); // Add two named outputs for below/above average MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_BELOW_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.addNamedOutput(binningJob, MULTIPLE_OUTPUTS_ABOVE_NAME, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(binningJob, true); TextOutputFormat.setOutputPath(binningJob, outputDir); // Add the user files to the DistributedCache FileStatus[] userFiles = FileSystem.get(conf).listStatus(userInput); for (FileStatus status : userFiles) { DistributedCache.addCacheFile(status.getPath().toUri(), binningJob.getConfiguration()); } // Execute job and grab exit code code = binningJob.waitForCompletion(true) ? 0 : 1; } // Clean up the intermediate output FileSystem.get(conf).delete(outputDirIntermediate, true); System.exit(code); }