org.apache.hadoop.mapreduce.lib.output.FileOutputFormat Java Examples
The following examples show how to use
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PageRankDriver.java From flink-perf with Apache License 2.0 | 6 votes |
public static void calculateNextRanks (Configuration conf, FileSystem fs, String inputPath, String outputPath) throws Exception { Path outFile = new Path (outputPath); if (fs.exists(outFile)) { fs.delete(outFile, true); } Job job = Job.getInstance(conf); job.setJarByClass(PageRankMapper.class); job.setMapperClass(PageRankMapper.class); job.setReducerClass(PageRankReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Message.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Message.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, outFile); job.waitForCompletion(true); }
Example #2
Source File: BigQueryOutputConfiguration.java From hadoop-connectors with Apache License 2.0 | 6 votes |
/** * Gets a configured instance of the stored {@link FileOutputFormat} in the configuration. * * @param conf the configuration to reference the keys from. * @return a configured instance of the stored {@link FileOutputFormat} in the configuration. * @throws IOException if there's an issue getting an instance of a FileOutputFormat from the * configuration. */ @SuppressWarnings("rawtypes") public static FileOutputFormat getFileOutputFormat(Configuration conf) throws IOException { // Ensure the BigQuery output information is valid. getMandatoryConfig(conf, OUTPUT_FORMAT_CLASS); Class<?> confClass = OUTPUT_FORMAT_CLASS.get(conf, conf::getClass); // Fail if the default value was used, or the class isn't a FileOutputFormat. if (confClass == null) { throw new IOException( "Unable to resolve value for the configuration key '" + OUTPUT_FORMAT_CLASS.getKey() + "'."); } else if (!FileOutputFormat.class.isAssignableFrom(confClass)) { throw new IOException("The class " + confClass.getName() + " is not a FileOutputFormat."); } Class<? extends FileOutputFormat> fileOutputClass = confClass.asSubclass(FileOutputFormat.class); // Create a new instance and configure it if it's configurable. return ReflectionUtils.newInstance(fileOutputClass, conf); }
Example #3
Source File: CompactionJobConfigurator.java From incubator-gobblin with Apache License 2.0 | 6 votes |
/** * Refer to MRCompactorAvroKeyDedupJobRunner#configureInputAndOutputPaths(Job). * @return false if no valid input paths present for MR job to process, where a path is valid if it is * a directory containing one or more files. * */ protected boolean configureInputAndOutputPaths(Job job, FileSystemDataset dataset) throws IOException { boolean emptyDirectoryFlag = false; String mrOutputBase = this.state.getProp(MRCompactor.COMPACTION_JOB_DIR); CompactionPathParser parser = new CompactionPathParser(this.state); CompactionPathParser.CompactionParserResult rst = parser.parse(dataset); this.mrOutputPath = concatPaths(mrOutputBase, rst.getDatasetName(), rst.getDstSubDir(), rst.getTimeString()); log.info("Cleaning temporary MR output directory: " + mrOutputPath); this.fs.delete(mrOutputPath, true); this.mapReduceInputPaths = getGranularInputPaths(dataset.datasetRoot()); if (this.mapReduceInputPaths.isEmpty()) { this.mapReduceInputPaths.add(dataset.datasetRoot()); emptyDirectoryFlag = true; } this.oldFiles = new HashSet<>(); for (Path path : mapReduceInputPaths) { oldFiles.add(this.fs.makeQualified(path).toString()); FileInputFormat.addInputPath(job, path); } FileOutputFormat.setOutputPath(job, mrOutputPath); return emptyDirectoryFlag; }
Example #4
Source File: BigDiffHadoop.java From secure-data-service with Apache License 2.0 | 6 votes |
public void execute(String inputPath1, String inputPath2, String outputPath) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "bigdiff"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(inputPath1)); FileInputFormat.addInputPath(job, new Path(inputPath2)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); }
Example #5
Source File: MapperInputSplitInfo.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: MapperInputSplitInfo <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, MapperInputSplitInfo.class.getSimpleName()); job.setJarByClass(MapperInputSplitInfo.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example #6
Source File: TestJoinDatamerge.java From big-c with Apache License 2.0 | 6 votes |
private static void checkOuterConsistency(Job job, Path[] src) throws IOException { Path outf = FileOutputFormat.getOutputPath(job); FileStatus[] outlist = cluster.getFileSystem().listStatus(outf, new Utils.OutputFileUtils.OutputFilesFilter()); assertEquals("number of part files is more than 1. It is" + outlist.length, 1, outlist.length); assertTrue("output file with zero length" + outlist[0].getLen(), 0 < outlist[0].getLen()); SequenceFile.Reader r = new SequenceFile.Reader(cluster.getFileSystem(), outlist[0].getPath(), job.getConfiguration()); IntWritable k = new IntWritable(); IntWritable v = new IntWritable(); while (r.next(k, v)) { assertEquals("counts does not match", v.get(), countProduct(k, src, job.getConfiguration())); } r.close(); }
Example #7
Source File: ParquetAvroExample.java From parquet-flinktacular with Apache License 2.0 | 6 votes |
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException { // Set up the Hadoop Input Format Job job = Job.getInstance(); // Set up Hadoop Output Format HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job); FileOutputFormat.setOutputPath(job, new Path(outputPath)); AvroParquetOutputFormat.setSchema(job, Person.getClassSchema()); ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY); ParquetOutputFormat.setEnableDictionary(job, true); // Output & Execute data.output(hadoopOutputFormat); }
Example #8
Source File: JobExecutor.java From Cubert with Apache License 2.0 | 6 votes |
protected void setOutput() throws IOException { JsonNode output = get(root, "output"); JsonNode params = output.get("params"); if (params == null) params = mapper.createObjectNode(); Path outputPath = new Path(getText(output, "path")); FileOutputFormat.setOutputPath(job, outputPath); if (params.has("overwrite") && Boolean.parseBoolean(getText(params, "overwrite"))) { fs.delete(outputPath, true); } BlockSchema schema = new BlockSchema(output.get("schema")); Storage storage = StorageFactory.get(getText(output, "type")); storage.prepareOutput(job, conf, params, schema, outputPath); }
Example #9
Source File: WordCount.java From wifi with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs(); // System.out.println(otherArgs); if(otherArgs.length != 2) { System.out.println("Usage:wordcount <in> <out>"); System.exit(2); } // if(args.length != 2) { // System.out.println("param error!"); // System.exit(-1); // } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example #10
Source File: GridmixJob.java From RDFS with Apache License 2.0 | 6 votes |
public Job call() throws IOException, InterruptedException, ClassNotFoundException { job.setMapperClass(GridmixMapper.class); job.setReducerClass(GridmixReducer.class); job.setNumReduceTasks(jobdesc.getNumberReduces()); job.setMapOutputKeyClass(GridmixKey.class); job.setMapOutputValueClass(GridmixRecord.class); job.setSortComparatorClass(GridmixKey.Comparator.class); job.setGroupingComparatorClass(SpecGroupingComparator.class); job.setInputFormatClass(GridmixInputFormat.class); job.setOutputFormatClass(RawBytesOutputFormat.class); job.setPartitionerClass(DraftPartitioner.class); job.setJarByClass(GridmixJob.class); job.getConfiguration().setInt("gridmix.job.seq", seq); job.getConfiguration().set(ORIGNAME, null == jobdesc.getJobID() ? "<unknown>" : jobdesc.getJobID().toString()); job.getConfiguration().setBoolean("mapred.used.genericoptionsparser", true); FileInputFormat.addInputPath(job, new Path("ignored")); FileOutputFormat.setOutputPath(job, outdir); job.submit(); return job; }
Example #11
Source File: WordCount.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example #12
Source File: MatMulDriver.java From MLHadoop with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // A is an m-by-n matrix; B is an n-by-p matrix. conf.set("m", args[0]); conf.set("n", args[1]); conf.set("p", args[2]); Job job = new Job(conf, "Matrix_Multiplication"); job.setJarByClass(MatMulDriver.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MatMulMap.class); //Don't use combiner if there is no scope of combining the output. Otherwise the job will get stuck. //job.setCombinerClass(MatMulModGenReduce.class); job.setReducerClass(MatMulReduce.class); //args[3] is the input path. FileInputFormat.addInputPath(job, new Path(args[3])); //args[4] is the output path. FileOutputFormat.setOutputPath(job, new Path(args[4])); System.exit(job.waitForCompletion(true)?0:1); }
Example #13
Source File: Main.java From hiped2 with Apache License 2.0 | 6 votes |
public static void runSortJob(Configuration conf, Path input, Path outputPath) throws Exception { Job job = new Job(conf); job.setJarByClass(Main.class); job.setMapperClass(SortMapReduce.Map.class); job.setReducerClass(SortMapReduce.Reduce.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setMapOutputKeyClass(Person.class); job.setMapOutputValueClass(Person.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(PersonNamePartitioner.class); job.setSortComparatorClass(PersonComparator.class); job.setGroupingComparatorClass(PersonNameComparator.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); }
Example #14
Source File: NBCDriver.java From MLHadoop with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf=new Configuration(); // The test input for which you want to find the acitivity that the Person should be doing conf.set("test_input", args[0]); Job job = new Job(conf); job.setJarByClass(NBCDriver.class); job.setJobName("Naive_Bayes_calssifier using Hadoop"); FileInputFormat.setInputPaths(job, new Path(args[1])); FileOutputFormat.setOutputPath(job, new Path(args[2])); job.setMapperClass(NBCMap.class); job.setReducerClass(NBCReduce.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); boolean success = job.waitForCompletion(true); System.exit(success ? 0 : 1); }
Example #15
Source File: XflowDstIPCount.java From bigdata-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: xflowdstipcount <in> <out>"); System.exit(2); } Job job = Job.getInstance(); job.setJobName("xflow dstip count"); job.setJarByClass(XflowDstIPCount.class); job.setMapperClass(ParesDstIPMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example #16
Source File: Step6.java From MapReduce-Demo with MIT License | 6 votes |
public static boolean run(Configuration config, Map<String, String> paths) throws IOException, ClassNotFoundException, InterruptedException { String jobName = "step6"; Job job = Job.getInstance(config, jobName); job.setJarByClass(Step6.class); job.setJar("export\\ItemCF.jar"); job.setMapperClass(Step6_Mapper.class); job.setReducerClass(Step6_Reducer.class); job.setMapOutputKeyClass(PairWritable.class); job.setMapOutputValueClass(Text.class); //job.setSortComparatorClass(ScoreSort.class); //自定义排序 job.setGroupingComparatorClass(UserGroup.class); //自定义分组 Path inPath = new Path(paths.get("Step6Input")); Path outpath = new Path(paths.get("Step6Output")); FileInputFormat.addInputPath(job, inPath); FileOutputFormat.setOutputPath(job, outpath); FileSystem fs = FileSystem.get(config); if (fs.exists(outpath)) { fs.delete(outpath, true); } return job.waitForCompletion(true); }
Example #17
Source File: KMeansDriver.java From flink-perf with Apache License 2.0 | 6 votes |
public static void initializeCenters (Configuration conf, FileSystem fs, String pointsPath, String seqFilePath) throws Exception { Path points = new Path (pointsPath); Path seqFile = new Path (seqFilePath); if (fs.exists(seqFile)) { fs.delete(seqFile, true); } Job job = Job.getInstance(conf); job.setMapperClass(CenterInitializer.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Centroid.class); job.setMapOutputValueClass(Point.class); job.setOutputKeyClass(Centroid.class); job.setOutputValueClass(Point.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(pointsPath)); FileOutputFormat.setOutputPath(job, seqFile); job.waitForCompletion(true); }
Example #18
Source File: ElemValueCooccurrencesTest.java From marklogic-contentpump with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 1) { System.err.println("Usage: ElemValueCooccurrencesTest configFile outputDir"); System.exit(2); } Job job = Job.getInstance(conf); job.setJarByClass(ElemValueCooccurrencesTest.class); job.setInputFormatClass(ValueInputFormat.class); job.setMapperClass(ElemCooccurrencesMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class); conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, ElemValueCooccurrencesFunction.class, ElemValueCooccurrences.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Example #19
Source File: IntegrationTestLoadAndVerify.java From hbase with Apache License 2.0 | 6 votes |
protected Job doLoad(Configuration conf, TableDescriptor tableDescriptor) throws Exception { Path outputDir = getTestDir(TEST_NAME, "load-output"); LOG.info("Load output dir: " + outputDir); NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT)); conf.set(TABLE_NAME_KEY, tableDescriptor.getTableName().getNameAsString()); Job job = Job.getInstance(conf); job.setJobName(TEST_NAME + " Load for " + tableDescriptor.getTableName()); job.setJarByClass(this.getClass()); setMapperClass(job); job.setInputFormatClass(NMapInputFormat.class); job.setNumReduceTasks(0); setJobScannerConf(job); FileOutputFormat.setOutputPath(job, outputDir); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); assertTrue(job.waitForCompletion(true)); return job; }
Example #20
Source File: TestCRAMOutputFormat.java From Hadoop-BAM with MIT License | 6 votes |
private Path doMapReduce(final String inputFile) throws Exception { final FileSystem fileSystem = FileSystem.get(conf); final Path inputPath = new Path(inputFile); final Path outputPath = fileSystem.makeQualified(new Path("target/out")); fileSystem.delete(outputPath, true); final Job job = Job.getInstance(conf); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(CRAMInputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(SAMRecordWritable.class); conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile); job.setOutputFormatClass(CRAMTestNoHeaderOutputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(SAMRecordWritable.class); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputPath); final boolean success = job.waitForCompletion(true); assertTrue(success); return outputPath; }
Example #21
Source File: HashTable.java From hbase with Apache License 2.0 | 6 votes |
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); jobConf.setBoolean(IGNORE_TIMESTAMPS, tableHash.ignoreTimestamps); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
Example #22
Source File: MapReduceTestUtil.java From hadoop with Apache License 2.0 | 6 votes |
/** * Creates a simple copy job. * * @param conf Configuration object * @param outdir Output directory. * @param indirs Comma separated input directories. * @return Job initialized for a data copy job. * @throws Exception If an error occurs creating job configuration. */ public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception { conf.setInt(MRJobConfig.NUM_MAPS, 3); Job theJob = Job.getInstance(conf); theJob.setJobName("DataMoveJob"); FileInputFormat.setInputPaths(theJob, indirs); theJob.setMapperClass(DataCopyMapper.class); FileOutputFormat.setOutputPath(theJob, outdir); theJob.setOutputKeyClass(Text.class); theJob.setOutputValueClass(Text.class); theJob.setReducerClass(DataCopyReducer.class); theJob.setNumReduceTasks(1); return theJob; }
Example #23
Source File: ConfigurationHelper.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
/** * Job configurator * * @param job job instance * @param jarByClass class of the jar * @param mapperClass mapper * @param reducerClass reducer * @param commaSeparatedInputFiles input paths * @param outputPath output * @throws IOException I/O exception */ public static void configureJob(Job job, Class<?> jarByClass, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, String commaSeparatedInputFiles, String outputPath) throws IOException { job.setJarByClass(jarByClass); job.setJobName(jarByClass.getName()); // mapper job.setMapperClass(mapperClass); // reducer job.setReducerClass(reducerClass); // input-output is warc job.setInputFormatClass(WARCInputFormat.class); // prevent producing empty files LazyOutputFormat.setOutputFormatClass(job, WARCOutputFormat.class); // intermediate data job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(WARCWritable.class); // output data job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, commaSeparatedInputFiles); FileOutputFormat.setOutputPath(job, new Path(outputPath)); }
Example #24
Source File: AvroHdfsFileSink.java From components with Apache License 2.0 | 5 votes |
@Override protected void configure(Job job, KV<AvroKey<IndexedRecord>, NullWritable> sample) { super.configure(job, sample); AvroKey<IndexedRecord> k = sample.getKey(); AvroJob.setOutputKeySchema(job, k.datum().getSchema()); FileOutputFormat.setCompressOutput(job, true); job.getConfiguration().set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC); }
Example #25
Source File: MapReduceRunner.java From halvade with GNU General Public License v3.0 | 5 votes |
protected int runCombineJob(String halvadeOutDir, String mergeOutDir, boolean featureCount) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Configuration combineConf = getConf(); if(!halvadeOpts.out.endsWith("/")) halvadeOpts.out += "/"; HalvadeConf.setInputDir(combineConf, halvadeOutDir); HalvadeConf.setOutDir(combineConf, mergeOutDir); FileSystem outFs = FileSystem.get(new URI(mergeOutDir), combineConf); if (outFs.exists(new Path(mergeOutDir))) { Logger.INFO("The output directory \'" + mergeOutDir + "\' already exists."); Logger.INFO("ERROR: Please remove this directory before trying again."); System.exit(-2); } HalvadeConf.setReportAllVariant(combineConf, halvadeOpts.reportAll); HalvadeResourceManager.setJobResources(halvadeOpts, combineConf, HalvadeResourceManager.COMBINE, false, halvadeOpts.useBamInput); // halvadeOpts.splitChromosomes(combineConf, 0); Job combineJob = Job.getInstance(combineConf, "HalvadeCombineVCF"); combineJob.setJarByClass(VCFCombineMapper.class); addInputFiles(halvadeOutDir, combineConf, combineJob, featureCount ? ".count" : ".vcf"); FileOutputFormat.setOutputPath(combineJob, new Path(mergeOutDir)); combineJob.setMapperClass(featureCount ? HTSeqCombineMapper.class : VCFCombineMapper.class); combineJob.setMapOutputKeyClass(featureCount ? Text.class : LongWritable.class); combineJob.setMapOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); combineJob.setInputFormatClass(featureCount ? TextInputFormat.class : VCFInputFormat.class); combineJob.setNumReduceTasks(1); combineJob.setReducerClass(featureCount ? be.ugent.intec.halvade.hadoop.mapreduce.HTSeqCombineReducer.class : be.ugent.intec.halvade.hadoop.mapreduce.VCFCombineReducer.class); combineJob.setOutputKeyClass(Text.class); combineJob.setOutputValueClass(featureCount ? LongWritable.class : VariantContextWritable.class); return runTimedJob(combineJob, (featureCount ? "featureCounts" : "VCF") + " Combine Job"); }
Example #26
Source File: TeraStreamValidate.java From pravega-samples with Apache License 2.0 | 5 votes |
public int run(String[] args) throws Exception { if (args.length != 5) { usage(); return 2; } LOG.info("starting"); Path inputDir = new Path(args[0]); Path outputDir = new Path(args[1]); getConf().setStrings(INPUT_URI_STRING, args[2]); getConf().setStrings(INPUT_SCOPE_NAME, args[3]); getConf().setStrings(INPUT_STREAM_NAME, args[4]); getConf().setStrings(INPUT_DESERIALIZER, TextSerializer.class.getName()); getConf().setInt(MRJobConfig.NUM_MAPS, 1); Job job = Job.getInstance(getConf()); TeraInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraStreamValidate"); job.setJarByClass(TeraStreamValidate.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(TeraSortMapper.class); job.setNumReduceTasks(1); job.setInputFormatClass(PravegaInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
Example #27
Source File: TestValueIterReset.java From big-c with Apache License 2.0 | 5 votes |
public void testValueIterReset() { try { Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "TestValueIterReset") ; job.setJarByClass(TestValueIterReset.class); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); job.setNumReduceTasks(NUM_TESTS); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.getConfiguration(). setInt(MRJobConfig.REDUCE_MARKRESET_BUFFER_SIZE,128); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in")); Path output = new Path(TEST_ROOT_DIR + "/out"); localFs.delete(output, true); FileOutputFormat.setOutputPath(job, output); createInput(); assertTrue(job.waitForCompletion(true)); validateOutput(); } catch (Exception e) { e.printStackTrace(); assertTrue(false); } }
Example #28
Source File: Phase2ExactMatchDeDuplication.java From dkpro-c4corpus with Apache License 2.0 | 5 votes |
@Override public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); //set from the command line job.setJarByClass(Phase2ExactMatchDeDuplication.class); job.setJobName(Phase2ExactMatchDeDuplication.class.getName()); // mapper job.setMapperClass(ExactMatchDetectionMapper.class); // we will compress the mapper's output (use fast Snappy compressor) job.getConfiguration().setBoolean(Job.MAP_OUTPUT_COMPRESS, true); job.getConfiguration() .setClass(Job.MAP_OUTPUT_COMPRESS_CODEC, SnappyCodec.class, CompressionCodec.class); // reducer job.setReducerClass(UniqueWarcWriterReducer.class); // no combiner, as the output classes in mapper and reducer are different! // input-output is warc job.setInputFormatClass(WARCInputFormat.class); job.setOutputFormatClass(WARCOutputFormat.class); // mapper output data job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(WARCWritable.class); // set output compression to GZip FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
Example #29
Source File: IAndKMatrixMultiplicationStep2.java From RecommendationEngine with MIT License | 5 votes |
public static void run() throws IOException, ClassNotFoundException, InterruptedException { String inputPath = ItemBasedCFDriver.path.get("step8InputPath"); String outputPath = ItemBasedCFDriver.path.get("step8OutputPath"); Configuration conf = new Configuration(); conf.set("mapred.textoutputformat.separator", ":"); conf.set("n", String.valueOf(ItemBasedCFDriver.N)); conf.set("m", String.valueOf(ItemBasedCFDriver.M)); Job job = Job.getInstance(conf); HDFS hdfs = new HDFS(conf); hdfs.rmr(outputPath); job.setMapperClass(MultiplicationMapper.class); job.setReducerClass(MultiplicationReducer.class); job.setJarByClass(IAndKMatrixMultiplicationStep2.class); job.setNumReduceTasks(ItemBasedCFDriver.ReducerNumber); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.waitForCompletion(true); }
Example #30
Source File: SamplerJob.java From hiped2 with Apache License 2.0 | 5 votes |
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build(); int result = cli.runCmd(); if (result != 0) { return result; } Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT)); Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT)); Configuration conf = super.getConf(); Job job = new Job(conf); job.setJarByClass(SamplerJob.class); ReservoirSamplerInputFormat.setInputFormat(job, TextInputFormat.class); ReservoirSamplerInputFormat.setNumSamples(job, 10); ReservoirSamplerInputFormat.setMaxRecordsToRead(job, 10000); ReservoirSamplerInputFormat. setUseSamplesNumberPerInputSplit(job, true); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); outputPath.getFileSystem(conf).delete(outputPath, true); if (job.waitForCompletion(true)) { return 0; } return 1; }