org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Java Exaples

Source File: P1Q3.java From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0

6 votes

public final static void main(final String[] args) throws Exception {
	final Configuration conf = new Configuration();

	final Job job = new Job(conf, "P1Q3");
	job.setJarByClass(P1Q3.class);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);

	job.setOutputKeyClass(IntWritable.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setMapperClass(P1Q3Map.class);
	//job.setCombinerClass(P1Q3Reduce.class);
	job.setReducerClass(P1Q3Reduce.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	job.waitForCompletion(true);
}

Source File: TopKDataGen.java From sequenceiq-samples with Apache License 2.0

6 votes

private DAG createDag(TezConfiguration tezConf, Path outPath, long outSize, int extraColumns, int numTasks)
        throws IOException {

    long largeOutSizePerTask = outSize / numTasks;

    DAG dag = DAG.create("TopK DataGen");

    Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
                    GenDataProcessor.class.getName()).setUserPayload(
                    UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask, extraColumns)))),
            numTasks);
    genDataVertex.addDataSink(OUTPUT,
            MROutput.createConfigBuilder(new Configuration(tezConf),
                    TextOutputFormat.class, outPath.toUri().toString()).build());
    dag.addVertex(genDataVertex);

    return dag;
}

Source File: HadoopWordCount2.java From ignite with Apache License 2.0

6 votes

/**
 * Sets task classes with related info if needed into configuration object.
 *
 * @param job Configuration to change.
 * @param setMapper Option to set mapper and input format classes.
 * @param setCombiner Option to set combiner class.
 * @param setReducer Option to set reducer and output format classes.
 */
public static void setTasksClasses(Job job, boolean setMapper, boolean setCombiner, boolean setReducer,
        boolean outputCompression) {
    if (setMapper) {
        job.setMapperClass(HadoopWordCount2Mapper.class);
        job.setInputFormatClass(TextInputFormat.class);
    }

    if (setCombiner)
        job.setCombinerClass(HadoopWordCount2Combiner.class);

    if (setReducer) {
        job.setReducerClass(HadoopWordCount2Reducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
    }

    if (outputCompression) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

        SequenceFileOutputFormat.setCompressOutput(job, true);

        job.getConfiguration().set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
    }
}

Source File: LeftJoin.java From BigData-In-Practice with Apache License 2.0

6 votes

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    GenericOptionsParser optionparser = new GenericOptionsParser(conf, args);
    conf = optionparser.getConfiguration();

    Job job = Job.getInstance(conf, "leftjoin");
    job.setJarByClass(LeftJoin.class);
    FileInputFormat.addInputPaths(job, conf.get("input_dir"));
    Path out = new Path(conf.get("output_dir"));
    FileOutputFormat.setOutputPath(job, out);
    job.setNumReduceTasks(conf.getInt("reduce_num", 1));

    job.setMapperClass(LeftJoinMapper.class);
    job.setReducerClass(LeftJoinReduce.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    conf.set("mapred.textoutputformat.separator", ",");

    return (job.waitForCompletion(true) ? 0 : 1);
}

Source File: BigDiffHadoop.java From secure-data-service with Apache License 2.0

6 votes

public void execute(String inputPath1, String inputPath2, String outputPath) throws Exception {
    Configuration conf = new Configuration();

    Job job = new Job(conf, "bigdiff");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, new Path(inputPath1));
    FileInputFormat.addInputPath(job, new Path(inputPath2));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);
}

Source File: CellCounter.java From hbase with Apache License 2.0

6 votes

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
    throws IOException {
  String tableName = args[0];
  Path outputDir = new Path(args[1]);
  String reportSeparatorString = (args.length > 2) ? args[2]: ":";
  conf.set("ReportSeparator", reportSeparatorString);
  Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
  job.setJarByClass(CellCounter.class);
  Scan scan = getConfiguredScanForJob(conf, args);
  TableMapReduceUtil.initTableMapperJob(tableName, scan,
      CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
  job.setNumReduceTasks(1);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputFormatClass(TextOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setReducerClass(IntSumReducer.class);
  return job;
}

Source File: KMeansDriver.java From flink-perf with Apache License 2.0

6 votes

public static void convertCentersSequenceFileToText (Configuration conf, FileSystem fs, String seqFilePath, String outputPath) throws Exception {

		Path seqFile = new Path (seqFilePath);
		Path output = new Path (outputPath);
		if (fs.exists(output)) {
			fs.delete(output, true);
		}
		Job job = Job.getInstance(conf);
		job.setMapperClass(CenterSequenceToTextConverter.class);
		job.setReducerClass(Reducer.class);
		job.setNumReduceTasks(0);
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setInputFormatClass(SequenceFileInputFormat.class);
		FileInputFormat.addInputPath(job, seqFile);
		FileOutputFormat.setOutputPath(job, output);
		job.waitForCompletion(true);
	}

Source File: ValueCooccurrencesTest.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ValueCooccurrencesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ValueCooccurrencesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ValueCooccurrencesMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ValueCooccurrencesFunction.class, ValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: P1Q1.java From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0

6 votes

public final static void main(final String[] args) throws Exception {
	final Configuration conf = new Configuration();

	final Job job = new Job(conf, "P1Q1");
	job.setJarByClass(P1Q1.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setMapperClass(P1Q1Map.class);
	job.setCombinerClass(P1Q1Reduce.class);
	job.setReducerClass(P1Q1Reduce.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	job.waitForCompletion(true);
}

Source File: ElemValueCooccurrencesTest.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: ElemValueCooccurrencesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElemValueCooccurrencesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElemCooccurrencesMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElemValueCooccurrencesFunction.class, ElemValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: WordsTest.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: WordsTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(WordsTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(WordsMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        Words.class, Words.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: ValueMatchTest.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ValueMatchTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ValueMatchTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ValueMatchMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ValueMatchFunction.class, ValueMatch.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: P1.java From IntroToHadoopAndMR__Udacity_Course with Apache License 2.0

6 votes

public final static void main(final String[] args) throws Exception {
	final Configuration conf = new Configuration();

	final Job job = new Job(conf, "P1");
	job.setJarByClass(P1.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setMapperClass(P1Map.class);
	job.setCombinerClass(P1Reduce.class);
	job.setReducerClass(P1Reduce.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));

	job.waitForCompletion(true);
}

Source File: ElementValueMatchTest.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 1) {
        System.err.println("Usage: ElementValueMatchTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElementValueMatchTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElementValueMatchMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElementValueMatchFunction.class, ElementValueMatch.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: ElementValuesTest.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: ElementValuesTest configFile outputDir");
        System.exit(2);
    }

    Job job = Job.getInstance(conf);
    job.setJarByClass(ElementValuesTest.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(ElementValueMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, 
            Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, 
        ElementValuesFunction.class, ElementValues.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: LinkCountHDFS.java From marklogic-contentpump with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountHDFS inputDir outputDir");
        System.exit(2);
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    
    Job job = Job.getInstance(conf, "link count hdfs");
    job.setJarByClass(LinkCountHDFS.class);
    job.setInputFormatClass(HDFSInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

Source File: FactDistinctColumnsReducerTest.java From kylin with Apache License 2.0

6 votes

private void testNormalDim() throws IOException {
    setConfigurations();
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_COLUMN, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, NullWritable.class, Text.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_DICT, reduceDriver.getConfiguration(),
            SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
    setMultipleOutputs(BatchConstants.CFG_OUTPUT_PARTITION, reduceDriver.getConfiguration(), TextOutputFormat.class,
            NullWritable.class, LongWritable.class);

    int nDimReducers = cubeDesc.getRowkey().getRowKeyColumns().length;
    setContextTaskId(nDimReducers - 1);

    ByteBuffer tmpBuf = ByteBuffer.allocate(4096);
    String val = "100";
    tmpBuf.put(Bytes.toBytes(val));
    Text outputKey1 = new Text();
    outputKey1.set(tmpBuf.array(), 0, tmpBuf.position());
    SelfDefineSortableKey key1 = new SelfDefineSortableKey();
    key1.init(outputKey1, (byte) 0);

    reduceDriver.setInput(key1, ImmutableList.of(new Text()));
    List<Pair<NullWritable, Text>> result = reduceDriver.run();
    assertEquals(0, result.size());
}

Source File: JMatrixMultiplicationStep3.java From RecommendationEngine with MIT License

5 votes

public static void run() throws IOException, ClassNotFoundException,
		InterruptedException {
	String inputPath = ItemBasedCFDriver.path.get("step9InputPath");
	String outputPath = ItemBasedCFDriver.path.get("step9OutputPath");

	Configuration conf = new Configuration();
	conf.set("mapred.textoutputformat.separator", ",");

	Job job = Job.getInstance(conf);

	HDFS hdfs = new HDFS(conf);
	hdfs.rmr(outputPath);

	job.setMapperClass(Step3_Mapper.class);
	job.setReducerClass(Step3_Reducer.class);
	job.setCombinerClass(Step3_Reducer.class);
	job.setJarByClass(JMatrixMultiplicationStep3.class);
	job.setNumReduceTasks(ItemBasedCFDriver.ReducerNumber);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(DoubleWritable.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);

	job.setInputFormatClass(TextInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);

	FileInputFormat.setInputPaths(job, new Path(inputPath));
	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	job.waitForCompletion(true);
}

Source File: Phase3Step2DistinctDataJob.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public int run(String[] args)
        throws Exception
{

    Job job = Job.getInstance(getConf());
    job.setJarByClass(Phase3Step2DistinctDataJob.class);
    job.setJobName(Phase3Step2DistinctDataJob.class.getName());

    //mapper
    job.setMapperClass(RemoveRedundantDataMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    //reducer
    job.setReducerClass(RemoveRedundantDataReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    //paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    job.setInputFormatClass(TextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);

    //i/o paths
    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

Source File: ContentTypeAndSizeDistribution.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override public int run(String[] args)
        throws Exception
{
    Job job = Job.getInstance(getConf());

    job.setJarByClass(ContentTypeAndSizeDistribution.class);

    job.setJobName(ContentTypeAndSizeDistribution.class.getName());

    // mapper
    job.setMapperClass(ContentAndSizeMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // reducer
    //        job.setReducerClass(DistributionReducer.class);
    job.setReducerClass(TextLongCountingReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = args[0];
    String outputPath = args[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

Source File: BlockMatrixMultiplicationStep1.java From RecommendationEngine with MIT License

5 votes

public static void run() throws IOException, ClassNotFoundException,
        InterruptedException {
    String inputPath1 = ItemBasedCFDriver.path.get("step7InputPath1");
    String inputPath2 = ItemBasedCFDriver.path.get("step7InputPath2");
    String outputPath = ItemBasedCFDriver.path.get("step7OutputPath");

    Configuration conf = new Configuration();
    conf.set("mapred.textoutputformat.separator", ",");

    Job job = Job.getInstance(conf);

    HDFS
            hdfs = new HDFS(conf);
    hdfs.rmr(outputPath);

    job.setMapperClass(BlockMatrixStep1_Mapper.class);

    job.setJarByClass(BlockMatrixMultiplicationStep1.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath1), new Path(
            inputPath2));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.waitForCompletion(true);

}

Source File: TextToSentencesSplitter.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public int run(String[] args)
        throws Exception
{
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf);
    job.setJarByClass(TextToSentencesSplitter.class);

    job.setJobName(TextToSentencesSplitter.class.getName());

    // mapper
    job.setMapperClass(TextToSentencesSplitter.MapperClass.class);
    job.setInputFormatClass(WARCInputFormat.class);

    // reducer
    job.setReducerClass(ReducerClass.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

Source File: CommunityCompression.java From distributed-graph-analytics with Apache License 2.0

5 votes

public int run(String[] args) throws Exception {
    Configuration mrConf = this.getConf();
    for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }

    Job job = Job.getInstance(mrConf);
    job.setJarByClass(CommunityCompression.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

Source File: GrepDriver.java From flink-perf with Apache License 2.0

5 votes

public static void main(String [] args) throws Exception {

        String in = args[0];
        String out = args[1];
        System.err.println("Using input=" + in);
        System.err.println("Using output=" + out);

        String patterns[] = new String[args.length - 2];
        System.arraycopy(args, 2, patterns, 0, args.length - 2);
        System.err.println("Using patterns: " + Arrays.toString(patterns));

        for (int i = 0; i < patterns.length; i++) {
            String pattern = patterns[i];
            Configuration conf = new Configuration();
            conf.set("pattern", pattern);
            Job job = Job.getInstance(conf, "Grep for " + pattern);
            job.setMapperClass(Grep.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);
            job.setNumReduceTasks(0);
            job.setJarByClass(Grep.class);
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1] + "_" + pattern));

            if (!job.waitForCompletion(true)) {
                throw new RuntimeException("Grep job " + i + " failed");
            }
        }
    }

Source File: PerformanceEvaluation.java From hbase with Apache License 2.0

5 votes

/**
 * Run a mapreduce job.  Run as many maps as asked-for clients.
 * Before we start up the job, write out an input file with instruction
 * per client regards which row they are to start on.
 * @param cmd Command to run.
 */
private void doMapReduce(final Class<? extends Test> cmd)
    throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = Job.getInstance(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(PeInputFormat.class);
  PeInputFormat.setInputPaths(job, inputDir);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);
  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  job.waitForCompletion(true);
}

Source File: JsonStorage.java From spork with Apache License 2.0

5 votes

@Override
public OutputFormat getOutputFormat() throws IOException {
    // We will use TextOutputFormat, the default Hadoop output format for
    // text.  The key is unused and the value will be a
    // Text (a string writable type) that we store our JSON data in.
    return new TextOutputFormat<LongWritable, Text>();
}

Source File: AbstractReasoningTool.java From rya with Apache License 2.0

5 votes

/**
 * Set up the MapReduce job to output a schema (TBox).
 */
protected void configureSchemaOutput() {
    Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
    SequenceFileOutputFormat.setOutputPath(job, outPath);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(SchemaWritable.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
    MultipleOutputs.addNamedOutput(job, "schemaobj",
        SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
    MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
        TextOutputFormat.class, Text.class, Text.class);
    MultipleOutputs.setCountersEnabled(job, true);
}

Source File: JobContextImpl.java From big-c with Apache License 2.0

5 votes

/**
 * Get the {@link OutputFormat} class for the job.
 * 
 * @return the {@link OutputFormat} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends OutputFormat<?,?>> getOutputFormatClass() 
   throws ClassNotFoundException {
  return (Class<? extends OutputFormat<?,?>>) 
    conf.getClass(OUTPUT_FORMAT_CLASS_ATTR, TextOutputFormat.class);
}

Source File: SimpleTextSearch.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public int run(String[] args)
        throws Exception
{
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance();
    job.setJarByClass(SimpleTextSearch.class);

    job.setJobName(SimpleTextSearch.class.getName());

    // mapper
    job.setMapperClass(TextSearchMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    // combiner + reducer
    job.setCombinerClass(TextLongCountingReducer.class);
    job.setReducerClass(TextLongCountingReducer.class);

    job.setInputFormatClass(WARCInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // paths
    String commaSeparatedInputFiles = otherArgs[0];
    String outputPath = otherArgs[1];

    // regex with a phrase to be searched for
    String regex = otherArgs[2];
    job.getConfiguration().set(MAPREDUCE_MAP_REGEX, regex);

    FileInputFormat.addInputPaths(job, commaSeparatedInputFiles);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

Source File: TestRecovery.java From big-c with Apache License 2.0

5 votes

private void writeOutput(TaskAttempt attempt, Configuration conf)
  throws Exception {
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, 
      TypeConverter.fromYarn(attempt.getID()));
  
  TextOutputFormat<?, ?> theOutputFormat = new TextOutputFormat();
  RecordWriter theRecordWriter = theOutputFormat
      .getRecordWriter(tContext);
  
  NullWritable nullWritable = NullWritable.get();
  try {
    theRecordWriter.write(key1, val1);
    theRecordWriter.write(null, nullWritable);
    theRecordWriter.write(null, val1);
    theRecordWriter.write(nullWritable, val2);
    theRecordWriter.write(key2, nullWritable);
    theRecordWriter.write(key1, null);
    theRecordWriter.write(null, null);
    theRecordWriter.write(key2, val2);
  } finally {
    theRecordWriter.close(tContext);
  }
  
  OutputFormat outputFormat = ReflectionUtils.newInstance(
      tContext.getOutputFormatClass(), conf);
  OutputCommitter committer = outputFormat.getOutputCommitter(tContext);
  committer.commitTask(tContext);
}

org.apache.hadoop.mapreduce.lib.output.TextOutputFormat Java Examples