org.apache.hadoop.mapreduce.Reducer Java Exaples

Source File: FlowPartitionReducer.java From xxhadoop with Apache License 2.0

6 votes

@Override
protected void reduce(Text key, Iterable<FlowBean> values,
		Reducer<Text, FlowBean, Text, FlowBean>.Context context)
		throws IOException, InterruptedException {

	//super.reduce(arg0, arg1, arg2);
	long upFlow = 0;
	long downFlow = 0;
	//long flowSum = 0;
	for (FlowBean flowBean : values) {
		upFlow += flowBean.getUpFlow();
		downFlow += flowBean.getDownFlow();
		//flowSum += flowBean.getSumFlow();
	}
	result.setPhoneNum(key.toString());
	result.setUpFlow(upFlow);
	result.setDownFlow(downFlow);
	//result.setSumFlow(flowSum);
	result.setSumFlow(upFlow + downFlow);
	context.write(key, result);
}

Source File: CountPlan.java From rya with Apache License 2.0

6 votes

@Override
public void reduce(final IntermediateProspect prospect, final Iterable<LongWritable> counts, final Date timestamp, final Reducer.Context context) throws IOException, InterruptedException {
    long sum = 0;
    for(final LongWritable count : counts) {
        sum += count.get();
    }

    final String indexType = prospect.getTripleValueType().getIndexType();

    // not sure if this is the best idea..
    if ((sum >= 0) || indexType.equals(TripleValueType.PREDICATE.getIndexType())) {
        final Mutation m = new Mutation(indexType + DELIM + prospect.getData() + DELIM + ProspectorUtils.getReverseIndexDateTime(timestamp));

        final String dataType = prospect.getDataType();
        final ColumnVisibility visibility = new ColumnVisibility(prospect.getVisibility());
        final Value sumValue = new Value(("" + sum).getBytes(StandardCharsets.UTF_8));
        m.put(COUNT, prospect.getDataType(), visibility, timestamp.getTime(), sumValue);

        context.write(null, m);
    }
}

Source File: MapReduceTestUtil.java From big-c with Apache License 2.0

6 votes

/**
 * Creates a simple fail job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple kill job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createKillJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {

  Job theJob = Job.getInstance(conf);
  theJob.setJobName("Kill-Job");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(KillMapper.class);
  theJob.setReducerClass(Reducer.class);
  theJob.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  return theJob;
}

Source File: StoreCopyReducer.java From geowave with Apache License 2.0

6 votes

@Override
protected void reduceNativeValues(
    final GeoWaveInputKey key,
    final Iterable<Object> values,
    final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, Object>.Context context)
    throws IOException, InterruptedException {
  final Iterator<Object> objects = values.iterator();
  while (objects.hasNext()) {
    final AdapterToIndexMapping mapping = store.getIndicesForAdapter(key.getInternalAdapterId());
    context.write(
        new GeoWaveOutputKey<>(
            internalAdapterStore.getTypeName(mapping.getAdapterId()),
            mapping.getIndexNames()),
        objects.next());
  }
}

Source File: Chain.java From big-c with Apache License 2.0

6 votes

/**
 * Add reducer that reads from context and writes to a queue
 */
@SuppressWarnings("unchecked")
void addReducer(TaskInputOutputContext inputContext,
    ChainBlockingQueue<KeyValuePair<?, ?>> outputQueue) throws IOException,
    InterruptedException {

  Class<?> keyOutClass = rConf.getClass(REDUCER_OUTPUT_KEY_CLASS,
      Object.class);
  Class<?> valueOutClass = rConf.getClass(REDUCER_OUTPUT_VALUE_CLASS,
      Object.class);
  RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass,
      outputQueue, rConf);
  Reducer.Context reducerContext = createReduceContext(rw,
      (ReduceContext) inputContext, rConf);
  ReduceRunner runner = new ReduceRunner(reducerContext, reducer, rw);
  threads.add(runner);
}

Source File: MapReduceUtil.java From kylin with Apache License 2.0

6 votes

private static int getReduceTaskNum(double totalSizeInM, KylinConfig kylinConfig) {
    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(totalSizeInM / perReduceInputMB * reduceCountRatio);

    // at least 1 reducer by default
    numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
    // no more than 500 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    logger.info("Having total map input MB " + Math.round(totalSizeInM));
    logger.info("Having per reduce MB " + perReduceInputMB);
    logger.info("Setting " + Reducer.Context.NUM_REDUCES + "=" + numReduceTasks);
    return numReduceTasks;
}

Source File: DataValidationReducer.java From jumbune with GNU Lesser General Public License v3.0

6 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
protected void setup(Reducer.Context context) throws IOException, InterruptedException {
	super.setup(context);
	maxViolationsInReport = context.getConfiguration().getInt(DataValidationConstants.DV_NUM_REPORT_VIOLATION, 1000);
	String dir = context.getConfiguration().get(SLAVE_FILE_LOC);
	dirPath = JobUtil.getAndReplaceHolders(dir);
	fileHandlerMap = new DVLRUCache(DataValidationConstants.TEN);
	
	offsetLinesMap = new TreeMap<>();
	
	ViolationPersistenceBean bean = new ViolationPersistenceBean();
	bean.setLineNum(Integer.MAX_VALUE);
	
	nullMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport);
	dataTypeMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport);
	regexMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport);
	numFieldsMap = new MultiValueTreeMap<String,ViolationPersistenceBean>(maxViolationsInReport); 
	fileNames = new HashSet<String>();
}

Source File: Chain.java From hadoop with Apache License 2.0

6 votes

/**
 * Add reducer that reads from context and writes to a queue
 */
@SuppressWarnings("unchecked")
void addReducer(TaskInputOutputContext inputContext,
    ChainBlockingQueue<KeyValuePair<?, ?>> outputQueue) throws IOException,
    InterruptedException {

  Class<?> keyOutClass = rConf.getClass(REDUCER_OUTPUT_KEY_CLASS,
      Object.class);
  Class<?> valueOutClass = rConf.getClass(REDUCER_OUTPUT_VALUE_CLASS,
      Object.class);
  RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass,
      outputQueue, rConf);
  Reducer.Context reducerContext = createReduceContext(rw,
      (ReduceContext) inputContext, rConf);
  ReduceRunner runner = new ReduceRunner(reducerContext, reducer, rw);
  threads.add(runner);
}

Source File: MapReduceUtil.java From kylin-on-parquet-v2 with Apache License 2.0

6 votes

private static int getReduceTaskNum(double totalSizeInM, KylinConfig kylinConfig) {
    double perReduceInputMB = kylinConfig.getDefaultHadoopJobReducerInputMB();
    double reduceCountRatio = kylinConfig.getDefaultHadoopJobReducerCountRatio();

    // number of reduce tasks
    int numReduceTasks = (int) Math.round(totalSizeInM / perReduceInputMB * reduceCountRatio);

    // at least 1 reducer by default
    numReduceTasks = Math.max(kylinConfig.getHadoopJobMinReducerNumber(), numReduceTasks);
    // no more than 500 reducer by default
    numReduceTasks = Math.min(kylinConfig.getHadoopJobMaxReducerNumber(), numReduceTasks);

    logger.info("Having total map input MB " + Math.round(totalSizeInM));
    logger.info("Having per reduce MB " + perReduceInputMB);
    logger.info("Setting " + Reducer.Context.NUM_REDUCES + "=" + numReduceTasks);
    return numReduceTasks;
}

Source File: MapReduceTestUtil.java From hadoop with Apache License 2.0

6 votes

/**
 * Creates a simple fail job.
 * 
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, 
    Path... indirs) throws Exception {
  FileSystem fs = outdir.getFileSystem(conf);
  if (fs.exists(outdir)) {
    fs.delete(outdir, true);
  }
  conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
  Job theJob = Job.getInstance(conf);
  theJob.setJobName("Fail-Job");

  FileInputFormat.setInputPaths(theJob, indirs);
  theJob.setMapperClass(FailMapper.class);
  theJob.setReducerClass(Reducer.class);
  theJob.setNumReduceTasks(0);
  FileOutputFormat.setOutputPath(theJob, outdir);
  theJob.setOutputKeyClass(Text.class);
  theJob.setOutputValueClass(Text.class);
  return theJob;
}

Source File: KMeansDriver.java From flink-perf with Apache License 2.0

6 votes

public static void convertCentersSequenceFileToText (Configuration conf, FileSystem fs, String seqFilePath, String outputPath) throws Exception {

		Path seqFile = new Path (seqFilePath);
		Path output = new Path (outputPath);
		if (fs.exists(output)) {
			fs.delete(output, true);
		}
		Job job = Job.getInstance(conf);
		job.setMapperClass(CenterSequenceToTextConverter.class);
		job.setReducerClass(Reducer.class);
		job.setNumReduceTasks(0);
		job.setMapOutputKeyClass(LongWritable.class);
		job.setMapOutputValueClass(Text.class);
		job.setOutputKeyClass(LongWritable.class);
		job.setOutputValueClass(Text.class);
		job.setOutputFormatClass(TextOutputFormat.class);
		job.setInputFormatClass(SequenceFileInputFormat.class);
		FileInputFormat.addInputPath(job, seqFile);
		FileOutputFormat.setOutputPath(job, output);
		job.waitForCompletion(true);
	}

Source File: UpdateCentroidCostMapReduce.java From geowave with Apache License 2.0

6 votes

@Override
public void reduce(
    final GroupIDText key,
    final Iterable<CountofDoubleWritable> values,
    final Reducer<GroupIDText, CountofDoubleWritable, GroupIDText, CountofDoubleWritable>.Context context)
    throws IOException, InterruptedException {

  double expectation = 0;
  double ptCount = 0;
  for (final CountofDoubleWritable value : values) {
    expectation += value.getValue();
    ptCount += value.getCount();
  }
  outputValue.set(expectation, ptCount);
  context.write(key, outputValue);
}

Source File: FlowSumReducer.java From xxhadoop with Apache License 2.0

6 votes

@Override
protected void reduce(Text key, Iterable<FlowBean> values,
		Reducer<Text, FlowBean, Text, FlowBean>.Context context)
		throws IOException, InterruptedException {

	//super.reduce(arg0, arg1, arg2);
	long upFlow = 0;
	long downFlow = 0;
	//long flowSum = 0;
	for (FlowBean flowBean : values) {
		upFlow += flowBean.getUpFlow();
		downFlow += flowBean.getDownFlow();
		//flowSum += flowBean.getSumFlow();
	}
	result.setPhoneNum(key.toString());
	result.setUpFlow(upFlow);
	result.setDownFlow(downFlow);
	//result.setSumFlow(flowSum);
	result.setSumFlow(upFlow + downFlow);
	context.write(key, result);
}

Source File: Chain.java From hadoop with Apache License 2.0

5 votes

ReduceRunner(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context,
    Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reducer,
    RecordWriter<KEYOUT, VALUEOUT> rw) throws IOException,
    InterruptedException {
  this.reducer = reducer;
  this.chainContext = context;
  this.rw = rw;
}

Source File: JobContextImpl.java From hadoop with Apache License 2.0

5 votes

/**
 * Get the {@link Reducer} class for the job.
 * 
 * @return the {@link Reducer} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Reducer<?,?,?,?>> getReducerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Reducer<?,?,?,?>>) 
    conf.getClass(REDUCE_CLASS_ATTR, Reducer.class);
}

Source File: JobContextImpl.java From hadoop with Apache License 2.0

5 votes

/**
 * Get the combiner class for the job.
 * 
 * @return the combiner class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Reducer<?,?,?,?>> getCombinerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Reducer<?,?,?,?>>) 
    conf.getClass(COMBINE_CLASS_ATTR, null);
}

Source File: TestLineRecordReaderJobs.java From big-c with Apache License 2.0

5 votes

/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(conf);
  job.setJarByClass(TestLineRecordReaderJobs.class);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(Reducer.class);
  FileInputFormat.addInputPath(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.waitForCompletion(true);
}

Source File: GeoWaveWritableOutputReducer.java From geowave with Apache License 2.0

5 votes

protected void reduceWritableValues(
    final KEYIN key,
    final Iterable<VALUEIN> values,
    final Reducer<KEYIN, VALUEIN, GeoWaveInputKey, ObjectWritable>.Context context)
    throws IOException, InterruptedException {
  reduceNativeValues(key, values, new NativeReduceContext(context, serializationTool));
}

Source File: DataProfNoCriteriaReducer.java From jumbune with GNU Lesser General Public License v3.0

5 votes

@Override
protected void setup(Reducer<Text, IntWritable, Text, IntWritable>.Context context)
		throws IOException, InterruptedException {
    sorted =  new DataProfNoCritBean[1000];
    DataProfNoCritBean dataProfNoCritBean = new DataProfNoCritBean();
    dataProfNoCritBean.setKey(INITIAL_KEY);
    dataProfNoCritBean.setValue(Integer.MIN_VALUE);
    sorted[0] = dataProfNoCritBean;
}

Source File: GeoWaveWritableOutputReducer.java From geowave with Apache License 2.0

5 votes

@Override
protected void reduce(
    final KEYIN key,
    final Iterable<VALUEIN> values,
    final Reducer<KEYIN, VALUEIN, GeoWaveInputKey, ObjectWritable>.Context context)
    throws IOException, InterruptedException {
  reduceWritableValues(key, values, context);
}

Source File: Upgrade322Tool.java From rya with Apache License 2.0

5 votes

@Override
public int run(String[] strings) throws Exception {
    conf.set(MRUtils.JOB_NAME_PROP, "Upgrade to Rya 3.2.2");
    //faster
    init();

    Job job = new Job(conf);
    job.setJarByClass(Upgrade322Tool.class);

    setupAccumuloInput(job);
    AccumuloInputFormat.setInputTableName(job, MRUtils.getTablePrefix(conf) + TBL_OSP_SUFFIX);

    //we do not need to change any row that is a string, custom, or iri type
    IteratorSetting regex = new IteratorSetting(30, "regex",
                                                RegExFilter.class);
    RegExFilter.setRegexs(regex, "\\w*" + TYPE_DELIM + "[\u0003|\u0008|\u0002]", null, null, null, false);
    RegExFilter.setNegate(regex, true);

    // set input output of the particular job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Mutation.class);

    setupAccumuloOutput(job, MRUtils.getTablePrefix(conf) +
                           TBL_SPO_SUFFIX);

    // set mapper and reducer classes
    job.setMapperClass(Upgrade322Mapper.class);
    job.setReducerClass(Reducer.class);

    // Submit the job
    return job.waitForCompletion(true) ? 0 : 1;
}

Source File: Chain.java From hadoop with Apache License 2.0

5 votes

/**
 * Create a reduce context that is based on ChainMapContext and the given
 * record writer
 */
private <KEYIN, VALUEIN, KEYOUT, VALUEOUT> 
Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(
    RecordWriter<KEYOUT, VALUEOUT> rw,
    ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> context,
    Configuration conf) {
  ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reduceContext = 
    new ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(
        context, rw, conf);
  Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = 
    new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>()
      .getReducerContext(reduceContext);
  return reducerContext;
}

Source File: TestLineRecordReaderJobs.java From hadoop with Apache License 2.0

5 votes

/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(conf);
  job.setJarByClass(TestLineRecordReaderJobs.class);
  job.setMapperClass(Mapper.class);
  job.setReducerClass(Reducer.class);
  FileInputFormat.addInputPath(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.waitForCompletion(true);
}

Source File: Chain.java From big-c with Apache License 2.0

5 votes

ReduceRunner(Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context context,
    Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reducer,
    RecordWriter<KEYOUT, VALUEOUT> rw) throws IOException,
    InterruptedException {
  this.reducer = reducer;
  this.chainContext = context;
  this.rw = rw;
}

Source File: Chain.java From big-c with Apache License 2.0

5 votes

/**
 * Create a reduce context that is based on ChainMapContext and the given
 * record writer
 */
private <KEYIN, VALUEIN, KEYOUT, VALUEOUT> 
Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context createReduceContext(
    RecordWriter<KEYOUT, VALUEOUT> rw,
    ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> context,
    Configuration conf) {
  ReduceContext<KEYIN, VALUEIN, KEYOUT, VALUEOUT> reduceContext = 
    new ChainReduceContextImpl<KEYIN, VALUEIN, KEYOUT, VALUEOUT>(
        context, rw, conf);
  Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context reducerContext = 
    new WrappedReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>()
      .getReducerContext(reduceContext);
  return reducerContext;
}

Source File: PutSortReducer.java From hbase with Apache License 2.0

5 votes

@Override
protected void
    setup(Reducer<ImmutableBytesWritable, Put, ImmutableBytesWritable, KeyValue>.Context context)
        throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  this.kvCreator = new CellCreator(conf);
}

Source File: GATKTools.java From halvade with GNU General Public License v3.0

5 votes

public void setContext(Reducer.Context context) {
        this.context = context;
//        mem = context.getConfiguration().get("mapreduce.reduce.java.opts");
        mem = "-Xmx" + (int)(0.8*Integer.parseInt(context.getConfiguration().get("mapreduce.reduce.memory.mb"))) + "m";
        String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "java", ""); 
        if(customArgs != null)
            java.add(customArgs);
    }

Source File: InputToOutputKeyReducer.java From geowave with Apache License 2.0

5 votes

@Override
protected void reduceNativeValues(
    final GeoWaveInputKey key,
    final Iterable<Object> values,
    final Reducer<GeoWaveInputKey, ObjectWritable, GeoWaveOutputKey, Object>.Context context)
    throws IOException, InterruptedException {
  outputKey.setTypeName(internalAdapterStore.getTypeName(key.getInternalAdapterId()));
  for (final Object value : values) {
    context.write(outputKey, value);
  }
}

Source File: TransformBaseRunner.java From BigDataPlatform with GNU General Public License v3.0

5 votes

public void setupRunner(String jobName, Class<?> runnerClass,
    Class<? extends TableMapper<?, ?>> mapperClass, Class<? extends Reducer<?, ?, ?, ?>> reducerClass,
    Class<? extends WritableComparable<?>> outputKeyClass,
    Class<? extends Writable> outputValueClass,
    Class<? extends OutputFormat<?, ?>> outputFormatClass) {
    this.setupRunner(jobName, runnerClass, mapperClass, reducerClass, outputKeyClass, outputValueClass, outputKeyClass, outputValueClass, outputFormatClass);

}

Source File: BulkIngestInputGenerationIT.java From geowave with Apache License 2.0

5 votes

@Override
public int run(final String[] args) throws Exception {

  final Configuration conf = getConf();
  conf.set("fs.defaultFS", "file:///");

  final Job job = Job.getInstance(conf, JOB_NAME);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, new Path(TEST_DATA_LOCATION));
  FileOutputFormat.setOutputPath(job, cleanPathForReuse(conf, OUTPUT_PATH));

  job.setMapperClass(SimpleFeatureToAccumuloKeyValueMapper.class);
  job.setReducerClass(Reducer.class); // (Identity Reducer)

  job.setInputFormatClass(GeonamesDataFileInputFormat.class);
  job.setOutputFormatClass(AccumuloFileOutputFormat.class);

  job.setMapOutputKeyClass(Key.class);
  job.setMapOutputValueClass(Value.class);
  job.setOutputKeyClass(Key.class);
  job.setOutputValueClass(Value.class);

  job.setNumReduceTasks(1);
  job.setSpeculativeExecution(false);

  final boolean result = job.waitForCompletion(true);

  mapInputRecords =
      job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_INPUT_RECORDS).getValue();

  mapOutputRecords =
      job.getCounters().findCounter(TASK_COUNTER_GROUP_NAME, MAP_OUTPUT_RECORDS).getValue();

  return result ? 0 : 1;
}

org.apache.hadoop.mapreduce.Reducer Java Examples