org.apache.hadoop.mapred.OutputCollector Java Exaples

Source File: TestDFSIO.java From hadoop with Apache License 2.0

6 votes

@Override // IOMapperBase
void collectStats(OutputCollector<Text, Text> output, 
                  String name,
                  long execTime, 
                  Long objSize) throws IOException {
  long totalSize = objSize.longValue();
  float ioRateMbSec = (float)totalSize * 1000 / (execTime * MEGA);
  LOG.info("Number of bytes processed = " + totalSize);
  LOG.info("Exec time = " + execTime);
  LOG.info("IO rate = " + ioRateMbSec);
  
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "tasks"),
      new Text(String.valueOf(1)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "size"),
      new Text(String.valueOf(totalSize)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_LONG + "time"),
      new Text(String.valueOf(execTime)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "rate"),
      new Text(String.valueOf(ioRateMbSec*1000)));
  output.collect(new Text(AccumulatingReducer.VALUE_TYPE_FLOAT + "sqrate"),
      new Text(String.valueOf(ioRateMbSec*ioRateMbSec*1000)));
}

Source File: DataJoinReducerBase.java From hadoop with Apache License 2.0

6 votes

public void reduce(Object key, Iterator values,
                   OutputCollector output, Reporter reporter) throws IOException {
  if (this.reporter == null) {
    this.reporter = reporter;
  }

  SortedMap<Object, ResetableIterator> groups = regroup(key, values, reporter);
  Object[] tags = groups.keySet().toArray();
  ResetableIterator[] groupValues = new ResetableIterator[tags.length];
  for (int i = 0; i < tags.length; i++) {
    groupValues[i] = groups.get(tags[i]);
  }
  joinAndCollect(tags, groupValues, key, output, reporter);
  addLongValue("groupCount", 1);
  for (int i = 0; i < tags.length; i++) {
    groupValues[i].close();
  }
}

Source File: PipesReducer.java From hadoop with Apache License 2.0

6 votes

@SuppressWarnings("unchecked")
private void startApplication(OutputCollector<K3, V3> output, Reporter reporter) throws IOException {
  if (application == null) {
    try {
      LOG.info("starting application");
      application = 
        new Application<K2, V2, K3, V3>(
            job, null, output, reporter, 
            (Class<? extends K3>) job.getOutputKeyClass(), 
            (Class<? extends V3>) job.getOutputValueClass());
      downlink = application.getDownlink();
    } catch (InterruptedException ie) {
      throw new RuntimeException("interrupted", ie);
    }
    int reduce=0;
    downlink.runReduce(reduce, Submitter.getIsJavaRecordWriter(job));
  }
}

Source File: ValueAggregatorCombiner.java From hadoop with Apache License 2.0

6 votes

/** Combines values for a given key.  
 * @param key the key is expected to be a Text object, whose prefix indicates
 * the type of aggregation to aggregate the values. 
 * @param values the values to combine
 * @param output to collect combined values
 */
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  String keyStr = key.toString();
  int pos = keyStr.indexOf(ValueAggregatorDescriptor.TYPE_SEPARATOR);
  String type = keyStr.substring(0, pos);
  ValueAggregator aggregator = ValueAggregatorBaseDescriptor
    .generateValueAggregator(type);
  while (values.hasNext()) {
    aggregator.addNextValue(values.next());
  }
  Iterator outputs = aggregator.getCombinerOutput().iterator();

  while (outputs.hasNext()) {
    Object v = outputs.next();
    if (v instanceof Text) {
      output.collect(key, (Text)v);
    } else {
      output.collect(key, new Text(v.toString()));
    }
  }
}

Source File: HadoopArchives.java From hadoop with Apache License 2.0

6 votes

public void reduce(IntWritable key, Iterator<Text> values,
    OutputCollector<Text, Text> out,
    Reporter reporter) throws IOException {
  keyVal = key.get();
  while(values.hasNext()) {
    Text value = values.next();
    String towrite = value.toString() + "\n";
    indexStream.write(towrite.getBytes(Charsets.UTF_8));
    written++;
    if (written > numIndexes -1) {
      // every 1000 indexes we report status
      reporter.setStatus("Creating index for archives");
      reporter.progress();
      endIndex = keyVal;
      String masterWrite = startIndex + " " + endIndex + " " + startPos 
                          +  " " + indexStream.getPos() + " \n" ;
      outStream.write(masterWrite.getBytes(Charsets.UTF_8));
      startPos = indexStream.getPos();
      startIndex = endIndex;
      written = 0;
    }
  }
}

Source File: PipesReducer.java From hadoop with Apache License 2.0

6 votes

/**
 * Process all of the keys and values. Start up the application if we haven't
 * started it yet.
 */
public void reduce(K2 key, Iterator<V2> values, 
                   OutputCollector<K3, V3> output, Reporter reporter
                   ) throws IOException {
  isOk = false;
  startApplication(output, reporter);
  downlink.reduceKey(key);
  while (values.hasNext()) {
    downlink.reduceValue(values.next());
  }
  if(skipping) {
    //flush the streams on every record input if running in skip mode
    //so that we don't buffer other records surrounding a bad record.
    downlink.flush();
  }
  isOk = true;
}

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void map(IntWritable key, IntWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final int vali = val.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  if (0 == k % (srcs * srcs)) {
    assertTrue(kvstr, vali == k * 10 / srcs + srcs - 1);
  } else {
    final int i = k % srcs;
    assertTrue(kvstr, srcs * (vali - i) == 10 * (k - i));
  }
  out.collect(key, one);
}

Source File: ExternalMapperReducer.java From hadoop with Apache License 2.0

5 votes

public void map(WritableComparable key, Writable value,
                OutputCollector<ExternalWritable, IntWritable> output,
                Reporter reporter)
  throws IOException {
  
  if (value instanceof Text) {
    Text text = (Text)value;
    ExternalWritable ext = new ExternalWritable(text.toString());
    output.collect(ext, new IntWritable(1));
  }
}

Source File: JobControlTestUtils.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text, Text> output, Reporter reporter)
    throws IOException {
  Text dumbKey = new Text("");
  while (values.hasNext()) {
    Text data = values.next();
    output.collect(dumbKey, data);
  }
}

Source File: OutputHandler.java From hadoop with Apache License 2.0

5 votes

/**
 * Create a handler that will handle any records output from the application.
 * @param collector the "real" collector that takes the output
 * @param reporter the reporter for reporting progress
 */
public OutputHandler(OutputCollector<K, V> collector, Reporter reporter, 
                     RecordReader<FloatWritable,NullWritable> recordReader,
                     String expectedDigest) {
  this.reporter = reporter;
  this.collector = collector;
  this.recordReader = recordReader;
  this.expectedDigest = expectedDigest;
}

Source File: ChainMapper.java From hadoop with Apache License 2.0

5 votes

/**
 * Chains the <code>map(...)</code> methods of the Mappers in the chain.
 */
@SuppressWarnings({"unchecked"})
public void map(Object key, Object value, OutputCollector output,
                Reporter reporter) throws IOException {
  Mapper mapper = chain.getFirstMap();
  if (mapper != null) {
    mapper.map(key, value, chain.getMapperCollector(0, output, reporter),
               reporter);
  }
}

Source File: FieldSelectionMapReduce.java From hadoop with Apache License 2.0

5 votes

/**
 * The identify function. Input key/value pair is written directly to output.
 */
public void map(K key, V val,
    OutputCollector<Text, Text> output, Reporter reporter) 
    throws IOException {
  FieldSelectionHelper helper = new FieldSelectionHelper(
    FieldSelectionHelper.emptyText, FieldSelectionHelper.emptyText);
  helper.extractOutputKeyValue(key.toString(), val.toString(),
    fieldSeparator, mapOutputKeyFieldList, mapOutputValueFieldList,
    allMapValueFieldsFrom, ignoreInputKey, true);
  output.collect(helper.getKey(), helper.getValue());
}

Source File: FieldSelectionMapReduce.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, Reporter reporter)
  throws IOException {
  String keyStr = key.toString() + this.fieldSeparator;
  while (values.hasNext()) {
      FieldSelectionHelper helper = new FieldSelectionHelper();
      helper.extractOutputKeyValue(keyStr, values.next().toString(),
        fieldSeparator, reduceOutputKeyFieldList,
        reduceOutputValueFieldList, allReduceValueFieldsFrom, false, false);
    output.collect(helper.getKey(), helper.getValue());
  }
}

Source File: ValueAggregatorMapper.java From hadoop with Apache License 2.0

5 votes

/**
 *  the map function. It iterates through the value aggregator descriptor 
 *  list to generate aggregation id/value pairs and emit them.
 */
public void map(K1 key, V1 value,
                OutputCollector<Text, Text> output, Reporter reporter) throws IOException {

  Iterator iter = this.aggregatorDescriptorList.iterator();
  while (iter.hasNext()) {
    ValueAggregatorDescriptor ad = (ValueAggregatorDescriptor) iter.next();
    Iterator<Entry<Text, Text>> ens =
      ad.generateKeyValPairs(key, value).iterator();
    while (ens.hasNext()) {
      Entry<Text, Text> en = ens.next();
      output.collect(en.getKey(), en.getValue());
    }
  }
}

Source File: PipeMapRed.java From hadoop with Apache License 2.0

5 votes

void startOutputThreads(OutputCollector output, Reporter reporter) 
  throws IOException {
  inWriter_ = createInputWriter();
  outReader_ = createOutputReader();
  outThread_ = new MROutputThread(outReader_, output, reporter);
  outThread_.start();
  errThread_ = new MRErrorThread();
  errThread_.setReporter(reporter);
  errThread_.start();
}

Source File: DelegatingMapper.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}

Source File: LoadGeneratorMR.java From hadoop with Apache License 2.0

5 votes

@Override
public void reduce(Text key, Iterator<IntWritable> values,
    OutputCollector<Text, IntWritable> output, Reporter reporter)
    throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  if (key.equals(OPEN_EXECTIME)){
    executionTime[OPEN] = sum;
  } else if (key.equals(NUMOPS_OPEN)){
    numOfOps[OPEN] = sum;
  } else if (key.equals(LIST_EXECTIME)){
    executionTime[LIST] = sum;
  } else if (key.equals(NUMOPS_LIST)){
    numOfOps[LIST] = sum;
  } else if (key.equals(DELETE_EXECTIME)){
    executionTime[DELETE] = sum;
  } else if (key.equals(NUMOPS_DELETE)){
    numOfOps[DELETE] = sum;
  } else if (key.equals(CREATE_EXECTIME)){
    executionTime[CREATE] = sum;
  } else if (key.equals(NUMOPS_CREATE)){
    numOfOps[CREATE] = sum;
  } else if (key.equals(WRITE_CLOSE_EXECTIME)){
    System.out.println(WRITE_CLOSE_EXECTIME + " = " + sum);
    executionTime[WRITE_CLOSE]= sum;
  } else if (key.equals(NUMOPS_WRITE_CLOSE)){
    numOfOps[WRITE_CLOSE] = sum;
  } else if (key.equals(TOTALOPS)){
    totalOps = sum;
  } else if (key.equals(ELAPSED_TIME)){
    totalTime = sum;
  }
  result.set(sum);
  output.collect(key, result);
  // System.out.println("Key = " + key + " Sum is =" + sum);
  // printResults(System.out);
}

Source File: DataJoinReducerBase.java From hadoop with Apache License 2.0

5 votes

/**
 * The subclass can overwrite this method to perform additional filtering
 * and/or other processing logic before a value is collected.
 * 
 * @param key
 * @param aRecord
 * @param output
 * @param reporter
 * @throws IOException
 */
protected void collect(Object key, TaggedMapOutput aRecord,
                       OutputCollector output, Reporter reporter) throws IOException {
  this.collected += 1;
  addLongValue("collectedCount", 1);
  if (aRecord != null) {
    output.collect(key, aRecord.getData());
    reporter.setStatus("key: " + key.toString() + " collected: " + collected);
    addLongValue("actuallyCollectedCount", 1);
  }
}

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void map(IntWritable key, TupleWritable val,
    OutputCollector<IntWritable, IntWritable> out, Reporter reporter)
    throws IOException {
  int k = key.get();
  final String kvstr = "Unexpected tuple: " + stringify(key, val);
  assertTrue(kvstr, 0 == k % (srcs * srcs));
  for (int i = 0; i < val.size(); ++i) {
    final int vali = ((IntWritable)val.get(i)).get();
    assertTrue(kvstr, (vali - i) * srcs == 10 * k);
  }
  out.collect(key, one);
}

Source File: TestDatamerge.java From hadoop with Apache License 2.0

5 votes

public void reduce(IntWritable key, Iterator<IntWritable> values,
                   OutputCollector<Text, Text> output,
                   Reporter reporter) throws IOException {
  int seen = 0;
  while (values.hasNext()) {
    seen += values.next().get();
  }
  assertTrue("Bad count for " + key.get(), verify(key.get(), seen));
}

Source File: HadoopArchives.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}

Source File: PipeMapRed.java From hadoop with Apache License 2.0

5 votes

MROutputThread(OutputReader outReader, OutputCollector outCollector,
  Reporter reporter) {
  setDaemon(true);
  this.outReader = outReader;
  this.outCollector = outCollector;
  this.reporter = reporter;
}

Source File: WordCount.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, Text value, 
                OutputCollector<Text, IntWritable> output, 
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }
}

Source File: MRCaching.java From hadoop with Apache License 2.0

5 votes

public void reduce(Text key, Iterator<IntWritable> values,
                   OutputCollector<Text, IntWritable> output,
                   Reporter reporter) throws IOException {
  int sum = 0;
  while (values.hasNext()) {
    sum += values.next().get();
  }
  output.collect(key, new IntWritable(sum));
}

Source File: MRCaching.java From hadoop with Apache License 2.0

5 votes

public void map(LongWritable key, Text value,
                OutputCollector<Text, IntWritable> output,
                Reporter reporter) throws IOException {
  String line = value.toString();
  StringTokenizer itr = new StringTokenizer(line);
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, one);
  }

}

Source File: HadoopMapredCompatWordCount.java From Flink-CEPplus with Apache License 2.0

5 votes

@Override
public void map(LongWritable k, Text v, OutputCollector<Text, LongWritable> out, Reporter rep)
		throws IOException {
	// normalize and split the line
	String line = v.toString();
	String[] tokens = line.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Text(token), new LongWritable(1L));
		}
	}
}

Source File: IdentityReducer.java From hadoop with Apache License 2.0

5 votes

/** Writes all keys and values directly to output. */
public void reduce(K key, Iterator<V> values,
                   OutputCollector<K, V> output, Reporter reporter)
  throws IOException {
  while (values.hasNext()) {
    output.collect(key, values.next());
  }
}

Source File: SliveReducer.java From hadoop with Apache License 2.0

5 votes

@Override // Reducer
public void reduce(Text key, Iterator<Text> values,
    OutputCollector<Text, Text> output, Reporter reporter) throws IOException {
  OperationOutput collector = null;
  int reduceAm = 0;
  int errorAm = 0;
  logAndSetStatus(reporter, "Iterating over reduction values for key " + key);
  while (values.hasNext()) {
    Text value = values.next();
    try {
      OperationOutput val = new OperationOutput(key, value);
      if (collector == null) {
        collector = val;
      } else {
        collector = OperationOutput.merge(collector, val);
      }
      LOG.info("Combined " + val + " into/with " + collector);
      ++reduceAm;
    } catch (Exception e) {
      ++errorAm;
      logAndSetStatus(reporter, "Error iterating over reduction input "
          + value + " due to : " + StringUtils.stringifyException(e));
      if (getConfig().shouldExitOnFirstError()) {
        break;
      }
    }
  }
  logAndSetStatus(reporter, "Reduced " + reduceAm + " values with " + errorAm
      + " errors");
  if (collector != null) {
    logAndSetStatus(reporter, "Writing output " + collector.getKey() + " : "
        + collector.getOutputValue());
    output.collect(collector.getKey(), collector.getOutputValue());
  }
}

Source File: TokenCountMapper.java From hadoop with Apache License 2.0

5 votes

public void map(K key, Text value,
                OutputCollector<Text, LongWritable> output,
                Reporter reporter)
  throws IOException {
  // get input text
  String text = value.toString();       // value is line of text

  // tokenize the value
  StringTokenizer st = new StringTokenizer(text);
  while (st.hasMoreTokens()) {
    // output <token,1> pairs
    output.collect(new Text(st.nextToken()), new LongWritable(1));
  }  
}

Source File: FailMapper.java From hadoop with Apache License 2.0

5 votes

public void map(WritableComparable key, Writable value,
    OutputCollector<WritableComparable, Writable> out, Reporter reporter)
    throws IOException {
  // NOTE- the next line is required for the TestDebugScript test to succeed
  System.err.println("failing map");
  throw new RuntimeException("failing map");
}

org.apache.hadoop.mapred.OutputCollector Java Examples