Java Code Examples for org.apache.hadoop.mapred.Reporter#getInputSplit()

The following examples show how to use org.apache.hadoop.mapred.Reporter#getInputSplit() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: DelegatingMapper.java From hadoop with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}

Example 2

Source File: DelegatingMapper.java From big-c with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}

Example 3

Source File: RegexIngestMapper.java From hadoop-solr with Apache License 2.0

5 votes

@Override
public LWDocument[] toDocuments(Writable key, Writable value, Reporter reporter,
    Configuration conf) throws IOException {
  if (key != null && value != null) {
    LWDocument doc = createDocument(key.toString() + "-" + System.currentTimeMillis(), null);
    Matcher matcher = regex.matcher(value.toString());
    if (matcher != null) {
      if (match) {
        if (matcher.matches()) {
          processMatch(doc, matcher);
        }
      } else {//
        while (matcher.find()) {
          processMatch(doc, matcher);
          reporter.progress();//do we really even need this?
        }
      }
    }
    // Adding the file path where this record was taken
    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String originalLogFilePath = fileSplit.getPath().toUri().getPath();
    doc.addField(FIELD_PATH, originalLogFilePath);
    String docId = originalLogFilePath + "-" + doc.getId();
    doc.setId(docId);
    return new LWDocument[] {doc};
  }
  return null;
}

Example 4

Source File: LineIndexer.java From attic-apex-malhar with Apache License 2.0

5 votes

public void map(LongWritable key, Text val,
    OutputCollector<Text, Text> output, Reporter reporter) throws IOException
{
  FileSplit fileSplit = (FileSplit)reporter.getInputSplit();
  String fileName = fileSplit.getPath().getName();
  location.set(fileName);

  String line = val.toString();
  StringTokenizer itr = new StringTokenizer(line.toLowerCase());
  while (itr.hasMoreTokens()) {
    word.set(itr.nextToken());
    output.collect(word, location);
  }
}

Example 5

Source File: DelegatingMapper.java From RDFS with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}

Example 6

Source File: InvertedIndex.java From hadoop-book with Apache License 2.0

5 votes

public void map(LongWritable key, Text val,
        OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
    String fileName = fileSplit.getPath().getName();
    location.set(fileName);

    String line = val.toString();
    StringTokenizer itr = new StringTokenizer(line.toLowerCase());
    while (itr.hasMoreTokens()) {
        word.set(itr.nextToken());
        output.collect(word, location);
    }
}

Example 7

Source File: DelegatingMapper.java From hadoop-gpu with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public void map(K1 key, V1 value, OutputCollector<K2, V2> outputCollector,
    Reporter reporter) throws IOException {

  if (mapper == null) {
    // Find the Mapper from the TaggedInputSplit.
    TaggedInputSplit inputSplit = (TaggedInputSplit) reporter.getInputSplit();
    mapper = (Mapper<K1, V1, K2, V2>) ReflectionUtils.newInstance(inputSplit
       .getMapperClass(), conf);
  }
  mapper.map(key, value, outputCollector, reporter);
}

Example 8

Source File: GrokIngestMapper.java From hadoop-solr with Apache License 2.0

4 votes

@Override
protected LWDocument[] toDocuments(LongWritable key, Text value, Reporter reporter,
                                   Configuration conf) throws IOException {

  Map<String, Object> params = new HashMap<String, Object>();
  params.put(LOG_RUBY_PARAM, value.toString());
  params.put(FILTERS_ARRAY_RUBY_PARAM, filters);

  List<String> toRemoveList = new ArrayList<String>();
  toRemoveList.add(LOG_RUBY_PARAM);
  toRemoveList.add(FILTERS_ARRAY_RUBY_PARAM);
  Object response = executeScript(MATCHER_RUBY_CLASS, params, toRemoveList);

  try {
    RubyHash hash = (RubyHash) response;
    if (response != null) {
      Set<String> keys = hash.keySet();
      LWDocument document = createDocument();
      for (String currentKey : keys) {
        document.addField(currentKey, hash.get(currentKey));
      }

      // Adding the file where this log was taken
      FileSplit fileSplit = (FileSplit) reporter.getInputSplit();
      String originalLogFilePath = fileSplit.getPath().toUri().getPath();
      document.addField(PATH_FIELD_NAME, originalLogFilePath);

      // Adding offset value
      document.addField(BYTE_OFFSET_FIELD_NAME, key.toString());

      // Set ID
      document.setId(originalLogFilePath + "-" + key.toString() + "-" + System.currentTimeMillis());

      return new LWDocument[] {document};
    } else {
      return null;
    }
  } catch (Exception e) {
    log.error("Error: " + e.getMessage());
    throw new RuntimeException("Error executing ruby script");
  }
}