org.apache.hadoop.mapred.Reporter#progress

Source File: CqlInputFormat.java From stratio-cassandra with Apache License 2.0

6 votes

public RecordReader<Long, Row> getRecordReader(InputSplit split, JobConf jobConf, final Reporter reporter)
        throws IOException
{
    TaskAttemptContext tac = new TaskAttemptContext(jobConf, TaskAttemptID.forName(jobConf.get(MAPRED_TASK_ID)))
    {
        @Override
        public void progress()
        {
            reporter.progress();
        }
    };

    CqlRecordReader recordReader = new CqlRecordReader();
    recordReader.initialize((org.apache.hadoop.mapreduce.InputSplit)split, tac);
    return recordReader;
}

Source File: HadoopArchives.java From RDFS with Apache License 2.0

6 votes

public void reduce(IntWritable key, Iterator<Text> values,
    OutputCollector<Text, Text> out,
    Reporter reporter) throws IOException {
  keyVal = key.get();
  while(values.hasNext()) {
    Text value = values.next();
    String towrite = value.toString() + "\n";
    indexStream.write(towrite.getBytes());
    written++;
    if (written > numIndexes -1) {
      // every 1000 indexes we report status
      reporter.setStatus("Creating index for archives");
      reporter.progress();
      endIndex = keyVal;
      String masterWrite = startIndex + " " + endIndex + " " + startPos 
                          +  " " + indexStream.getPos() + " \n" ;
      outStream.write(masterWrite.getBytes());
      startPos = indexStream.getPos();
      startIndex = endIndex;
      written = 0;
    }
  }
}

Source File: HadoopArchives.java From hadoop with Apache License 2.0

6 votes

public void reduce(IntWritable key, Iterator<Text> values,
    OutputCollector<Text, Text> out,
    Reporter reporter) throws IOException {
  keyVal = key.get();
  while(values.hasNext()) {
    Text value = values.next();
    String towrite = value.toString() + "\n";
    indexStream.write(towrite.getBytes(Charsets.UTF_8));
    written++;
    if (written > numIndexes -1) {
      // every 1000 indexes we report status
      reporter.setStatus("Creating index for archives");
      reporter.progress();
      endIndex = keyVal;
      String masterWrite = startIndex + " " + endIndex + " " + startPos 
                          +  " " + indexStream.getPos() + " \n" ;
      outStream.write(masterWrite.getBytes(Charsets.UTF_8));
      startPos = indexStream.getPos();
      startIndex = endIndex;
      written = 0;
    }
  }
}

Source File: HiveDynamoDBInputFormat.java From emr-dynamodb-connector with Apache License 2.0

6 votes

@Override
public RecordReader<Text, DynamoDBItemWritable> getRecordReader(InputSplit split, JobConf conf,
    Reporter reporter) throws
    IOException {
  reporter.progress();

  Map<String, String> columnMapping =
      HiveDynamoDBUtil.fromJsonString(conf.get(DynamoDBConstants.DYNAMODB_COLUMN_MAPPING));
  Map<String, String> hiveTypeMapping = HiveDynamoDBUtil.extractHiveTypeMapping(conf);
  DynamoDBQueryFilter queryFilter = getQueryFilter(conf, columnMapping, hiveTypeMapping);
  DynamoDBSplit bbSplit = (DynamoDBSplit) split;
  bbSplit.setDynamoDBFilterPushdown(queryFilter);

  Collection<String> attributes = (columnMapping == null ? null : columnMapping.values());
  DynamoDBRecordReaderContext context = buildHiveDynamoDBRecordReaderContext(bbSplit, conf,
      reporter, attributes);
  return new DefaultDynamoDBRecordReader(context);
}

Source File: HadoopArchives.java From big-c with Apache License 2.0

6 votes

public void reduce(IntWritable key, Iterator<Text> values,
    OutputCollector<Text, Text> out,
    Reporter reporter) throws IOException {
  keyVal = key.get();
  while(values.hasNext()) {
    Text value = values.next();
    String towrite = value.toString() + "\n";
    indexStream.write(towrite.getBytes(Charsets.UTF_8));
    written++;
    if (written > numIndexes -1) {
      // every 1000 indexes we report status
      reporter.setStatus("Creating index for archives");
      reporter.progress();
      endIndex = keyVal;
      String masterWrite = startIndex + " " + endIndex + " " + startPos 
                          +  " " + indexStream.getPos() + " \n" ;
      outStream.write(masterWrite.getBytes(Charsets.UTF_8));
      startPos = indexStream.getPos();
      startIndex = endIndex;
      written = 0;
    }
  }
}

Source File: HadoopArchives.java From big-c with Apache License 2.0

5 votes

public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = encodeProperties(srcStatus);
  if (value.isDir()) { 
    towrite = encodeName(relPath.toString())
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(encodeName(child) + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = encodeName(relPath.toString())
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}

Source File: HadoopArchives.java From RDFS with Apache License 2.0

5 votes

public void map(LongWritable key, HarEntry value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  Path relPath = new Path(value.path);
  int hash = HarFileSystem.getHarHash(relPath);
  String towrite = null;
  Path srcPath = realPath(relPath, rootPath);
  long startPos = partStream.getPos();
  FileSystem srcFs = srcPath.getFileSystem(conf);
  FileStatus srcStatus = srcFs.getFileStatus(srcPath);
  String propStr = URLEncoder.encode(
                      srcStatus.getModificationTime() + " "
                    + srcStatus.getAccessTime() + " "
                    + srcStatus.getPermission().toShort() + " "
                    + URLEncoder.encode(srcStatus.getOwner(), "UTF-8") + " "
                    + URLEncoder.encode(srcStatus.getGroup(), "UTF-8"),
                   "UTF-8");
  if (value.isDir()) { 
    towrite = URLEncoder.encode(relPath.toString(),"UTF-8")  
              + " dir " + propStr + " 0 0 ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: value.children) {
      sbuff.append(URLEncoder.encode(child,"UTF-8") + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = URLEncoder.encode(relPath.toString(),"UTF-8")
              + " file " + partname + " " + startPos
              + " " + srcStatus.getLen() + " " + propStr + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}

Source File: HadoopArchives.java From RDFS with Apache License 2.0

5 votes

public void copyData(Path input, FSDataInputStream fsin, 
    FSDataOutputStream fout, Reporter reporter) throws IOException {
  try {
    for (int cbread=0; (cbread = fsin.read(buffer))>= 0;) {
      fout.write(buffer, 0,cbread);
      reporter.progress();
    }
  } finally {
    fsin.close();
  }
}

Source File: UDFWithOptions.java From incubator-hivemall with Apache License 2.0

5 votes

protected static void reportProgress(@Nonnull Reporter reporter) {
    if (reporter != null) {
        synchronized (reporter) {
            reporter.progress();
        }
    }
}

Source File: UDTFWithOptions.java From incubator-hivemall with Apache License 2.0

5 votes

protected static void reportProgress(@Nullable Reporter reporter) {
    if (reporter != null) {
        synchronized (reporter) {
            reporter.progress();
        }
    }
}

Source File: DynamoDBFibonacciRetryer.java From emr-dynamodb-connector with Apache License 2.0

5 votes

private void incrementRetryCounter(Reporter reporter, PrintCounter retryCounter) {
  if (reporter != null) {
    if (retryCounter != null) {
      reporter.incrCounter(retryCounter.getGroup(), retryCounter.getName(), 1);
    } else {
      reporter.progress();
    }
  }
}

Source File: HadoopArchives.java From hadoop-gpu with Apache License 2.0

5 votes

public void map(LongWritable key, Text value,
    OutputCollector<IntWritable, Text> out,
    Reporter reporter) throws IOException {
  String line  = value.toString();
  MapStat mstat = new MapStat(line);
  Path srcPath = new Path(mstat.pathname);
  String towrite = null;
  Path relPath = makeRelative(srcPath);
  int hash = HarFileSystem.getHarHash(relPath);
  long startPos = partStream.getPos();
  if (mstat.isDir) { 
    towrite = relPath.toString() + " " + "dir none " + 0 + " " + 0 + " ";
    StringBuffer sbuff = new StringBuffer();
    sbuff.append(towrite);
    for (String child: mstat.children) {
      sbuff.append(child + " ");
    }
    towrite = sbuff.toString();
    //reading directories is also progress
    reporter.progress();
  }
  else {
    FileSystem srcFs = srcPath.getFileSystem(conf);
    FileStatus srcStatus = srcFs.getFileStatus(srcPath);
    FSDataInputStream input = srcFs.open(srcStatus.getPath());
    reporter.setStatus("Copying file " + srcStatus.getPath() + 
        " to archive.");
    copyData(srcStatus.getPath(), input, partStream, reporter);
    towrite = relPath.toString() + " file " + partname + " " + startPos
    + " " + srcStatus.getLen() + " ";
  }
  out.collect(new IntWritable(hash), new Text(towrite));
}

Source File: HadoopArchives.java From hadoop-gpu with Apache License 2.0

5 votes

public void copyData(Path input, FSDataInputStream fsin, 
    FSDataOutputStream fout, Reporter reporter) throws IOException {
  try {
    for (int cbread=0; (cbread = fsin.read(buffer))>= 0;) {
      fout.write(buffer, 0,cbread);
      reporter.progress();
    }
  } finally {
    fsin.close();
  }
}

Source File: OnDiskMapOutput.java From hadoop with Apache License 2.0

4 votes

@Override
public void shuffle(MapHost host, InputStream input,
                    long compressedLength, long decompressedLength,
                    ShuffleClientMetrics metrics,
                    Reporter reporter) throws IOException {
  input = new IFileInputStream(input, compressedLength, conf);
  // Copy data to local-disk
  long bytesLeft = compressedLength;
  try {
    final int BYTES_TO_READ = 64 * 1024;
    byte[] buf = new byte[BYTES_TO_READ];
    while (bytesLeft > 0) {
      int n = ((IFileInputStream)input).readWithChecksum(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ));
      if (n < 0) {
        throw new IOException("read past end of stream reading " + 
                              getMapId());
      }
      disk.write(buf, 0, n);
      bytesLeft -= n;
      metrics.inputBytes(n);
      reporter.progress();
    }

    LOG.info("Read " + (compressedLength - bytesLeft) + 
             " bytes from map-output for " + getMapId());

    disk.close();
  } catch (IOException ioe) {
    // Close the streams
    IOUtils.cleanup(LOG, input, disk);

    // Re-throw
    throw ioe;
  }

  // Sanity check
  if (bytesLeft != 0) {
    throw new IOException("Incomplete map output received for " +
                          getMapId() + " from " +
                          host.getHostName() + " (" + 
                          bytesLeft + " bytes missing of " + 
                          compressedLength + ")");
  }
  this.compressedSize = compressedLength;
}

Source File: Loops.java From nutch-htmlunit with Apache License 2.0

4 votes

/**
 * Performs a single loop pass looking for loop cycles within routes. If
 * This is not the last loop cycle then url will be mapped for further
 * passes.
 */
public void reduce(Text key, Iterator<ObjectWritable> values,
  OutputCollector<Text, Route> output, Reporter reporter)
  throws IOException {

  List<Route> routeList = new ArrayList<Route>();
  Set<String> outlinkUrls = new LinkedHashSet<String>();
  int numValues = 0;

  // aggregate all routes and outlinks for a given url
  while (values.hasNext()) {
    ObjectWritable next = values.next();
    Object value = next.get();
    if (value instanceof Route) {
      routeList.add(WritableUtils.clone((Route)value, conf));
    }
    else if (value instanceof Text) {
      String outlinkUrl = ((Text)value).toString();
      if (!outlinkUrls.contains(outlinkUrl)) {
        outlinkUrls.add(outlinkUrl);
      }
    }

    // specify progress, could be a lot of routes
    numValues++;
    if (numValues % 100 == 0) {
      reporter.progress();
    }
  }

  // loop through the route list
  Iterator<Route> routeIt = routeList.listIterator();
  while (routeIt.hasNext()) {

    // removing the route for space concerns, could be a lot of routes
    // if the route is already found, meaning it is a loop just collect it
    // urls with no outlinks that are not found will fall off
    Route route = routeIt.next();
    routeIt.remove();
    if (route.isFound()) {
      output.collect(key, route);
    }
    else {

      // if the route start url is found, set route to found and collect
      String lookingFor = route.getLookingFor();
      if (outlinkUrls.contains(lookingFor)) {
        route.setFound(true);
        output.collect(key, route);
      }
      else if (!last) {

        // setup for next pass through the loop
        for (String outlink : outlinkUrls) {
          output.collect(new Text(outlink), route);
        }
      }
    }
  }
}

Source File: TestShufflePlugin.java From big-c with Apache License 2.0

4 votes

@Test
/**
 * A testing method verifying availability and accessibility of API that is needed
 * for sub-classes of ShuffleConsumerPlugin
 */
public void testConsumerApi() {

  JobConf jobConf = new JobConf();
  ShuffleConsumerPlugin<K, V> shuffleConsumerPlugin = new TestShuffleConsumerPlugin<K, V>();

  //mock creation
  ReduceTask mockReduceTask = mock(ReduceTask.class);
  TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
  Reporter mockReporter = mock(Reporter.class);
  FileSystem mockFileSystem = mock(FileSystem.class);
  Class<? extends org.apache.hadoop.mapred.Reducer>  combinerClass = jobConf.getCombinerClass();
  @SuppressWarnings("unchecked")  // needed for mock with generic
  CombineOutputCollector<K, V>  mockCombineOutputCollector =
    (CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
  org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
    mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
  LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
  CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
  Counter mockCounter = mock(Counter.class);
  TaskStatus mockTaskStatus = mock(TaskStatus.class);
  Progress mockProgress = mock(Progress.class);
  MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
  Task mockTask = mock(Task.class);

  try {
    String [] dirs = jobConf.getLocalDirs();
    // verify that these APIs are available through super class handler
    ShuffleConsumerPlugin.Context<K, V> context =
   new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, jobConf, mockFileSystem,
                                              mockUmbilical, mockLocalDirAllocator,
                                              mockReporter, mockCompressionCodec,
                                              combinerClass, mockCombineOutputCollector,
                                              mockCounter, mockCounter, mockCounter,
                                              mockCounter, mockCounter, mockCounter,
                                              mockTaskStatus, mockProgress, mockProgress,
                                              mockTask, mockMapOutputFile, null);
    shuffleConsumerPlugin.init(context);
    shuffleConsumerPlugin.run();
    shuffleConsumerPlugin.close();
  }
  catch (Exception e) {
    assertTrue("Threw exception:" + e, false);
  }

  // verify that these APIs are available for 3rd party plugins
  mockReduceTask.getTaskID();
  mockReduceTask.getJobID();
  mockReduceTask.getNumMaps();
  mockReduceTask.getPartition();
  mockReporter.progress();
}

Source File: OnDiskMapOutput.java From big-c with Apache License 2.0

4 votes

@Override
public void shuffle(MapHost host, InputStream input,
                    long compressedLength, long decompressedLength,
                    ShuffleClientMetrics metrics,
                    Reporter reporter) throws IOException {
  input = new IFileInputStream(input, compressedLength, conf);
  // Copy data to local-disk
  long bytesLeft = compressedLength;
  try {
    final int BYTES_TO_READ = 64 * 1024;
    byte[] buf = new byte[BYTES_TO_READ];
    while (bytesLeft > 0) {
      int n = ((IFileInputStream)input).readWithChecksum(buf, 0, (int) Math.min(bytesLeft, BYTES_TO_READ));
      if (n < 0) {
        throw new IOException("read past end of stream reading " + 
                              getMapId());
      }
      disk.write(buf, 0, n);
      bytesLeft -= n;
      metrics.inputBytes(n);
      reporter.progress();
    }

    LOG.info("Read " + (compressedLength - bytesLeft) + 
             " bytes from map-output for " + getMapId());

    disk.close();
  } catch (IOException ioe) {
    // Close the streams
    IOUtils.cleanup(LOG, input, disk);

    // Re-throw
    throw ioe;
  }

  // Sanity check
  if (bytesLeft != 0) {
    throw new IOException("Incomplete map output received for " +
                          getMapId() + " from " +
                          host.getHostName() + " (" + 
                          bytesLeft + " bytes missing of " + 
                          compressedLength + ")");
  }
  this.compressedSize = compressedLength;
}

Source File: HiveCassandraStandardColumnInputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public RecordReader<BytesWritable, MapWritable> getRecordReader(InputSplit split,
    JobConf jobConf, final Reporter reporter) throws IOException {
  HiveCassandraStandardSplit cassandraSplit = (HiveCassandraStandardSplit) split;

  List<String> columns = AbstractColumnSerDe.parseColumnMapping(cassandraSplit.getColumnMapping());
  isTransposed = AbstractColumnSerDe.isTransposed(columns);


  List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);

  if (columns.size() < readColIDs.size()) {
    throw new IOException("Cannot read more columns than the given table contains.");
  }

  org.apache.cassandra.hadoop.ColumnFamilySplit cfSplit = cassandraSplit.getSplit();
  Job job = new Job(jobConf);

  TaskAttemptContext tac = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID()) {
    @Override
    public void progress() {
      reporter.progress();
    }
  };

  SlicePredicate predicate = new SlicePredicate();

  if (isTransposed || readColIDs.size() == columns.size() || readColIDs.size() == 0) {
    SliceRange range = new SliceRange();
    AbstractType comparator = BytesType.instance;

    String comparatorType = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_COMPARATOR);
    if (comparatorType != null && !comparatorType.equals("")) {
      try {
        comparator = TypeParser.parse(comparatorType);
      } catch (Exception ex) {
        throw new IOException("Comparator class not found.");
      }
    }

    String sliceStart = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_START);
    String sliceEnd = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_FINISH);
    String reversed = jobConf.get(AbstractColumnSerDe.CASSANDRA_SLICE_PREDICATE_RANGE_REVERSED);

    range.setStart(comparator.fromString(sliceStart == null ? "" : sliceStart));
    range.setFinish(comparator.fromString(sliceEnd == null ? "" : sliceEnd));
    range.setReversed(reversed == null ? false : reversed.equals("true"));
    range.setCount(cassandraSplit.getSlicePredicateSize());
    predicate.setSlice_range(range);
  } else {
    int iKey = columns.indexOf(AbstractColumnSerDe.CASSANDRA_KEY_COLUMN);
    predicate.setColumn_names(getColumnNames(iKey, columns, readColIDs));
  }


  try {
    ConfigHelper.setInputColumnFamily(tac.getConfiguration(),
        cassandraSplit.getKeyspace(), cassandraSplit.getColumnFamily());

    ConfigHelper.setInputSlicePredicate(tac.getConfiguration(), predicate);
    ConfigHelper.setRangeBatchSize(tac.getConfiguration(), cassandraSplit.getRangeBatchSize());
    ConfigHelper.setInputRpcPort(tac.getConfiguration(), cassandraSplit.getPort() + "");
    ConfigHelper.setInputInitialAddress(tac.getConfiguration(), cassandraSplit.getHost());
    ConfigHelper.setInputPartitioner(tac.getConfiguration(), cassandraSplit.getPartitioner());
    // Set Split Size
    ConfigHelper.setInputSplitSize(tac.getConfiguration(), cassandraSplit.getSplitSize());

    CassandraHiveRecordReader rr = null;

    if(isTransposed && tac.getConfiguration().getBoolean(AbstractColumnSerDe.CASSANDRA_ENABLE_WIDEROW_ITERATOR, true)) {
      rr = new CassandraHiveRecordReader(new ColumnFamilyWideRowRecordReader(), isTransposed);
    } else {
      rr = new CassandraHiveRecordReader(new ColumnFamilyRecordReader(), isTransposed);
    }
    rr.initialize(cfSplit, tac);

    return rr;

  } catch (Exception ie) {
    throw new IOException(ie);
  }
}

Source File: ArcSegmentCreator.java From anthelion with Apache License 2.0

4 votes

/**
 * <p>Runs the Map job to translate an arc record into output for Nutch 
 * segments.</p>
 * 
 * @param key The arc record header.
 * @param bytes The arc record raw content bytes.
 * @param output The output collecter.
 * @param reporter The progress reporter.
 */
public void map(Text key, BytesWritable bytes,
  OutputCollector<Text, NutchWritable> output, Reporter reporter)
  throws IOException {

  String[] headers = key.toString().split("\\s+");
  String urlStr = headers[0];
  String version = headers[2];
  String contentType = headers[3];
  
  // arcs start with a file description.  for now we ignore this as it is not
  // a content record
  if (urlStr.startsWith("filedesc://")) {
    LOG.info("Ignoring file header: " + urlStr);
    return;
  }
  LOG.info("Processing: " + urlStr);

  // get the raw  bytes from the arc file, create a new crawldatum
  Text url = new Text();
  CrawlDatum datum = new CrawlDatum(CrawlDatum.STATUS_DB_FETCHED, interval,
    1.0f);
  String segmentName = getConf().get(Nutch.SEGMENT_NAME_KEY);

  // normalize and filter the urls
  try {
    urlStr = normalizers.normalize(urlStr, URLNormalizers.SCOPE_FETCHER);
    urlStr = urlFilters.filter(urlStr); // filter the url
  }
  catch (Exception e) {
    if (LOG.isWarnEnabled()) {
      LOG.warn("Skipping " + url + ":" + e);
    }
    urlStr = null;
  }

  // if still a good url then process
  if (urlStr != null) {

    url.set(urlStr);
    try {

      // set the protocol status to success and the crawl status to success
      // create the content from the normalized url and the raw bytes from
      // the arc file,  TODO: currently this doesn't handle text of errors
      // pages (i.e. 404, etc.). We assume we won't get those.
      ProtocolStatus status = ProtocolStatus.STATUS_SUCCESS;
      Content content = new Content(urlStr, urlStr, bytes.getBytes(), contentType,
        new Metadata(), getConf());
      
      // set the url version into the metadata
      content.getMetadata().set(URL_VERSION, version);
      ParseStatus pstatus = null;
      pstatus = output(output, segmentName, url, datum, content, status,
        CrawlDatum.STATUS_FETCH_SUCCESS);
      reporter.progress();
    }
    catch (Throwable t) { // unexpected exception
      logError(url, t);
      output(output, segmentName, url, datum, null, null,
        CrawlDatum.STATUS_FETCH_RETRY);
    }
  }
}

Source File: InMemoryMapOutput.java From hadoop with Apache License 2.0

4 votes

@Override
public void shuffle(MapHost host, InputStream input,
                    long compressedLength, long decompressedLength,
                    ShuffleClientMetrics metrics,
                    Reporter reporter) throws IOException {
  IFileInputStream checksumIn = 
    new IFileInputStream(input, compressedLength, conf);

  input = checksumIn;       

  // Are map-outputs compressed?
  if (codec != null) {
    decompressor.reset();
    input = codec.createInputStream(input, decompressor);
  }

  try {
    IOUtils.readFully(input, memory, 0, memory.length);
    metrics.inputBytes(memory.length);
    reporter.progress();
    LOG.info("Read " + memory.length + " bytes from map-output for " +
              getMapId());

    /**
     * We've gotten the amount of data we were expecting. Verify the
     * decompressor has nothing more to offer. This action also forces the
     * decompressor to read any trailing bytes that weren't critical
     * for decompression, which is necessary to keep the stream
     * in sync.
     */
    if (input.read() >= 0 ) {
      throw new IOException("Unexpected extra bytes from input stream for " +
                             getMapId());
    }

  } catch (IOException ioe) {      
    // Close the streams
    IOUtils.cleanup(LOG, input);

    // Re-throw
    throw ioe;
  } finally {
    CodecPool.returnDecompressor(decompressor);
  }
}

Java Code Examples for org.apache.hadoop.mapred.Reporter#progress()