org.apache.hadoop.mapreduce.TaskAttemptContext#setStatus

Source File: PigAvroInputFormat.java From Cubert with Apache License 2.0

5 votes

/**
 * Create and return an avro record reader.
 * It uses the input schema passed in to the
 * constructor.
 */
@Override
public RecordReader<NullWritable, Writable>
createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException,  InterruptedException {
    context.setStatus(split.toString());
    return new PigAvroRecordReader(context, (FileSplit) split, readerSchema,
            ignoreBadFiles, schemaToMergedSchemaMap, useMultipleSchemas);
}

Source File: SolrRecordWriter.java From hbase-indexer with Apache License 2.0

5 votes

@Override
  public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    if (context != null) {
      heartBeater.setProgress(context);
    }
    try {
      heartBeater.needHeartBeat();
      if (batch.size() > 0) {
        batchWriter.queueBatch(batch);
        numDocsWritten += batch.size();
        batch.clear();
      }
      LOG.info("docsWritten: {}", numDocsWritten);
      batchWriter.close(context);
//      if (outputZipFile) {
//        context.setStatus("Writing Zip");
//        packZipFile(); // Written to the perm location
//      } else {
//        context.setStatus("Copying Index");
//        fs.completeLocalOutput(perm, temp); // copy to dfs
//      }
    } catch (Exception e) {
      if (e instanceof IOException) {
        throw (IOException) e;
      }
      throw new IOException(e);
    } finally {
      heartBeater.cancelHeartBeat();
      heartBeater.close();
//      File tempFile = new File(temp.toString());
//      if (tempFile.exists()) {
//        FileUtils.forceDelete(new File(temp.toString()));
//      }
    }

    context.setStatus("Done");
  }

Source File: PigAvroInputFormat.java From spork with Apache License 2.0

5 votes

/**
 * Create and return an avro record reader.
 * It uses the input schema passed in to the
 * constructor.
 */
@Override
public RecordReader<NullWritable, Writable>
createRecordReader(InputSplit split, TaskAttemptContext context)
throws IOException,  InterruptedException {
    context.setStatus(split.toString());
    return new PigAvroRecordReader(context, (FileSplit) split, readerSchema,
            ignoreBadFiles, schemaToMergedSchemaMap, useMultipleSchemas);
}

Source File: AvroStorage.java From spork with Apache License 2.0

5 votes

/**
 * @see org.apache.pig.LoadFunc#getInputFormat()
 */
@Override
public InputFormat<NullWritable, GenericData.Record> getInputFormat()
    throws IOException {

  return new org.apache.pig.backend.hadoop.executionengine.mapReduceLayer
      .PigFileInputFormat<NullWritable, GenericData.Record>() {

    @Override
    public RecordReader<NullWritable, GenericData.Record>
      createRecordReader(final InputSplit is, final TaskAttemptContext tc)
        throws IOException, InterruptedException {
      Schema s = getInputAvroSchema();
      RecordReader<NullWritable, GenericData.Record> rr = null;
      if (s.getType() == Type.ARRAY) {
        rr = new AvroArrayReader(s);
      } else {
        rr = new AvroRecordReader(s);
      }
      try {
          rr.initialize(is, tc);
      } finally {
          rr.close();
      }
      tc.setStatus(is.toString());
      return rr;
    }
  };

}

Source File: SolrRecordWriter.java From examples with Apache License 2.0

5 votes

@Override
  public void close(TaskAttemptContext context) throws IOException, InterruptedException {
    if (context != null) {
      heartBeater.setProgress(context);
    }
    try {
      heartBeater.needHeartBeat();
      if (batch.size() > 0) {
        batchWriter.queueBatch(batch);
        numDocsWritten += batch.size();
        batch.clear();
      }
      LOG.info("docsWritten: {}", numDocsWritten);
      batchWriter.close(context);
//      if (outputZipFile) {
//        context.setStatus("Writing Zip");
//        packZipFile(); // Written to the perm location
//      } else {
//        context.setStatus("Copying Index");
//        fs.completeLocalOutput(perm, temp); // copy to dfs
//      }
    } catch (Exception e) {
      if (e instanceof IOException) {
        throw (IOException) e;
      }
      throw new IOException(e);
    } finally {
      heartBeater.cancelHeartBeat();
      heartBeater.close();
//      File tempFile = new File(temp.toString());
//      if (tempFile.exists()) {
//        FileUtils.forceDelete(new File(temp.toString()));
//      }
    }

    context.setStatus("Done");
  }

Source File: BatchWriter.java From examples with Apache License 2.0

5 votes

public synchronized void close(TaskAttemptContext context)
    throws InterruptedException, SolrServerException, IOException {

  if (batchPool != null) {
    context.setStatus("Waiting for batches to complete");
    batchPool.shutdown();

    while (!batchPool.isTerminated()) {
      LOG.info(String.format(Locale.ENGLISH, 
          "Waiting for %d items and %d threads to finish executing", batchPool
              .getQueue().size(), batchPool.getActiveCount()));
      batchPool.awaitTermination(5, TimeUnit.SECONDS);
    }
  }
  context.setStatus("Committing Solr Phase 1");
  solr.commit(true, false);
  context.setStatus("Optimizing Solr");
  int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1);
  LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments);
  long start = System.nanoTime();
  solr.optimize(true, false, maxSegments);
  context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.nanoTime() - start);
  float secs = (System.nanoTime() - start) / (float)(10^9);
  LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs);
  context.setStatus("Committing Solr Phase 2");
  solr.commit(true, false);
  context.setStatus("Shutting down Solr");
  solr.shutdown();
}

Source File: QseqInputFormat.java From Hadoop-BAM with MIT License

5 votes

public RecordReader<Text, SequencedFragment> createRecordReader(
                                        InputSplit genericSplit,
                                        TaskAttemptContext context) throws IOException, InterruptedException
{
	context.setStatus(genericSplit.toString());
	return new QseqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
}

Source File: FastaInputFormat.java From Hadoop-BAM with MIT License

5 votes

public RecordReader<Text, ReferenceFragment> createRecordReader(
                                        InputSplit genericSplit,
                                        TaskAttemptContext context) throws IOException, InterruptedException
{
	context.setStatus(genericSplit.toString());
	return new FastaRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
}

Source File: FastqInputFormat.java From Hadoop-BAM with MIT License

5 votes

public RecordReader<Text, SequencedFragment> createRecordReader(
                                        InputSplit genericSplit,
                                        TaskAttemptContext context) throws IOException, InterruptedException
{
	context.setStatus(genericSplit.toString());
	return new FastqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
}

Source File: AvroInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0

5 votes

@Override
public RecordReader<AvroWrapper<T>, NullWritable> createRecordReader(
    InputSplit split, TaskAttemptContext context) throws IOException,
    InterruptedException {
  context.setStatus(split.toString());
  return new AvroRecordReader<T>();
}

Source File: MultiLineInputFormat.java From dkpro-c4corpus with Apache License 2.0

5 votes

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit genericSplit,
        TaskAttemptContext context)
{
    context.setStatus(genericSplit.toString());
    return new MultiLineRecordReader();
}

Source File: SequenceFileAsTextInputFormat.java From big-c with Apache License 2.0

4 votes

public RecordReader<Text, Text> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new SequenceFileAsTextRecordReader();
}

Source File: SequenceFileInputFilter.java From big-c with Apache License 2.0

4 votes

/** Create a record reader for the given split
 * @param split file split
 * @param context the task-attempt context
 * @return RecordReader
 */
public RecordReader<K, V> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new FilterRecordReader<K, V>(context.getConfiguration());
}

Source File: KeyValueTextInputFormat.java From big-c with Apache License 2.0

4 votes

public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit,
    TaskAttemptContext context) throws IOException {
  
  context.setStatus(genericSplit.toString());
  return new KeyValueLineRecordReader(context.getConfiguration());
}

Source File: TreeMergeOutputFormat.java From examples with Apache License 2.0

4 votes

@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }

Source File: NLineInputFormat.java From big-c with Apache License 2.0

4 votes

public RecordReader<LongWritable, Text> createRecordReader(
    InputSplit genericSplit, TaskAttemptContext context) 
    throws IOException {
  context.setStatus(genericSplit.toString());
  return new LineRecordReader();
}

Source File: SequenceFileAsTextInputFormat.java From hadoop with Apache License 2.0

4 votes

public RecordReader<Text, Text> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new SequenceFileAsTextRecordReader();
}

Source File: SequenceFileInputFilter.java From hadoop with Apache License 2.0

4 votes

/** Create a record reader for the given split
 * @param split file split
 * @param context the task-attempt context
 * @return RecordReader
 */
public RecordReader<K, V> createRecordReader(InputSplit split,
    TaskAttemptContext context) throws IOException {
  context.setStatus(split.toString());
  return new FilterRecordReader<K, V>(context.getConfiguration());
}

Source File: KeyValueTextInputFormat.java From hadoop with Apache License 2.0

4 votes

public RecordReader<Text, Text> createRecordReader(InputSplit genericSplit,
    TaskAttemptContext context) throws IOException {
  
  context.setStatus(genericSplit.toString());
  return new KeyValueLineRecordReader(context.getConfiguration());
}

Source File: NLineInputFormat.java From hadoop with Apache License 2.0

4 votes

public RecordReader<LongWritable, Text> createRecordReader(
    InputSplit genericSplit, TaskAttemptContext context) 
    throws IOException {
  context.setStatus(genericSplit.toString());
  return new LineRecordReader();
}

Java Code Examples for org.apache.hadoop.mapreduce.TaskAttemptContext#setStatus()