org.apache.hadoop.util.IndexedSorter Java Exaples

Source File: PipelinedSorter.java From incubator-tez with Apache License 2.0

5 votes

public SpanIterator sort(IndexedSorter sorter, RawComparator comparator) {
	this.comparator = comparator;
  ki = new byte[keymax];
  kj = new byte[keymax];
  LOG.info("begin sorting Span"+index + " ("+length()+")");
  if(length() > 1) {
    sorter.sort(this, 0, length(), nullProgressable);
  }
  LOG.info("done sorting Span"+index);
  return new SpanIterator(this);
}

Source File: PipelinedSorter.java From tez with Apache License 2.0

5 votes

public SpanIterator sort(IndexedSorter sorter) {
  long start = System.currentTimeMillis();
  if(length() > 1) {
    sorter.sort(this, 0, length(), progressable);
  }
  LOG.info(outputContext.getDestinationVertexName() + ": " + "done sorting span=" + index + ", length=" + length() + ", "
      + "time=" + (System.currentTimeMillis() - start));
  return new SpanIterator((SortSpan)this);
}

Source File: MapTask.java From RDFS with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  spillSortCounters = new MapSpillSortCounters(reporter);
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  boolean localMode = job.get("mapred.job.tracker", "local").equals("local");
  int sortmb = job.getInt("io.sort.mb", 100);
  if (localMode) {
    sortmb = job.getInt("io.sort.mb.localmode", 100);
  }
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}

Source File: ExternalSorter.java From incubator-tez with Apache License 2.0

4 votes

public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs,
    long initialMemoryAvailable) throws IOException {
  this.outputContext = outputContext;
  this.conf = conf;
  this.partitions = numOutputs;

  rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw();

  int assignedMb = (int) (initialMemoryAvailable >> 20);
  if (assignedMb <= 0) {
    if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB
      this.availableMemoryMb = 1;
      LOG.warn("initialAvailableMemory: " + initialMemoryAvailable
          + " is too low. Rounding to 1 MB");
    } else {
      throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable);
    }
  } else {
    this.availableMemoryMb = assignedMb;
  }

  // sorter
  sorter = ReflectionUtils.newInstance(this.conf.getClass(
      TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class,
      IndexedSorter.class), this.conf);

  comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf);

  // k/v serialization
  keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf);
  valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf);
  serializationFactory = new SerializationFactory(this.conf);
  keySerializer = serializationFactory.getSerializer(keyClass);
  valSerializer = serializationFactory.getSerializer(valClass);

  //    counters    
  mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES);
  mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS);
  outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
  fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
  spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS);
  additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
  additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
  numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);

  // compression
  if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) {
    Class<? extends CompressionCodec> codecClass =
        ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, this.conf);
  } else {
    codec = null;
  }

  this.ifileReadAhead = this.conf.getBoolean(
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD,
      TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT);
  if (this.ifileReadAhead) {
    this.ifileReadAheadLength = conf.getInt(
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES,
        TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT);
  } else {
    this.ifileReadAheadLength = 0;
  }
  this.ifileBufferSize = conf.getInt("io.file.buffer.size",
      TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT);

  
  // Task outputs
  mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext);
  
  LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]");
  this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions);
  this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf);
  this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext);
}

Source File: PipelinedSorter.java From incubator-tez with Apache License 2.0

4 votes

public SortTask(SortSpan sortable, 
          IndexedSorter sorter, RawComparator comparator) {
    this.sortable = sortable;
    this.sorter = sorter;
    this.comparator = comparator;
}

Source File: PipelinedSorter.java From tez with Apache License 2.0

4 votes

public SortTask(SortSpan sortable, IndexedSorter sorter) {
    this.sortable = sortable;
    this.sorter = sorter;
}

Source File: MapTask.java From hadoop-gpu with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job,
                       TaskReporter reporter
                       ) throws IOException, ClassNotFoundException {
  this.job = job;
  this.reporter = reporter;
  localFs = FileSystem.getLocal(job);
  partitions = job.getNumReduceTasks();
   
  rfs = ((LocalFileSystem)localFs).getRaw();

  indexCacheList = new ArrayList<SpillRecord>();
  
  //sanity checks
  final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8);
  final float recper = job.getFloat("io.sort.record.percent",(float)0.05);
  final int sortmb = job.getInt("io.sort.mb", 100);
  if (spillper > (float)1.0 || spillper < (float)0.0) {
    throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper);
  }
  if (recper > (float)1.0 || recper < (float)0.01) {
    throw new IOException("Invalid \"io.sort.record.percent\": " + recper);
  }
  if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"io.sort.mb\": " + sortmb);
  }
  sorter = ReflectionUtils.newInstance(
        job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job);
  LOG.info("io.sort.mb = " + sortmb);
  // buffers and accounting
  int maxMemUsage = sortmb << 20;
  int recordCapacity = (int)(maxMemUsage * recper);
  recordCapacity -= recordCapacity % RECSIZE;
  kvbuffer = new byte[maxMemUsage - recordCapacity];
  bufvoid = kvbuffer.length;
  recordCapacity /= RECSIZE;
  kvoffsets = new int[recordCapacity];
  kvindices = new int[recordCapacity * ACCTSIZE];
  softBufferLimit = (int)(kvbuffer.length * spillper);
  softRecordLimit = (int)(kvoffsets.length * spillper);
  LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length);
  LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length);
  // k/v serialization
  comparator = job.getOutputKeyComparator();
  keyClass = (Class<K>)job.getMapOutputKeyClass();
  valClass = (Class<V>)job.getMapOutputValueClass();
  serializationFactory = new SerializationFactory(job);
  keySerializer = serializationFactory.getSerializer(keyClass);
  keySerializer.open(bb);
  valSerializer = serializationFactory.getSerializer(valClass);
  valSerializer.open(bb);
  // counters
  mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES);
  mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS);
  Counters.Counter combineInputCounter = 
    reporter.getCounter(COMBINE_INPUT_RECORDS);
  combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS);
  // compression
  if (job.getCompressMapOutput()) {
    Class<? extends CompressionCodec> codecClass =
      job.getMapOutputCompressorClass(DefaultCodec.class);
    codec = ReflectionUtils.newInstance(codecClass, job);
  }
  // combiner
  combinerRunner = CombinerRunner.create(job, getTaskID(), 
                                         combineInputCounter,
                                         reporter, null);
  if (combinerRunner != null) {
    combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter);
  } else {
    combineCollector = null;
  }
  minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3);
  spillThread.setDaemon(true);
  spillThread.setName("SpillThread");
  spillLock.lock();
  try {
    spillThread.start();
    while (!spillThreadRunning) {
      spillDone.await();
    }
  } catch (InterruptedException e) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  } finally {
    spillLock.unlock();
  }
  if (sortSpillException != null) {
    throw (IOException)new IOException("Spill thread failed to initialize"
        ).initCause(sortSpillException);
  }
}

org.apache.hadoop.util.IndexedSorter Java Examples