org.apache.hadoop.util.QuickSort Java Examples
The following examples show how to use
org.apache.hadoop.util.QuickSort.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TeraInputFormat.java From big-c with Apache License 2.0 | 7 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " sampled records"); if (numPartitions > numRecords) { throw new IllegalArgumentException ("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; Text[] result = new Text[numPartitions-1]; for(int i=1; i < numPartitions; ++i) { result[i-1] = records.get(Math.round(stepSize * i)); } return result; }
Example #2
Source File: TeraInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " sampled records"); if (numPartitions > numRecords) { throw new IllegalArgumentException ("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; Text[] result = new Text[numPartitions-1]; for(int i=1; i < numPartitions; ++i) { result[i-1] = records.get(Math.round(stepSize * i)); } return result; }
Example #3
Source File: QuickSorterTemplate.java From dremio-oss with Apache License 2.0 | 6 votes |
@Override public SelectionVector4 getFinalSort(BufferAllocator allocator, int targetBatchSize){ Stopwatch watch = Stopwatch.createStarted(); intVector.setValueCount(totalCount); QuickSort qs = new QuickSort(); if (totalCount > 0) { qs.sort(this, 0, totalCount); } SelectionVector4 finalSortedSV4 = new SelectionVector4(allocator.buffer(totalCount * 4), totalCount, targetBatchSize); for (int i = 0; i < totalCount; i++) { finalSortedSV4.set(i, intVector.get(i)); } logger.debug("Took {} us to final sort {} records in {} batches", watch.elapsed(TimeUnit.MICROSECONDS), totalCount, hyperBatch.size()); return finalSortedSV4; }
Example #4
Source File: TeraInputFormat.java From RDFS with Apache License 2.0 | 6 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " records"); if (numPartitions > numRecords) { throw new IllegalArgumentException ("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; System.out.println("Step size is " + stepSize); Text[] result = new Text[numPartitions-1]; for(int i=1; i < numPartitions; ++i) { result[i-1] = records.get(Math.round(stepSize * i)); } return result; }
Example #5
Source File: TeraInputFormat.java From hadoop-gpu with Apache License 2.0 | 6 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " records"); if (numPartitions > numRecords) { throw new IllegalArgumentException ("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; System.out.println("Step size is " + stepSize); Text[] result = new Text[numPartitions-1]; for(int i=1; i < numPartitions; ++i) { result[i-1] = records.get(Math.round(stepSize * i)); } return result; }
Example #6
Source File: TeraInputFormat.java From hadoop-book with Apache License 2.0 | 6 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split * points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " records"); if (numPartitions > numRecords) { throw new IllegalArgumentException("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; System.out.println("Step size is " + stepSize); Text[] result = new Text[numPartitions - 1]; for (int i = 1; i < numPartitions; ++i) { result[i - 1] = records.get(Math.round(stepSize * i)); } return result; }
Example #7
Source File: TeraInputFormat.java From incubator-tez with Apache License 2.0 | 6 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " sampled records"); if (numPartitions > numRecords) { throw new IllegalArgumentException ("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; Text[] result = new Text[numPartitions-1]; for(int i=1; i < numPartitions; ++i) { result[i-1] = records.get(Math.round(stepSize * i)); } return result; }
Example #8
Source File: SingleBatchSorterTemplate.java From Bats with Apache License 2.0 | 5 votes |
@Override public void sort(SelectionVector2 vector2){ QuickSort qs = new QuickSort(); Stopwatch watch = Stopwatch.createStarted(); if (vector2.getCount() > 0) { qs.sort(this, 0, vector2.getCount()); } logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount()); }
Example #9
Source File: TeraInputFormat.java From pravega-samples with Apache License 2.0 | 5 votes |
/** * Find the split points for a given sample. The sample keys are sorted * and down sampled to find even split points for the partitions. The * returned keys should be the start of their respective partitions. * @param numPartitions the desired number of partitions * @return an array of size numPartitions - 1 that holds the split points */ Text[] createPartitions(int numPartitions) { int numRecords = records.size(); System.out.println("Making " + numPartitions + " from " + numRecords + " sampled records"); if (numPartitions > numRecords) { throw new IllegalArgumentException ("Requested more partitions than input keys (" + numPartitions + " > " + numRecords + ")"); } new QuickSort().sort(this, 0, records.size()); float stepSize = numRecords / (float) numPartitions; Text[] result = new Text[numPartitions-1]; for(int i=1; i < numPartitions; ++i) { result[i-1] = records.get(Math.round(stepSize * i)); } return result; }
Example #10
Source File: SingleBatchSorterTemplate.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override public void sort(SelectionVector2 vector2){ QuickSort qs = new QuickSort(); Stopwatch watch = Stopwatch.createStarted(); if (vector2.getCount() > 0) { qs.sort(this, 0, vector2.getCount()); } logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector2.getCount()); }
Example #11
Source File: SortTest.java From dremio-oss with Apache License 2.0 | 5 votes |
public long doSort(){ QuickSort qs = new QuickSort(); ByteSortable b = new ByteSortable(); long nano = System.nanoTime(); qs.sort(b, 0, RECORD_COUNT); return System.nanoTime() - nano; }
Example #12
Source File: SortTemplate.java From Bats with Apache License 2.0 | 5 votes |
@Override public void sort(SelectionVector4 vector4, VectorContainer container){ Stopwatch watch = Stopwatch.createStarted(); QuickSort qs = new QuickSort(); qs.sort(this, 0, vector4.getTotalCount()); logger.debug("Took {} us to sort {} records", watch.elapsed(TimeUnit.MICROSECONDS), vector4.getTotalCount()); }
Example #13
Source File: VectorizedSorter.java From tajo with Apache License 2.0 | 5 votes |
@Override public Iterable<Tuple> sort() { new QuickSort().sort(this, 0, mappings.length); return new Iterable<Tuple>() { @Override public Iterator<Tuple> iterator() { return new Iterator<Tuple>() { int index; public boolean hasNext() { return index < mappings.length; } public Tuple next() { return tuples[mappings[index++]]; } public void remove() { throw new TajoRuntimeException(new UnsupportedException()); } }; } }; }
Example #14
Source File: BasicReducePartition.java From RDFS with Apache License 2.0 | 5 votes |
protected void sortMemBlock(MemoryBlock memBlock) { if (memBlock.currentPtr <= 0) { return; } // quick sort the offsets OffsetSortable sortableObj = new OffsetSortable(memBlock, kvbuffer); QuickSort quickSort = new QuickSort(); quickSort.sort(sortableObj, 0, memBlock.currentPtr); }
Example #15
Source File: LookUpTable.java From Cubert with Apache License 2.0 | 4 votes |
private void buildTable() throws IOException { QuickSort quickSort = new QuickSort(); long start, end; /* Sort the offsets array */ start = System.currentTimeMillis(); if (offsetArr.length > 1) { quickSort.sort(sortable, 0, offsetArr.length); } end = System.currentTimeMillis(); print.f("LookUpTable: Sorted %d entries in %d ms", offsetArr.length, (end - start)); /* Fill in the HashCode array */ start = System.currentTimeMillis(); int prevHashCode = -1; Tuple prevTuple = newTuple(); Tuple t = newTuple(); for (int i = 0; i < offsetArr.length; ++i) { t = store.getTuple(offsetArr[i], t); int hashCode = tupleHashCode(t); if (prevHashCode != hashCode) { hashCodeArr[hashCode] = i; prevHashCode = hashCode; } if (i == 0 || !compareKeys(prevTuple, t)) { offsetArr[i] = offsetArr[i] | SIGNBIT; } /* Object Reuse: Swap the tuples instead of creating new ones */ Tuple temp = t; t = prevTuple; prevTuple = temp; } end = System.currentTimeMillis(); print.f("LookUpTable: Created HashCode Array for %d entries in %d ms", offsetArr.length, (end - start)); }
Example #16
Source File: MapTask.java From hadoop-gpu with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job, TaskReporter reporter ) throws IOException, ClassNotFoundException { this.job = job; this.reporter = reporter; localFs = FileSystem.getLocal(job); partitions = job.getNumReduceTasks(); rfs = ((LocalFileSystem)localFs).getRaw(); indexCacheList = new ArrayList<SpillRecord>(); //sanity checks final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8); final float recper = job.getFloat("io.sort.record.percent",(float)0.05); final int sortmb = job.getInt("io.sort.mb", 100); if (spillper > (float)1.0 || spillper < (float)0.0) { throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper); } if (recper > (float)1.0 || recper < (float)0.01) { throw new IOException("Invalid \"io.sort.record.percent\": " + recper); } if ((sortmb & 0x7FF) != sortmb) { throw new IOException("Invalid \"io.sort.mb\": " + sortmb); } sorter = ReflectionUtils.newInstance( job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job); LOG.info("io.sort.mb = " + sortmb); // buffers and accounting int maxMemUsage = sortmb << 20; int recordCapacity = (int)(maxMemUsage * recper); recordCapacity -= recordCapacity % RECSIZE; kvbuffer = new byte[maxMemUsage - recordCapacity]; bufvoid = kvbuffer.length; recordCapacity /= RECSIZE; kvoffsets = new int[recordCapacity]; kvindices = new int[recordCapacity * ACCTSIZE]; softBufferLimit = (int)(kvbuffer.length * spillper); softRecordLimit = (int)(kvoffsets.length * spillper); LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length); LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length); // k/v serialization comparator = job.getOutputKeyComparator(); keyClass = (Class<K>)job.getMapOutputKeyClass(); valClass = (Class<V>)job.getMapOutputValueClass(); serializationFactory = new SerializationFactory(job); keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(bb); valSerializer = serializationFactory.getSerializer(valClass); valSerializer.open(bb); // counters mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES); mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS); Counters.Counter combineInputCounter = reporter.getCounter(COMBINE_INPUT_RECORDS); combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS); // compression if (job.getCompressMapOutput()) { Class<? extends CompressionCodec> codecClass = job.getMapOutputCompressorClass(DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); } // combiner combinerRunner = CombinerRunner.create(job, getTaskID(), combineInputCounter, reporter, null); if (combinerRunner != null) { combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter); } else { combineCollector = null; } minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3); spillThread.setDaemon(true); spillThread.setName("SpillThread"); spillLock.lock(); try { spillThread.start(); while (!spillThreadRunning) { spillDone.await(); } } catch (InterruptedException e) { throw (IOException)new IOException("Spill thread failed to initialize" ).initCause(sortSpillException); } finally { spillLock.unlock(); } if (sortSpillException != null) { throw (IOException)new IOException("Spill thread failed to initialize" ).initCause(sortSpillException); } }
Example #17
Source File: ExternalSorter.java From incubator-tez with Apache License 2.0 | 4 votes |
public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs, long initialMemoryAvailable) throws IOException { this.outputContext = outputContext; this.conf = conf; this.partitions = numOutputs; rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw(); int assignedMb = (int) (initialMemoryAvailable >> 20); if (assignedMb <= 0) { if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB this.availableMemoryMb = 1; LOG.warn("initialAvailableMemory: " + initialMemoryAvailable + " is too low. Rounding to 1 MB"); } else { throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable); } } else { this.availableMemoryMb = assignedMb; } // sorter sorter = ReflectionUtils.newInstance(this.conf.getClass( TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class, IndexedSorter.class), this.conf); comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf); // k/v serialization keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf); valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf); serializationFactory = new SerializationFactory(this.conf); keySerializer = serializationFactory.getSerializer(keyClass); valSerializer = serializationFactory.getSerializer(valClass); // counters mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES); mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS); outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS); additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); // compression if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, this.conf); } else { codec = null; } this.ifileReadAhead = this.conf.getBoolean( TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); if (this.ifileReadAhead) { this.ifileReadAheadLength = conf.getInt( TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT); } else { this.ifileReadAheadLength = 0; } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); // Task outputs mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext); LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]"); this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions); this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf); this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext); }
Example #18
Source File: MapTask.java From RDFS with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") public MapOutputBuffer(TaskUmbilicalProtocol umbilical, JobConf job, TaskReporter reporter ) throws IOException, ClassNotFoundException { this.job = job; this.reporter = reporter; localFs = FileSystem.getLocal(job); partitions = job.getNumReduceTasks(); rfs = ((LocalFileSystem)localFs).getRaw(); indexCacheList = new ArrayList<SpillRecord>(); spillSortCounters = new MapSpillSortCounters(reporter); //sanity checks final float spillper = job.getFloat("io.sort.spill.percent",(float)0.8); final float recper = job.getFloat("io.sort.record.percent",(float)0.05); boolean localMode = job.get("mapred.job.tracker", "local").equals("local"); int sortmb = job.getInt("io.sort.mb", 100); if (localMode) { sortmb = job.getInt("io.sort.mb.localmode", 100); } if (spillper > (float)1.0 || spillper < (float)0.0) { throw new IOException("Invalid \"io.sort.spill.percent\": " + spillper); } if (recper > (float)1.0 || recper < (float)0.01) { throw new IOException("Invalid \"io.sort.record.percent\": " + recper); } if ((sortmb & 0x7FF) != sortmb) { throw new IOException("Invalid \"io.sort.mb\": " + sortmb); } sorter = ReflectionUtils.newInstance( job.getClass("map.sort.class", QuickSort.class, IndexedSorter.class), job); LOG.info("io.sort.mb = " + sortmb); // buffers and accounting int maxMemUsage = sortmb << 20; int recordCapacity = (int)(maxMemUsage * recper); recordCapacity -= recordCapacity % RECSIZE; kvbuffer = new byte[maxMemUsage - recordCapacity]; bufvoid = kvbuffer.length; recordCapacity /= RECSIZE; kvoffsets = new int[recordCapacity]; kvindices = new int[recordCapacity * ACCTSIZE]; softBufferLimit = (int)(kvbuffer.length * spillper); softRecordLimit = (int)(kvoffsets.length * spillper); LOG.info("data buffer = " + softBufferLimit + "/" + kvbuffer.length); LOG.info("record buffer = " + softRecordLimit + "/" + kvoffsets.length); // k/v serialization comparator = job.getOutputKeyComparator(); keyClass = (Class<K>)job.getMapOutputKeyClass(); valClass = (Class<V>)job.getMapOutputValueClass(); serializationFactory = new SerializationFactory(job); keySerializer = serializationFactory.getSerializer(keyClass); keySerializer.open(bb); valSerializer = serializationFactory.getSerializer(valClass); valSerializer.open(bb); // counters mapOutputByteCounter = reporter.getCounter(MAP_OUTPUT_BYTES); mapOutputRecordCounter = reporter.getCounter(MAP_OUTPUT_RECORDS); Counters.Counter combineInputCounter = reporter.getCounter(COMBINE_INPUT_RECORDS); combineOutputCounter = reporter.getCounter(COMBINE_OUTPUT_RECORDS); // compression if (job.getCompressMapOutput()) { Class<? extends CompressionCodec> codecClass = job.getMapOutputCompressorClass(DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, job); } // combiner combinerRunner = CombinerRunner.create(job, getTaskID(), combineInputCounter, reporter, null); if (combinerRunner != null) { combineCollector= new CombineOutputCollector<K,V>(combineOutputCounter); } else { combineCollector = null; } minSpillsForCombine = job.getInt("min.num.spills.for.combine", 3); spillThread.setDaemon(true); spillThread.setName("SpillThread"); spillLock.lock(); try { spillThread.start(); while (!spillThreadRunning) { spillDone.await(); } } catch (InterruptedException e) { throw (IOException)new IOException("Spill thread failed to initialize" ).initCause(sortSpillException); } finally { spillLock.unlock(); } if (sortSpillException != null) { throw (IOException)new IOException("Spill thread failed to initialize" ).initCause(sortSpillException); } }
Example #19
Source File: SampleSortTemplate.java From Bats with Apache License 2.0 | 4 votes |
@Override public void sort(SelectionVector2 vector2, VectorContainer container){ QuickSort qs = new QuickSort(); qs.sort(this, 0, vector2.getCount()); }