Java Code Examples for org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateOutputKeyComparator()
The following examples show how to use
org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateOutputKeyComparator() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MRCombiner.java From incubator-tez with Apache License 2.0 | 5 votes |
public MRCombiner(TezTaskContext taskContext) throws IOException { this.conf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload()); assert(taskContext instanceof TezInputContext || taskContext instanceof TezOutputContext); if (taskContext instanceof TezOutputContext) { this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf); this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf); this.reporter = new MRTaskReporter((TezOutputContext)taskContext); } else { this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf); this.reporter = new MRTaskReporter((TezInputContext)taskContext); } this.useNewApi = ConfigUtils.useNewApi(conf); combineInputKeyCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS); combineInputValueCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS); boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false); this.mrTaskAttemptID = new TaskAttemptID( new TaskID(String.valueOf(taskContext.getApplicationId() .getClusterTimestamp()), taskContext.getApplicationId().getId(), isMap ? TaskType.MAP : TaskType.REDUCE, taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber()); LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi); }
Example 2
Source File: PipelinedSorter.java From tez with Apache License 2.0 | 5 votes |
public SortSpan next() { ByteBuffer remaining = end(); if(remaining != null) { SortSpan newSpan = null; int items = length(); int perItem = kvbuffer.position()/items; if (reinit) { //next mem block //quite possible that the previous span had a length of 1. It is better to reinit here for new span. items = 1024*1024; perItem = 16; } final RawComparator newComparator = ConfigUtils.getIntermediateOutputKeyComparator(conf); if (this.comparator == newComparator) { LOG.warn("Same comparator used. comparator={}, newComparator={}," + " hashCode: comparator={}, newComparator={}", this.comparator, newComparator, System.identityHashCode(this.comparator), System.identityHashCode(newComparator)); } newSpan = new SortSpan(remaining, items, perItem, newComparator); newSpan.index = index+1; LOG.info(String.format(outputContext.getDestinationVertexName() + ": " + "New Span%d.length = %d, perItem = %d", newSpan.index, newSpan .length(), perItem) + ", counter:" + mapOutputRecordCounter.getValue()); return newSpan; } return null; }
Example 3
Source File: MRCombiner.java From tez with Apache License 2.0 | 5 votes |
public MRCombiner(TaskContext taskContext) throws IOException { final Configuration userConf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload()); useNewApi = ConfigUtils.useNewApi(userConf); if (useNewApi) { conf = new JobConf(userConf); } else { conf = userConf; } assert(taskContext instanceof InputContext || taskContext instanceof OutputContext); if (taskContext instanceof OutputContext) { this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf); this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf); this.reporter = new MRTaskReporter((OutputContext)taskContext); } else { this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf); this.reporter = new MRTaskReporter((InputContext)taskContext); } combineInputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS); combineOutputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS); boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false); this.mrTaskAttemptID = new TaskAttemptID( new TaskID(String.valueOf(taskContext.getApplicationId() .getClusterTimestamp()), taskContext.getApplicationId().getId(), isMap ? TaskType.MAP : TaskType.REDUCE, taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber()); LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi); }
Example 4
Source File: ExternalSorter.java From incubator-tez with Apache License 2.0 | 4 votes |
public ExternalSorter(TezOutputContext outputContext, Configuration conf, int numOutputs, long initialMemoryAvailable) throws IOException { this.outputContext = outputContext; this.conf = conf; this.partitions = numOutputs; rfs = ((LocalFileSystem)FileSystem.getLocal(this.conf)).getRaw(); int assignedMb = (int) (initialMemoryAvailable >> 20); if (assignedMb <= 0) { if (initialMemoryAvailable > 0) { // Rounded down to 0MB - may be > 0 && < 1MB this.availableMemoryMb = 1; LOG.warn("initialAvailableMemory: " + initialMemoryAvailable + " is too low. Rounding to 1 MB"); } else { throw new RuntimeException("InitialMemoryAssigned is <= 0: " + initialMemoryAvailable); } } else { this.availableMemoryMb = assignedMb; } // sorter sorter = ReflectionUtils.newInstance(this.conf.getClass( TezJobConfig.TEZ_RUNTIME_INTERNAL_SORTER_CLASS, QuickSort.class, IndexedSorter.class), this.conf); comparator = ConfigUtils.getIntermediateOutputKeyComparator(this.conf); // k/v serialization keyClass = ConfigUtils.getIntermediateOutputKeyClass(this.conf); valClass = ConfigUtils.getIntermediateOutputValueClass(this.conf); serializationFactory = new SerializationFactory(this.conf); keySerializer = serializationFactory.getSerializer(keyClass); valSerializer = serializationFactory.getSerializer(valClass); // counters mapOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES); mapOutputRecordCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_RECORDS); outputBytesWithOverheadCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD); fileOutputByteCounter = outputContext.getCounters().findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL); spilledRecordsCounter = outputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS); additionalSpillBytesWritten = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN); additionalSpillBytesRead = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ); numAdditionalSpills = outputContext.getCounters().findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT); // compression if (ConfigUtils.shouldCompressIntermediateOutput(this.conf)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateOutputCompressorClass(this.conf, DefaultCodec.class); codec = ReflectionUtils.newInstance(codecClass, this.conf); } else { codec = null; } this.ifileReadAhead = this.conf.getBoolean( TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); if (this.ifileReadAhead) { this.ifileReadAheadLength = conf.getInt( TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT); } else { this.ifileReadAheadLength = 0; } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); // Task outputs mapOutputFile = TezRuntimeUtils.instantiateTaskOutputManager(conf, outputContext); LOG.info("Instantiating Partitioner: [" + conf.get(TezJobConfig.TEZ_RUNTIME_PARTITIONER_CLASS) + "]"); this.conf.setInt(TezRuntimeFrameworkConfigs.TEZ_RUNTIME_NUM_EXPECTED_PARTITIONS, this.partitions); this.partitioner = TezRuntimeUtils.instantiatePartitioner(this.conf); this.combiner = TezRuntimeUtils.instantiateCombiner(this.conf, outputContext); }
Example 5
Source File: PipelinedSorter.java From tez with Apache License 2.0 | 4 votes |
public void sort() throws IOException { SortSpan newSpan = span.next(); if(newSpan == null) { //avoid sort/spill of empty span StopWatch stopWatch = new StopWatch(); stopWatch.start(); // sort in the same thread, do not wait for the thread pool merger.add(span.sort(sorter)); boolean ret = spill(true); stopWatch.stop(); if (LOG.isDebugEnabled()) { LOG.debug(outputContext.getDestinationVertexName() + ": Time taken for spill " + (stopWatch.now(TimeUnit.MILLISECONDS)) + " ms"); } if (pipelinedShuffle && ret) { sendPipelinedShuffleEvents(); } //safe to reset bufferIndex to 0; bufferIndex = 0; int items = 1024*1024; int perItem = 16; if(span.length() != 0) { items = span.length(); perItem = span.kvbuffer.limit()/items; items = (int) ((span.capacity)/(METASIZE+perItem)); if(items > 1024*1024) { // our goal is to have 1M splits and sort early items = 1024*1024; } } Preconditions.checkArgument(buffers.get(bufferIndex) != null, "block should not be empty"); //TODO: fix per item being passed. span = new SortSpan((ByteBuffer)buffers.get(bufferIndex).clear(), (1024*1024), perItem, ConfigUtils.getIntermediateOutputKeyComparator(this.conf)); } else { // queue up the sort SortTask task = new SortTask(span, sorter); LOG.debug("Submitting span={} for sort", span.toString()); Future<SpanIterator> future = sortmaster.submit(task); merger.add(future); span = newSpan; } valSerializer.open(span.out); keySerializer.open(span.out); }