Java Code Examples for org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateInputValueClass()
The following examples show how to use
org.apache.tez.runtime.library.common.ConfigUtils#getIntermediateInputValueClass() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ShuffledUnorderedKVReader.java From incubator-tez with Apache License 2.0 | 6 votes |
public ShuffledUnorderedKVReader(ShuffleManager shuffleManager, Configuration conf, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, int ifileBufferSize, TezCounter inputRecordCounter) throws IOException { this.shuffleManager = shuffleManager; this.codec = codec; this.ifileReadAhead = ifileReadAhead; this.ifileReadAheadLength = ifileReadAheadLength; this.ifileBufferSize = ifileBufferSize; this.inputRecordCounter = inputRecordCounter; this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.keyIn = new DataInputBuffer(); this.valIn = new DataInputBuffer(); SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(keyIn); this.valDeserializer = serializationFactory.getDeserializer(valClass); this.valDeserializer.open(valIn); }
Example 2
Source File: OrderedGroupedKVInput.java From tez with Apache License 2.0 | 6 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) protected synchronized void createValuesIterator() throws IOException { // Not used by ReduceProcessor RawComparator rawComparator = ConfigUtils.getIntermediateInputKeyComparator(conf); Class<?> keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); Class<?> valClass = ConfigUtils.getIntermediateInputValueClass(conf); LOG.info(getContext().getSourceVertexName() + ": " + "creating ValuesIterator with " + "comparator=" + rawComparator.getClass().getName() + ", keyClass=" + keyClass.getName() + ", valClass=" + valClass.getName()); vIter = new ValuesIterator(rawIter, rawComparator, keyClass, valClass, conf, inputKeyCounter, inputValueCounter); }
Example 3
Source File: UnorderedKVReader.java From tez with Apache License 2.0 | 6 votes |
public UnorderedKVReader(ShuffleManager shuffleManager, Configuration conf, CompressionCodec codec, boolean ifileReadAhead, int ifileReadAheadLength, int ifileBufferSize, TezCounter inputRecordCounter, InputContext context) throws IOException { this.shuffleManager = shuffleManager; this.context = context; this.codec = codec; this.ifileReadAhead = ifileReadAhead; this.ifileReadAheadLength = ifileReadAheadLength; this.ifileBufferSize = ifileBufferSize; this.inputRecordCounter = inputRecordCounter; this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.keyIn = new DataInputBuffer(); this.valIn = new DataInputBuffer(); SerializationFactory serializationFactory = new SerializationFactory(conf); this.keyDeserializer = serializationFactory.getDeserializer(keyClass); this.keyDeserializer.open(keyIn); this.valDeserializer = serializationFactory.getDeserializer(valClass); this.valDeserializer.open(valIn); }
Example 4
Source File: ShuffledMergedInput.java From incubator-tez with Apache License 2.0 | 5 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) protected synchronized void createValuesIterator() throws IOException { // Not used by ReduceProcessor vIter = new ValuesIterator(rawIter, (RawComparator) ConfigUtils.getIntermediateInputKeyComparator(conf), ConfigUtils.getIntermediateInputKeyClass(conf), ConfigUtils.getIntermediateInputValueClass(conf), conf, inputKeyCounter, inputValueCounter); }
Example 5
Source File: MRCombiner.java From incubator-tez with Apache License 2.0 | 5 votes |
public MRCombiner(TezTaskContext taskContext) throws IOException { this.conf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload()); assert(taskContext instanceof TezInputContext || taskContext instanceof TezOutputContext); if (taskContext instanceof TezOutputContext) { this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf); this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf); this.reporter = new MRTaskReporter((TezOutputContext)taskContext); } else { this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf); this.reporter = new MRTaskReporter((TezInputContext)taskContext); } this.useNewApi = ConfigUtils.useNewApi(conf); combineInputKeyCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS); combineInputValueCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS); boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false); this.mrTaskAttemptID = new TaskAttemptID( new TaskID(String.valueOf(taskContext.getApplicationId() .getClusterTimestamp()), taskContext.getApplicationId().getId(), isMap ? TaskType.MAP : TaskType.REDUCE, taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber()); LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi); }
Example 6
Source File: MRCombiner.java From tez with Apache License 2.0 | 5 votes |
public MRCombiner(TaskContext taskContext) throws IOException { final Configuration userConf = TezUtils.createConfFromUserPayload(taskContext.getUserPayload()); useNewApi = ConfigUtils.useNewApi(userConf); if (useNewApi) { conf = new JobConf(userConf); } else { conf = userConf; } assert(taskContext instanceof InputContext || taskContext instanceof OutputContext); if (taskContext instanceof OutputContext) { this.keyClass = ConfigUtils.getIntermediateOutputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateOutputValueClass(conf); this.comparator = ConfigUtils.getIntermediateOutputKeyComparator(conf); this.reporter = new MRTaskReporter((OutputContext)taskContext); } else { this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf); this.reporter = new MRTaskReporter((InputContext)taskContext); } combineInputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_INPUT_RECORDS); combineOutputRecordsCounter = taskContext.getCounters().findCounter(TaskCounter.COMBINE_OUTPUT_RECORDS); boolean isMap = conf.getBoolean(MRConfig.IS_MAP_PROCESSOR,false); this.mrTaskAttemptID = new TaskAttemptID( new TaskID(String.valueOf(taskContext.getApplicationId() .getClusterTimestamp()), taskContext.getApplicationId().getId(), isMap ? TaskType.MAP : TaskType.REDUCE, taskContext.getTaskIndex()), taskContext.getTaskAttemptNumber()); LOG.info("Using combineKeyClass: " + keyClass + ", combineValueClass: " + valClass + ", combineComparator: " +comparator + ", useNewApi: " + useNewApi); }
Example 7
Source File: LocalShuffle.java From incubator-tez with Apache License 2.0 | 4 votes |
public LocalShuffle(TezInputContext inputContext, Configuration conf, int numInputs) throws IOException { this.inputContext = inputContext; this.conf = conf; this.numInputs = numInputs; this.keyClass = ConfigUtils.getIntermediateInputKeyClass(conf); this.valClass = ConfigUtils.getIntermediateInputValueClass(conf); this.comparator = ConfigUtils.getIntermediateInputKeyComparator(conf); this.sortFactor = conf.getInt( TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR, TezJobConfig.TEZ_RUNTIME_IO_SORT_FACTOR_DEFAULT); this.rfs = FileSystem.getLocal(conf).getRaw(); this.spilledRecordsCounter = inputContext.getCounters().findCounter(TaskCounter.SPILLED_RECORDS); // compression if (ConfigUtils.isIntermediateInputCompressed(conf)) { Class<? extends CompressionCodec> codecClass = ConfigUtils.getIntermediateInputCompressorClass(conf, DefaultCodec.class); this.codec = ReflectionUtils.newInstance(codecClass, conf); } else { this.codec = null; } this.ifileReadAhead = conf.getBoolean( TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_DEFAULT); if (this.ifileReadAhead) { this.ifileReadAheadLength = conf.getInt( TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES, TezJobConfig.TEZ_RUNTIME_IFILE_READAHEAD_BYTES_DEFAULT); } else { this.ifileReadAheadLength = 0; } this.ifileBufferSize = conf.getInt("io.file.buffer.size", TezJobConfig.TEZ_RUNTIME_IFILE_BUFFER_SIZE_DEFAULT); // Always local this.mapOutputFile = new TezLocalTaskOutputFiles(conf, inputContext.getUniqueIdentifier()); }
Example 8
Source File: MergeManager.java From incubator-tez with Apache License 2.0 | 4 votes |
@Override public void merge(List<MapOutput> inputs) throws IOException, InterruptedException { if (inputs == null || inputs.size() == 0) { return; } numMemToDiskMerges.increment(1); //name this output file same as the name of the first file that is //there in the current list of inmem files (this is guaranteed to //be absent on the disk currently. So we don't overwrite a prev. //created spill). Also we need to create the output file now since //it is not guaranteed that this file will be present after merge //is called (we delete empty files as soon as we see them //in the merge method) //figure out the mapId InputAttemptIdentifier srcTaskIdentifier = inputs.get(0).getAttemptIdentifier(); List<Segment> inMemorySegments = new ArrayList<Segment>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments,0); int noInMemorySegments = inMemorySegments.size(); // TODO Maybe track serialized vs deserialized bytes. // All disk writes done by this merge are overhead - due to the lac of // adequate memory to keep all segments in memory. Path outputPath = mapOutputFile.getInputFileForWrite( srcTaskIdentifier.getInputIdentifier().getInputIndex(), mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX); Writer writer = null; try { writer = new Writer(conf, rfs, outputPath, (Class)ConfigUtils.getIntermediateInputKeyClass(conf), (Class)ConfigUtils.getIntermediateInputValueClass(conf), codec, null, null); TezRawKeyValueIterator rIter = null; LOG.info("Initiating in-memory merge with " + noInMemorySegments + " segments..."); // Nothing actually materialized to disk - controlled by setting sort-factor to #segments. rIter = TezMerger.merge(conf, rfs, (Class)ConfigUtils.getIntermediateInputKeyClass(conf), (Class)ConfigUtils.getIntermediateInputValueClass(conf), inMemorySegments, inMemorySegments.size(), new Path(inputContext.getUniqueIdentifier()), (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf), nullProgressable, spilledRecordsCounter, null, additionalBytesRead, null); // spilledRecordsCounter is tracking the number of keys that will be // read from each of the segments being merged - which is essentially // what will be written to disk. if (null == combiner) { TezMerger.writeFile(rIter, writer, nullProgressable, TezJobConfig.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); } else { // TODO Counters for Combine runCombineProcessor(rIter, writer); } writer.close(); additionalBytesWritten.increment(writer.getCompressedLength()); writer = null; LOG.info(inputContext.getUniqueIdentifier() + " Merge of the " + noInMemorySegments + " files in-memory complete." + " Local file is " + outputPath + " of size " + localFS.getFileStatus(outputPath).getLen()); } catch (IOException e) { //make sure that we delete the ondisk file that we created //earlier when we invoked cloneFileAttributes localFS.delete(outputPath, true); throw e; } finally { if (writer != null) { writer.close(); } } // Note the output of the merge closeOnDiskFile(outputPath); }
Example 9
Source File: MergeManager.java From incubator-tez with Apache License 2.0 | 4 votes |
@Override public void merge(List<Path> inputs) throws IOException { // sanity check if (inputs == null || inputs.isEmpty()) { LOG.info("No ondisk files to merge..."); return; } numDiskToDiskMerges.increment(1); long approxOutputSize = 0; int bytesPerSum = conf.getInt("io.bytes.per.checksum", 512); LOG.info("OnDiskMerger: We have " + inputs.size() + " map outputs on disk. Triggering merge..."); // 1. Prepare the list of files to be merged. for (Path file : inputs) { approxOutputSize += localFS.getFileStatus(file).getLen(); } // add the checksum length approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum); // 2. Start the on-disk merge process Path outputPath = localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(), approxOutputSize, conf).suffix(Constants.MERGED_OUTPUT_PREFIX); Writer writer = new Writer(conf, rfs, outputPath, (Class)ConfigUtils.getIntermediateInputKeyClass(conf), (Class)ConfigUtils.getIntermediateInputValueClass(conf), codec, null, null); TezRawKeyValueIterator iter = null; Path tmpDir = new Path(inputContext.getUniqueIdentifier()); try { iter = TezMerger.merge(conf, rfs, (Class)ConfigUtils.getIntermediateInputKeyClass(conf), (Class)ConfigUtils.getIntermediateInputValueClass(conf), codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize, inputs.toArray(new Path[inputs.size()]), true, ioSortFactor, tmpDir, (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf), nullProgressable, spilledRecordsCounter, null, mergedMapOutputsCounter, null); // TODO Maybe differentiate between data written because of Merges and // the finalMerge (i.e. final mem available may be different from // initial merge mem) TezMerger.writeFile(iter, writer, nullProgressable, TezJobConfig.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); writer.close(); additionalBytesWritten.increment(writer.getCompressedLength()); } catch (IOException e) { localFS.delete(outputPath, true); throw e; } closeOnDiskFile(outputPath); LOG.info(inputContext.getUniqueIdentifier() + " Finished merging " + inputs.size() + " map output files on disk of total-size " + approxOutputSize + "." + " Local output file is " + outputPath + " of size " + localFS.getFileStatus(outputPath).getLen()); }
Example 10
Source File: ReduceProcessor.java From incubator-tez with Apache License 2.0 | 4 votes |
@Override public void run(Map<String, LogicalInput> inputs, Map<String, LogicalOutput> outputs) throws Exception { LOG.info("Running reduce: " + processorContext.getUniqueIdentifier()); if (outputs.size() <= 0 || outputs.size() > 1) { throw new IOException("Invalid number of outputs" + ", outputCount=" + outputs.size()); } if (inputs.size() <= 0 || inputs.size() > 1) { throw new IOException("Invalid number of inputs" + ", inputCount=" + inputs.size()); } LogicalInput in = inputs.values().iterator().next(); in.start(); List<Input> pendingInputs = new LinkedList<Input>(); pendingInputs.add(in); processorContext.waitForAllInputsReady(pendingInputs); LOG.info("Input is ready for consumption. Starting Output"); LogicalOutput out = outputs.values().iterator().next(); out.start(); initTask(out); this.statusUpdate(); Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf); Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf); LOG.info("Using keyClass: " + keyClass); LOG.info("Using valueClass: " + valueClass); RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf); LOG.info("Using comparator: " + comparator); reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS); reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS); // Sanity check if (!(in instanceof ShuffledMergedInputLegacy)) { throw new IOException("Illegal input to reduce: " + in.getClass()); } ShuffledMergedInputLegacy shuffleInput = (ShuffledMergedInputLegacy)in; KeyValuesReader kvReader = shuffleInput.getReader(); KeyValueWriter kvWriter = null; if((out instanceof MROutputLegacy)) { kvWriter = ((MROutputLegacy) out).getWriter(); } else if ((out instanceof OnFileSortedOutput)) { kvWriter = ((OnFileSortedOutput) out).getWriter(); } else { throw new IOException("Illegal output to reduce: " + in.getClass()); } if (useNewApi) { try { runNewReducer( jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } else { runOldReducer( jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter); } done(); }
Example 11
Source File: MergeManager.java From tez with Apache License 2.0 | 4 votes |
@Override public void merge(List<MapOutput> inputs) throws IOException, InterruptedException { if (inputs == null || inputs.size() == 0) { return; } numMemToDiskMerges.increment(1); inputContext.notifyProgress(); //name this output file same as the name of the first file that is //there in the current list of inmem files (this is guaranteed to //be absent on the disk currently. So we don't overwrite a prev. //created spill). Also we need to create the output file now since //it is not guaranteed that this file will be present after merge //is called (we delete empty files as soon as we see them //in the merge method) //figure out the mapId srcTaskIdentifier = inputs.get(0).getAttemptIdentifier(); List<Segment> inMemorySegments = new ArrayList<Segment>(); long mergeOutputSize = createInMemorySegments(inputs, inMemorySegments,0); int noInMemorySegments = inMemorySegments.size(); // TODO Maybe track serialized vs deserialized bytes. // All disk writes done by this merge are overhead - due to the lack of // adequate memory to keep all segments in memory. outputPath = mapOutputFile.getInputFileForWrite( srcTaskIdentifier.getInputIdentifier(), srcTaskIdentifier.getSpillEventId(), mergeOutputSize).suffix(Constants.MERGED_OUTPUT_PREFIX); Writer writer = null; long outFileLen = 0; try { writer = new Writer(conf, rfs, outputPath, (Class)ConfigUtils.getIntermediateInputKeyClass(conf), (Class)ConfigUtils.getIntermediateInputValueClass(conf), codec, null, null); TezRawKeyValueIterator rIter = null; LOG.info("Initiating in-memory merge with " + noInMemorySegments + " segments..."); tmpDir = new Path(inputContext.getUniqueIdentifier()); // Nothing actually materialized to disk - controlled by setting sort-factor to #segments. rIter = TezMerger.merge(conf, rfs, (Class)ConfigUtils.getIntermediateInputKeyClass(conf), (Class)ConfigUtils.getIntermediateInputValueClass(conf), inMemorySegments, inMemorySegments.size(), tmpDir, (RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf), progressable, spilledRecordsCounter, null, additionalBytesRead, null); // spilledRecordsCounter is tracking the number of keys that will be // read from each of the segments being merged - which is essentially // what will be written to disk. if (null == combiner) { TezMerger.writeFile(rIter, writer, progressable, TezRuntimeConfiguration.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT); } else { // TODO Counters for Combine runCombineProcessor(rIter, writer); } writer.close(); additionalBytesWritten.increment(writer.getCompressedLength()); writer = null; outFileLen = localFS.getFileStatus(outputPath).getLen(); LOG.info(inputContext.getUniqueIdentifier() + " Merge of the " + noInMemorySegments + " files in-memory complete." + " Local file is " + outputPath + " of size " + outFileLen); } catch (IOException e) { //make sure that we delete the ondisk file that we created //earlier when we invoked cloneFileAttributes localFS.delete(outputPath, true); throw e; } finally { if (writer != null) { writer.close(); } } // Note the output of the merge closeOnDiskFile(new FileChunk(outputPath, 0, outFileLen)); }
Example 12
Source File: ReduceProcessor.java From tez with Apache License 2.0 | 4 votes |
@Override public void run(Map<String, LogicalInput> _inputs, Map<String, LogicalOutput> _outputs) throws Exception { this.inputs = _inputs; this.outputs = _outputs; progressHelper = new ProgressHelper(this.inputs, processorContext, this.getClass().getSimpleName()); LOG.info("Running reduce: " + processorContext.getUniqueIdentifier()); if (_outputs.size() <= 0 || _outputs.size() > 1) { throw new IOException("Invalid number of _outputs" + ", outputCount=" + _outputs.size()); } if (_inputs.size() <= 0 || _inputs.size() > 1) { throw new IOException("Invalid number of _inputs" + ", inputCount=" + _inputs.size()); } LogicalInput in = _inputs.values().iterator().next(); in.start(); List<Input> pendingInputs = new LinkedList<Input>(); pendingInputs.add(in); processorContext.waitForAllInputsReady(pendingInputs); LOG.info("Input is ready for consumption. Starting Output"); LogicalOutput out = _outputs.values().iterator().next(); out.start(); initTask(out); progressHelper.scheduleProgressTaskService(0, 100); this.statusUpdate(); Class keyClass = ConfigUtils.getIntermediateInputKeyClass(jobConf); Class valueClass = ConfigUtils.getIntermediateInputValueClass(jobConf); LOG.info("Using keyClass: " + keyClass); LOG.info("Using valueClass: " + valueClass); RawComparator comparator = ConfigUtils.getInputKeySecondaryGroupingComparator(jobConf); LOG.info("Using comparator: " + comparator); reduceInputKeyCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_GROUPS); reduceInputValueCounter = mrReporter.getCounter(TaskCounter.REDUCE_INPUT_RECORDS); // Sanity check if (!(in instanceof OrderedGroupedInputLegacy)) { throw new IOException("Illegal input to reduce: " + in.getClass()); } OrderedGroupedInputLegacy shuffleInput = (OrderedGroupedInputLegacy)in; KeyValuesReader kvReader = shuffleInput.getReader(); KeyValueWriter kvWriter = null; if((out instanceof MROutputLegacy)) { kvWriter = ((MROutputLegacy) out).getWriter(); } else if ((out instanceof OrderedPartitionedKVOutput)) { kvWriter = ((OrderedPartitionedKVOutput) out).getWriter(); } else { throw new IOException("Illegal output to reduce: " + in.getClass()); } if (useNewApi) { try { runNewReducer( jobConf, mrReporter, shuffleInput, comparator, keyClass, valueClass, kvWriter); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } else { runOldReducer( jobConf, mrReporter, kvReader, comparator, keyClass, valueClass, kvWriter); } done(); }