Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#getPos()
The following examples show how to use
org.apache.hadoop.fs.FSDataOutputStream#getPos() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SquallArchiveWriter.java From imhotep with Apache License 2.0 | 6 votes |
private void internalAppendFile(FSDataOutputStream os, File file, List<String> parentDirectories, SquallArchiveCompressor compressor, String archiveFilename) throws IOException { final String baseFilename = file.getName().replaceAll("\\s+", "_"); final String filename = makeFilename(parentDirectories, baseFilename); final long size = file.length(); final long timestamp = file.lastModified(); final long startOffset = os.getPos(); final InputStream is = new BufferedInputStream(new FileInputStream(file)); final String checksum; try { final CompressionOutputStream cos = compressor.newOutputStream(os); final DigestOutputStream dos = new DigestOutputStream(cos, ArchiveUtils.getMD5Digest()); ByteStreams.copy(is, dos); checksum = ArchiveUtils.toHex(dos.getMessageDigest().digest()); cos.finish(); } finally { is.close(); } pendingMetadataWrites.add(new FileMetadata(filename, size, timestamp, checksum, startOffset, compressor, archiveFilename)); }
Example 2
Source File: ProcedureWALFormat.java From hbase with Apache License 2.0 | 6 votes |
public static long writeTrailer(FSDataOutputStream stream, ProcedureStoreTracker tracker) throws IOException { long offset = stream.getPos(); // Write EOF Entry ProcedureWALEntry.newBuilder() .setType(ProcedureWALEntry.Type.PROCEDURE_WAL_EOF) .build().writeDelimitedTo(stream); // Write Tracker tracker.toProto().writeDelimitedTo(stream); stream.write(TRAILER_VERSION); StreamUtils.writeLong(stream, TRAILER_MAGIC); StreamUtils.writeLong(stream, offset); return stream.getPos() - offset; }
Example 3
Source File: TestMergeManager.java From tez with Apache License 2.0 | 6 votes |
private SrcFileInfo createFile(Configuration conf, FileSystem fs, Path path, int numPartitions, int numKeysPerPartition, int startKey) throws IOException { FSDataOutputStream outStream = fs.create(path); int currentKey = startKey; SrcFileInfo srcFileInfo = new SrcFileInfo(); srcFileInfo.indexedRecords = new TezIndexRecord[numPartitions]; srcFileInfo.path = path; for (int i = 0; i < numPartitions; i++) { long pos = outStream.getPos(); IFile.Writer writer = new IFile.Writer(conf, outStream, IntWritable.class, IntWritable.class, null, null, null); for (int j = 0; j < numKeysPerPartition; j++) { writer.append(new IntWritable(currentKey), new IntWritable(currentKey)); currentKey++; } writer.close(); srcFileInfo.indexedRecords[i] = new TezIndexRecord(pos, writer.getRawLength(), writer.getCompressedLength()); } outStream.close(); return srcFileInfo; }
Example 4
Source File: TestBlockUnderConstruction.java From big-c with Apache License 2.0 | 6 votes |
void writeFile(Path file, FSDataOutputStream stm, int size) throws IOException { long blocksBefore = stm.getPos() / BLOCK_SIZE; TestFileCreation.writeFile(stm, BLOCK_SIZE); // need to make sure the full block is completely flushed to the DataNodes // (see FSOutputSummer#flush) stm.flush(); int blocksAfter = 0; // wait until the block is allocated by DataStreamer BlockLocation[] locatedBlocks; while(blocksAfter <= blocksBefore) { locatedBlocks = DFSClientAdapter.getDFSClient(hdfs).getBlockLocations( file.toString(), 0L, BLOCK_SIZE*NUM_BLOCKS); blocksAfter = locatedBlocks == null ? 0 : locatedBlocks.length; } }
Example 5
Source File: DefaultPullHandler.java From DBus with Apache License 2.0 | 5 votes |
private boolean isCreateNewFile(FSDataOutputStream fsDataOutputStream, int len) { if (fsDataOutputStream == null) { return true; } else { try { return (fsDataOutputStream.getPos() + len) > hdfsFileMaxSize; } catch (IOException e) { logger.error(e.getMessage(), e); } return true; } }
Example 6
Source File: SpoolingRawBatchBuffer.java From Bats with Apache License 2.0 | 5 votes |
public void writeToStream(FSDataOutputStream stream) throws IOException { Stopwatch watch = Stopwatch.createStarted(); available = false; check = ThreadLocalRandom.current().nextLong(); start = stream.getPos(); logger.debug("Writing check value {} at position {}", check, start); stream.writeLong(check); batch.getHeader().writeDelimitedTo(stream); ByteBuf buf = batch.getBody(); if (buf != null) { bodyLength = buf.capacity(); } else { bodyLength = 0; } if (bodyLength > 0) { buf.getBytes(0, stream, bodyLength); } stream.hsync(); FileStatus status = fs.getFileStatus(path); long len = status.getLen(); logger.debug("After spooling batch, stream at position {}. File length {}", stream.getPos(), len); batch.sendOk(); latch.countDown(); long t = watch.elapsed(TimeUnit.MICROSECONDS); logger.debug("Took {} us to spool {} to disk. Rate {} mb/s", t, bodyLength, bodyLength / t); if (buf != null) { buf.release(); } }
Example 7
Source File: JobSplitWriter.java From hadoop with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private static <T extends InputSplit> SplitMetaInfo[] writeNewSplits(Configuration conf, T[] array, FSDataOutputStream out) throws IOException, InterruptedException { SplitMetaInfo[] info = new SplitMetaInfo[array.length]; if (array.length != 0) { SerializationFactory factory = new SerializationFactory(conf); int i = 0; int maxBlockLocations = conf.getInt(MRConfig.MAX_BLOCK_LOCATIONS_KEY, MRConfig.MAX_BLOCK_LOCATIONS_DEFAULT); long offset = out.getPos(); for(T split: array) { long prevCount = out.getPos(); Text.writeString(out, split.getClass().getName()); Serializer<T> serializer = factory.getSerializer((Class<T>) split.getClass()); serializer.open(out); serializer.serialize(split); long currCount = out.getPos(); String[] locations = split.getLocations(); if (locations.length > maxBlockLocations) { LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length + " maxsize: " + maxBlockLocations); locations = Arrays.copyOf(locations, maxBlockLocations); } info[i++] = new JobSplit.SplitMetaInfo( locations, offset, split.getLength()); offset += currCount - prevCount; } } return info; }
Example 8
Source File: BCFile.java From big-c with Apache License 2.0 | 5 votes |
/** * Constructor * * @param fout * FS output stream. * @param compressionName * Name of the compression algorithm, which will be used for all * data blocks. * @throws IOException * @see Compression#getSupportedAlgorithms */ public Writer(FSDataOutputStream fout, String compressionName, Configuration conf) throws IOException { if (fout.getPos() != 0) { throw new IOException("Output file not at zero offset."); } this.out = fout; this.conf = conf; dataIndex = new DataIndex(compressionName); metaIndex = new MetaIndex(); fsOutputBuffer = new BytesWritable(); Magic.write(fout); }
Example 9
Source File: DTBCFile.java From attic-apex-malhar with Apache License 2.0 | 5 votes |
/** * Constructor * * @param fout * FS output stream. * @param compressionName * Name of the compression algorithm, which will be used for all * data blocks. * @throws IOException * @see Compression#getSupportedAlgorithms */ public Writer(FSDataOutputStream fout, String compressionName, Configuration conf) throws IOException { if (fout.getPos() != 0) { throw new IOException("Output file not at zero offset."); } this.out = fout; this.conf = conf; dataIndex = new DataIndex(compressionName); metaIndex = new MetaIndex(); fsOutputBuffer = new BytesWritable(); Magic.write(fout); }
Example 10
Source File: DefaultSorter.java From incubator-tez with Apache License 2.0 | 4 votes |
protected void spill(int mstart, int mend) throws IOException, InterruptedException { //approximate the length of the output file to be the length of the //buffer + header lengths for the partitions final long size = (bufend >= bufstart ? bufend - bufstart : (bufvoid - bufend) + bufstart) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); int spindex = mstart; final InMemValBytes value = createInMemValBytes(); for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null); if (combiner == null) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { final int kvoff = offsetFor(spindex); int keystart = kvmeta.get(kvoff + KEYSTART); int valstart = kvmeta.get(kvoff + VALSTART); key.reset(kvbuffer, keystart, valstart - keystart); getVBytesForOffset(kvoff, value); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < mend && kvmeta.get(offsetFor(spindex) + PARTITION) == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { TezRawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); if (LOG.isDebugEnabled()) { LOG.debug("Running combine processor"); } runCombineProcessor(kvIter, writer); } } // close the writer writer.close(); if (numSpills > 0) { additionalSpillBytesWritten.increment(writer.getCompressedLength()); numAdditionalSpills.increment(1); // Reset the value will be set during the final merge. outputBytesWithOverheadCounter.setValue(0); } else { // Set this up for the first write only. Subsequent ones will be handled in the final merge. outputBytesWithOverheadCounter.increment(writer.getRawLength()); } // record offsets final TezIndexRecord rec = new TezIndexRecord( segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) writer.close(); } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, conf); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); } }
Example 11
Source File: MapTask.java From RDFS with Apache License 2.0 | 4 votes |
/** * Handles the degenerate case where serialization fails to fit in * the in-memory buffer, so we must spill the record from collect * directly to a spill file. Consider this "losing". */ private void spillSingleRecord(final K key, final V value, int partition) throws IOException { long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { long spillStartMilli = System.currentTimeMillis(); ProcResourceValues spillStartProcVals = getCurrentProcResourceValues(); long spillBytes = 0; // create spill file final SpillRecord spillRec = new SpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(), numSpills, size); out = rfs.create(filename); // we don't run the combiner for a single record IndexRecord rec = new IndexRecord(); for (int i = 0; i < partitions; ++i) { IFile.Writer<K, V> writer = null; try { long segmentStart = out.getPos(); // Create a new codec, don't care! writer = new IFile.Writer<K,V>(job, out, keyClass, valClass, codec, spilledRecordsCounter); if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); // Note that our map byte count will not be accurate with // compression mapOutputByteCounter.increment(out.getPos() - recordStart); } writer.close(); // record offsets rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength(); rec.partLength = writer.getCompressedLength(); spillBytes += writer.getCompressedLength(); spillRec.putIndex(rec, i); writer = null; } catch (IOException e) { if (null != writer) writer.close(); throw e; } } if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite( getTaskID(), numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, job); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } long spillEndMilli = System.currentTimeMillis(); ProcResourceValues spillEndProcVals = getCurrentProcResourceValues(); spillSortCounters.incCountersPerSpill(spillStartProcVals, spillEndProcVals, spillEndMilli - spillStartMilli, spillBytes); ++numSpills; } finally { if (out != null) out.close(); } }
Example 12
Source File: PipelinedSorter.java From tez with Apache License 2.0 | 4 votes |
private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException { final TezSpillRecord spillRec = new TezSpillRecord(partitions); // getSpillFileForWrite with size -1 as the serialized size of KV pair is still unknown final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, -1); Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFilePaths.put(numSpills, filename); FSDataOutputStream out = rfs.create(filename, true, 4096); ensureSpillFilePermissions(filename, rfs); try { LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString() + ", indexFilename=" + indexFilename); for (int i = 0; i < partitions; ++i) { if (isThreadInterrupted()) { return; } Writer writer = null; try { long segmentStart = out.getPos(); if (!sendEmptyPartitionDetails || (i == partition)) { writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false); } // we need not check for combiner since its a single record if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); mapOutputRecordCounter.increment(1); mapOutputByteCounter.increment(out.getPos() - recordStart); } long rawLength = 0; long partLength = 0; if (writer != null) { writer.close(); rawLength = writer.getRawLength(); partLength = writer.getCompressedLength(); } adjustSpillCounters(rawLength, partLength); // record offsets final TezIndexRecord rec = new TezIndexRecord( segmentStart, rawLength, partLength); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) { writer.close(); } } } spillFileIndexPaths.put(numSpills, indexFilename); spillRec.writeToFile(indexFilename, conf, localFs); //TODO: honor cache limits indexCacheList.add(spillRec); ++numSpills; if (!isFinalMergeEnabled()) { fileOutputByteCounter.increment(rfs.getFileStatus(filename).getLen()); //No final merge. Set the number of files offered via shuffle-handler numShuffleChunks.setValue(numSpills); } if (pipelinedShuffle) { sendPipelinedShuffleEvents(); } } finally { out.close(); } }
Example 13
Source File: DefaultSorter.java From incubator-tez with Apache License 2.0 | 4 votes |
/** * Handles the degenerate case where serialization fails to fit in * the in-memory buffer, so we must spill the record from collect * directly to a spill file. Consider this "losing". */ private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException { long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); // we don't run the combiner for a single record for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); // Create a new codec, don't care! writer = new IFile.Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null); if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); // Note that our map byte count will not be accurate with // compression mapOutputByteCounter.increment(out.getPos() - recordStart); } writer.close(); if (numSpills > 0) { additionalSpillBytesWritten.increment(writer.getCompressedLength()); numAdditionalSpills.increment(1); outputBytesWithOverheadCounter.setValue(0); } else { // Set this up for the first write only. Subsequent ones will be handled in the final merge. outputBytesWithOverheadCounter.increment(writer.getRawLength()); } // record offsets TezIndexRecord rec = new TezIndexRecord( segmentStart, writer.getRawLength(), writer.getCompressedLength()); spillRec.putIndex(rec, i); writer = null; } catch (IOException e) { if (null != writer) writer.close(); throw e; } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, conf); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } ++numSpills; } finally { if (out != null) out.close(); } }
Example 14
Source File: PipelinedSorter.java From tez with Apache License 2.0 | 4 votes |
public boolean spill(boolean ignoreEmptySpills) throws IOException { FSDataOutputStream out = null; try { try { boolean ret = merger.ready(); // if merger returned false and ignore merge is true, // then return directly without spilling if (!ret && ignoreEmptySpills){ return false; } } catch (InterruptedException e) { Thread.currentThread().interrupt(); LOG.info(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete"); throw new IOInterruptedException(outputContext.getDestinationVertexName() + ": Interrupted while waiting for mergers to complete", e); } // create spill file final long size = capacity + + (partitions * APPROX_HEADER_LENGTH); final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); spillFilePaths.put(numSpills, filename); out = rfs.create(filename, true, 4096); ensureSpillFilePermissions(filename, rfs); LOG.info(outputContext.getDestinationVertexName() + ": Spilling to " + filename.toString()); for (int i = 0; i < partitions; ++i) { if (isThreadInterrupted()) { return false; } outputContext.notifyProgress(); TezRawKeyValueIterator kvIter = merger.filter(i); //write merged output to disk long segmentStart = out.getPos(); Writer writer = null; boolean hasNext = kvIter.hasNext(); if (hasNext || !sendEmptyPartitionDetails) { writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, merger.needsRLE()); } if (combiner == null) { while (kvIter.next()) { writer.append(kvIter.getKey(), kvIter.getValue()); } } else { if (hasNext) { runCombineProcessor(kvIter, writer); } } long rawLength = 0; long partLength = 0; //close if (writer != null) { writer.close(); rawLength = writer.getRawLength(); partLength = writer.getCompressedLength(); } adjustSpillCounters(rawLength, partLength); // record offsets final TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength); spillRec.putIndex(rec, i); if (!isFinalMergeEnabled() && reportPartitionStats()) { partitionStats[i] += partLength; } } Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFileIndexPaths.put(numSpills, indexFilename); spillRec.writeToFile(indexFilename, conf, localFs); //TODO: honor cache limits indexCacheList.add(spillRec); ++numSpills; if (!isFinalMergeEnabled()) { fileOutputByteCounter.increment(rfs.getFileStatus(filename).getLen()); //No final merge. Set the number of files offered via shuffle-handler numShuffleChunks.setValue(numSpills); } return true; } finally { if (out != null) { out.close(); } } }
Example 15
Source File: DefaultSorter.java From tez with Apache License 2.0 | 4 votes |
/** * Handles the degenerate case where serialization fails to fit in * the in-memory buffer, so we must spill the record from collect * directly to a spill file. Consider this "losing". */ private void spillSingleRecord(final Object key, final Object value, int partition) throws IOException { long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final TezSpillRecord spillRec = new TezSpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); spillFilePaths.put(numSpills, filename); out = rfs.create(filename); ensureSpillFilePermissions(filename, rfs); // we don't run the combiner for a single record for (int i = 0; i < partitions; ++i) { IFile.Writer writer = null; try { long segmentStart = out.getPos(); // Create a new codec, don't care! if (!sendEmptyPartitionDetails || (i == partition)) { writer = new Writer(conf, out, keyClass, valClass, codec, spilledRecordsCounter, null, false); } if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); // Note that our map byte count will not be accurate with // compression mapOutputByteCounter.increment(out.getPos() - recordStart); } long rawLength =0; long partLength =0; if (writer != null) { writer.close(); rawLength = writer.getRawLength(); partLength = writer.getCompressedLength(); } adjustSpillCounters(rawLength, partLength); // record offsets TezIndexRecord rec = new TezIndexRecord(segmentStart, rawLength, partLength); spillRec.putIndex(rec, i); writer = null; } catch (IOException e) { if (null != writer) writer.close(); throw e; } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillFileIndexPaths.put(numSpills, indexFilename); spillRec.writeToFile(indexFilename, conf, localFs); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } ++numSpills; if (!isFinalMergeEnabled()) { numShuffleChunks.setValue(numSpills); } else if (numSpills > 1) { //Increment only when there is atleast one previous spill numAdditionalSpills.increment(1); } } finally { if (out != null) out.close(); } }
Example 16
Source File: HFileBlockIndex.java From hbase with Apache License 2.0 | 4 votes |
/** * Writes the root level and intermediate levels of the block index into * the output stream, generating the tree from bottom up. Assumes that the * leaf level has been inline-written to the disk if there is enough data * for more than one leaf block. We iterate by breaking the current level * of the block index, starting with the index of all leaf-level blocks, * into chunks small enough to be written to disk, and generate its parent * level, until we end up with a level small enough to become the root * level. * * If the leaf level is not large enough, there is no inline block index * anymore, so we only write that level of block index to disk as the root * level. * * @param out FSDataOutputStream * @return position at which we entered the root-level index. * @throws IOException */ public long writeIndexBlocks(FSDataOutputStream out) throws IOException { if (curInlineChunk != null && curInlineChunk.getNumEntries() != 0) { throw new IOException("Trying to write a multi-level block index, " + "but are " + curInlineChunk.getNumEntries() + " entries in the " + "last inline chunk."); } // We need to get mid-key metadata before we create intermediate // indexes and overwrite the root chunk. byte[] midKeyMetadata = numLevels > 1 ? rootChunk.getMidKeyMetadata() : null; if (curInlineChunk != null) { while (rootChunk.getRootSize() > maxChunkSize // HBASE-16288: if firstKey is larger than maxChunkSize we will loop indefinitely && rootChunk.getNumEntries() > minIndexNumEntries // Sanity check. We will not hit this (minIndexNumEntries ^ 16) blocks can be addressed && numLevels < 16) { rootChunk = writeIntermediateLevel(out, rootChunk); numLevels += 1; } } // write the root level long rootLevelIndexPos = out.getPos(); { DataOutput blockStream = blockWriter.startWriting(BlockType.ROOT_INDEX); rootChunk.writeRoot(blockStream); if (midKeyMetadata != null) blockStream.write(midKeyMetadata); blockWriter.writeHeaderAndData(out); if (cacheConf != null) { cacheConf.getBlockCache().ifPresent(cache -> { HFileBlock blockForCaching = blockWriter.getBlockForCaching(cacheConf); cache.cacheBlock(new BlockCacheKey(nameForCaching, rootLevelIndexPos, true, blockForCaching.getBlockType()), blockForCaching); }); } } // Add root index block size totalBlockOnDiskSize += blockWriter.getOnDiskSizeWithoutHeader(); totalBlockUncompressedSize += blockWriter.getUncompressedSizeWithoutHeader(); if (LOG.isTraceEnabled()) { LOG.trace("Wrote a " + numLevels + "-level index with root level at pos " + rootLevelIndexPos + ", " + rootChunk.getNumEntries() + " root-level entries, " + totalNumEntries + " total entries, " + StringUtils.humanReadableInt(this.totalBlockOnDiskSize) + " on-disk size, " + StringUtils.humanReadableInt(totalBlockUncompressedSize) + " total uncompressed size."); } return rootLevelIndexPos; }
Example 17
Source File: CryptoFSDataOutputStream.java From hadoop with Apache License 2.0 | 4 votes |
public CryptoFSDataOutputStream(FSDataOutputStream out, CryptoCodec codec, int bufferSize, byte[] key, byte[] iv) throws IOException { super(new CryptoOutputStream(out, codec, bufferSize, key, iv, out.getPos()), null, out.getPos()); this.fsOut = out; }
Example 18
Source File: MapTask.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** * Handles the degenerate case where serialization fails to fit in * the in-memory buffer, so we must spill the record from collect * directly to a spill file. Consider this "losing". */ private void spillSingleRecord(final K key, final V value, int partition) throws IOException { long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final SpillRecord spillRec = new SpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(getTaskID(), numSpills, size); out = rfs.create(filename); // we don't run the combiner for a single record IndexRecord rec = new IndexRecord(); for (int i = 0; i < partitions; ++i) { IFile.Writer<K, V> writer = null; try { long segmentStart = out.getPos(); // Create a new codec, don't care! writer = new IFile.Writer<K,V>(job, out, keyClass, valClass, codec, spilledRecordsCounter); if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); // Note that our map byte count will not be accurate with // compression mapOutputByteCounter.increment(out.getPos() - recordStart); } writer.close(); // record offsets rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength(); rec.partLength = writer.getCompressedLength(); spillRec.putIndex(rec, i); writer = null; } catch (IOException e) { if (null != writer) writer.close(); throw e; } } if (totalIndexCacheMemory >= INDEX_CACHE_MEMORY_LIMIT) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite( getTaskID(), numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, job); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } ++numSpills; } finally { if (out != null) out.close(); } }
Example 19
Source File: MapTask.java From hadoop with Apache License 2.0 | 4 votes |
/** * Handles the degenerate case where serialization fails to fit in * the in-memory buffer, so we must spill the record from collect * directly to a spill file. Consider this "losing". */ private void spillSingleRecord(final K key, final V value, int partition) throws IOException { long size = kvbuffer.length + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final SpillRecord spillRec = new SpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); // we don't run the combiner for a single record IndexRecord rec = new IndexRecord(); for (int i = 0; i < partitions; ++i) { IFile.Writer<K, V> writer = null; try { long segmentStart = out.getPos(); // Create a new codec, don't care! FSDataOutputStream partitionOut = CryptoUtils.wrapIfNecessary(job, out); writer = new IFile.Writer<K,V>(job, partitionOut, keyClass, valClass, codec, spilledRecordsCounter); if (i == partition) { final long recordStart = out.getPos(); writer.append(key, value); // Note that our map byte count will not be accurate with // compression mapOutputByteCounter.increment(out.getPos() - recordStart); } writer.close(); // record offsets rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job); rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job); spillRec.putIndex(rec, i); writer = null; } catch (IOException e) { if (null != writer) writer.close(); throw e; } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, job); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } ++numSpills; } finally { if (out != null) out.close(); } }
Example 20
Source File: MapTask.java From hadoop with Apache License 2.0 | 4 votes |
private void sortAndSpill() throws IOException, ClassNotFoundException, InterruptedException { //approximate the length of the output file to be the length of the //buffer + header lengths for the partitions final long size = distanceTo(bufstart, bufend, bufvoid) + partitions * APPROX_HEADER_LENGTH; FSDataOutputStream out = null; try { // create spill file final SpillRecord spillRec = new SpillRecord(partitions); final Path filename = mapOutputFile.getSpillFileForWrite(numSpills, size); out = rfs.create(filename); final int mstart = kvend / NMETA; final int mend = 1 + // kvend is a valid record (kvstart >= kvend ? kvstart : kvmeta.capacity() + kvstart) / NMETA; sorter.sort(MapOutputBuffer.this, mstart, mend, reporter); int spindex = mstart; final IndexRecord rec = new IndexRecord(); final InMemValBytes value = new InMemValBytes(); for (int i = 0; i < partitions; ++i) { IFile.Writer<K, V> writer = null; try { long segmentStart = out.getPos(); FSDataOutputStream partitionOut = CryptoUtils.wrapIfNecessary(job, out); writer = new Writer<K, V>(job, partitionOut, keyClass, valClass, codec, spilledRecordsCounter); if (combinerRunner == null) { // spill directly DataInputBuffer key = new DataInputBuffer(); while (spindex < mend && kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) { final int kvoff = offsetFor(spindex % maxRec); int keystart = kvmeta.get(kvoff + KEYSTART); int valstart = kvmeta.get(kvoff + VALSTART); key.reset(kvbuffer, keystart, valstart - keystart); getVBytesForOffset(kvoff, value); writer.append(key, value); ++spindex; } } else { int spstart = spindex; while (spindex < mend && kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) { ++spindex; } // Note: we would like to avoid the combiner if we've fewer // than some threshold of records for a partition if (spstart != spindex) { combineCollector.setWriter(writer); RawKeyValueIterator kvIter = new MRResultIterator(spstart, spindex); combinerRunner.combine(kvIter, combineCollector); } } // close the writer writer.close(); // record offsets rec.startOffset = segmentStart; rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job); rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job); spillRec.putIndex(rec, i); writer = null; } finally { if (null != writer) writer.close(); } } if (totalIndexCacheMemory >= indexCacheMemoryLimit) { // create spill index file Path indexFilename = mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions * MAP_OUTPUT_INDEX_RECORD_LENGTH); spillRec.writeToFile(indexFilename, job); } else { indexCacheList.add(spillRec); totalIndexCacheMemory += spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH; } LOG.info("Finished spill " + numSpills); ++numSpills; } finally { if (out != null) out.close(); } }