Java Code Examples for org.apache.parquet.io.SeekableInputStream#readFully()
The following examples show how to use
org.apache.parquet.io.SeekableInputStream#readFully() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 6 votes |
private DictionaryPage readCompressedDictionary( PageHeader pageHeader, SeekableInputStream fin) throws IOException { DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header(); int uncompressedPageSize = pageHeader.getUncompressed_page_size(); int compressedPageSize = pageHeader.getCompressed_page_size(); byte [] dictPageBytes = new byte[compressedPageSize]; fin.readFully(dictPageBytes); BytesInput bin = BytesInput.from(dictPageBytes); return new DictionaryPage( bin, uncompressedPageSize, dictHeader.getNum_values(), converter.getEncoding(dictHeader.getEncoding())); }
Example 2
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 5 votes |
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException { long fileLen = file.getLength(); LOG.debug("File length {}", fileLen); int FOOTER_LENGTH_SIZE = 4; if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC throw new RuntimeException(file.toString() + " is not a Parquet file (too small length: " + fileLen + ")"); } long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length; LOG.debug("reading footer index at {}", footerLengthIndex); f.seek(footerLengthIndex); int footerLength = readIntLittleEndian(f); byte[] magic = new byte[MAGIC.length]; f.readFully(magic); if (!Arrays.equals(MAGIC, magic)) { throw new RuntimeException(file.toString() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic)); } long footerIndex = footerLengthIndex - footerLength; LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex); if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) { throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex); } f.seek(footerIndex); // Read all the footer bytes in one time to avoid multiple read operations, // since it can be pretty time consuming for a single read operation in HDFS. ByteBuffer footerBytesBuffer = ByteBuffer.allocate(footerLength); f.readFully(footerBytesBuffer); LOG.debug("Finished to read all footer bytes."); footerBytesBuffer.flip(); InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer); return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter()); }
Example 3
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * @param f file to read the chunks from * @param builder used to build chunk list to read the pages for the different columns * @throws IOException if there is an error while reading from the stream */ public void readAll(SeekableInputStream f, ChunkListBuilder builder) throws IOException { List<Chunk> result = new ArrayList<Chunk>(chunks.size()); f.seek(offset); int fullAllocations = length / options.getMaxAllocationSize(); int lastAllocationSize = length % options.getMaxAllocationSize(); int numAllocations = fullAllocations + (lastAllocationSize > 0 ? 1 : 0); List<ByteBuffer> buffers = new ArrayList<>(numAllocations); for (int i = 0; i < fullAllocations; i += 1) { buffers.add(options.getAllocator().allocate(options.getMaxAllocationSize())); } if (lastAllocationSize > 0) { buffers.add(options.getAllocator().allocate(lastAllocationSize)); } for (ByteBuffer buffer : buffers) { f.readFully(buffer); buffer.flip(); } // report in a counter the data we just scanned BenchmarkCounter.incrementBytesRead(length); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffers); for (int i = 0; i < chunks.size(); i++) { ChunkDescriptor descriptor = chunks.get(i); builder.add(descriptor, stream.sliceBuffers(descriptor.size), f); } }
Example 4
Source File: VectorAccessibleSerializable.java From dremio-oss with Apache License 2.0 | 4 votes |
public static void readFromStream(SeekableInputStream input, final ArrowBuf outputBuffer, final int bytesToRead) throws IOException{ final ByteBuffer directBuffer = outputBuffer.nioBuffer(0, bytesToRead); input.readFully(directBuffer); outputBuffer.writerIndex(bytesToRead); }
Example 5
Source File: TestBulkInputStream.java From dremio-oss with Apache License 2.0 | 4 votes |
private void testSeekableStream(SeekableInputStream inputStream) throws IOException { int streamPos = 0; assertEquals(streamPos, inputStream.getPos()); // Read some bytes from the start final byte[] buf = new byte[1000]; inputStream.readFully(buf, 0, 88); compareData(buf, 0, streamPos, 88); streamPos += 88; assertEquals(streamPos, inputStream.getPos()); final byte[] shortBuf = new byte[17]; inputStream.readFully(shortBuf); compareData(shortBuf, 0, streamPos, 17); streamPos += 17; assertEquals(streamPos, inputStream.getPos()); // test ByteBuffer interfaces final ByteBuffer shortByteBuf = ByteBuffer.allocate(25); inputStream.read(shortByteBuf); compareData(shortByteBuf.array(), 0, streamPos, 25); streamPos += 25; assertEquals(streamPos, inputStream.getPos()); final ByteBuffer shortByteBuf2 = ByteBuffer.allocateDirect(71); inputStream.read(shortByteBuf2); final ByteBuf compareBuf = Unpooled.directBuffer(100); shortByteBuf2.flip(); compareBuf.writeBytes(shortByteBuf2); compareData(compareBuf, streamPos, 71); streamPos += 71; assertEquals(streamPos, inputStream.getPos()); final ByteBuffer shortByteBuf3 = ByteBuffer.allocate(66); inputStream.readFully(shortByteBuf3); compareData(shortByteBuf3.array(), 0, streamPos, 66); streamPos += 66; assertEquals(streamPos, inputStream.getPos()); // Test plain old read interface buf[0] = (byte) inputStream.read(); buf[1] = (byte) inputStream.read(); buf[2] = (byte) inputStream.read(); compareData(buf, 0, streamPos, 3); streamPos += 3; assertEquals(streamPos, inputStream.getPos()); // Skip some, then read streamPos += 50; // skip 50 bytes inputStream.seek(streamPos); inputStream.readFully(buf, 0, 37); compareData(buf, 0, streamPos, 37); streamPos += 37; assertEquals(streamPos, inputStream.getPos()); // skip to near the end, then read streamPos = TEST_DATA_SIZE - 100; inputStream.seek(streamPos); inputStream.readFully(buf, 0, 100); compareData(buf, 0, streamPos,100); streamPos += 100; assertEquals(streamPos, inputStream.getPos()); }