Java Code Examples for org.apache.parquet.format.Util#readPageHeader()
The following examples show how to use
org.apache.parquet.format.Util#readPageHeader() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PredicateUtils.java From presto with Apache License 2.0 | 6 votes |
private static Optional<DictionaryPage> readDictionaryPage(byte[] data, CompressionCodecName codecName) { try { ByteArrayInputStream inputStream = new ByteArrayInputStream(data); PageHeader pageHeader = Util.readPageHeader(inputStream); if (pageHeader.type != PageType.DICTIONARY_PAGE) { return Optional.empty(); } Slice compressedData = wrappedBuffer(data, data.length - inputStream.available(), pageHeader.getCompressed_page_size()); DictionaryPageHeader dicHeader = pageHeader.getDictionary_page_header(); ParquetEncoding encoding = getParquetEncoding(Encoding.valueOf(dicHeader.getEncoding().name())); int dictionarySize = dicHeader.getNum_values(); return Optional.of(new DictionaryPage(decompress(codecName, compressedData, pageHeader.getUncompressed_page_size()), dictionarySize, encoding)); } catch (IOException ignored) { return Optional.empty(); } }
Example 2
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * Reads and decompresses a dictionary page for the given column chunk. * * Returns null if the given column chunk has no dictionary page. * * @param meta a column's ColumnChunkMetaData to read the dictionary from * @return an uncompressed DictionaryPage or null * @throws IOException if there is an error while reading the dictionary */ DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException { if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) && !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) { return null; } // TODO: this should use getDictionaryPageOffset() but it isn't reliable. if (f.getPos() != meta.getStartingPos()) { f.seek(meta.getStartingPos()); } PageHeader pageHeader = Util.readPageHeader(f); if (!pageHeader.isSetDictionary_page_header()) { return null; // TODO: should this complain? } DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f); BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec()); return new DictionaryPage( decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()), compressedPage.getDictionarySize(), compressedPage.getEncoding()); }
Example 3
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 6 votes |
protected PageHeader readPageHeader() throws IOException { PageHeader pageHeader; stream.mark(8192); // headers should not be larger than 8k try { pageHeader = Util.readPageHeader(stream); } catch (IOException e) { // this is to workaround a bug where the compressedLength // of the chunk is missing the size of the header of the dictionary // to allow reading older files (using dictionary) we need this. // usually 13 to 19 bytes are missing // if the last page is smaller than this, the page header itself is truncated in the buffer. stream.reset(); // resetting the buffer to the position before we got the error LOG.info("completing the column chunk to read the page header"); pageHeader = Util.readPageHeader(new SequenceInputStream(stream, f)); // trying again from the buffer + remainder of the stream. } return pageHeader; }
Example 4
Source File: PageReader.java From Bats with Apache License 2.0 | 5 votes |
/** * Get the page header and the pageData (uncompressed) for the next page */ protected void nextInternal() throws IOException{ Stopwatch timer = Stopwatch.createUnstarted(); // next, we need to decompress the bytes // TODO - figure out if we need multiple dictionary pages, I believe it may be limited to one // I think we are clobbering parts of the dictionary if there can be multiple pages of dictionary do { long start=dataReader.getPos(); timer.start(); pageHeader = Util.readPageHeader(dataReader); long timeToRead = timer.elapsed(TimeUnit.NANOSECONDS); long pageHeaderBytes=dataReader.getPos()-start; this.updateStats(pageHeader, "Page Header", start, timeToRead, pageHeaderBytes, pageHeaderBytes); logger.trace("ParquetTrace,{},{},{},{},{},{},{},{}","Page Header Read","", this.parentColumnReader.parentReader.hadoopPath, this.parentColumnReader.columnDescriptor.toString(), start, 0, 0, timeToRead); timer.reset(); if (pageHeader.getType() == PageType.DICTIONARY_PAGE) { readDictionaryPage(pageHeader, parentColumnReader); } } while (pageHeader.getType() == PageType.DICTIONARY_PAGE); int compressedSize = pageHeader.getCompressed_page_size(); int uncompressedSize = pageHeader.getUncompressed_page_size(); pageData = readPage(pageHeader, compressedSize, uncompressedSize); }
Example 5
Source File: ColumnChunkIncReadStore.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override public DictionaryPage readDictionaryPage() { if (dictionaryPage == null) { PageHeader pageHeader = new PageHeader(); long pos = 0; try { pos = in.getPos(); pageHeader = Util.readPageHeader(in.asSeekableInputStream()); if (pageHeader.getDictionary_page_header() == null) { in.seek(pos); return null; } dictionaryPage = readDictionaryPageHelper(pageHeader); } catch (Exception e) { throw new RuntimeException("Error reading dictionary page." + "\nFile path: " + path.toURI().getPath() + "\nRow count: " + rowCount + "\nColumn Chunk Metadata: " + metaData + "\nPage Header: " + pageHeader + "\nFile offset: " + fileOffset + "\nSize: " + size + "\nValue read so far: " + valueReadSoFar + "\nPosition: " + pos, e); } } return dictionaryPage; }
Example 6
Source File: ColumnDataReader.java From Bats with Apache License 2.0 | 4 votes |
public PageHeader readPageHeader() throws IOException{ return Util.readPageHeader(input); }
Example 7
Source File: ParquetColumnChunk.java From presto with Apache License 2.0 | 4 votes |
protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(input); }
Example 8
Source File: ColumnDataReader.java From dremio-oss with Apache License 2.0 | 4 votes |
public PageHeader readPageHeader() throws IOException{ return Util.readPageHeader(input); }
Example 9
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 4 votes |
protected PageHeader readPageHeader() throws IOException { return Util.readPageHeader(stream); }
Example 10
Source File: CompressionConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public PageHeader readPageHeader() throws IOException { return Util.readPageHeader(f); }