Java Code Examples for org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#getTotalUncompressedSize()
The following examples show how to use
org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#getTotalUncompressedSize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PageReader.java From Bats with Apache License 2.0 | 5 votes |
PageReader(org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData) throws ExecutionSetupException { this.parentColumnReader = parentStatus; allocatedDictionaryBuffers = new ArrayList<ByteBuf>(); codecFactory = parentColumnReader.parentReader.getCodecFactory(); this.stats = parentColumnReader.parentReader.parquetReaderStats; this.fileName = path.toString(); debugName = new StringBuilder() .append(this.parentColumnReader.parentReader.getFragmentContext().getFragIdString()) .append(":") .append(this.parentColumnReader.parentReader.getOperatorContext().getStats().getId() ) .append(this.parentColumnReader.columnChunkMetaData.toString() ) .toString(); try { inputStream = fs.open(path); BufferAllocator allocator = parentColumnReader.parentReader.getOperatorContext().getAllocator(); columnChunkMetaData.getTotalUncompressedSize(); useBufferedReader = parentColumnReader.parentReader.useBufferedReader; scanBufferSize = parentColumnReader.parentReader.bufferedReadSize; useFadvise = parentColumnReader.parentReader.useFadvise; enforceTotalSize = parentColumnReader.parentReader.enforceTotalSize; if (useBufferedReader) { this.dataReader = new BufferedDirectBufInputStream(inputStream, allocator, path.getName(), columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), scanBufferSize, enforceTotalSize, useFadvise); } else { this.dataReader = new DirectBufInputStream(inputStream, allocator, path.getName(), columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), enforceTotalSize, useFadvise); } } catch (IOException e) { throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: " + path.getName(), e); } }
Example 2
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) { long doff = meta.getDictionaryPageOffset(); long foff = meta.getFirstDataPageOffset(); long tsize = meta.getTotalSize(); long usize = meta.getTotalUncompressedSize(); long count = meta.getValueCount(); double ratio = usize / (double)tsize; String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings()); if (name) { String path = Joiner.on('.').skipNulls().join(meta.getPath()); out.format("%s: ", path); } out.format(" %s", meta.getType()); out.format(" %s", meta.getCodec()); out.format(" DO:%d", doff); out.format(" FPO:%d", foff); out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio); out.format(" VC:%d", count); if (!encodings.isEmpty()) out.format(" ENC:%s", encodings); Statistics<?> stats = meta.getStatistics(); if (stats != null) { out.format(" ST:[%s]", stats); } else { out.format(" ST:[none]"); } out.println(); }
Example 3
Source File: MetadataUtils.java From parquet-mr with Apache License 2.0 | 5 votes |
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) { long doff = meta.getDictionaryPageOffset(); long foff = meta.getFirstDataPageOffset(); long tsize = meta.getTotalSize(); long usize = meta.getTotalUncompressedSize(); long count = meta.getValueCount(); double ratio = usize / (double)tsize; String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings()); if (name) { String path = Joiner.on('.').skipNulls().join(meta.getPath()); out.format("%s: ", path); } out.format(" %s", meta.getType()); out.format(" %s", meta.getCodec()); out.format(" DO:%d", doff); out.format(" FPO:%d", foff); out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio); out.format(" VC:%d", count); if (!encodings.isEmpty()) out.format(" ENC:%s", encodings); Statistics<?> stats = meta.getStatistics(); if (stats != null) { out.format(" ST:[%s]", stats); } else { out.format(" ST:[none]"); } out.println(); }
Example 4
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) { //rowGroup.total_byte_size = ; List<ColumnChunkMetaData> columns = block.getColumns(); List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>(); for (ColumnChunkMetaData columnMetaData : columns) { ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset columnChunk.file_path = block.getPath(); // they are in the same file for now columnChunk.meta_data = new ColumnMetaData( getType(columnMetaData.getType()), toFormatEncodings(columnMetaData.getEncodings()), Arrays.asList(columnMetaData.getPath().toArray()), toFormatCodec(columnMetaData.getCodec()), columnMetaData.getValueCount(), columnMetaData.getTotalUncompressedSize(), columnMetaData.getTotalSize(), columnMetaData.getFirstDataPageOffset()); if (columnMetaData.getEncodingStats() != null && columnMetaData.getEncodingStats().hasDictionaryPages()) { columnChunk.meta_data.setDictionary_page_offset(columnMetaData.getDictionaryPageOffset()); } columnChunk.meta_data.setBloom_filter_offset(columnMetaData.getBloomFilterOffset()); if (!columnMetaData.getStatistics().isEmpty()) { columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics(), this.statisticsTruncateLength)); } if (columnMetaData.getEncodingStats() != null) { columnChunk.meta_data.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats())); } // columnChunk.meta_data.index_page_offset = ; // columnChunk.meta_data.key_value_metadata = ; // nothing yet IndexReference columnIndexRef = columnMetaData.getColumnIndexReference(); if (columnIndexRef != null) { columnChunk.setColumn_index_offset(columnIndexRef.getOffset()); columnChunk.setColumn_index_length(columnIndexRef.getLength()); } IndexReference offsetIndexRef = columnMetaData.getOffsetIndexReference(); if (offsetIndexRef != null) { columnChunk.setOffset_index_offset(offsetIndexRef.getOffset()); columnChunk.setOffset_index_length(offsetIndexRef.getLength()); } parquetColumns.add(columnChunk); } RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount()); rowGroups.add(rowGroup); }