Java Code Examples for org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#setColumnIndexReference()
The following examples show how to use
org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#setColumnIndexReference() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetFileWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
private static void serializeColumnIndexes( List<List<ColumnIndex>> columnIndexes, List<BlockMetaData> blocks, PositionOutputStream out) throws IOException { LOG.debug("{}: column indexes", out.getPos()); for (int bIndex = 0, bSize = blocks.size(); bIndex < bSize; ++bIndex) { List<ColumnChunkMetaData> columns = blocks.get(bIndex).getColumns(); List<ColumnIndex> blockColumnIndexes = columnIndexes.get(bIndex); for (int cIndex = 0, cSize = columns.size(); cIndex < cSize; ++cIndex) { ColumnChunkMetaData column = columns.get(cIndex); org.apache.parquet.format.ColumnIndex columnIndex = ParquetMetadataConverter .toParquetColumnIndex(column.getPrimitiveType(), blockColumnIndexes.get(cIndex)); if (columnIndex == null) { continue; } long offset = out.getPos(); Util.writeColumnIndex(columnIndex, out); column.setColumnIndexReference(new IndexReference(offset, (int) (out.getPos() - offset))); } } }
Example 2
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException { MessageType messageType = fromParquetSchema(parquetMetadata.getSchema(), parquetMetadata.getColumn_orders()); List<BlockMetaData> blocks = new ArrayList<BlockMetaData>(); List<RowGroup> row_groups = parquetMetadata.getRow_groups(); if (row_groups != null) { for (RowGroup rowGroup : row_groups) { BlockMetaData blockMetaData = new BlockMetaData(); blockMetaData.setRowCount(rowGroup.getNum_rows()); blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size()); List<ColumnChunk> columns = rowGroup.getColumns(); String filePath = columns.get(0).getFile_path(); for (ColumnChunk columnChunk : columns) { if ((filePath == null && columnChunk.getFile_path() != null) || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) { throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now"); } ColumnMetaData metaData = columnChunk.meta_data; ColumnPath path = getPath(metaData); ColumnChunkMetaData column = ColumnChunkMetaData.get( path, messageType.getType(path.toArray()).asPrimitiveType(), fromFormatCodec(metaData.codec), convertEncodingStats(metaData.getEncoding_stats()), fromFormatEncodings(metaData.encodings), fromParquetStatistics( parquetMetadata.getCreated_by(), metaData.statistics, messageType.getType(path.toArray()).asPrimitiveType()), metaData.data_page_offset, metaData.dictionary_page_offset, metaData.num_values, metaData.total_compressed_size, metaData.total_uncompressed_size); column.setColumnIndexReference(toColumnIndexReference(columnChunk)); column.setOffsetIndexReference(toOffsetIndexReference(columnChunk)); column.setBloomFilterOffset(metaData.bloom_filter_offset); // TODO // index_page_offset // key_value_metadata blockMetaData.addColumn(column); } blockMetaData.setPath(filePath); blocks.add(blockMetaData); } } Map<String, String> keyValueMetaData = new HashMap<String, String>(); List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata(); if (key_value_metadata != null) { for (KeyValue keyValue : key_value_metadata) { keyValueMetaData.put(keyValue.key, keyValue.value); } } return new ParquetMetadata( new org.apache.parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, parquetMetadata.getCreated_by()), blocks); }