Java Code Examples for org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#getEncodings()
The following examples show how to use
org.apache.parquet.hadoop.metadata.ColumnChunkMetaData#getEncodings() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PredicateUtils.java From presto with Apache License 2.0 | 6 votes |
@VisibleForTesting @SuppressWarnings("deprecation") static boolean isOnlyDictionaryEncodingPages(ColumnChunkMetaData columnMetaData) { // Files written with newer versions of Parquet libraries (e.g. parquet-mr 1.9.0) will have EncodingStats available // Otherwise, fallback to v1 logic EncodingStats stats = columnMetaData.getEncodingStats(); if (stats != null) { return stats.hasDictionaryPages() && !stats.hasNonDictionaryEncodedPages(); } Set<Encoding> encodings = columnMetaData.getEncodings(); if (encodings.contains(PLAIN_DICTIONARY)) { // PLAIN_DICTIONARY was present, which means at least one page was // dictionary-encoded and 1.0 encodings are used // The only other allowed encodings are RLE and BIT_PACKED which are used for repetition or definition levels return Sets.difference(encodings, ImmutableSet.of(PLAIN_DICTIONARY, RLE, BIT_PACKED)).isEmpty(); } return false; }
Example 2
Source File: ParquetUtil.java From iceberg with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") public static boolean hasNonDictionaryPages(ColumnChunkMetaData meta) { EncodingStats stats = meta.getEncodingStats(); if (stats != null) { return stats.hasNonDictionaryEncodedPages(); } // without EncodingStats, fall back to testing the encoding list Set<Encoding> encodings = new HashSet<Encoding>(meta.getEncodings()); if (encodings.remove(Encoding.PLAIN_DICTIONARY)) { // if remove returned true, PLAIN_DICTIONARY was present, which means at // least one page was dictionary encoded and 1.0 encodings are used // RLE and BIT_PACKED are only used for repetition or definition levels encodings.remove(Encoding.RLE); encodings.remove(Encoding.BIT_PACKED); // when empty, no encodings other than dictionary or rep/def levels return !encodings.isEmpty(); } else { // if PLAIN_DICTIONARY wasn't present, then either the column is not // dictionary-encoded, or the 2.0 encoding, RLE_DICTIONARY, was used. // for 2.0, this cannot determine whether a page fell back without // page encoding stats return true; } }
Example 3
Source File: ParquetDictionaryRowGroupFilter.java From iceberg with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") private static boolean hasNonDictionaryPages(ColumnChunkMetaData meta) { EncodingStats stats = meta.getEncodingStats(); if (stats != null) { return stats.hasNonDictionaryEncodedPages(); } // without EncodingStats, fall back to testing the encoding list Set<Encoding> encodings = new HashSet<Encoding>(meta.getEncodings()); if (encodings.remove(Encoding.PLAIN_DICTIONARY)) { // if remove returned true, PLAIN_DICTIONARY was present, which means at // least one page was dictionary encoded and 1.0 encodings are used // RLE and BIT_PACKED are only used for repetition or definition levels encodings.remove(Encoding.RLE); encodings.remove(Encoding.BIT_PACKED); if (encodings.isEmpty()) { return false; // no encodings other than dictionary or rep/def levels } return true; } else { // if PLAIN_DICTIONARY wasn't present, then either the column is not // dictionary-encoded, or the 2.0 encoding, RLE_DICTIONARY, was used. // for 2.0, this cannot determine whether a page fell back without // page encoding stats return true; } }
Example 4
Source File: DictionaryPageReader.java From parquet-mr with Apache License 2.0 | 5 votes |
private boolean hasDictionaryPage(ColumnChunkMetaData column) { EncodingStats stats = column.getEncodingStats(); if (stats != null) { // ensure there is a dictionary page and that it is used to encode data pages return stats.hasDictionaryPages() && stats.hasDictionaryEncodedPages(); } Set<Encoding> encodings = column.getEncodings(); return (encodings.contains(PLAIN_DICTIONARY) || encodings.contains(RLE_DICTIONARY)); }
Example 5
Source File: DictionaryFilter.java From parquet-mr with Apache License 2.0 | 5 votes |
@SuppressWarnings("deprecation") private static boolean hasNonDictionaryPages(ColumnChunkMetaData meta) { EncodingStats stats = meta.getEncodingStats(); if (stats != null) { return stats.hasNonDictionaryEncodedPages(); } // without EncodingStats, fall back to testing the encoding list Set<Encoding> encodings = new HashSet<Encoding>(meta.getEncodings()); if (encodings.remove(Encoding.PLAIN_DICTIONARY)) { // if remove returned true, PLAIN_DICTIONARY was present, which means at // least one page was dictionary encoded and 1.0 encodings are used // RLE and BIT_PACKED are only used for repetition or definition levels encodings.remove(Encoding.RLE); encodings.remove(Encoding.BIT_PACKED); if (encodings.isEmpty()) { return false; // no encodings other than dictionary or rep/def levels } return true; } else { // if PLAIN_DICTIONARY wasn't present, then either the column is not // dictionary-encoded, or the 2.0 encoding, RLE_DICTIONARY, was used. // for 2.0, this cannot determine whether a page fell back without // page encoding stats return true; } }
Example 6
Source File: ParquetMetadataCommand.java From parquet-mr with Apache License 2.0 | 5 votes |
private void printColumnChunk(Logger console, int width, ColumnChunkMetaData column, MessageType schema) { String[] path = column.getPath().toArray(); PrimitiveType type = primitive(schema, path); Preconditions.checkNotNull(type); ColumnDescriptor desc = schema.getColumnDescription(path); long size = column.getTotalSize(); long count = column.getValueCount(); float perValue = ((float) size) / count; CompressionCodecName codec = column.getCodec(); Set<Encoding> encodings = column.getEncodings(); EncodingStats encodingStats = column.getEncodingStats(); String encodingSummary = encodingStats == null ? encodingsAsString(encodings, desc) : encodingStatsAsString(encodingStats); Statistics stats = column.getStatistics(); String name = column.getPath().toDotString(); PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName(); if (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) { console.info(String.format("%-" + width + "s FIXED[%d] %s %-7s %-9d %-8s %-7s %s", name, type.getTypeLength(), shortCodec(codec), encodingSummary, count, humanReadable(perValue), stats == null || !stats.isNumNullsSet() ? "" : String.valueOf(stats.getNumNulls()), minMaxAsString(stats))); } else { console.info(String.format("%-" + width + "s %-9s %s %-7s %-9d %-10s %-7s %s", name, typeName, shortCodec(codec), encodingSummary, count, humanReadable(perValue), stats == null || !stats.isNumNullsSet() ? "" : String.valueOf(stats.getNumNulls()), minMaxAsString(stats))); } }