org.apache.parquet.format.Encoding Java Examples
The following examples show how to use
org.apache.parquet.format.Encoding.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VarLengthValuesColumn.java From Bats with Apache License 2.0 | 6 votes |
VarLengthValuesColumn(ParquetRecordReader parentReader, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException { super(parentReader, descriptor, columnChunkMetaData, fixedLength, v, schemaElement); variableWidthVector = (VariableWidthVector) valueVec; if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) { usingDictionary = true; // We didn't implement the fixed length optimization when a Parquet Dictionary is used; as there are // no data point about this use-case. Will also enable bulk processing by default since early data // profiling (for detecting the best processing strategy to use) is disabled when the column precision // is already set. bulkReaderState.columnPrecInfo.columnPrecisionType = ColumnPrecisionType.DT_PRECISION_IS_VARIABLE; bulkReaderState.columnPrecInfo.bulkProcess = true; } else { usingDictionary = false; } }
Example #2
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
Set<org.apache.parquet.column.Encoding> fromFormatEncodings(List<Encoding> encodings) { Set<org.apache.parquet.column.Encoding> converted = new HashSet<org.apache.parquet.column.Encoding>(); for (Encoding encoding : encodings) { converted.add(getEncoding(encoding)); } // make converted unmodifiable, drop reference to modifiable copy converted = Collections.unmodifiableSet(converted); // atomically update the cache Set<org.apache.parquet.column.Encoding> cached = cachedEncodingSets.putIfAbsent(converted, converted); if (cached == null) { // cached == null signifies that converted was *not* in the cache previously // so we can return converted instead of throwing it away, it has now // been cached cached = converted; } return cached; }
Example #3
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
@Deprecated public void writeDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { writePageHeader(newDataPageHeader(uncompressedSize, compressedSize, valueCount, rlEncoding, dlEncoding, valuesEncoding), to); }
Example #4
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
public void writeDataPageV1Header( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding, int crc, OutputStream to) throws IOException { writePageHeader(newDataPageHeader(uncompressedSize, compressedSize, valueCount, rlEncoding, dlEncoding, valuesEncoding, crc), to); }
Example #5
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 6 votes |
private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding, int crc) { PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); pageHeader.setCrc(crc); pageHeader.setData_page_header(new DataPageHeader( valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); return pageHeader; }
Example #6
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public void writeDictionaryPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, int crc, OutputStream to) throws IOException { PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize); pageHeader.setCrc(crc); pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding))); writePageHeader(pageHeader, to); }
Example #7
Source File: VarLengthColumn.java From Bats with Apache License 2.0 | 5 votes |
VarLengthColumn(ParquetRecordReader parentReader, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException { super(parentReader, descriptor, columnChunkMetaData, fixedLength, v, schemaElement); if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) { usingDictionary = true; } else { usingDictionary = false; } }
Example #8
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public void writeDictionaryPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize); pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding))); writePageHeader(pageHeader, to); }
Example #9
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private PageHeader newDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength) { // TODO: pageHeader.crc = ...; DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2( valueCount, nullCount, rowCount, getEncoding(dataEncoding), dlByteLength, rlByteLength); PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize); pageHeader.setData_page_header_v2(dataPageHeaderV2); return pageHeader; }
Example #10
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public void writeDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength, OutputStream to) throws IOException { writePageHeader( newDataPageV2Header( uncompressedSize, compressedSize, valueCount, nullCount, rowCount, dataEncoding, rlByteLength, dlByteLength), to); }
Example #11
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public void writeDataPageV1Header( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { writePageHeader(newDataPageHeader(uncompressedSize, compressedSize, valueCount, rlEncoding, dlEncoding, valuesEncoding), to); }
Example #12
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Deprecated public void writeDataPageV2Header( int uncompressedSize, int compressedSize, int valueCount, int nullCount, int rowCount, org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding dataEncoding, int rlByteLength, int dlByteLength, OutputStream to) throws IOException { writePageHeader( newDataPageV2Header( uncompressedSize, compressedSize, valueCount, nullCount, rowCount, dataEncoding, rlByteLength, dlByteLength), to); }
Example #13
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private PageHeader newDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding) { PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize); pageHeader.setData_page_header(new DataPageHeader( valueCount, getEncoding(valuesEncoding), getEncoding(dlEncoding), getEncoding(rlEncoding))); return pageHeader; }
Example #14
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Deprecated public void writeDataPageHeader( int uncompressedSize, int compressedSize, int valueCount, org.apache.parquet.column.statistics.Statistics statistics, org.apache.parquet.column.Encoding rlEncoding, org.apache.parquet.column.Encoding dlEncoding, org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException { writePageHeader( newDataPageHeader(uncompressedSize, compressedSize, valueCount, rlEncoding, dlEncoding, valuesEncoding), to); }
Example #15
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
private List<Encoding> toFormatEncodings(Set<org.apache.parquet.column.Encoding> encodings) { List<Encoding> converted = new ArrayList<Encoding>(encodings.size()); for (org.apache.parquet.column.Encoding encoding : encodings) { converted.add(getEncoding(encoding)); } return converted; }
Example #16
Source File: MetadataReader.java From presto with Apache License 2.0 | 5 votes |
private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings) { Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>(); for (Encoding encoding : encodings) { columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name())); } return Collections.unmodifiableSet(columnEncodings); }
Example #17
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public Encoding getEncoding(org.apache.parquet.column.Encoding encoding) { return Encoding.valueOf(encoding.name()); }
Example #18
Source File: ParquetMetadataConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public org.apache.parquet.column.Encoding getEncoding(Encoding encoding) { return org.apache.parquet.column.Encoding.valueOf(encoding.name()); }