org.apache.parquet.io.api.PrimitiveConverter Java Examples
The following examples show how to use
org.apache.parquet.io.api.PrimitiveConverter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ColumnReaderBase.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * creates a reader for triplets * @param path the descriptor for the corresponding column * @param pageReader the underlying store to read from * @param converter a converter that materializes the values in this column in the current record * @param writerVersion writer version string from the Parquet file being read */ ColumnReaderBase(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) { this.path = Objects.requireNonNull(path, "path cannot be null"); this.pageReader = Objects.requireNonNull(pageReader, "pageReader cannot be null"); this.converter = Objects.requireNonNull(converter, "converter cannot be null"); this.writerVersion = writerVersion; this.maxDefinitionLevel = path.getMaxDefinitionLevel(); DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); if (dictionaryPage != null) { try { this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage); if (converter.hasDictionarySupport()) { converter.setDictionary(dictionary); } } catch (IOException e) { throw new ParquetDecodingException("could not decode the dictionary for " + path, e); } } else { this.dictionary = null; } this.totalValueCount = pageReader.getTotalValueCount(); if (totalValueCount <= 0) { throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0"); } }
Example #2
Source File: CheckParquet251Command.java From parquet-mr with Apache License 2.0 | 5 votes |
private void validateStatsForPage(DataPage page, DictionaryPage dict, ColumnDescriptor desc) { SingletonPageReader reader = new SingletonPageReader(dict, page); PrimitiveConverter converter = getValidatingConverter(page, desc.getType()); Statistics stats = getStatisticsFromPageHeader(page); long numNulls = 0; ColumnReader column = COL_READER_CTOR.newInstance(desc, reader, converter, null); for (int i = 0; i < reader.getTotalValueCount(); i += 1) { if (column.getCurrentDefinitionLevel() >= desc.getMaxDefinitionLevel()) { column.writeCurrentValueToConverter(); } else { numNulls += 1; } column.consume(); } if (numNulls != stats.getNumNulls()) { throw new BadStatsException("Number of nulls doesn't match."); } console.debug(String.format( "Validated stats min=%s max=%s nulls=%d for page=%s col=%s", stats.minAsString(), stats.maxAsString(), stats.getNumNulls(), page, Arrays.toString(desc.getPath()))); }
Example #3
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 5 votes |
public PrimitiveFieldHandler(PrimitiveConverter delegate, final ThriftField field, List<TProtocol> events) { this.delegate = delegate; this.events = events; final byte thriftType = field.getType().getType() == ThriftTypeID.ENUM ? ThriftTypeID.I32.getThriftType() : // enums are serialized as I32 field.getType().getType().getThriftType(); this.readFieldBegin = new ParquetProtocol("readFieldBegin()") { @Override public TField readFieldBegin() throws TException { return new TField(field.getName(), thriftType, field.getFieldId()); } }; }
Example #4
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 5 votes |
private void validateStatsForPage(DataPage page, DictionaryPage dict, ColumnDescriptor desc) { SingletonPageReader reader = new SingletonPageReader(dict, page); PrimitiveConverter converter = getValidatingConverter(page, desc.getType()); Statistics<?> stats = getStatisticsFromPageHeader(page); assertEquals("Statistics does not use the proper comparator", desc.getPrimitiveType().comparator().getClass(), stats.comparator().getClass()); if (stats.isEmpty()) { // stats are empty if num nulls = 0 and there are no non-null values // this happens if stats are not written (e.g., when stats are too big) return; } long numNulls = 0; ColumnReaderImpl column = new ColumnReaderImpl(desc, reader, converter, null); for (int i = 0; i < reader.getTotalValueCount(); i += 1) { if (column.getCurrentDefinitionLevel() >= desc.getMaxDefinitionLevel()) { column.writeCurrentValueToConverter(); } else { numNulls += 1; } column.consume(); } Assert.assertEquals(numNulls, stats.getNumNulls()); }
Example #5
Source File: FileEncodingsIT.java From parquet-mr with Apache License 2.0 | 5 votes |
public static void validateValuesForPage(int rowGroupID, int pageID, DictionaryPage dictPage, DataPage page, ColumnDescriptor columnDesc, List<?> expectedValues) { TestStatistics.SingletonPageReader pageReader = new TestStatistics.SingletonPageReader(dictPage, page); PrimitiveConverter converter = getConverter(rowGroupID, pageID, columnDesc.getType(), expectedValues); ColumnReaderImpl column = new ColumnReaderImpl(columnDesc, pageReader, converter, null); for (int i = 0; i < pageReader.getTotalValueCount(); i += 1) { column.writeCurrentValueToConverter(); column.consume(); } }
Example #6
Source File: RecordReaderImplementation.java From parquet-mr with Apache License 2.0 | 5 votes |
private State(int id, PrimitiveColumnIO primitiveColumnIO, ColumnReader column, int[] nextLevel, GroupConverter[] groupConverterPath, PrimitiveConverter primitiveConverter) { this.id = id; this.primitiveColumnIO = primitiveColumnIO; this.maxDefinitionLevel = primitiveColumnIO.getDefinitionLevel(); this.maxRepetitionLevel = primitiveColumnIO.getRepetitionLevel(); this.column = column; this.nextLevel = nextLevel; this.groupConverterPath = groupConverterPath; this.primitiveConverter = primitiveConverter; this.primitive = primitiveColumnIO.getType().asPrimitiveType().getPrimitiveTypeName(); this.fieldPath = primitiveColumnIO.getFieldPath(); this.primitiveField = fieldPath[fieldPath.length - 1]; this.indexFieldPath = primitiveColumnIO.getIndexFieldPath(); this.primitiveFieldIndex = indexFieldPath[indexFieldPath.length - 1]; }
Example #7
Source File: SynchronizingColumnReader.java From parquet-mr with Apache License 2.0 | 5 votes |
SynchronizingColumnReader(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion, PrimitiveIterator.OfLong rowIndexes) { super(path, pageReader, converter, writerVersion); this.rowIndexes = rowIndexes; targetRow = Long.MIN_VALUE; consume(); }
Example #8
Source File: ColumnReadStoreImpl.java From parquet-mr with Apache License 2.0 | 5 votes |
private PrimitiveConverter getPrimitiveConverter(ColumnDescriptor path) { Type currentType = schema; Converter currentConverter = recordConverter; for (String fieldName : path.getPath()) { final GroupType groupType = currentType.asGroupType(); int fieldIndex = groupType.getFieldIndex(fieldName); currentType = groupType.getType(fieldName); currentConverter = currentConverter.asGroupConverter().getConverter(fieldIndex); } PrimitiveConverter converter = currentConverter.asPrimitiveConverter(); return converter; }
Example #9
Source File: ColumnReadStoreImpl.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public ColumnReader getColumnReader(ColumnDescriptor path) { PrimitiveConverter converter = getPrimitiveConverter(path); PageReader pageReader = pageReadStore.getPageReader(path); Optional<PrimitiveIterator.OfLong> rowIndexes = pageReadStore.getRowIndexes(); if (rowIndexes.isPresent()) { return new SynchronizingColumnReader(path, pageReader, converter, writerVersion, rowIndexes.get()); } else { return new ColumnReaderImpl(path, pageReader, converter, writerVersion); } }
Example #10
Source File: FilteringPrimitiveConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public FilteringPrimitiveConverter(PrimitiveConverter delegate, ValueInspector[] valueInspectors) { this.delegate = Objects.requireNonNull(delegate, "delegate cannot be null"); this.valueInspectors = Objects.requireNonNull(valueInspectors, "valueInspectors cannot be null"); }
Example #11
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addLong(columnReader.getLong()); }
Example #12
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addInt(columnReader.getInteger()); }
Example #13
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addBoolean(columnReader.getBoolean()); }
Example #14
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addBinary(columnReader.getBinary()); }
Example #15
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addFloat(columnReader.getFloat()); }
Example #16
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addDouble(columnReader.getDouble()); }
Example #17
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addBinary(columnReader.getBinary()); }
Example #18
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader) { primitiveConverter.addBinary(columnReader.getBinary()); }
Example #19
Source File: PrimitiveType.java From parquet-mr with Apache License 2.0 | 4 votes |
abstract public void addValueToPrimitiveConverter( PrimitiveConverter primitiveConverter, ColumnReader columnReader);
Example #20
Source File: ColumnReadStoreImpl.java From parquet-mr with Apache License 2.0 | 4 votes |
private ColumnReaderImpl newMemColumnReader(ColumnDescriptor path, PageReader pageReader) { PrimitiveConverter converter = getPrimitiveConverter(path); return new ColumnReaderImpl(path, pageReader, converter, writerVersion); }
Example #21
Source File: ThriftRecordConverter.java From parquet-mr with Apache License 2.0 | 4 votes |
public PrimitiveCounter(PrimitiveConverter delegate) { this.delegate = delegate; }
Example #22
Source File: ColumnReaderImpl.java From parquet-mr with Apache License 2.0 | 2 votes |
/** * creates a reader for triplets * * @param path * the descriptor for the corresponding column * @param pageReader * the underlying store to read from * @param converter * a converter that materializes the values in this column in the current record * @param writerVersion * writer version string from the Parquet file being read */ public ColumnReaderImpl(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) { super(path, pageReader, converter, writerVersion); consume(); }