Java Code Examples for org.apache.parquet.column.page.DataPage#getValueCount()

The following examples show how to use org.apache.parquet.column.page.DataPage#getValueCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestMemPageStore.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws IOException {
  MemPageStore memPageStore = new MemPageStore(10);
  ColumnDescriptor col = new ColumnDescriptor(path , PrimitiveTypeName.INT64, 2, 2);
  LongStatistics stats = new LongStatistics();
  PageWriter pageWriter = memPageStore.getPageWriter(col);
  pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  PageReader pageReader = memPageStore.getPageReader(col);
  long totalValueCount = pageReader.getTotalValueCount();
  System.out.println(totalValueCount);
  int total = 0;
  do {
    DataPage readPage = pageReader.readPage();
    total += readPage.getValueCount();
    System.out.println(readPage);
    // TODO: assert
  } while (total < totalValueCount);
}
 
Example 2
Source File: TestColumnReaderImpl.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message test { required binary foo; }");
  ColumnDescriptor col = schema.getColumns().get(0);
  MemPageWriter pageWriter = new MemPageWriter();
  ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
      ParquetProperties.builder()
          .withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
          .withPageSize(2048).build());
  for (int i = 0; i < rows; i++) {
    columnWriterV2.write(Binary.fromString("bar" + i % 10), 0, 0);
    if ((i + 1) % 1000 == 0) {
      columnWriterV2.writePage();
    }
  }
  columnWriterV2.writePage();
  columnWriterV2.finalizeColumnChunk();
  List<DataPage> pages = pageWriter.getPages();
  int valueCount = 0;
  int rowCount = 0;
  for (DataPage dataPage : pages) {
    valueCount += dataPage.getValueCount();
    rowCount += ((DataPageV2)dataPage).getRowCount();
  }
  assertEquals(rows, rowCount);
  assertEquals(rows, valueCount);
  MemPageReader pageReader = new MemPageReader(rows, pages.iterator(), pageWriter.getDictionaryPage());
  ValidatingConverter converter = new ValidatingConverter();
  ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
  for (int i = 0; i < rows; i++) {
    assertEquals(0, columnReader.getCurrentRepetitionLevel());
    assertEquals(0, columnReader.getCurrentDefinitionLevel());
    columnReader.writeCurrentValueToConverter();
    columnReader.consume();
  }
  assertEquals(rows, converter.count);
}
 
Example 3
Source File: TestColumnReaderImpl.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testOptional() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message test { optional binary foo; }");
  ColumnDescriptor col = schema.getColumns().get(0);
  MemPageWriter pageWriter = new MemPageWriter();
  ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
      ParquetProperties.builder()
          .withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
          .withPageSize(2048).build());
  for (int i = 0; i < rows; i++) {
    columnWriterV2.writeNull(0, 0);
    if ((i + 1) % 1000 == 0) {
      columnWriterV2.writePage();
    }
  }
  columnWriterV2.writePage();
  columnWriterV2.finalizeColumnChunk();
  List<DataPage> pages = pageWriter.getPages();
  int valueCount = 0;
  int rowCount = 0;
  for (DataPage dataPage : pages) {
    valueCount += dataPage.getValueCount();
    rowCount += ((DataPageV2)dataPage).getRowCount();
  }
  assertEquals(rows, rowCount);
  assertEquals(rows, valueCount);
  MemPageReader pageReader = new MemPageReader(rows, pages.iterator(), pageWriter.getDictionaryPage());
  ValidatingConverter converter = new ValidatingConverter();
  ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
  for (int i = 0; i < rows; i++) {
    assertEquals(0, columnReader.getCurrentRepetitionLevel());
    assertEquals(0, columnReader.getCurrentDefinitionLevel());
    columnReader.consume();
  }
  assertEquals(0, converter.count);
}
 
Example 4
Source File: ColumnChunkPageReadStore.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
ColumnChunkPageReader(BytesInputDecompressor decompressor, List<DataPage> compressedPages,
    DictionaryPage compressedDictionaryPage, OffsetIndex offsetIndex, long rowCount) {
  this.decompressor = decompressor;
  this.compressedPages = new ArrayDeque<DataPage>(compressedPages);
  this.compressedDictionaryPage = compressedDictionaryPage;
  long count = 0;
  for (DataPage p : compressedPages) {
    count += p.getValueCount();
  }
  this.valueCount = count;
  this.offsetIndex = offsetIndex;
  this.rowCount = rowCount;
}
 
Example 5
Source File: FileEncodingsIT.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void validateFirstToLast(int rowGroupID, DictionaryPage dictPage, List<DataPage> pageGroup, ColumnDescriptor desc, List<?> expectedValues) {
  int rowsRead = 0, pageID = 0;
  for (DataPage page : pageGroup) {
    List<?> expectedPageValues = expectedValues.subList(rowsRead, rowsRead + page.getValueCount());
    PageValuesValidator.validateValuesForPage(rowGroupID, pageID, dictPage, page, desc, expectedPageValues);
    rowsRead += page.getValueCount();
    pageID++;
  }
}
 
Example 6
Source File: FileEncodingsIT.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void validateLastToFirst(int rowGroupID, DictionaryPage dictPage, List<DataPage> pageGroup, ColumnDescriptor desc, List<?> expectedValues) {
  int rowsLeft = expectedValues.size();
  for (int pageID = pageGroup.size() - 1; pageID >= 0; pageID--) {
    DataPage page = pageGroup.get(pageID);
    int offset = rowsLeft - page.getValueCount();
    List<?> expectedPageValues = expectedValues.subList(offset, offset + page.getValueCount());
    PageValuesValidator.validateValuesForPage(rowGroupID, pageID, dictPage, page, desc, expectedPageValues);
    rowsLeft -= page.getValueCount();
  }
}