org.apache.parquet.bytes.ByteBufferInputStream Java Examples
The following examples show how to use
org.apache.parquet.bytes.ByteBufferInputStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ColumnReaderBase.java From parquet-mr with Apache License 2.0 | 6 votes |
private void readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL); this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); int valueCount = page.getValueCount(); try { BytesInput bytes = page.getBytes(); LOG.debug("page size {} bytes and {} values", bytes.size(), valueCount); LOG.debug("reading repetition levels at 0"); ByteBufferInputStream in = bytes.toInputStream(); rlReader.initFromPage(valueCount, in); LOG.debug("reading definition levels at {}", in.position()); dlReader.initFromPage(valueCount, in); LOG.debug("reading data at {}", in.position()); initDataReader(page.getValueEncoding(), in, valueCount); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } newPageInitialized(page); }
Example #2
Source File: RunLengthDecoder.java From flink with Apache License 2.0 | 6 votes |
/** * Init from input stream. */ void initFromStream(int valueCount, ByteBufferInputStream in) throws IOException { this.in = in; if (fixedWidth) { // initialize for repetition and definition levels if (readLength) { int length = readIntLittleEndian(); this.in = in.sliceStream(length); } } else { // initialize for values if (in.available() > 0) { initWidthAndPacker(in.read()); } } if (bitWidth == 0) { // 0 bit width, treat this as an RLE run of valueCount number of 0's. this.mode = MODE.RLE; this.currentCount = valueCount; this.currentValue = 0; } else { this.currentCount = 0; } }
Example #3
Source File: ByteStreamSplitValuesReaderTest.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testSkip() throws Exception { byte[] byteData = new byte[16]; for (int i = 0; i < 16; ++i) { byteData[i] = (byte) 0xFF; } byteData[3] = (byte) 0x00; byteData[7] = (byte) 0x00; byteData[11] = (byte) 0x10; byteData[15] = (byte) 0x40; ByteBuffer buffer = ByteBuffer.wrap(byteData); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer); ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat(); reader.initFromPage(4, stream); reader.skip(3); float f = reader.readFloat(); assertEquals(2.25f, f, 0.0f); }
Example #4
Source File: AbstractColumnReader.java From flink with Apache License 2.0 | 6 votes |
private void readPageV1(DataPageV1 page) throws IOException { this.pageValueCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); // Initialize the decoders. if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) { throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding()); } int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.runLenDecoder = new RunLengthDecoder(bitWidth); try { BytesInput bytes = page.getBytes(); ByteBufferInputStream in = bytes.toInputStream(); rlReader.initFromPage(pageValueCount, in); this.runLenDecoder.initFromStream(pageValueCount, in); prepareNewPage(page.getValueEncoding(), in); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } }
Example #5
Source File: PageIterator.java From iceberg with Apache License 2.0 | 6 votes |
private void initFromPage(DataPageV1 page) { this.triplesCount = page.getValueCount(); ValuesReader rlReader = page.getRlEncoding().getValuesReader(desc, REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(desc, DEFINITION_LEVEL); this.repetitionLevels = new ValuesReaderIntIterator(rlReader); this.definitionLevels = new ValuesReaderIntIterator(dlReader); try { BytesInput bytes = page.getBytes(); LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount); LOG.debug("reading repetition levels at 0"); ByteBufferInputStream in = bytes.toInputStream(); rlReader.initFromPage(triplesCount, in); LOG.debug("reading definition levels at {}", in.position()); dlReader.initFromPage(triplesCount, in); LOG.debug("reading data at {}", in.position()); initDataReader(page.getValueEncoding(), in, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e); } }
Example #6
Source File: TestDeltaByteArray.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testLengths() throws IOException { DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator()); ValuesReader reader = new DeltaBinaryPackingValuesReader(); Utils.writeData(writer, values); ByteBufferInputStream data = writer.getBytes().toInputStream(); int[] bin = Utils.readInts(reader, data, values.length); // test prefix lengths Assert.assertEquals(0, bin[0]); Assert.assertEquals(7, bin[1]); Assert.assertEquals(7, bin[2]); reader = new DeltaBinaryPackingValuesReader(); bin = Utils.readInts(reader, data, values.length); // test suffix lengths Assert.assertEquals(10, bin[0]); Assert.assertEquals(0, bin[1]); Assert.assertEquals(7, bin[2]); }
Example #7
Source File: DeltaBinaryPackingValuesReader.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * eagerly loads all the data into memory */ @Override public void initFromPage(int valueCount, ByteBufferInputStream stream) throws IOException { this.in = stream; long startPos = in.position(); this.config = DeltaBinaryPackingConfig.readConfig(in); this.totalValueCount = BytesUtils.readUnsignedVarInt(in); allocateValuesBuffer(); bitWidths = new int[config.miniBlockNumInABlock]; //read first value from header valuesBuffer[valuesBuffered++] = BytesUtils.readZigZagVarLong(in); while (valuesBuffered < totalValueCount) { //values Buffered could be more than totalValueCount, since we flush on a mini block basis loadNewBlockToBuffer(); } updateNextOffset((int) (in.position() - startPos)); }
Example #8
Source File: BasePageIterator.java From iceberg with Apache License 2.0 | 6 votes |
protected void initFromPage(DataPageV1 initPage) { this.triplesCount = initPage.getValueCount(); ValuesReader rlReader = initPage.getRlEncoding().getValuesReader(desc, ValuesType.REPETITION_LEVEL); this.repetitionLevels = new ValuesReaderIntIterator(rlReader); try { BytesInput bytes = initPage.getBytes(); LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount); LOG.debug("reading repetition levels at 0"); ByteBufferInputStream in = bytes.toInputStream(); rlReader.initFromPage(triplesCount, in); LOG.debug("reading definition levels at {}", in.position()); initDefinitionLevelsReader(initPage, desc, in, triplesCount); LOG.debug("reading data at {}", in.position()); initDataReader(initPage.getValueEncoding(), in, initPage.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + initPage + " in col " + desc, e); } }
Example #9
Source File: DeltaBinaryPackingValuesWriterForIntegerTest.java From parquet-mr with Apache License 2.0 | 6 votes |
private void shouldReadAndWrite(int[] data, int length) throws IOException { writeData(data, length); reader = new DeltaBinaryPackingValuesReader(); byte[] page = writer.getBytes().toByteArray(); int miniBlockSize = blockSize / miniBlockNum; double miniBlockFlushed = Math.ceil(((double) length - 1) / miniBlockSize); double blockFlushed = Math.ceil(((double) length - 1) / blockSize); double estimatedSize = 4 * 5 //blockHeader + 4 * miniBlockFlushed * miniBlockSize //data(aligned to miniBlock) + blockFlushed * miniBlockNum //bitWidth of mini blocks + (5.0 * blockFlushed);//min delta for each block assertTrue(estimatedSize >= page.length); reader.initFromPage(100, ByteBufferInputStream.wrap(ByteBuffer.wrap(page))); for (int i = 0; i < length; i++) { assertEquals(data[i], reader.readInteger()); } }
Example #10
Source File: BitPackingPerfTest.java From parquet-mr with Apache License 2.0 | 6 votes |
private static long readNTimes(byte[] bytes, int[] result, ValuesReader r) throws IOException { System.out.println(); long t = 0; int N = 10; System.gc(); System.out.print(" " + r.getClass().getSimpleName()); System.out.print(" no gc <"); for (int k = 0; k < N; k++) { long t2 = System.nanoTime(); r.initFromPage(result.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes))); for (int i = 0; i < result.length; i++) { result[i] = r.readInteger(); } long t3 = System.nanoTime(); t += t3 - t2; } System.out.println("> read in " + t/1000 + "µs " + (N * result.length / (t / 1000)) + " values per µs"); verify(result); return t; }
Example #11
Source File: BaseVectorizedParquetValuesReader.java From iceberg with Apache License 2.0 | 6 votes |
@Override public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { this.inputStream = in; if (fixedWidth) { // initialize for repetition and definition levels if (readLength) { int length = readIntLittleEndian(); this.inputStream = in.sliceStream(length); } } else { // initialize for values if (in.available() > 0) { init(in.read()); } } if (bitWidth == 0) { // 0 bit width, treat this as an RLE run of valueCount number of 0's. this.mode = Mode.RLE; this.currentCount = valueCount; this.currentValue = 0; } else { this.currentCount = 0; } }
Example #12
Source File: PrimitiveColumnReader.java From presto with Apache License 2.0 | 6 votes |
private ValuesReader initDataReader(ParquetEncoding dataEncoding, int valueCount, ByteBufferInputStream in) { ValuesReader valuesReader; if (dataEncoding.usesDictionary()) { if (dictionary == null) { throw new ParquetDecodingException("Dictionary is missing for Page"); } valuesReader = dataEncoding.getDictionaryBasedValuesReader(columnDescriptor, VALUES, dictionary); } else { valuesReader = dataEncoding.getValuesReader(columnDescriptor, VALUES); } try { valuesReader.initFromPage(valueCount, in); return valuesReader; } catch (IOException e) { throw new ParquetDecodingException("Error reading parquet page in column " + columnDescriptor, e); } }
Example #13
Source File: PrimitiveColumnReader.java From presto with Apache License 2.0 | 6 votes |
private ValuesReader readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRepetitionLevelEncoding().getValuesReader(columnDescriptor, REPETITION_LEVEL); ValuesReader dlReader = page.getDefinitionLevelEncoding().getValuesReader(columnDescriptor, DEFINITION_LEVEL); repetitionReader = new LevelValuesReader(rlReader); definitionReader = new LevelValuesReader(dlReader); try { ByteBufferInputStream in = toInputStream(page.getSlice()); rlReader.initFromPage(page.getValueCount(), in); dlReader.initFromPage(page.getValueCount(), in); return initDataReader(page.getValueEncoding(), page.getValueCount(), in); } catch (IOException e) { throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + columnDescriptor, e); } }
Example #14
Source File: TestDictionary.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testZeroValues() throws IOException { FallbackValuesWriter<PlainIntegerDictionaryValuesWriter, PlainValuesWriter> cw = newPlainIntegerDictionaryValuesWriter(100, 100); cw.writeInteger(34); cw.writeInteger(34); getBytesAndCheckEncoding(cw, PLAIN_DICTIONARY); DictionaryValuesReader reader = initDicReader(cw, INT32); // pretend there are 100 nulls. what matters is offset = bytes.length. ByteBuffer bytes = ByteBuffer.wrap(new byte[] {0x00, 0x01, 0x02, 0x03}); // data doesn't matter ByteBufferInputStream stream = ByteBufferInputStream.wrap(bytes); stream.skipFully(stream.available()); reader.initFromPage(100, stream); // Testing the deprecated behavior of using byte arrays directly reader = initDicReader(cw, INT32); int offset = bytes.remaining(); reader.initFromPage(100, bytes, offset); }
Example #15
Source File: DeltaBinaryPackingValuesWriterForLongTest.java From parquet-mr with Apache License 2.0 | 6 votes |
private void shouldReadAndWrite(long[] data, int length) throws IOException { writeData(data, length); reader = new DeltaBinaryPackingValuesReader(); byte[] page = writer.getBytes().toByteArray(); int miniBlockSize = blockSize / miniBlockNum; double miniBlockFlushed = Math.ceil(((double) length - 1) / miniBlockSize); double blockFlushed = Math.ceil(((double) length - 1) / blockSize); double estimatedSize = 3 * 5 + 1 * 10 //blockHeader, 3 * int + 1 * long + 8 * miniBlockFlushed * miniBlockSize //data(aligned to miniBlock) + blockFlushed * miniBlockNum //bitWidth of mini blocks + (10.0 * blockFlushed);//min delta for each block assertTrue(estimatedSize >= page.length); reader.initFromPage(100, ByteBufferInputStream.wrap(ByteBuffer.wrap(page))); for (int i = 0; i < length; i++) { assertEquals(data[i], reader.readLong()); } }
Example #16
Source File: BenchmarkDeltaByteArray.java From parquet-mr with Apache License 2.0 | 5 votes |
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) @Test public void benchmarkSortedStringsWithPlainValuesWriter() throws IOException { PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator()); BinaryPlainValuesReader reader = new BinaryPlainValuesReader(); Utils.writeData(writer, sortedVals); ByteBufferInputStream data = writer.getBytes().toInputStream(); Utils.readData(reader, data, values.length); System.out.println("size " + data.position()); }
Example #17
Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0 | 5 votes |
private static final ParquetMetadata readFooter(InputFile file, ParquetReadOptions options, SeekableInputStream f, ParquetMetadataConverter converter) throws IOException { long fileLen = file.getLength(); LOG.debug("File length {}", fileLen); int FOOTER_LENGTH_SIZE = 4; if (fileLen < MAGIC.length + FOOTER_LENGTH_SIZE + MAGIC.length) { // MAGIC + data + footer + footerIndex + MAGIC throw new RuntimeException(file.toString() + " is not a Parquet file (too small length: " + fileLen + ")"); } long footerLengthIndex = fileLen - FOOTER_LENGTH_SIZE - MAGIC.length; LOG.debug("reading footer index at {}", footerLengthIndex); f.seek(footerLengthIndex); int footerLength = readIntLittleEndian(f); byte[] magic = new byte[MAGIC.length]; f.readFully(magic); if (!Arrays.equals(MAGIC, magic)) { throw new RuntimeException(file.toString() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(MAGIC) + " but found " + Arrays.toString(magic)); } long footerIndex = footerLengthIndex - footerLength; LOG.debug("read footer length: {}, footer index: {}", footerLength, footerIndex); if (footerIndex < MAGIC.length || footerIndex >= footerLengthIndex) { throw new RuntimeException("corrupted file: the footer index is not within the file: " + footerIndex); } f.seek(footerIndex); // Read all the footer bytes in one time to avoid multiple read operations, // since it can be pretty time consuming for a single read operation in HDFS. ByteBuffer footerBytesBuffer = ByteBuffer.allocate(footerLength); f.readFully(footerBytesBuffer); LOG.debug("Finished to read all footer bytes."); footerBytesBuffer.flip(); InputStream footerBytesStream = ByteBufferInputStream.wrap(footerBytesBuffer); return converter.readParquetMetadata(footerBytesStream, options.getMetadataFilter()); }
Example #18
Source File: TestBitPackingColumn.java From parquet-mr with Apache License 2.0 | 5 votes |
private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException { for (PACKING_TYPE type : PACKING_TYPE.values()) { LOG.debug("{}", type); final int bound = (int)Math.pow(2, bitLength) - 1; ValuesWriter w = type.getWriter(bound); for (int i : vals) { w.writeInteger(i); } byte[] bytes = w.getBytes().toByteArray(); LOG.debug("vals ("+bitLength+"): " + TestBitPacking.toString(vals)); LOG.debug("bytes: {}", TestBitPacking.toString(bytes)); assertEquals(type.toString(), expected, TestBitPacking.toString(bytes)); ValuesReader r = type.getReader(bound); r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes))); int[] result = new int[vals.length]; for (int i = 0; i < result.length; i++) { result[i] = r.readInteger(); } LOG.debug("result: {}", TestBitPacking.toString(result)); assertArrayEquals(type + " result: " + TestBitPacking.toString(result), vals, result); // Test skipping r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes))); for (int i = 0; i < vals.length; i += 2) { assertEquals(vals[i], r.readInteger()); r.skip(); } // Test n-skipping r.initFromPage(vals.length, ByteBufferInputStream.wrap(ByteBuffer.wrap(bytes))); int skipCount; for (int i = 0; i < vals.length; i += skipCount + 1) { skipCount = (vals.length - i) / 2; assertEquals(vals[i], r.readInteger()); r.skip(skipCount); } } }
Example #19
Source File: TestCorruptDeltaByteArrays.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testReassemblyWithoutCorruption() throws Exception { DeltaByteArrayWriter writer = getDeltaByteArrayWriter(); for (int i = 0; i < 10; i += 1) { writer.writeBytes(Binary.fromString(str(i))); } ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer(); writer.reset(); // sets previous to new byte[0] for (int i = 10; i < 20; i += 1) { writer.writeBytes(Binary.fromString(str(i))); } ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer(); DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader(); firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes)); for (int i = 0; i < 10; i += 1) { assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i)); } DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader(); secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes)); secondPageReader.setPreviousReader(firstPageReader); for (int i = 10; i < 20; i += 1) { assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i)); } }
Example #20
Source File: Utils.java From parquet-mr with Apache License 2.0 | 5 votes |
public static int[] readInts(ValuesReader reader, ByteBufferInputStream stream, int length) throws IOException { int[] ints = new int[length]; reader.initFromPage(length, stream); for(int i=0; i < length; i++) { ints[i] = reader.readInteger(); } return ints; }
Example #21
Source File: Utils.java From parquet-mr with Apache License 2.0 | 5 votes |
public static Binary[] readData(ValuesReader reader, ByteBufferInputStream stream, int length) throws IOException { Binary[] bins = new Binary[length]; reader.initFromPage(length, stream); for(int i=0; i < length; i++) { bins[i] = reader.readBytes(); } return bins; }
Example #22
Source File: TestCorruptDeltaByteArrays.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testOldReassemblyWithoutCorruption() throws Exception { DeltaByteArrayWriter writer = getDeltaByteArrayWriter(); for (int i = 0; i < 10; i += 1) { writer.writeBytes(Binary.fromString(str(i))); } ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer(); writer.reset(); // sets previous to new byte[0] for (int i = 10; i < 20; i += 1) { writer.writeBytes(Binary.fromString(str(i))); } ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer(); DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader(); firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes)); for (int i = 0; i < 10; i += 1) { assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i)); } DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader(); secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes)); for (int i = 10; i < 20; i += 1) { assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i)); } }
Example #23
Source File: ByteStreamSplitValuesReaderTest.java From parquet-mr with Apache License 2.0 | 5 votes |
private void testReader(byte[] input, double[] values) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(input); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer); ByteStreamSplitValuesReaderForDouble reader = new ByteStreamSplitValuesReaderForDouble(); reader.initFromPage(values.length, stream); for (double expectedValue : values) { double d = reader.readDouble(); assertEquals(expectedValue, d, 0.0); } }
Example #24
Source File: ByteStreamSplitValuesReaderTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testSkipUnderflow() throws Exception { byte[] byteData = new byte[128]; ByteBuffer buffer = ByteBuffer.wrap(byteData); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer); ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat(); reader.initFromPage(32, stream); try { reader.skip(-1); Assert.fail("Expected an exception."); } catch (ParquetDecodingException ex) {} }
Example #25
Source File: ByteStreamSplitValuesReaderTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testSkipOverflow() throws Exception { byte[] byteData = new byte[128]; ByteBuffer buffer = ByteBuffer.wrap(byteData); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer); ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat(); reader.initFromPage(32, stream); try { reader.skip(33); Assert.fail("Expected an exception."); } catch (ParquetDecodingException ex) {} }
Example #26
Source File: TestDictionary.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testSkipInBinaryDictionary() throws Exception { ValuesWriter cw = newPlainBinaryDictionaryValuesWriter(1000, 10000); writeRepeated(100, cw, "a"); writeDistinct(100, cw, "b"); assertEquals(PLAIN_DICTIONARY, cw.getEncoding()); // Test skip and skip-n with dictionary encoding ByteBufferInputStream stream = cw.getBytes().toInputStream(); DictionaryValuesReader cr = initDicReader(cw, BINARY); cr.initFromPage(200, stream); for (int i = 0; i < 100; i += 2) { assertEquals(Binary.fromString("a" + i % 10), cr.readBytes()); cr.skip(); } int skipCount; for (int i = 0; i < 100; i += skipCount + 1) { skipCount = (100 - i) / 2; assertEquals(Binary.fromString("b" + i), cr.readBytes()); cr.skip(skipCount); } // Ensure fallback writeDistinct(1000, cw, "c"); assertEquals(PLAIN, cw.getEncoding()); // Test skip and skip-n with plain encoding (after fallback) ValuesReader plainReader = new BinaryPlainValuesReader(); plainReader.initFromPage(1200, cw.getBytes().toInputStream()); plainReader.skip(200); for (int i = 0; i < 100; i += 2) { assertEquals("c" + i, plainReader.readBytes().toStringUsingUTF8()); plainReader.skip(); } for (int i = 100; i < 1000; i += skipCount + 1) { skipCount = (1000 - i) / 2; assertEquals(Binary.fromString("c" + i), plainReader.readBytes()); plainReader.skip(skipCount); } }
Example #27
Source File: BenchmarkDeltaLengthByteArray.java From parquet-mr with Apache License 2.0 | 5 votes |
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) @Test public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException { PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator()); BinaryPlainValuesReader reader = new BinaryPlainValuesReader(); Utils.writeData(writer, values); ByteBufferInputStream data = writer.getBytes().toInputStream(); Utils.readData(reader, data, values.length); System.out.println("size " + data.position()); }
Example #28
Source File: ByteStreamSplitValuesReaderTest.java From parquet-mr with Apache License 2.0 | 5 votes |
@Test public void testExtraReads() throws Exception { byte[] byteData = {(byte) 0x00, (byte) 0x00, (byte) 0x10, (byte) 0x40}; ByteBuffer buffer = ByteBuffer.wrap(byteData); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer); ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat(); reader.initFromPage(1, stream); float f = reader.readFloat(); assertEquals(2.25f, f, 0.0f); try { reader.readFloat(); Assert.fail("Expected an exception."); } catch (ParquetDecodingException ex) {} }
Example #29
Source File: ByteStreamSplitValuesReaderTest.java From parquet-mr with Apache License 2.0 | 5 votes |
private void testReader(byte[] input, float[] values) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(input); ByteBufferInputStream stream = ByteBufferInputStream.wrap(buffer); ByteStreamSplitValuesReaderForFloat reader = new ByteStreamSplitValuesReaderForFloat(); reader.initFromPage(values.length, stream); for (float expectedValue : values) { float f = reader.readFloat(); assertEquals(expectedValue, f, 0.0f); } }
Example #30
Source File: BenchmarkDeltaLengthByteArray.java From parquet-mr with Apache License 2.0 | 5 votes |
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4) @Test public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException { DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator()); DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader(); Utils.writeData(writer, values); ByteBufferInputStream data = writer.getBytes().toInputStream(); Utils.readData(reader, data, values.length); System.out.println("size " + data.position()); }