Java Code Examples for org.apache.parquet.bytes.BytesInput#from()

The following examples show how to use org.apache.parquet.bytes.BytesInput#from() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: RunLengthBitPackingHybridEncoder.java From parquet-mr with Apache License 2.0

6 votes

public BytesInput toBytes() throws IOException {
  Preconditions.checkArgument(!toBytesCalled,
      "You cannot call toBytes() more than once without calling reset()");

  // write anything that is buffered / queued up for an rle-run
  if (repeatCount >= 8) {
    writeRleRun();
  } else if(numBufferedValues > 0) {
    for (int i = numBufferedValues; i < 8; i++) {
      bufferedValues[i] = 0;
    }
    writeOrAppendBitPackedRun();
    endPreviousBitPackedRun();
  } else {
    endPreviousBitPackedRun();
  }

  toBytesCalled = true;
  return BytesInput.from(baos);
}

Example 2

Source File: CodecFactory.java From parquet-mr with Apache License 2.0

6 votes

@Override
public BytesInput compress(BytesInput bytes) throws IOException {
  final BytesInput compressedBytes;
  if (codec == null) {
    compressedBytes = bytes;
  } else {
    compressedOutBuffer.reset();
    if (compressor != null) {
      // null compressor for non-native gzip
      compressor.reset();
    }
    CompressionOutputStream cos = codec.createOutputStream(compressedOutBuffer, compressor);
    bytes.writeAllTo(cos);
    cos.finish();
    cos.close();
    compressedBytes = BytesInput.from(compressedOutBuffer);
  }
  return compressedBytes;
}

Example 3

Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0

6 votes

private DictionaryPage readCompressedDictionary(
    PageHeader pageHeader, SeekableInputStream fin) throws IOException {
  DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header();

  int uncompressedPageSize = pageHeader.getUncompressed_page_size();
  int compressedPageSize = pageHeader.getCompressed_page_size();

  byte [] dictPageBytes = new byte[compressedPageSize];
  fin.readFully(dictPageBytes);

  BytesInput bin = BytesInput.from(dictPageBytes);

  return new DictionaryPage(
      bin, uncompressedPageSize, dictHeader.getNum_values(),
      converter.getEncoding(dictHeader.getEncoding()));
}

Example 4

Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0

6 votes

public BytesInput readAsBytesInput(int size) throws IOException {
  int available = stream.available();
  if (size > available) {
    // this is to workaround a bug where the compressedLength
    // of the chunk is missing the size of the header of the dictionary
    // to allow reading older files (using dictionary) we need this.
    // usually 13 to 19 bytes are missing
    int missingBytes = size - available;
    LOG.info("completed the column chunk with {} bytes", missingBytes);

    List<ByteBuffer> buffers = new ArrayList<>();
    buffers.addAll(stream.sliceBuffers(available));

    ByteBuffer lastBuffer = ByteBuffer.allocate(missingBytes);
    f.readFully(lastBuffer);
    buffers.add(lastBuffer);

    return BytesInput.from(buffers);
  }

  return super.readAsBytesInput(size);
}

Example 5

Source File: ColumnChunkIncReadStore.java From dremio-oss with Apache License 2.0

5 votes

private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException {
  ByteBuffer data = uncompressPage(pageHeader, false);
  return new DictionaryPage(
      BytesInput.from(data, 0, pageHeader.uncompressed_page_size),
      pageHeader.getDictionary_page_header().getNum_values(),
      parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
  );
}

Example 6

Source File: FileEncodingsIT.java From parquet-mr with Apache License 2.0

5 votes

private static DictionaryPage reusableCopy(DictionaryPage dict) {
  if (dict == null) {
    return null;
  }
  try {
    return new DictionaryPage(
        BytesInput.from(dict.getBytes().toByteArray()),
        dict.getDictionarySize(), dict.getEncoding());
  } catch (IOException e) {
    throw new ParquetDecodingException("Cannot read dictionary", e);
  }
}

Example 7

Source File: TestZstandardCodec.java From parquet-mr with Apache License 2.0

5 votes

private BytesInput decompress(ZstandardCodec codec, BytesInput bytes, int uncompressedSize) throws IOException {
  BytesInput decompressed;
  InputStream is = codec.createInputStream(bytes.toInputStream(), null);
  decompressed = BytesInput.from(is, uncompressedSize);
  is.close();
  return decompressed;
}

Example 8

Source File: TestZstandardCodec.java From parquet-mr with Apache License 2.0

5 votes

private BytesInput compress(ZstandardCodec codec, BytesInput bytes) throws IOException {
  ByteArrayOutputStream compressedOutBuffer = new ByteArrayOutputStream((int)bytes.size());
  CompressionOutputStream cos = codec.createOutputStream(compressedOutBuffer, null);
  bytes.writeAllTo(cos);
  cos.close();
  return BytesInput.from(compressedOutBuffer);
}

Example 9

Source File: CodecFactory.java From parquet-mr with Apache License 2.0

5 votes

@Override
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
  final BytesInput decompressed;
  if (codec != null) {
    if (decompressor != null) {
      decompressor.reset();
    }
    InputStream is = codec.createInputStream(bytes.toInputStream(), decompressor);
    decompressed = BytesInput.from(is, uncompressedSize);
  } else {
    decompressed = bytes;
  }
  return decompressed;
}

Example 10

Source File: CompressionConverter.java From parquet-mr with Apache License 2.0

5 votes

public BytesInput readBlock(int length, TransParquetFileReader reader) throws IOException {
  byte[] data;
  if (length > pageBufferSize) {
    data = new byte[length];
  } else {
    data = pageBuffer;
  }
  reader.blockRead(data, 0, length);
  return BytesInput.from(data, 0, length);
}

Example 11

Source File: DirectCodecFactory.java From parquet-mr with Apache License 2.0

5 votes

@Override
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
  decompressor.reset();
  byte[] inputBytes = bytes.toByteArray();
  decompressor.setInput(inputBytes, 0, inputBytes.length);
  byte[] output = new byte[uncompressedSize];
  decompressor.decompress(output, 0, uncompressedSize);
  return BytesInput.from(output);
}

Example 12

Source File: FixedLenByteArrayPlainValuesWriter.java From parquet-mr with Apache License 2.0

5 votes

@Override
public BytesInput getBytes() {
  try {
    out.flush();
  } catch (IOException e) {
    throw new ParquetEncodingException("could not write page", e);
  }
  LOG.debug("writing a buffer of size {}", arrayOut.size());
  return BytesInput.from(arrayOut);
}

Example 13

Source File: PlainValuesWriter.java From parquet-mr with Apache License 2.0

5 votes

@Override
public BytesInput getBytes() {
  try {
    out.flush();
  } catch (IOException e) {
    throw new ParquetEncodingException("could not write page", e);
  }
  if (LOG.isDebugEnabled()) LOG.debug("writing a buffer of size {}", arrayOut.size());
  return BytesInput.from(arrayOut);
}

Example 14

Source File: BitPackingValuesWriter.java From parquet-mr with Apache License 2.0

5 votes

/**
 * {@inheritDoc}
 * @see org.apache.parquet.column.values.ValuesWriter#getBytes()
 */
@Override
public BytesInput getBytes() {
  try {
    this.bitPackingWriter.finish();
    return BytesInput.from(out);
  } catch (IOException e) {
    throw new ParquetEncodingException(e);
  }
}

Example 15

Source File: ByteStreamSplitValuesWriter.java From parquet-mr with Apache License 2.0

5 votes

@Override
public BytesInput getBytes() {
  BytesInput[] allInputs = new BytesInput[this.numStreams];
  for (int i = 0; i < this.numStreams; ++i) {
    allInputs[i] = BytesInput.from(this.byteStreams[i]);
  }
  return BytesInput.concat(allInputs);
}

Example 16

Source File: DictionaryPageReader.java From parquet-mr with Apache License 2.0

4 votes

private static DictionaryPage reusableCopy(DictionaryPage dict)
    throws IOException {
  return new DictionaryPage(BytesInput.from(dict.getBytes().toByteArray()),
      dict.getDictionarySize(), dict.getEncoding());
}

Example 17

Source File: CompressionConverter.java From parquet-mr with Apache License 2.0

4 votes

public BytesInput readBlockAllocate(int length, TransParquetFileReader reader) throws IOException {
  byte[] data = new byte[length];
  reader.blockRead(data, 0, length);
  return BytesInput.from(data, 0, length);
}

Example 18

Source File: PageReader.java From dremio-oss with Apache License 2.0

4 votes

public static BytesInput asBytesInput(ArrowBuf buf, int offset, int length) throws IOException {
  return BytesInput.from(buf.nioBuffer(offset, length), 0, length);
}

Example 19

Source File: PageReader.java From Bats with Apache License 2.0

4 votes

public static BytesInput asBytesInput(DrillBuf buf, int offset, int length) throws IOException {
  return BytesInput.from(buf.nioBuffer(offset, length));
}

Example 20

Source File: ParquetFileReader.java From parquet-mr with Apache License 2.0

2 votes

/**
 * @param size the size of the page
 * @return the page
 * @throws IOException if there is an error while reading from the file stream
 */
public BytesInput readAsBytesInput(int size) throws IOException {
  return BytesInput.from(stream.sliceBuffers(size));
}