org.apache.parquet.hadoop.CodecFactory.BytesCompressor Java Exaples

Source File: ParquetRecordWriter.java From dremio-oss with Apache License 2.0

6 votes

@SuppressWarnings("deprecation")
private static BytesCompressor toDeprecatedBytesCompressor(final BytesInputCompressor compressor) {
  return new BytesCompressor() {
    @Override
    public BytesInput compress(BytesInput bytes) throws IOException {
      return compressor.compress(bytes);
    }

    @Override
    public CompressionCodecName getCodecName() {
      return compressor.getCodecName();
    }

    @Override
    public void release() {
      compressor.release();
    }
  };
}

Source File: InternalParquetRecordWriter.java From parquet-mr with Apache License 2.0

6 votes

/**
 * @param parquetFileWriter the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param rowGroupSize the size of a block in the file (this will be approximate)
 * @param compressor the codec used to compress
 */
public InternalParquetRecordWriter(
    ParquetFileWriter parquetFileWriter,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long rowGroupSize,
    BytesCompressor compressor,
    boolean validating,
    ParquetProperties props) {
  this.parquetFileWriter = parquetFileWriter;
  this.writeSupport = Objects.requireNonNull(writeSupport, "writeSupport cannot be null");
  this.schema = schema;
  this.extraMetaData = extraMetaData;
  this.rowGroupSize = rowGroupSize;
  this.rowGroupSizeThreshold = rowGroupSize;
  this.nextRowGroupSize = rowGroupSizeThreshold;
  this.compressor = compressor;
  this.validating = validating;
  this.props = props;
  initStore();
}

Source File: ParquetRecordWriter.java From parquet-mr with Apache License 2.0

6 votes

/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param pageSize the size of a page in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 * @param writerVersion writer compatibility version
 * @param memoryManager memory manager for the write
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion,
    MemoryManager memoryManager) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null");
  memoryManager.addWriter(internalWriter, blockSize);
  this.codecFactory = null;
}

Source File: ParquetColumnChunkPageWriteStore.java From Bats with Apache License 2.0

5 votes

public ParquetColumnChunkPageWriteStore(BytesCompressor compressor,
                                        MessageType schema,
                                        int initialSlabSize,
                                        int maxCapacityHint,
                                        ByteBufferAllocator allocator) {
  this.schema = schema;
  for (ColumnDescriptor path : schema.getColumns()) {
    writers.put(path,  new ColumnChunkPageWriter(path, compressor, initialSlabSize, maxCapacityHint, allocator));
  }
}

Source File: ParquetColumnChunkPageWriteStore.java From Bats with Apache License 2.0

5 votes

private ColumnChunkPageWriter(ColumnDescriptor path,
                              BytesCompressor compressor,
                              int initialSlabSize,
                              int maxCapacityHint,
                              ByteBufferAllocator allocator) {
  this.path = path;
  this.compressor = compressor;
  this.buf = new CapacityByteArrayOutputStream(initialSlabSize, maxCapacityHint, allocator);
  this.totalStatistics = Statistics.createStats(this.path.getPrimitiveType());
}

Source File: ColumnChunkPageWriteStoreExposer.java From dremio-oss with Apache License 2.0

5 votes

public static ColumnChunkPageWriteStore newColumnChunkPageWriteStore(
    BytesCompressor compressor,
    MessageType schema,
    ParquetProperties parquetProperties
    ) {
  return new ColumnChunkPageWriteStore(compressor, schema, parquetProperties);
}

Source File: ParquetRecordWriter.java From parquet-mr with Apache License 2.0

5 votes

/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param pageSize the size of a page in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 * @param writerVersion writer compatibility version
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    int blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = null;
  this.codecFactory = null;
}

Source File: ColumnChunkPageWriteStore.java From parquet-mr with Apache License 2.0

5 votes

private ColumnChunkPageWriter(ColumnDescriptor path,
                              BytesCompressor compressor,
                              ByteBufferAllocator allocator,
                              int columnIndexTruncateLength,
                              boolean pageWriteChecksumEnabled) {
  this.path = path;
  this.compressor = compressor;
  this.allocator = allocator;
  this.buf = new ConcatenatingByteArrayCollector();
  this.columnIndexBuilder = ColumnIndexBuilder.getBuilder(path.getPrimitiveType(), columnIndexTruncateLength);
  this.offsetIndexBuilder = OffsetIndexBuilder.getBuilder();
  this.pageWriteChecksumEnabled = pageWriteChecksumEnabled;
  this.crc = pageWriteChecksumEnabled ? new CRC32() : null;
}

Source File: ColumnChunkPageWriteStore.java From parquet-mr with Apache License 2.0

5 votes

public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator,
    int columnIndexTruncateLength, boolean pageWriteChecksumEnabled) {
  this.schema = schema;
  for (ColumnDescriptor path : schema.getColumns()) {
    writers.put(path, new ColumnChunkPageWriter(path, compressor, allocator, columnIndexTruncateLength, pageWriteChecksumEnabled));
  }
}

Source File: ColumnChunkPageWriteStore.java From parquet-mr with Apache License 2.0

4 votes

public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator,
                                 int columnIndexTruncateLength) {
  this(compressor, schema, allocator, columnIndexTruncateLength,
    ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED);
}

org.apache.parquet.hadoop.CodecFactory.BytesCompressor Java Examples