org.apache.parquet.hadoop.api.WriteSupport Java Examples
The following examples show how to use
org.apache.parquet.hadoop.api.WriteSupport.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: InternalParquetRecordWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * @param parquetFileWriter the file to write to * @param writeSupport the class to convert incoming records * @param schema the schema of the records * @param extraMetaData extra meta data to write in the footer of the file * @param rowGroupSize the size of a block in the file (this will be approximate) * @param compressor the codec used to compress */ public InternalParquetRecordWriter( ParquetFileWriter parquetFileWriter, WriteSupport<T> writeSupport, MessageType schema, Map<String, String> extraMetaData, long rowGroupSize, BytesCompressor compressor, boolean validating, ParquetProperties props) { this.parquetFileWriter = parquetFileWriter; this.writeSupport = Objects.requireNonNull(writeSupport, "writeSupport cannot be null"); this.schema = schema; this.extraMetaData = extraMetaData; this.rowGroupSize = rowGroupSize; this.rowGroupSizeThreshold = rowGroupSize; this.nextRowGroupSize = rowGroupSizeThreshold; this.compressor = compressor; this.validating = validating; this.props = props; initStore(); }
Example #2
Source File: ParquetRecordWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * * @param w the file to write to * @param writeSupport the class to convert incoming records * @param schema the schema of the records * @param extraMetaData extra meta data to write in the footer of the file * @param blockSize the size of a block in the file (this will be approximate) * @param pageSize the size of a page in the file (this will be approximate) * @param compressor the compressor used to compress the pages * @param dictionaryPageSize the threshold for dictionary size * @param enableDictionary to enable the dictionary * @param validating if schema validation should be turned on * @param writerVersion writer compatibility version * @param memoryManager memory manager for the write */ @Deprecated public ParquetRecordWriter( ParquetFileWriter w, WriteSupport<T> writeSupport, MessageType schema, Map<String, String> extraMetaData, long blockSize, int pageSize, BytesCompressor compressor, int dictionaryPageSize, boolean enableDictionary, boolean validating, WriterVersion writerVersion, MemoryManager memoryManager) { ParquetProperties props = ParquetProperties.builder() .withPageSize(pageSize) .withDictionaryPageSize(dictionaryPageSize) .withDictionaryEncoding(enableDictionary) .withWriterVersion(writerVersion) .build(); internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema, extraMetaData, blockSize, compressor, validating, props); this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null"); memoryManager.addWriter(internalWriter, blockSize); this.codecFactory = null; }
Example #3
Source File: ParquetRecordWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * * @param w the file to write to * @param writeSupport the class to convert incoming records * @param schema the schema of the records * @param extraMetaData extra meta data to write in the footer of the file * @param blockSize the size of a block in the file (this will be approximate) * @param codec the compression codec used to compress the pages * @param validating if schema validation should be turned on * @param props parquet encoding properties */ ParquetRecordWriter( ParquetFileWriter w, WriteSupport<T> writeSupport, MessageType schema, Map<String, String> extraMetaData, long blockSize, CompressionCodecName codec, boolean validating, ParquetProperties props, MemoryManager memoryManager, Configuration conf) { this.codecFactory = new CodecFactory(conf, props.getPageSizeThreshold()); internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema, extraMetaData, blockSize, codecFactory.getCompressor(codec), validating, props); this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null"); memoryManager.addWriter(internalWriter, blockSize); }
Example #4
Source File: AvroParquetWriterBuilder190Int96.java From datacollector with Apache License 2.0 | 6 votes |
protected WriteSupport<T> getWriteSupport(Configuration conf) { AvroLogicalTypeSupport avroLogicalTypeSupport = AvroLogicalTypeSupport.getAvroLogicalTypeSupport(); if (avroLogicalTypeSupport.isLogicalTypeSupported()) { LOG.debug("Returning write support with converter = AvroSchemaConverter190Int96Avro18"); return new AvroWriteSupportInt96Avro18<>( (new AvroSchemaConverter190Int96Avro18(conf)).convert(this.schema), this.schema, this.model, this.timeZoneId ); } else { LOG.debug("Returning write support with converter = AvroSchemaConverter190Int96Avro17"); return new AvroWriteSupportInt96Avro17<>( (new AvroSchemaConverter190Int96Avro17(conf)).convert(this.schema), this.schema, this.model, this.timeZoneId ); } }
Example #5
Source File: ParquetWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * Create a new ParquetWriter. * * @param file the file to create * @param mode file creation mode * @param writeSupport the implementation to write a record to a RecordConsumer * @param compressionCodecName the compression codec to use * @param blockSize the block size threshold * @param pageSize the page size threshold * @param dictionaryPageSize the page size threshold for the dictionary pages * @param enableDictionary to turn dictionary encoding on * @param validating to turn on validation using the schema * @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion} * @param conf Hadoop configuration to use while accessing the filesystem * @throws IOException if there is an error while writing * @deprecated will be removed in 2.0.0 */ @Deprecated public ParquetWriter( Path file, ParquetFileWriter.Mode mode, WriteSupport<T> writeSupport, CompressionCodecName compressionCodecName, int blockSize, int pageSize, int dictionaryPageSize, boolean enableDictionary, boolean validating, WriterVersion writerVersion, Configuration conf) throws IOException { this(HadoopOutputFile.fromPath(file, conf), mode, writeSupport, compressionCodecName, blockSize, validating, conf, MAX_PADDING_SIZE_DEFAULT, ParquetProperties.builder() .withPageSize(pageSize) .withDictionaryPageSize(dictionaryPageSize) .withDictionaryEncoding(enableDictionary) .withWriterVersion(writerVersion) .build()); }
Example #6
Source File: ParquetWriter.java From parquet-mr with Apache License 2.0 | 6 votes |
/** * Create a new ParquetWriter. * * @param file the file to create * @param writeSupport the implementation to write a record to a RecordConsumer * @param compressionCodecName the compression codec to use * @param blockSize the block size threshold * @param pageSize the page size threshold * @param dictionaryPageSize the page size threshold for the dictionary pages * @param enableDictionary to turn dictionary encoding on * @param validating to turn on validation using the schema * @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion} * @param conf Hadoop configuration to use while accessing the filesystem * @throws IOException if there is an error while writing * @deprecated will be removed in 2.0.0 */ @Deprecated public ParquetWriter( Path file, WriteSupport<T> writeSupport, CompressionCodecName compressionCodecName, int blockSize, int pageSize, int dictionaryPageSize, boolean enableDictionary, boolean validating, WriterVersion writerVersion, Configuration conf) throws IOException { this(file, ParquetFileWriter.Mode.CREATE, writeSupport, compressionCodecName, blockSize, pageSize, dictionaryPageSize, enableDictionary, validating, writerVersion, conf); }
Example #7
Source File: GroupWriteSupport.java From parquet-mr with Apache License 2.0 | 5 votes |
@Override public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) { // if present, prefer the schema passed to the constructor if (schema == null) { schema = getSchema(configuration); } return new WriteContext(schema, this.extraMetaData); }
Example #8
Source File: ParquetRecordWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * @param w the file to write to * @param writeSupport the class to convert incoming records * @param schema the schema of the records * @param extraMetaData extra meta data to write in the footer of the file * @param blockSize the size of a block in the file (this will be approximate) * @param pageSize the size of a page in the file (this will be approximate) * @param compressor the compressor used to compress the pages * @param dictionaryPageSize the threshold for dictionary size * @param enableDictionary to enable the dictionary * @param validating if schema validation should be turned on * @param writerVersion writer compatibility version */ @Deprecated public ParquetRecordWriter( ParquetFileWriter w, WriteSupport<T> writeSupport, MessageType schema, Map<String, String> extraMetaData, int blockSize, int pageSize, BytesCompressor compressor, int dictionaryPageSize, boolean enableDictionary, boolean validating, WriterVersion writerVersion) { ParquetProperties props = ParquetProperties.builder() .withPageSize(pageSize) .withDictionaryPageSize(dictionaryPageSize) .withDictionaryEncoding(enableDictionary) .withWriterVersion(writerVersion) .build(); internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema, extraMetaData, blockSize, compressor, validating, props); this.memoryManager = null; this.codecFactory = null; }
Example #9
Source File: ParquetOutputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
public static Class<?> getWriteSupportClass(Configuration configuration) { final String className = configuration.get(WRITE_SUPPORT_CLASS); if (className == null) { return null; } final Class<?> writeSupportClass = ConfigurationUtil.getClassFromConfig(configuration, WRITE_SUPPORT_CLASS, WriteSupport.class); return writeSupportClass; }
Example #10
Source File: ParquetOutputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * @param configuration to find the configuration for the write support class * @return the configured write support */ @SuppressWarnings("unchecked") public WriteSupport<T> getWriteSupport(Configuration configuration){ if (writeSupport != null) return writeSupport; Class<?> writeSupportClass = getWriteSupportClass(configuration); try { return (WriteSupport<T>) Objects .requireNonNull(writeSupportClass, "writeSupportClass cannot be null") .newInstance(); } catch (InstantiationException | IllegalAccessException e) { throw new BadConfigurationException("could not instantiate write support class: " + writeSupportClass, e); } }
Example #11
Source File: HoodieAvroWriteSupport.java From hudi with Apache License 2.0 | 5 votes |
@Override public WriteSupport.FinalizedWriteContext finalizeWrite() { HashMap<String, String> extraMetaData = new HashMap<>(); if (bloomFilter != null) { extraMetaData.put(HOODIE_AVRO_BLOOM_FILTER_METADATA_KEY, bloomFilter.serializeToString()); if (minRecordKey != null && maxRecordKey != null) { extraMetaData.put(HOODIE_MIN_RECORD_KEY_FOOTER, minRecordKey); extraMetaData.put(HOODIE_MAX_RECORD_KEY_FOOTER, maxRecordKey); } if (bloomFilter.getBloomFilterTypeCode().name().contains(HoodieDynamicBoundedBloomFilter.TYPE_CODE_PREFIX)) { extraMetaData.put(HOODIE_BLOOM_FILTER_TYPE_CODE, bloomFilter.getBloomFilterTypeCode().name()); } } return new WriteSupport.FinalizedWriteContext(extraMetaData); }
Example #12
Source File: ParquetWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * Create a new ParquetWriter. * * @param file the file to create * @param writeSupport the implementation to write a record to a RecordConsumer * @param compressionCodecName the compression codec to use * @param blockSize the block size threshold * @param pageSize the page size threshold * @param dictionaryPageSize the page size threshold for the dictionary pages * @param enableDictionary to turn dictionary encoding on * @param validating to turn on validation using the schema * @throws IOException if there is an error while writing * @deprecated will be removed in 2.0.0 */ @Deprecated public ParquetWriter( Path file, WriteSupport<T> writeSupport, CompressionCodecName compressionCodecName, int blockSize, int pageSize, int dictionaryPageSize, boolean enableDictionary, boolean validating) throws IOException { this(file, writeSupport, compressionCodecName, blockSize, pageSize, dictionaryPageSize, enableDictionary, validating, DEFAULT_WRITER_VERSION); }
Example #13
Source File: Parquet.java From iceberg with Apache License 2.0 | 5 votes |
@Override protected WriteSupport<T> getWriteSupport(Configuration configuration) { for (Map.Entry<String, String> entry : config.entrySet()) { configuration.set(entry.getKey(), entry.getValue()); } return new ParquetWriteSupport<>(type, keyValueMetadata, writeSupport); }
Example #14
Source File: ParquetWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
@Deprecated public ParquetWriter(Path file, Configuration conf, WriteSupport<T> writeSupport) throws IOException { this(file, writeSupport, DEFAULT_COMPRESSION_CODEC_NAME, DEFAULT_BLOCK_SIZE, DEFAULT_PAGE_SIZE, DEFAULT_PAGE_SIZE, DEFAULT_IS_DICTIONARY_ENABLED, DEFAULT_IS_VALIDATING_ENABLED, DEFAULT_WRITER_VERSION, conf); }
Example #15
Source File: Parquet.java From iceberg with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private <T> WriteSupport<T> getWriteSupport(MessageType type) { if (writeSupport != null) { return (WriteSupport<T>) writeSupport; } else { return new AvroWriteSupport<>( type, ParquetAvro.parquetAvroSchema(AvroSchemaUtil.convert(schema, name)), ParquetAvro.DEFAULT_MODEL); } }
Example #16
Source File: ParquetWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
ParquetWriter( OutputFile file, ParquetFileWriter.Mode mode, WriteSupport<T> writeSupport, CompressionCodecName compressionCodecName, int rowGroupSize, boolean validating, Configuration conf, int maxPaddingSize, ParquetProperties encodingProps) throws IOException { WriteSupport.WriteContext writeContext = writeSupport.init(conf); MessageType schema = writeContext.getSchema(); ParquetFileWriter fileWriter = new ParquetFileWriter( file, schema, mode, rowGroupSize, maxPaddingSize, encodingProps.getColumnIndexTruncateLength(), encodingProps.getStatisticsTruncateLength(), encodingProps.getPageWriteChecksumEnabled()); fileWriter.start(); this.codecFactory = new CodecFactory(conf, encodingProps.getPageSizeThreshold()); CodecFactory.BytesCompressor compressor = codecFactory.getCompressor(compressionCodecName); this.writer = new InternalParquetRecordWriter<T>( fileWriter, writeSupport, schema, writeContext.getExtraMetaData(), rowGroupSize, compressor, validating, encodingProps); }
Example #17
Source File: Parquet.java From iceberg with Apache License 2.0 | 5 votes |
@Override protected WriteSupport<T> getWriteSupport(Configuration configuration) { for (Map.Entry<String, String> entry : config.entrySet()) { configuration.set(entry.getKey(), entry.getValue()); } return new ParquetWriteSupport<>(type, keyValueMetadata, writeSupport); }
Example #18
Source File: Parquet.java From iceberg with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private <T> WriteSupport<T> getWriteSupport(MessageType type) { if (writeSupport != null) { return (WriteSupport<T>) writeSupport; } else { return new AvroWriteSupport<>( type, ParquetAvro.parquetAvroSchema(AvroSchemaUtil.convert(schema, name)), ParquetAvro.DEFAULT_MODEL); } }
Example #19
Source File: ParquetOutputFormat.java From parquet-mr with Apache License 2.0 | 4 votes |
public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, CompressionCodecName codec, Mode mode) throws IOException, InterruptedException { final WriteSupport<T> writeSupport = getWriteSupport(conf); ParquetProperties.Builder propsBuilder = ParquetProperties.builder() .withPageSize(getPageSize(conf)) .withDictionaryPageSize(getDictionaryPageSize(conf)) .withDictionaryEncoding(getEnableDictionary(conf)) .withWriterVersion(getWriterVersion(conf)) .estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf)) .withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf)) .withMaxRowCountForPageSizeCheck(getMaxRowCountForPageSizeCheck(conf)) .withColumnIndexTruncateLength(getColumnIndexTruncateLength(conf)) .withStatisticsTruncateLength(getStatisticsTruncateLength(conf)) .withMaxBloomFilterBytes(getBloomFilterMaxBytes(conf)) .withBloomFilterEnabled(getBloomFilterEnabled(conf)) .withPageRowCountLimit(getPageRowCountLimit(conf)) .withPageWriteChecksumEnabled(getPageWriteChecksumEnabled(conf)); new ColumnConfigParser() .withColumnConfig(ENABLE_DICTIONARY, key -> conf.getBoolean(key, false), propsBuilder::withDictionaryEncoding) .withColumnConfig(BLOOM_FILTER_ENABLED, key -> conf.getBoolean(key, false), propsBuilder::withBloomFilterEnabled) .withColumnConfig(BLOOM_FILTER_EXPECTED_NDV, key -> conf.getLong(key, -1L), propsBuilder::withBloomFilterNDV) .parseConfig(conf); ParquetProperties props = propsBuilder.build(); long blockSize = getLongBlockSize(conf); int maxPaddingSize = getMaxPaddingSize(conf); boolean validating = getValidation(conf); if (LOG.isInfoEnabled()) { LOG.info("Parquet block size to {}", blockSize); LOG.info("Validation is {}", (validating ? "on" : "off")); LOG.info("Maximum row group padding size is {} bytes", maxPaddingSize); LOG.info("Parquet properties are:\n{}", props); } WriteContext init = writeSupport.init(conf); ParquetFileWriter w = new ParquetFileWriter(HadoopOutputFile.fromPath(file, conf), init.getSchema(), mode, blockSize, maxPaddingSize, props.getColumnIndexTruncateLength(), props.getStatisticsTruncateLength(), props.getPageWriteChecksumEnabled()); w.start(); float maxLoad = conf.getFloat(ParquetOutputFormat.MEMORY_POOL_RATIO, MemoryManager.DEFAULT_MEMORY_POOL_RATIO); long minAllocation = conf.getLong(ParquetOutputFormat.MIN_MEMORY_ALLOCATION, MemoryManager.DEFAULT_MIN_MEMORY_ALLOCATION); synchronized (ParquetOutputFormat.class) { if (memoryManager == null) { memoryManager = new MemoryManager(maxLoad, minAllocation); } } if (memoryManager.getMemoryPoolRatio() != maxLoad) { LOG.warn("The configuration " + MEMORY_POOL_RATIO + " has been set. It should not " + "be reset by the new value: " + maxLoad); } return new ParquetRecordWriter<T>( w, writeSupport, init.getSchema(), init.getExtraMetaData(), blockSize, codec, validating, props, memoryManager, conf); }
Example #20
Source File: AvroParquetWriter.java From parquet-mr with Apache License 2.0 | 4 votes |
private static <T> WriteSupport<T> writeSupport(Schema avroSchema, GenericData model) { return new AvroWriteSupport<T>( new AvroSchemaConverter().convert(avroSchema), avroSchema, model); }
Example #21
Source File: AvroParquetWriter.java From parquet-mr with Apache License 2.0 | 4 votes |
private static <T> WriteSupport<T> writeSupport(Configuration conf, Schema avroSchema, GenericData model) { return new AvroWriteSupport<T>( new AvroSchemaConverter(conf).convert(avroSchema), avroSchema, model); }
Example #22
Source File: ExtraMetadataWriteSupport.java From garmadon with Apache License 2.0 | 4 votes |
public ExtraMetadataWriteSupport(WriteSupport<T> delegate) { super(delegate); }
Example #23
Source File: AvroParquetWriter.java From parquet-mr with Apache License 2.0 | 4 votes |
@Override protected WriteSupport<T> getWriteSupport(Configuration conf) { return AvroParquetWriter.writeSupport(conf, schema, model); }
Example #24
Source File: ParquetWriterFactory.java From osm-parquetizer with Apache License 2.0 | 4 votes |
@Override protected WriteSupport<Relation> getWriteSupport(Configuration conf) { return new RelationWriteSupport(excludeMetadata); }
Example #25
Source File: ParquetWriteSupport.java From iceberg with Apache License 2.0 | 4 votes |
ParquetWriteSupport(MessageType type, Map<String, String> keyValueMetadata, WriteSupport<T> writeSupport) { this.type = type; this.keyValueMetadata = keyValueMetadata; this.wrapped = writeSupport; }
Example #26
Source File: Parquet.java From iceberg with Apache License 2.0 | 4 votes |
public WriteBuilder writeSupport(WriteSupport<?> newWriteSupport) { this.writeSupport = newWriteSupport; return this; }
Example #27
Source File: Parquet.java From iceberg with Apache License 2.0 | 4 votes |
public ParquetWriteBuilder<T> setWriteSupport(WriteSupport<T> writeSupport) { this.writeSupport = writeSupport; return self(); }
Example #28
Source File: ParquetWriteSupport.java From iceberg with Apache License 2.0 | 4 votes |
ParquetWriteSupport(MessageType type, Map<String, String> keyValueMetadata, WriteSupport<T> writeSupport) { this.type = type; this.keyValueMetadata = keyValueMetadata; this.wrapped = writeSupport; }
Example #29
Source File: Parquet.java From iceberg with Apache License 2.0 | 4 votes |
public ParquetWriteBuilder<T> setWriteSupport(WriteSupport<T> writeSupport) { this.writeSupport = writeSupport; return self(); }
Example #30
Source File: ParquetWriterFactory.java From osm-parquetizer with Apache License 2.0 | 4 votes |
@Override protected WriteSupport<Way> getWriteSupport(Configuration conf) { return new WayWriteSupport(excludeMetadata); }