org.apache.flink.api.common.serialization.BulkWriter Java Examples
The following examples show how to use
org.apache.flink.api.common.serialization.BulkWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static <ID> OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithCustomizedBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, ID> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, ID> bucketFactory, final OutputFileConfig outputFileConfig) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forBulkFormat(new Path(outDir.toURI()), writer) .withNewBucketAssigner(bucketer) .withRollingPolicy(build()) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .withOutputFileConfig(outputFileConfig) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #2
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory, final String partFilePrefix, final String partFileSuffix) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forBulkFormat(new Path(outDir.toURI()), writer) .withBucketAssigner(bucketer) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .withPartFilePrefix(partFilePrefix) .withPartFileSuffix(partFileSuffix) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #3
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static <ID> OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithCustomizedBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, ID> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, ID> bucketFactory) throws Exception { return createTestSinkWithCustomizedBulkEncoder( outDir, totalParallelism, taskIdx, bucketCheckInterval, bucketer, writer, bucketFactory, OutputFileConfig.builder().build()); }
Example #4
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 6 votes |
@Override public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException { BulkWriter<RowData> writer = factory.create(out); return new BulkWriter<RowData>() { @Override public void addElement(RowData element) throws IOException { writer.addElement(computer.projectColumnsToWrite(element)); } @Override public void flush() throws IOException { writer.flush(); } @Override public void finish() throws IOException { writer.finish(); } }; }
Example #5
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception { return createTestSinkWithBulkEncoder( outDir, totalParallelism, taskIdx, bucketCheckInterval, bucketer, writer, bucketFactory, PartFileConfig.DEFAULT_PART_PREFIX, PartFileConfig.DEFAULT_PART_SUFFIX); }
Example #6
Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forBulkFormat(new Path(outDir.toURI()), writer) .withBucketAssigner(bucketer) .withBucketCheckInterval(bucketCheckInterval) .withBucketFactory(bucketFactory) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #7
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory, final OutputFileConfig outputFileConfig) throws Exception { StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink .forBulkFormat(new Path(outDir.toURI()), writer) .withBucketAssigner(bucketer) .withBucketCheckInterval(bucketCheckInterval) .withRollingPolicy(build()) .withBucketFactory(bucketFactory) .withOutputFileConfig(outputFileConfig) .build(); return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx); }
Example #8
Source File: TestUtils.java From flink with Apache License 2.0 | 6 votes |
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder( final File outDir, final int totalParallelism, final int taskIdx, final long bucketCheckInterval, final BucketAssigner<Tuple2<String, Integer>, String> bucketer, final BulkWriter.Factory<Tuple2<String, Integer>> writer, final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception { return createTestSinkWithBulkEncoder( outDir, totalParallelism, taskIdx, bucketCheckInterval, bucketer, writer, bucketFactory, OutputFileConfig.builder().build()); }
Example #9
Source File: HiveTableSink.java From flink with Apache License 2.0 | 6 votes |
private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns, StorageDescriptor sd) { String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase(); int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length; String[] formatNames = new String[formatFieldCount]; LogicalType[] formatTypes = new LogicalType[formatFieldCount]; for (int i = 0; i < formatFieldCount; i++) { formatNames[i] = tableSchema.getFieldName(i).get(); formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType(); } RowType formatType = RowType.of(formatTypes, formatNames); Configuration formatConf = new Configuration(jobConf); sd.getSerdeInfo().getParameters().forEach(formatConf::set); if (serLib.contains("parquet")) { return Optional.of(ParquetRowDataBuilder.createWriterFactory( formatType, formatConf, hiveVersion.startsWith("3."))); } else if (serLib.contains("orc")) { TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType); return Optional.of(hiveShim.createOrcBulkWriterFactory( formatConf, typeDescription.toString(), formatTypes)); } else { return Optional.empty(); } }
Example #10
Source File: AvroFileSystemFormatFactory.java From flink with Apache License 2.0 | 6 votes |
@Override public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException { BulkWriter<GenericRecord> writer = factory.create(out); AvroRowDataSerializationSchema.SerializationRuntimeConverter converter = AvroRowDataSerializationSchema.createRowConverter(rowType); Schema schema = AvroSchemaConverter.convertToSchema(rowType); return new BulkWriter<RowData>() { @Override public void addElement(RowData element) throws IOException { GenericRecord record = (GenericRecord) converter.convert(schema, element); writer.addElement(record); } @Override public void flush() throws IOException { writer.flush(); } @Override public void finish() throws IOException { writer.finish(); } }; }
Example #11
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 6 votes |
private BulkFormatBuilder( Path basePath, BulkWriter.Factory<IN> writerFactory, BucketAssigner<IN, BucketID> assigner, long bucketCheckInterval, BucketFactory<IN, BucketID> bucketFactory, String partFilePrefix, String partFileSuffix) { this.basePath = Preconditions.checkNotNull(basePath); this.writerFactory = writerFactory; this.bucketAssigner = Preconditions.checkNotNull(assigner); this.bucketCheckInterval = bucketCheckInterval; this.bucketFactory = Preconditions.checkNotNull(bucketFactory); this.partFilePrefix = Preconditions.checkNotNull(partFilePrefix); this.partFileSuffix = Preconditions.checkNotNull(partFileSuffix); }
Example #12
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 6 votes |
protected BulkFormatBuilder( Path basePath, BulkWriter.Factory<IN> writerFactory, BucketAssigner<IN, BucketID> assigner, CheckpointRollingPolicy<IN, BucketID> policy, long bucketCheckInterval, BucketFactory<IN, BucketID> bucketFactory, OutputFileConfig outputFileConfig) { this.basePath = Preconditions.checkNotNull(basePath); this.writerFactory = writerFactory; this.bucketAssigner = Preconditions.checkNotNull(assigner); this.rollingPolicy = Preconditions.checkNotNull(policy); this.bucketCheckInterval = bucketCheckInterval; this.bucketFactory = Preconditions.checkNotNull(bucketFactory); this.outputFileConfig = Preconditions.checkNotNull(outputFileConfig); }
Example #13
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 5 votes |
private static OutputFormat<RowData> createBulkWriterOutputFormat( BulkWriter.Factory<RowData> factory, Path path) { return new OutputFormat<RowData>() { private static final long serialVersionUID = 1L; private transient BulkWriter<RowData> writer; @Override public void configure(Configuration parameters) { } @Override public void open(int taskNumber, int numTasks) throws IOException { this.writer = factory.create(path.getFileSystem() .create(path, FileSystem.WriteMode.OVERWRITE)); } @Override public void writeRecord(RowData record) throws IOException { writer.addElement(record); } @Override public void close() throws IOException { writer.flush(); writer.finish(); } }; }
Example #14
Source File: CompressWriterFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public BulkWriter<IN> create(FSDataOutputStream out) throws IOException { if (hadoopCodecName == null || hadoopCodecName.trim().isEmpty()) { return new NoCompressionBulkWriter<>(out, extractor); } initializeCompressionCodec(); return new HadoopCompressionBulkWriter<>(hadoopCodec.createOutputStream(out), extractor); }
Example #15
Source File: HiveShimV200.java From flink with Apache License 2.0 | 5 votes |
@Override public BulkWriter.Factory<RowData> createOrcBulkWriterFactory( Configuration conf, String schema, LogicalType[] fieldTypes) { return new OrcBulkWriterFactory<>( new RowDataVectorizer(schema, fieldTypes), new Properties(), conf); }
Example #16
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") private OutputFormatFactory<RowData> createOutputFormatFactory() { Object writer = createWriter(); return writer instanceof Encoder ? path -> createEncoderOutputFormat((Encoder<RowData>) writer, path) : path -> createBulkWriterOutputFormat((BulkWriter.Factory<RowData>) writer, path); }
Example #17
Source File: FileSystemTableSink.java From flink with Apache License 2.0 | 5 votes |
private Object createWriter() { FileSystemFormatFactory formatFactory = createFormatFactory(properties); Configuration conf = new Configuration(); properties.forEach(conf::setString); FileSystemFormatFactory.WriterContext context = new FileSystemFormatFactory.WriterContext() { @Override public TableSchema getSchema() { return schema; } @Override public ReadableConfig getFormatOptions() { return new DelegatingConfiguration(conf, formatFactory.factoryIdentifier() + "."); } @Override public List<String> getPartitionKeys() { return partitionKeys; } }; Optional<Encoder<RowData>> encoder = formatFactory.createEncoder(context); Optional<BulkWriter.Factory<RowData>> bulk = formatFactory.createBulkWriterFactory(context); if (encoder.isPresent()) { return encoder.get(); } else if (bulk.isPresent()) { return bulk.get(); } else { throw new TableException( formatFactory + " format should implement at least one Encoder or BulkWriter"); } }
Example #18
Source File: BulkPartWriter.java From flink with Apache License 2.0 | 5 votes |
@Override public PartFileWriter<IN, BucketID> openNew( final BucketID bucketId, final RecoverableFsDataOutputStream stream, final Path path, final long creationTime) throws IOException { Preconditions.checkNotNull(stream); Preconditions.checkNotNull(path); final BulkWriter<IN> writer = writerFactory.create(stream); return new BulkPartWriter<>(bucketId, stream, writer, creationTime); }
Example #19
Source File: OrcFileSystemFormatFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(WriterContext context) { LogicalType[] orcTypes = Arrays.stream(context.getFormatFieldTypes()) .map(DataType::getLogicalType) .toArray(LogicalType[]::new); TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType( RowType.of(orcTypes, context.getFormatFieldNames())); OrcBulkWriterFactory<RowData> factory = new OrcBulkWriterFactory<>( new RowDataVectorizer(typeDescription.toString(), orcTypes), getOrcProperties(context.getFormatOptions()), new Configuration()); return Optional.of(factory); }
Example #20
Source File: OrcBulkWriterFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public BulkWriter<T> create(FSDataOutputStream out) throws IOException { OrcFile.WriterOptions opts = getWriterOptions(); opts.physicalWriter(new PhysicalWriterImpl(out, opts)); return new OrcBulkWriter<>(vectorizer, new WriterImpl(null, FIXED_PATH, opts)); }
Example #21
Source File: TestCsvFileSystemFormatFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(WriterContext context) { if (!useBulkWriter(context)) { return Optional.empty(); } DataType[] types = context.getFormatFieldTypes(); return Optional.of(out -> new CsvBulkWriter(types, out)); }
Example #22
Source File: OrcNoHiveBulkWriterFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public BulkWriter<RowData> create(FSDataOutputStream out) throws IOException { OrcFile.WriterOptions opts = OrcFile.writerOptions(new Properties(), conf); TypeDescription description = TypeDescription.fromString(schema); opts.setSchema(description); opts.physicalWriter(new PhysicalWriterImpl(out, opts)); WriterImpl writer = new WriterImpl(null, new Path("."), opts); VectorizedRowBatch rowBatch = description.createRowBatch(); return new BulkWriter<RowData>() { @Override public void addElement(RowData row) throws IOException { int rowId = rowBatch.size++; for (int i = 0; i < row.getArity(); ++i) { setColumn(rowId, rowBatch.cols[i], fieldTypes[i], row, i); } if (rowBatch.size == rowBatch.getMaxSize()) { writer.addRowBatch(rowBatch); rowBatch.reset(); } } @Override public void flush() throws IOException { if (rowBatch.size != 0) { writer.addRowBatch(rowBatch); rowBatch.reset(); } } @Override public void finish() throws IOException { flush(); writer.close(); } }; }
Example #23
Source File: BulkBucketWriter.java From flink with Apache License 2.0 | 5 votes |
@Override public InProgressFileWriter<IN, BucketID> resumeFrom( final BucketID bucketId, final RecoverableFsDataOutputStream stream, final RecoverableWriter.ResumeRecoverable resumable, final long creationTime) throws IOException { Preconditions.checkNotNull(stream); Preconditions.checkNotNull(resumable); final BulkWriter<IN> writer = writerFactory.create(stream); return new BulkPartWriter<>(bucketId, stream, writer, creationTime); }
Example #24
Source File: BulkBucketWriter.java From flink with Apache License 2.0 | 5 votes |
@Override public InProgressFileWriter<IN, BucketID> openNew( final BucketID bucketId, final RecoverableFsDataOutputStream stream, final Path path, final long creationTime) throws IOException { Preconditions.checkNotNull(stream); Preconditions.checkNotNull(path); final BulkWriter<IN> writer = writerFactory.create(stream); return new BulkPartWriter<>(bucketId, stream, writer, creationTime); }
Example #25
Source File: ParquetFileSystemFormatFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(WriterContext context) { return Optional.of(ParquetRowDataBuilder.createWriterFactory( RowType.of(Arrays.stream(context.getFormatFieldTypes()) .map(DataType::getLogicalType) .toArray(LogicalType[]::new), context.getFormatFieldNames()), getParquetConfiguration(context.getFormatOptions()), context.getFormatOptions().get(UTC_TIMEZONE))); }
Example #26
Source File: BulkPartWriter.java From flink with Apache License 2.0 | 5 votes |
@Override public PartFileWriter<IN, BucketID> resumeFrom( final BucketID bucketId, final RecoverableFsDataOutputStream stream, final RecoverableWriter.ResumeRecoverable resumable, final long creationTime) throws IOException { Preconditions.checkNotNull(stream); Preconditions.checkNotNull(resumable); final BulkWriter<IN> writer = writerFactory.create(stream); return new BulkPartWriter<>(bucketId, stream, writer, creationTime); }
Example #27
Source File: BulkPartWriter.java From flink with Apache License 2.0 | 5 votes |
private BulkPartWriter( final BucketID bucketId, final RecoverableFsDataOutputStream currentPartStream, final BulkWriter<IN> writer, final long creationTime) { super(bucketId, currentPartStream, creationTime); this.writer = Preconditions.checkNotNull(writer); }
Example #28
Source File: BulkPartWriter.java From flink with Apache License 2.0 | 5 votes |
BulkPartWriter( final BucketID bucketId, final RecoverableFsDataOutputStream currentPartStream, final BulkWriter<IN> writer, final long creationTime) { super(bucketId, currentPartStream, creationTime); this.writer = Preconditions.checkNotNull(writer); }
Example #29
Source File: BulkPartWriter.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public PartFileWriter<IN, BucketID> openNew( final BucketID bucketId, final RecoverableFsDataOutputStream stream, final Path path, final long creationTime) throws IOException { Preconditions.checkNotNull(stream); Preconditions.checkNotNull(path); final BulkWriter<IN> writer = writerFactory.create(stream); return new BulkPartWriter<>(bucketId, stream, writer, creationTime); }
Example #30
Source File: BulkPartWriter.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public PartFileWriter<IN, BucketID> resumeFrom( final BucketID bucketId, final RecoverableFsDataOutputStream stream, final RecoverableWriter.ResumeRecoverable resumable, final long creationTime) throws IOException { Preconditions.checkNotNull(stream); Preconditions.checkNotNull(resumable); final BulkWriter<IN> writer = writerFactory.create(stream); return new BulkPartWriter<>(bucketId, stream, writer, creationTime); }