org.apache.flink.api.common.serialization.BulkWriter#Factory

Source File: TestUtils.java From Flink-CEPplus with Apache License 2.0

6 votes

static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder(
		final File outDir,
		final int totalParallelism,
		final int taskIdx,
		final long bucketCheckInterval,
		final BucketAssigner<Tuple2<String, Integer>, String> bucketer,
		final BulkWriter.Factory<Tuple2<String, Integer>> writer,
		final BucketFactory<Tuple2<String, Integer>, String> bucketFactory) throws Exception {

	StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink
			.forBulkFormat(new Path(outDir.toURI()), writer)
			.withBucketAssigner(bucketer)
			.withBucketCheckInterval(bucketCheckInterval)
			.withBucketFactory(bucketFactory)
			.build();

	return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx);
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithBulkEncoder(
		final File outDir,
		final int totalParallelism,
		final int taskIdx,
		final long bucketCheckInterval,
		final BucketAssigner<Tuple2<String, Integer>, String> bucketer,
		final BulkWriter.Factory<Tuple2<String, Integer>> writer,
		final BucketFactory<Tuple2<String, Integer>, String> bucketFactory,
		final OutputFileConfig outputFileConfig) throws Exception {

	StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink
		.forBulkFormat(new Path(outDir.toURI()), writer)
		.withBucketAssigner(bucketer)
		.withBucketCheckInterval(bucketCheckInterval)
		.withRollingPolicy(build())
		.withBucketFactory(bucketFactory)
		.withOutputFileConfig(outputFileConfig)
		.build();

	return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx);
}

Source File: StreamingFileSink.java From flink with Apache License 2.0

6 votes

private BulkFormatBuilder(
		Path basePath,
		BulkWriter.Factory<IN> writerFactory,
		BucketAssigner<IN, BucketID> assigner,
		long bucketCheckInterval,
		BucketFactory<IN, BucketID> bucketFactory,
		String partFilePrefix,
		String partFileSuffix) {
	this.basePath = Preconditions.checkNotNull(basePath);
	this.writerFactory = writerFactory;
	this.bucketAssigner = Preconditions.checkNotNull(assigner);
	this.bucketCheckInterval = bucketCheckInterval;
	this.bucketFactory = Preconditions.checkNotNull(bucketFactory);
	this.partFilePrefix = Preconditions.checkNotNull(partFilePrefix);
	this.partFileSuffix = Preconditions.checkNotNull(partFileSuffix);
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

static <ID> OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithCustomizedBulkEncoder(
		final File outDir,
		final int totalParallelism,
		final int taskIdx,
		final long bucketCheckInterval,
		final BucketAssigner<Tuple2<String, Integer>, ID> bucketer,
		final BulkWriter.Factory<Tuple2<String, Integer>> writer,
		final BucketFactory<Tuple2<String, Integer>, ID> bucketFactory,
		final OutputFileConfig outputFileConfig) throws Exception {

	StreamingFileSink<Tuple2<String, Integer>> sink = StreamingFileSink
			.forBulkFormat(new Path(outDir.toURI()), writer)
			.withNewBucketAssigner(bucketer)
			.withRollingPolicy(build())
			.withBucketCheckInterval(bucketCheckInterval)
			.withBucketFactory(bucketFactory)
			.withOutputFileConfig(outputFileConfig)
			.build();

	return new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), MAX_PARALLELISM, totalParallelism, taskIdx);
}

Source File: HiveTableSink.java From flink with Apache License 2.0

6 votes

private Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(String[] partitionColumns,
		StorageDescriptor sd) {
	String serLib = sd.getSerdeInfo().getSerializationLib().toLowerCase();
	int formatFieldCount = tableSchema.getFieldCount() - partitionColumns.length;
	String[] formatNames = new String[formatFieldCount];
	LogicalType[] formatTypes = new LogicalType[formatFieldCount];
	for (int i = 0; i < formatFieldCount; i++) {
		formatNames[i] = tableSchema.getFieldName(i).get();
		formatTypes[i] = tableSchema.getFieldDataType(i).get().getLogicalType();
	}
	RowType formatType = RowType.of(formatTypes, formatNames);
	Configuration formatConf = new Configuration(jobConf);
	sd.getSerdeInfo().getParameters().forEach(formatConf::set);
	if (serLib.contains("parquet")) {
		return Optional.of(ParquetRowDataBuilder.createWriterFactory(
				formatType, formatConf, hiveVersion.startsWith("3.")));
	} else if (serLib.contains("orc")) {
		TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatType);
		return Optional.of(hiveShim.createOrcBulkWriterFactory(
				formatConf, typeDescription.toString(), formatTypes));
	} else {
		return Optional.empty();
	}
}

Source File: TestUtils.java From flink with Apache License 2.0

6 votes

static <ID> OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createTestSinkWithCustomizedBulkEncoder(
		final File outDir,
		final int totalParallelism,
		final int taskIdx,
		final long bucketCheckInterval,
		final BucketAssigner<Tuple2<String, Integer>, ID> bucketer,
		final BulkWriter.Factory<Tuple2<String, Integer>> writer,
		final BucketFactory<Tuple2<String, Integer>, ID> bucketFactory) throws Exception {

	return createTestSinkWithCustomizedBulkEncoder(
			outDir,
			totalParallelism,
			taskIdx,
			bucketCheckInterval,
			bucketer,
			writer,
			bucketFactory,
			OutputFileConfig.builder().build());
}

Source File: StreamingFileSink.java From flink with Apache License 2.0

6 votes

protected BulkFormatBuilder(
		Path basePath,
		BulkWriter.Factory<IN> writerFactory,
		BucketAssigner<IN, BucketID> assigner,
		CheckpointRollingPolicy<IN, BucketID> policy,
		long bucketCheckInterval,
		BucketFactory<IN, BucketID> bucketFactory,
		OutputFileConfig outputFileConfig) {
	this.basePath = Preconditions.checkNotNull(basePath);
	this.writerFactory = writerFactory;
	this.bucketAssigner = Preconditions.checkNotNull(assigner);
	this.rollingPolicy = Preconditions.checkNotNull(policy);
	this.bucketCheckInterval = bucketCheckInterval;
	this.bucketFactory = Preconditions.checkNotNull(bucketFactory);
	this.outputFileConfig = Preconditions.checkNotNull(outputFileConfig);
}

Source File: TestCsvFileSystemFormatFactory.java From flink with Apache License 2.0

5 votes

@Override
public Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(WriterContext context) {
	if (!useBulkWriter(context)) {
		return Optional.empty();
	}

	DataType[] types = context.getFormatFieldTypes();
	return Optional.of(out -> new CsvBulkWriter(types, out));
}

Source File: StreamingFileSink.java From Flink-CEPplus with Apache License 2.0

5 votes

private BulkFormatBuilder(
		Path basePath,
		BulkWriter.Factory<IN> writerFactory,
		BucketAssigner<IN, BucketID> assigner,
		long bucketCheckInterval,
		BucketFactory<IN, BucketID> bucketFactory) {
	this.basePath = Preconditions.checkNotNull(basePath);
	this.writerFactory = writerFactory;
	this.bucketAssigner = Preconditions.checkNotNull(assigner);
	this.bucketCheckInterval = bucketCheckInterval;
	this.bucketFactory = Preconditions.checkNotNull(bucketFactory);
}

Source File: ParquetFileSystemFormatFactory.java From flink with Apache License 2.0

5 votes

@Override
public Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(WriterContext context) {
	return Optional.of(ParquetRowDataBuilder.createWriterFactory(
			RowType.of(Arrays.stream(context.getFormatFieldTypes())
							.map(DataType::getLogicalType)
							.toArray(LogicalType[]::new),
					context.getFormatFieldNames()),
			getParquetConfiguration(context.getFormatOptions()),
			context.getFormatOptions().get(UTC_TIMEZONE)));
}

Source File: FileSystemTableSink.java From flink with Apache License 2.0

5 votes

private Object createWriter() {
	FileSystemFormatFactory formatFactory = createFormatFactory(properties);
	Configuration conf = new Configuration();
	properties.forEach(conf::setString);

	FileSystemFormatFactory.WriterContext context = new FileSystemFormatFactory.WriterContext() {

		@Override
		public TableSchema getSchema() {
			return schema;
		}

		@Override
		public ReadableConfig getFormatOptions() {
			return new DelegatingConfiguration(conf, formatFactory.factoryIdentifier() + ".");
		}

		@Override
		public List<String> getPartitionKeys() {
			return partitionKeys;
		}
	};

	Optional<Encoder<RowData>> encoder = formatFactory.createEncoder(context);
	Optional<BulkWriter.Factory<RowData>> bulk = formatFactory.createBulkWriterFactory(context);

	if (encoder.isPresent()) {
		return encoder.get();
	} else if (bulk.isPresent()) {
		return bulk.get();
	} else {
		throw new TableException(
				formatFactory + " format should implement at least one Encoder or BulkWriter");
	}
}

Source File: StreamingFileSink.java From Flink-CEPplus with Apache License 2.0

4 votes

BulkFormatBuilder(Path basePath, BulkWriter.Factory<IN> writerFactory, BucketAssigner<IN, BucketID> assigner) {
	this(basePath, writerFactory, assigner, 60L * 1000L, new DefaultBucketFactoryImpl<>());
}

Source File: AvroFileSystemFormatFactory.java From flink with Apache License 2.0

4 votes

@Override
public Optional<BulkWriter.Factory<RowData>> createBulkWriterFactory(WriterContext context) {
	return Optional.of(new RowDataAvroWriterFactory(
			context.getFormatRowType(),
			context.getFormatOptions().get(AVRO_OUTPUT_CODEC)));
}

Source File: HiveShimV100.java From flink with Apache License 2.0

4 votes

@Override
public BulkWriter.Factory<RowData> createOrcBulkWriterFactory(
		Configuration conf, String schema, LogicalType[] fieldTypes) {
	return new OrcNoHiveBulkWriterFactory(conf, schema, fieldTypes);
}

Source File: BulkBucketWriter.java From flink with Apache License 2.0

4 votes

BulkBucketWriter(final RecoverableWriter recoverableWriter, BulkWriter.Factory<IN> writerFactory) throws IOException {
	super(recoverableWriter);
	this.writerFactory = writerFactory;
}

Source File: StreamingFileSink.java From flink with Apache License 2.0

4 votes

protected BulkFormatBuilder(Path basePath, BulkWriter.Factory<IN> writerFactory, BucketAssigner<IN, BucketID> assigner) {
	this(basePath, writerFactory, assigner, OnCheckpointRollingPolicy.build(), DEFAULT_BUCKET_CHECK_INTERVAL,
		new DefaultBucketFactoryImpl<>(), OutputFileConfig.builder().build());
}

Source File: BulkPartWriter.java From Flink-CEPplus with Apache License 2.0

4 votes

Factory(BulkWriter.Factory<IN> writerFactory) {
	this.writerFactory = writerFactory;
}

Source File: StreamingFileSink.java From flink with Apache License 2.0

4 votes

private DefaultBulkFormatBuilder(Path basePath, BulkWriter.Factory<IN> writerFactory, BucketAssigner<IN, String> assigner) {
	super(basePath, writerFactory, assigner);
}

Source File: StreamingFileSink.java From flink with Apache License 2.0

2 votes

/**
 * Creates the builder for a {@link StreamingFileSink} with row-encoding format.
 * @param basePath the base path where all the buckets are going to be created as sub-directories.
 * @param writerFactory the {@link BulkWriter.Factory} to be used when writing elements in the buckets.
 * @param <IN> the type of incoming elements
 * @return The builder where the remaining of the configuration parameters for the sink can be configured.
 * In order to instantiate the sink, call {@link RowFormatBuilder#build()} after specifying the desired parameters.
 */
public static <IN> StreamingFileSink.DefaultBulkFormatBuilder<IN> forBulkFormat(
		final Path basePath, final BulkWriter.Factory<IN> writerFactory) {
	return new StreamingFileSink.DefaultBulkFormatBuilder<>(basePath, writerFactory, new DateTimeBucketAssigner<>());
}

Source File: StreamingFileSink.java From flink with Apache License 2.0

2 votes

/**
 * Creates the builder for a {@link StreamingFileSink} with row-encoding format.
 * @param basePath the base path where all the buckets are going to be created as sub-directories.
 * @param writerFactory the {@link BulkWriter.Factory} to be used when writing elements in the buckets.
 * @param <IN> the type of incoming elements
 * @return The builder where the remaining of the configuration parameters for the sink can be configured.
 * In order to instantiate the sink, call {@link RowFormatBuilder#build()} after specifying the desired parameters.
 */
public static <IN> StreamingFileSink.BulkFormatBuilder<IN, String> forBulkFormat(
		final Path basePath, final BulkWriter.Factory<IN> writerFactory) {
	return new StreamingFileSink.BulkFormatBuilder<>(basePath, writerFactory, new DateTimeBucketAssigner<>());
}

Java Code Examples for org.apache.flink.api.common.serialization.BulkWriter#Factory