org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer Java Examples
The following examples show how to use
org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: KafkaExample.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #2
Source File: KafkaExample.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #3
Source File: KafkaExample.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
Example #4
Source File: KafkaSinkProvider.java From stateful-functions with Apache License 2.0 | 6 votes |
@Override
public <T> SinkFunction<T> forSpec(EgressSpec<T> egressSpec) {
KafkaEgressSpec<T> spec = asSpec(egressSpec);
Properties properties = new Properties();
properties.putAll(spec.properties());
properties.put("bootstrap.servers", spec.kafkaAddress());
Semantic producerSemantic = semanticFromSpec(spec);
if (producerSemantic == Semantic.EXACTLY_ONCE) {
properties.put("transaction.timeout.ms", spec.transactionTimeoutDuration().toMillis());
}
return new FlinkKafkaProducer<>(
randomKafkaTopic(),
serializerFromSpec(spec),
properties,
producerSemantic,
spec.kafkaProducerPoolSize());
}
Example #5
Source File: AvroDataGeneratorJob.java From flink-tutorials with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { ParameterTool params = Utils.parseArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); KafkaSerializationSchema<Message> schema = ClouderaRegistryKafkaSerializationSchema.<Message> builder(params.getRequired(K_KAFKA_TOPIC)) .setConfig(Utils.readSchemaRegistryProperties(params)) .setKey(Message::getId) .build(); FlinkKafkaProducer<Message> kafkaSink = new FlinkKafkaProducer<>( "default", schema, Utils.readKafkaProperties(params), FlinkKafkaProducer.Semantic.AT_LEAST_ONCE); DataStream<Message> input = env.addSource(new DataGeneratorSource()).name("Data Generator Source"); input.addSink(kafkaSink) .name("Kafka Sink") .uid("Kafka Sink"); input.print(); env.execute("Data Generator Job"); }
Example #6
Source File: KafkaSinkProvider.java From flink-statefun with Apache License 2.0 | 6 votes |
@Override
public <T> SinkFunction<T> forSpec(EgressSpec<T> egressSpec) {
KafkaEgressSpec<T> spec = asSpec(egressSpec);
Properties properties = new Properties();
properties.putAll(spec.properties());
properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, spec.kafkaAddress());
Semantic producerSemantic = semanticFromSpec(spec);
if (producerSemantic == Semantic.EXACTLY_ONCE) {
properties.setProperty(
ProducerConfig.TRANSACTION_TIMEOUT_CONFIG,
String.valueOf(spec.transactionTimeoutDuration().toMillis()));
}
return new FlinkKafkaProducer<>(
randomKafkaTopic(),
serializerFromSpec(spec),
properties,
producerSemantic,
spec.kafkaProducerPoolSize());
}
Example #7
Source File: ClickEventCount.java From flink with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); configureEnvironment(params, env); String inputTopic = params.get("input-topic", "input"); String outputTopic = params.get("output-topic", "output"); String brokers = params.get("bootstrap.servers", "localhost:9092"); Properties kafkaProps = new Properties(); kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers); kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count"); env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps)) .name("ClickEvent Source") .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) { @Override public long extractTimestamp(final ClickEvent element) { return element.getTimestamp().getTime(); } }) .keyBy(ClickEvent::getPage) .timeWindow(WINDOW_SIZE) .aggregate(new CountingAggregator(), new ClickEventStatisticsCollector()) .name("ClickEvent Counter") .addSink(new FlinkKafkaProducer<>( outputTopic, new ClickEventStatisticsSerializationSchema(outputTopic), kafkaProps, FlinkKafkaProducer.Semantic.AT_LEAST_ONCE)) .name("ClickEventStatistics Sink"); env.execute("Click Event Count"); }
Example #8
Source File: KafkaItemTransactionJob.java From flink-tutorials with Apache License 2.0 | 5 votes |
public void writeQueryOutput(ParameterTool params, DataStream<QueryResult> queryResultStream) { // Query output is written back to kafka in a tab delimited format for readability FlinkKafkaProducer<QueryResult> queryOutputSink = new FlinkKafkaProducer<>( params.getRequired(QUERY_OUTPUT_TOPIC_KEY), new QueryResultSchema(), Utils.readKafkaProperties(params, false), Optional.of(new HashingKafkaPartitioner<>())); queryResultStream .addSink(queryOutputSink) .name("Kafka Query Result Sink") .uid("Kafka Query Result Sink"); }
Example #9
Source File: KafkaDataGeneratorJob.java From flink-tutorials with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { if (args.length != 1) { throw new RuntimeException("Path to the properties file is expected as the only argument."); } ParameterTool params = ParameterTool.fromPropertiesFile(args[0]); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<ItemTransaction> generatedInput = env.addSource(new ItemTransactionGeneratorSource(params)) .name("Item Transaction Generator"); FlinkKafkaProducer<ItemTransaction> kafkaSink = new FlinkKafkaProducer<>( params.getRequired(KafkaItemTransactionJob.TRANSACTION_INPUT_TOPIC_KEY), new TransactionSchema(), Utils.readKafkaProperties(params, false), Optional.empty()); generatedInput.keyBy("itemId").addSink(kafkaSink).name("Transaction Kafka Sink"); if (params.getBoolean(GENERATE_QUERIES, false)) { DataStream<Query> queries = env.addSource(new QueryGeneratorSource(params)) .name("Query Generator"); FlinkKafkaProducer<Query> querySink = new FlinkKafkaProducer<>( params.getRequired(KafkaItemTransactionJob.QUERY_INPUT_TOPIC_KEY), new QuerySchema(), Utils.readKafkaProperties(params, false), Optional.empty()); queries.keyBy("itemId").addSink(querySink).name("Query Kafka Sink"); } env.execute("Kafka Data generator"); }
Example #10
Source File: GenericKafkaSinkProviderTest.java From flink-statefun with Apache License 2.0 | 5 votes |
@Test
public void exampleUsage() {
JsonNode egressDefinition =
loadAsJsonFromClassResource(getClass().getClassLoader(), "generic-kafka-egress.yaml");
JsonEgressSpec<?> spec =
new JsonEgressSpec<>(
KafkaEgressTypes.GENERIC_KAFKA_EGRESS_TYPE,
new EgressIdentifier<>("foo", "bar", Any.class),
egressDefinition);
GenericKafkaSinkProvider provider = new GenericKafkaSinkProvider();
SinkFunction<?> sink = provider.forSpec(spec);
assertThat(sink, instanceOf(FlinkKafkaProducer.class));
}
Example #11
Source File: KafkaStrSink.java From blog_demos with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //并行度为1 env.setParallelism(1); Properties properties = new Properties(); properties.setProperty("bootstrap.servers", "192.168.50.43:9092"); String topic = "test006"; FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<>(topic, new ProducerStringSerializationSchema(topic), properties, FlinkKafkaProducer.Semantic.EXACTLY_ONCE); //创建一个List,里面有两个Tuple2元素 List<String> list = new ArrayList<>(); list.add("aaa"); list.add("bbb"); list.add("ccc"); list.add("ddd"); list.add("eee"); list.add("fff"); list.add("aaa"); //统计每个单词的数量 env.fromCollection(list) .addSink(producer) .setParallelism(4); env.execute("sink demo : kafka str"); }
Example #12
Source File: KafkaObjSink.java From blog_demos with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); //并行度为1 env.setParallelism(1); Properties properties = new Properties(); //kafka的broker地址 properties.setProperty("bootstrap.servers", "192.168.50.43:9092"); String topic = "test006"; FlinkKafkaProducer<Tuple2<String, Integer>> producer = new FlinkKafkaProducer<>(topic, new ObjSerializationSchema(topic), properties, FlinkKafkaProducer.Semantic.EXACTLY_ONCE); //创建一个List,里面有两个Tuple2元素 List<Tuple2<String, Integer>> list = new ArrayList<>(); list.add(new Tuple2("aaa", 1)); list.add(new Tuple2("bbb", 1)); list.add(new Tuple2("ccc", 1)); list.add(new Tuple2("ddd", 1)); list.add(new Tuple2("eee", 1)); list.add(new Tuple2("fff", 1)); list.add(new Tuple2("aaa", 1)); //统计每个单词的数量 env.fromCollection(list) .keyBy(0) .sum(1) .addSink(producer) .setParallelism(4); env.execute("sink demo : kafka obj"); }
Example #13
Source File: KafkaDynamicSink.java From flink with Apache License 2.0 | 5 votes |
@Override
protected SinkFunction<RowData> createKafkaProducer(
String topic,
Properties properties,
SerializationSchema<RowData> serializationSchema,
Optional<FlinkKafkaPartitioner<RowData>> partitioner) {
return new FlinkKafkaProducer<>(
topic,
serializationSchema,
properties,
partitioner);
}
Example #14
Source File: KafkaEventsGeneratorJob.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
double errorRate = params.getDouble("error-rate", 0.0);
int sleep = params.getInt("sleep", 1);
String kafkaTopic = params.get("kafka-topic");
String brokers = params.get("brokers", "localhost:9092");
System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
System.out.println();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env
.addSource(new EventsGeneratorSource(errorRate, sleep))
.addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer()));
// trigger program execution
env.execute("State machine example Kafka events generator job");
}
Example #15
Source File: KafkaDynamicTableFactoryTest.java From flink with Apache License 2.0 | 4 votes |
@Override
protected Class<?> getExpectedProducerClass() {
return FlinkKafkaProducer.class;
}
Example #16
Source File: KafkaShuffleTestBase.java From flink with Apache License 2.0 | 4 votes |
@BeforeClass
public static void prepare() throws Exception {
KafkaProducerTestBase.prepare();
((KafkaTestEnvironmentImpl) kafkaServer).setProducerSemantic(FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
}
Example #17
Source File: KafkaSinkBuilder.java From Alink with Apache License 2.0 | 4 votes |
@Override
public RichSinkFunction<Row> build() {
SerializationSchema<Row> serializationSchema = getSerializationSchema();
return new FlinkKafkaProducer<Row>(topic, serializationSchema, properties);
}
Example #18
Source File: KafkaEventsGeneratorJob.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception {
final ParameterTool params = ParameterTool.fromArgs(args);
double errorRate = params.getDouble("error-rate", 0.0);
int sleep = params.getInt("sleep", 1);
String kafkaTopic = params.get("kafka-topic");
String brokers = params.get("brokers", "localhost:9092");
System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
System.out.println();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env
.addSource(new EventsGeneratorSource(errorRate, sleep))
.addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer()));
// trigger program execution
env.execute("State machine example Kafka events generator job");
}
Example #19
Source File: ClickEventCount.java From flink-playgrounds with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); configureEnvironment(params, env); boolean inflictBackpressure = params.has(BACKPRESSURE_OPTION); String inputTopic = params.get("input-topic", "input"); String outputTopic = params.get("output-topic", "output"); String brokers = params.get("bootstrap.servers", "localhost:9092"); Properties kafkaProps = new Properties(); kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers); kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count"); DataStream<ClickEvent> clicks = env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps)) .name("ClickEvent Source") .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) { @Override public long extractTimestamp(final ClickEvent element) { return element.getTimestamp().getTime(); } }); if (inflictBackpressure) { // Force a network shuffle so that the backpressure will affect the buffer pools clicks = clicks .keyBy(ClickEvent::getPage) .map(new BackpressureMap()) .name("Backpressure"); } DataStream<ClickEventStatistics> statistics = clicks .keyBy(ClickEvent::getPage) .timeWindow(WINDOW_SIZE) .aggregate(new CountingAggregator(), new ClickEventStatisticsCollector()) .name("ClickEvent Counter"); statistics .addSink(new FlinkKafkaProducer<>( outputTopic, new ClickEventStatisticsSerializationSchema(outputTopic), kafkaProps, FlinkKafkaProducer.Semantic.AT_LEAST_ONCE)) .name("ClickEventStatistics Sink"); env.execute("Click Event Count"); }
Example #20
Source File: FlinkKafkaShuffle.java From flink with Apache License 2.0 | 3 votes |
/** * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}. * * <p>This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in Kafka. * {@link FlinkKafkaShuffleProducer} uses the same key group assignment function * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes. * Hence, each producer task can potentially write to each Kafka partition based on the key. * Here, the number of partitions equals to the key group size. * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts each watermark * to all of the Kafka partitions to make sure watermark information is propagated properly. * * <p>Attention: make sure kafkaProperties include * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly. * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer. * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions. * They are not necessarily the same and allowed to be set independently. * * @see FlinkKafkaShuffle#persistentKeyBy * @see FlinkKafkaShuffle#readKeyBy * * @param dataStream Data stream to be shuffled * @param topic Kafka topic written to * @param kafkaProperties Kafka properties for Kafka Producer * @param keySelector Key selector to retrieve key from `dataStream' * @param <T> Type of the input data stream * @param <K> Type of key */ public static <T, K> void writeKeyBy( DataStream<T> dataStream, String topic, Properties kafkaProperties, KeySelector<T, K> keySelector) { StreamExecutionEnvironment env = dataStream.getExecutionEnvironment(); TypeSerializer<T> typeSerializer = dataStream.getType().createSerializer(env.getConfig()); // write data to Kafka FlinkKafkaShuffleProducer<T, K> kafkaProducer = new FlinkKafkaShuffleProducer<>( topic, typeSerializer, kafkaProperties, env.clean(keySelector), FlinkKafkaProducer.Semantic.EXACTLY_ONCE, FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE); // make sure the sink parallelism is set to producerParallelism Preconditions.checkArgument( kafkaProperties.getProperty(PRODUCER_PARALLELISM) != null, "Missing producer parallelism for Kafka Shuffle"); int producerParallelism = PropertiesUtil.getInt(kafkaProperties, PRODUCER_PARALLELISM, Integer.MIN_VALUE); addKafkaShuffle(dataStream, kafkaProducer, producerParallelism); }
Example #21
Source File: RandomKafkaDataGeneratorJob.java From flink-tutorials with Apache License 2.0 | 3 votes |
public static void main(String[] args) throws Exception { ParameterTool params = Utils.parseArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); FlinkKafkaProducer<String> kafkaSink = new FlinkKafkaProducer<String>(params.getRequired(K_KAFKA_TOPIC), new SimpleStringSchema(), Utils.readKafkaProperties(params)); DataStream<String> input = env.addSource(new UUIDGeneratorSource()).name("Data Generator Source"); input.addSink(kafkaSink).name("Kafka Sink").uid("Kafka Sink"); input.print(); env.execute("Data Generator Job"); }