org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer Java Exaples

Source File: KafkaExample.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Source File: KafkaExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Source File: KafkaExample.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
		.addSource(
			new FlinkKafkaConsumer<>(
				parameterTool.getRequired("input-topic"),
				new KafkaEventSchema(),
				parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
		.keyBy("word")
		.map(new RollingAdditionMapper());

	input.addSink(
		new FlinkKafkaProducer<>(
			parameterTool.getRequired("output-topic"),
			new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()),
			parameterTool.getProperties(),
			FlinkKafkaProducer.Semantic.EXACTLY_ONCE));

	env.execute("Modern Kafka Example");
}

Source File: KafkaSinkProvider.java From stateful-functions with Apache License 2.0

6 votes

@Override
public <T> SinkFunction<T> forSpec(EgressSpec<T> egressSpec) {
  KafkaEgressSpec<T> spec = asSpec(egressSpec);

  Properties properties = new Properties();
  properties.putAll(spec.properties());
  properties.put("bootstrap.servers", spec.kafkaAddress());

  Semantic producerSemantic = semanticFromSpec(spec);
  if (producerSemantic == Semantic.EXACTLY_ONCE) {
    properties.put("transaction.timeout.ms", spec.transactionTimeoutDuration().toMillis());
  }

  return new FlinkKafkaProducer<>(
      randomKafkaTopic(),
      serializerFromSpec(spec),
      properties,
      producerSemantic,
      spec.kafkaProducerPoolSize());
}

Source File: AvroDataGeneratorJob.java From flink-tutorials with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool params = Utils.parseArgs(args);
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	KafkaSerializationSchema<Message> schema = ClouderaRegistryKafkaSerializationSchema.<Message>
			builder(params.getRequired(K_KAFKA_TOPIC))
			.setConfig(Utils.readSchemaRegistryProperties(params))
			.setKey(Message::getId)
			.build();

	FlinkKafkaProducer<Message> kafkaSink = new FlinkKafkaProducer<>(
			"default", schema, Utils.readKafkaProperties(params), FlinkKafkaProducer.Semantic.AT_LEAST_ONCE);

	DataStream<Message> input = env.addSource(new DataGeneratorSource()).name("Data Generator Source");

	input.addSink(kafkaSink)
			.name("Kafka Sink")
			.uid("Kafka Sink");

	input.print();

	env.execute("Data Generator Job");
}

Source File: KafkaSinkProvider.java From flink-statefun with Apache License 2.0

6 votes

@Override
public <T> SinkFunction<T> forSpec(EgressSpec<T> egressSpec) {
  KafkaEgressSpec<T> spec = asSpec(egressSpec);

  Properties properties = new Properties();
  properties.putAll(spec.properties());
  properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, spec.kafkaAddress());

  Semantic producerSemantic = semanticFromSpec(spec);
  if (producerSemantic == Semantic.EXACTLY_ONCE) {
    properties.setProperty(
        ProducerConfig.TRANSACTION_TIMEOUT_CONFIG,
        String.valueOf(spec.transactionTimeoutDuration().toMillis()));
  }

  return new FlinkKafkaProducer<>(
      randomKafkaTopic(),
      serializerFromSpec(spec),
      properties,
      producerSemantic,
      spec.kafkaProducerPoolSize());
}

Source File: ClickEventCount.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		})
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter")
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}

Source File: KafkaItemTransactionJob.java From flink-tutorials with Apache License 2.0

5 votes

public void writeQueryOutput(ParameterTool params, DataStream<QueryResult> queryResultStream) {
	// Query output is written back to kafka in a tab delimited format for readability
	FlinkKafkaProducer<QueryResult> queryOutputSink = new FlinkKafkaProducer<>(
			params.getRequired(QUERY_OUTPUT_TOPIC_KEY), new QueryResultSchema(),
			Utils.readKafkaProperties(params, false),
			Optional.of(new HashingKafkaPartitioner<>()));

	queryResultStream
			.addSink(queryOutputSink)
			.name("Kafka Query Result Sink")
			.uid("Kafka Query Result Sink");
}

Source File: KafkaDataGeneratorJob.java From flink-tutorials with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	if (args.length != 1) {
		throw new RuntimeException("Path to the properties file is expected as the only argument.");
	}
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<ItemTransaction> generatedInput =
			env.addSource(new ItemTransactionGeneratorSource(params))
					.name("Item Transaction Generator");

	FlinkKafkaProducer<ItemTransaction> kafkaSink = new FlinkKafkaProducer<>(
			params.getRequired(KafkaItemTransactionJob.TRANSACTION_INPUT_TOPIC_KEY),
			new TransactionSchema(),
			Utils.readKafkaProperties(params, false),
			Optional.empty());

	generatedInput.keyBy("itemId").addSink(kafkaSink).name("Transaction Kafka Sink");

	if (params.getBoolean(GENERATE_QUERIES, false)) {
		DataStream<Query> queries = env.addSource(new QueryGeneratorSource(params))
				.name("Query Generator");

		FlinkKafkaProducer<Query> querySink = new FlinkKafkaProducer<>(
				params.getRequired(KafkaItemTransactionJob.QUERY_INPUT_TOPIC_KEY),
				new QuerySchema(),
				Utils.readKafkaProperties(params, false),
				Optional.empty());

		queries.keyBy("itemId").addSink(querySink).name("Query Kafka Sink");
	}

	env.execute("Kafka Data generator");
}

Source File: GenericKafkaSinkProviderTest.java From flink-statefun with Apache License 2.0

5 votes

@Test
public void exampleUsage() {
  JsonNode egressDefinition =
      loadAsJsonFromClassResource(getClass().getClassLoader(), "generic-kafka-egress.yaml");
  JsonEgressSpec<?> spec =
      new JsonEgressSpec<>(
          KafkaEgressTypes.GENERIC_KAFKA_EGRESS_TYPE,
          new EgressIdentifier<>("foo", "bar", Any.class),
          egressDefinition);

  GenericKafkaSinkProvider provider = new GenericKafkaSinkProvider();
  SinkFunction<?> sink = provider.forSpec(spec);

  assertThat(sink, instanceOf(FlinkKafkaProducer.class));
}

Source File: KafkaStrSink.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    Properties properties = new Properties();
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");

    String topic = "test006";
    FlinkKafkaProducer<String> producer = new FlinkKafkaProducer<>(topic,
            new ProducerStringSerializationSchema(topic),
            properties,
            FlinkKafkaProducer.Semantic.EXACTLY_ONCE);


    //创建一个List，里面有两个Tuple2元素
    List<String> list = new ArrayList<>();
    list.add("aaa");
    list.add("bbb");
    list.add("ccc");
    list.add("ddd");
    list.add("eee");
    list.add("fff");
    list.add("aaa");

    //统计每个单词的数量
    env.fromCollection(list)
       .addSink(producer)
       .setParallelism(4);

    env.execute("sink demo : kafka str");
}

Source File: KafkaObjSink.java From blog_demos with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    //并行度为1
    env.setParallelism(1);

    Properties properties = new Properties();
    //kafka的broker地址
    properties.setProperty("bootstrap.servers", "192.168.50.43:9092");

    String topic = "test006";
    FlinkKafkaProducer<Tuple2<String, Integer>> producer = new FlinkKafkaProducer<>(topic,
            new ObjSerializationSchema(topic),
            properties,
            FlinkKafkaProducer.Semantic.EXACTLY_ONCE);

    //创建一个List，里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 1));
    list.add(new Tuple2("ccc", 1));
    list.add(new Tuple2("ddd", 1));
    list.add(new Tuple2("eee", 1));
    list.add(new Tuple2("fff", 1));
    list.add(new Tuple2("aaa", 1));


    //统计每个单词的数量
    env.fromCollection(list)
        .keyBy(0)
        .sum(1)
        .addSink(producer)
        .setParallelism(4);


    env.execute("sink demo : kafka obj");
}

Source File: KafkaDynamicSink.java From flink with Apache License 2.0

5 votes

@Override
protected SinkFunction<RowData> createKafkaProducer(
		String topic,
		Properties properties,
		SerializationSchema<RowData> serializationSchema,
		Optional<FlinkKafkaPartitioner<RowData>> partitioner) {
	return new FlinkKafkaProducer<>(
			topic,
			serializationSchema,
			properties,
			partitioner);
}

Source File: KafkaEventsGeneratorJob.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		double errorRate = params.getDouble("error-rate", 0.0);
		int sleep = params.getInt("sleep", 1);

		String kafkaTopic = params.get("kafka-topic");
		String brokers = params.get("brokers", "localhost:9092");

		System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
		System.out.println();

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env
			.addSource(new EventsGeneratorSource(errorRate, sleep))
			.addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer()));

		// trigger program execution
		env.execute("State machine example Kafka events generator job");
	}

Source File: KafkaDynamicTableFactoryTest.java From flink with Apache License 2.0

4 votes

@Override
protected Class<?> getExpectedProducerClass() {
	return FlinkKafkaProducer.class;
}

Source File: KafkaShuffleTestBase.java From flink with Apache License 2.0

4 votes

@BeforeClass
public static void prepare() throws Exception {
	KafkaProducerTestBase.prepare();
	((KafkaTestEnvironmentImpl) kafkaServer).setProducerSemantic(FlinkKafkaProducer.Semantic.EXACTLY_ONCE);
}

Source File: KafkaSinkBuilder.java From Alink with Apache License 2.0

4 votes

@Override
public RichSinkFunction<Row> build() {
    SerializationSchema<Row> serializationSchema = getSerializationSchema();
    return new FlinkKafkaProducer<Row>(topic, serializationSchema, properties);
}

Source File: KafkaEventsGeneratorJob.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool params = ParameterTool.fromArgs(args);

		double errorRate = params.getDouble("error-rate", 0.0);
		int sleep = params.getInt("sleep", 1);

		String kafkaTopic = params.get("kafka-topic");
		String brokers = params.get("brokers", "localhost:9092");

		System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep);
		System.out.println();

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env
			.addSource(new EventsGeneratorSource(errorRate, sleep))
			.addSink(new FlinkKafkaProducer<>(brokers, kafkaTopic, new EventDeSerializer()));

		// trigger program execution
		env.execute("State machine example Kafka events generator job");
	}

Source File: ClickEventCount.java From flink-playgrounds with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	boolean inflictBackpressure = params.has(BACKPRESSURE_OPTION);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	DataStream<ClickEvent> clicks =
			env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		});

	if (inflictBackpressure) {
		// Force a network shuffle so that the backpressure will affect the buffer pools
		clicks = clicks
			.keyBy(ClickEvent::getPage)
			.map(new BackpressureMap())
			.name("Backpressure");
	}

	DataStream<ClickEventStatistics> statistics = clicks
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter");

	statistics
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}

Source File: FlinkKafkaShuffle.java From flink with Apache License 2.0

3 votes

/**
 * The write side of {@link FlinkKafkaShuffle#persistentKeyBy}.
 *
 * <p>This function contains a {@link FlinkKafkaShuffleProducer} to shuffle and persist data in Kafka.
 * {@link FlinkKafkaShuffleProducer} uses the same key group assignment function
 * {@link KeyGroupRangeAssignment#assignKeyToParallelOperator} to decide which partition a key goes.
 * Hence, each producer task can potentially write to each Kafka partition based on the key.
 * Here, the number of partitions equals to the key group size.
 * In the case of using {@link TimeCharacteristic#EventTime}, each producer task broadcasts each watermark
 * to all of the Kafka partitions to make sure watermark information is propagated properly.
 *
 * <p>Attention: make sure kafkaProperties include
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} and {@link FlinkKafkaShuffle#PARTITION_NUMBER} explicitly.
 * {@link FlinkKafkaShuffle#PRODUCER_PARALLELISM} is the parallelism of the producer.
 * {@link FlinkKafkaShuffle#PARTITION_NUMBER} is the number of partitions.
 * They are not necessarily the same and allowed to be set independently.
 *
 * @see FlinkKafkaShuffle#persistentKeyBy
 * @see FlinkKafkaShuffle#readKeyBy
 *
 * @param dataStream 			Data stream to be shuffled
 * @param topic 				Kafka topic written to
 * @param kafkaProperties 		Kafka properties for Kafka Producer
 * @param keySelector 			Key selector to retrieve key from `dataStream'
 * @param <T> 					Type of the input data stream
 * @param <K> 					Type of key
 */
public static <T, K> void writeKeyBy(
		DataStream<T> dataStream,
		String topic,
		Properties kafkaProperties,
		KeySelector<T, K> keySelector) {

	StreamExecutionEnvironment env = dataStream.getExecutionEnvironment();
	TypeSerializer<T> typeSerializer = dataStream.getType().createSerializer(env.getConfig());

	// write data to Kafka
	FlinkKafkaShuffleProducer<T, K> kafkaProducer = new FlinkKafkaShuffleProducer<>(
		topic,
		typeSerializer,
		kafkaProperties,
		env.clean(keySelector),
		FlinkKafkaProducer.Semantic.EXACTLY_ONCE,
		FlinkKafkaProducer.DEFAULT_KAFKA_PRODUCERS_POOL_SIZE);

	// make sure the sink parallelism is set to producerParallelism
	Preconditions.checkArgument(
		kafkaProperties.getProperty(PRODUCER_PARALLELISM) != null,
		"Missing producer parallelism for Kafka Shuffle");
	int producerParallelism = PropertiesUtil.getInt(kafkaProperties, PRODUCER_PARALLELISM, Integer.MIN_VALUE);

	addKafkaShuffle(dataStream, kafkaProducer, producerParallelism);
}

Source File: RandomKafkaDataGeneratorJob.java From flink-tutorials with Apache License 2.0

3 votes

public static void main(String[] args) throws Exception {
    ParameterTool params = Utils.parseArgs(args);
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


    FlinkKafkaProducer<String> kafkaSink = new FlinkKafkaProducer<String>(params.getRequired(K_KAFKA_TOPIC), new SimpleStringSchema(), Utils.readKafkaProperties(params));

    DataStream<String> input = env.addSource(new UUIDGeneratorSource()).name("Data Generator Source");

    input.addSink(kafkaSink).name("Kafka Sink").uid("Kafka Sink");

    input.print();

    env.execute("Data Generator Job");
}

org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer Java Examples