org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09 Java Exaples

Source File: ReadFromKafka.java From kafka-flink-101 with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
  // create execution environment
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

  Properties properties = new Properties();
  properties.setProperty("bootstrap.servers", "localhost:9092");
  properties.setProperty("group.id", "flink_consumer");


  DataStream<String> stream = env
          .addSource(new FlinkKafkaConsumer09<>("flink-demo", new SimpleStringSchema(), properties));

  stream.map(new MapFunction<String, String>() {
    private static final long serialVersionUID = -6867736771747690202L;

    @Override
    public String map(String value) throws Exception {
      return "Stream Value: " + value;
    }
  }).print();

  env.execute();
}

Source File: KafkaSource09.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer09<>(topicSets, deserializationSchema, properties);
}

Source File: KafkaApp.java From Mastering-Apache-Flink with MIT License

5 votes

public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	Properties properties = new Properties();
	properties.setProperty("bootstrap.servers", "localhost:9092");
	properties.setProperty("group.id", "test");

	DataStream<TemperatureEvent> inputEventStream = env.addSource(
			new FlinkKafkaConsumer09<TemperatureEvent>("test", new EventDeserializationSchema(), properties));

	Pattern<TemperatureEvent, ?> warningPattern = Pattern.<TemperatureEvent> begin("first")
			.subtype(TemperatureEvent.class).where(new FilterFunction<TemperatureEvent>() {
				private static final long serialVersionUID = 1L;

				public boolean filter(TemperatureEvent value) {
					if (value.getTemperature() >= 26.0) {
						return true;
					}
					return false;
				}
			}).within(Time.seconds(10));

	DataStream<Alert> patternStream = CEP.pattern(inputEventStream, warningPattern)
			.select(new PatternSelectFunction<TemperatureEvent, Alert>() {
				private static final long serialVersionUID = 1L;

				public Alert select(Map<String, TemperatureEvent> event) throws Exception {

					return new Alert("Temperature Rise Detected:" + event.get("first").getTemperature()
							+ " on machine name:" + event.get("first").getMachineName());
				}

			});

	patternStream.print();
	env.execute("CEP on Temperature Sensor");
}

Source File: StreamingJob.java From Mastering-Apache-Flink with MIT License

4 votes

public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// env.enableCheckpointing(5000);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	Properties properties = new Properties();
	properties.setProperty("bootstrap.servers", "localhost:9092");

	properties.setProperty("zookeeper.connect", "localhost:2181");
	properties.setProperty("group.id", "test");

	FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<>("temp", new SimpleStringSchema(),
			properties);
	myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());


	DataStream<Tuple2<String, Double>> keyedStream = env.addSource(myConsumer).flatMap(new Splitter()).keyBy(0)
			.timeWindow(Time.seconds(300))
			.apply(new WindowFunction<Tuple2<String, Double>, Tuple2<String, Double>, Tuple, TimeWindow>() {

				@Override
				public void apply(Tuple key, TimeWindow window, Iterable<Tuple2<String, Double>> input,
						Collector<Tuple2<String, Double>> out) throws Exception {
					double sum = 0L;
					int count = 0;
					for (Tuple2<String, Double> record : input) {
						sum += record.f1;
						count++;
					}

					Tuple2<String, Double> result = input.iterator().next();
					result.f1 = (sum/count);
					out.collect(result);

				}
			});

	keyedStream.print();

	// execute program
	env.execute("Flink Streaming Java API Skeleton");
}

Source File: StreamingETL.java From flink-streaming-etl with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}

Source File: Kafka09AvroTableSource.java From df_data_service with Apache License 2.0

4 votes

@Override
FlinkKafkaConsumerBase<Row> getKafkaConsumer(String topic, Properties properties, DeserializationSchema<Row> deserializationSchema) {
    return new FlinkKafkaConsumer09<>(topic, deserializationSchema, properties);
}

Source File: UnitTestSuiteFlink.java From df_data_service with Apache License 2.0

4 votes

public static void testFlinkSQL() {

        LOG.info("Only Unit Testing Function is enabled");
        String resultFile = "/home/vagrant/test.txt";

        try {

            String jarPath = DFInitService.class.getProtectionDomain().getCodeSource().getLocation().getPath();
            StreamExecutionEnvironment env = StreamExecutionEnvironment.createRemoteEnvironment("localhost", 6123, jarPath)
                    .setParallelism(1);
            String kafkaTopic = "finance";
            String kafkaTopic_stage = "df_trans_stage_finance";
            String kafkaTopic_out = "df_trans_out_finance";



            StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
            Properties properties = new Properties();
            properties.setProperty("bootstrap.servers", "localhost:9092");
            properties.setProperty("group.id", "consumer3");

            // Internal covert Json String to Json - Begin
            DataStream<String> stream = env
                    .addSource(new FlinkKafkaConsumer09<>(kafkaTopic, new SimpleStringSchema(), properties));

            stream.map(new MapFunction<String, String>() {
                @Override
                public String map(String jsonString) throws Exception {
                    return jsonString.replaceAll("\\\\", "").replace("\"{", "{").replace("}\"","}");
                }
            }).addSink(new FlinkKafkaProducer09<String>("localhost:9092", kafkaTopic_stage, new SimpleStringSchema()));
            // Internal covert Json String to Json - End

            String[] fieldNames =  new String[] {"name"};
            Class<?>[] fieldTypes = new Class<?>[] {String.class};

            Kafka09AvroTableSource kafkaTableSource = new Kafka09AvroTableSource(
                    kafkaTopic_stage,
                    properties,
                    fieldNames,
                    fieldTypes);

            //kafkaTableSource.setFailOnMissingField(true);

            tableEnv.registerTableSource("Orders", kafkaTableSource);

            //Table result = tableEnv.sql("SELECT STREAM name FROM Orders");
            Table result = tableEnv.sql("SELECT name FROM Orders");

            Files.deleteIfExists(Paths.get(resultFile));

            // create a TableSink
            TableSink sink = new CsvTableSink(resultFile, "|");
            // write the result Table to the TableSink
            result.writeToSink(sink);

            env.execute("FlinkConsumer");

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09 Java Examples