org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor Java Exaples

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From flink with Apache License 2.0

6 votes

private void runValidTests(BoundedOutOfOrdernessTimestampExtractor<Long> extractor) {
	assertEquals(new Watermark(Long.MIN_VALUE), extractor.getCurrentWatermark());

	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(14L, extractor.extractTimestamp(14L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));

	assertEquals(new Watermark(10L), extractor.getCurrentWatermark());

	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(500L, extractor.extractTimestamp(500L, 0L));

	assertEquals(new Watermark(490L), extractor.getCurrentWatermark());

	assertEquals(Long.MAX_VALUE - 1, extractor.extractTimestamp(Long.MAX_VALUE - 1, 0L));
	assertEquals(new Watermark(Long.MAX_VALUE - 11), extractor.getCurrentWatermark());
}

Source File: WordCountIntegrationTest.java From tutorials with MIT License

6 votes

@Test
public void givenStreamOfEvents_whenProcessEvents_thenShouldApplyWindowingOnTransformation() throws Exception {
    // given
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    SingleOutputStreamOperator<Tuple2<Integer, Long>> windowed = env.fromElements(new Tuple2<>(16, ZonedDateTime.now().plusMinutes(25).toInstant().getEpochSecond()), new Tuple2<>(15, ZonedDateTime.now().plusMinutes(2).toInstant().getEpochSecond()))
            .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<Integer, Long>>(Time.seconds(20)) {
                @Override
                public long extractTimestamp(Tuple2<Integer, Long> element) {
                    return element.f1 * 1000;
                }
            });

    SingleOutputStreamOperator<Tuple2<Integer, Long>> reduced = windowed.windowAll(TumblingEventTimeWindows.of(Time.seconds(5))).maxBy(0, true);

    reduced.print();

    // when
    env.execute();
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From flink with Apache License 2.0

6 votes

private void runValidTests(BoundedOutOfOrdernessTimestampExtractor<Long> extractor) {
	assertEquals(new Watermark(Long.MIN_VALUE), extractor.getCurrentWatermark());

	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(14L, extractor.extractTimestamp(14L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));

	assertEquals(new Watermark(10L), extractor.getCurrentWatermark());

	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(500L, extractor.extractTimestamp(500L, 0L));

	assertEquals(new Watermark(490L), extractor.getCurrentWatermark());

	assertEquals(Long.MAX_VALUE - 1, extractor.extractTimestamp(Long.MAX_VALUE - 1, 0L));
	assertEquals(new Watermark(Long.MAX_VALUE - 11), extractor.getCurrentWatermark());
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From flink with Apache License 2.0

6 votes

@Test
public void testInitialFinalAndWatermarkUnderflow() {
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor = new LongExtractor(Time.milliseconds(10L));
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MIN_VALUE, -1L);

	// the following two lines check for underflow.
	// We have a max latency of 5 millis.
	// We insert an element with ts of Long.MIN_VALUE + 2, which will now be the max ts,
	// then when getting the next watermark, we would have Long.MIN_VALUE + 2 - 5 which
	// would lead to underflow.

	extractor.extractTimestamp(Long.MIN_VALUE + 2, -1);
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MAX_VALUE, -1L);
	assertEquals(Long.MAX_VALUE - 10, extractor.getCurrentWatermark().getTimestamp());
}

Source File: KafkaItemTransactionJob.java From flink-tutorials with Apache License 2.0

6 votes

public DataStream<ItemTransaction> readTransactionStream(ParameterTool params, StreamExecutionEnvironment env) {
	// We read the ItemTransaction objects directly using the schema
	FlinkKafkaConsumer<ItemTransaction> transactionSource = new FlinkKafkaConsumer<>(
			params.getRequired(TRANSACTION_INPUT_TOPIC_KEY), new TransactionSchema(),
			Utils.readKafkaProperties(params, true));

	transactionSource.setCommitOffsetsOnCheckpoints(true);
	transactionSource.setStartFromEarliest();

	// In case event time processing is enabled we assign trailing watermarks for each partition
	transactionSource.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ItemTransaction>(Time.minutes(1)) {
		@Override
		public long extractTimestamp(ItemTransaction transaction) {
			return transaction.ts;
		}
	});

	return env.addSource(transactionSource)
			.name("Kafka Transaction Source")
			.uid("Kafka Transaction Source");
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From flink with Apache License 2.0

6 votes

@Test
public void testInitialFinalAndWatermarkUnderflow() {
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor = new LongExtractor(Time.milliseconds(10L));
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MIN_VALUE, -1L);

	// the following two lines check for underflow.
	// We have a max latency of 5 millis.
	// We insert an element with ts of Long.MIN_VALUE + 2, which will now be the max ts,
	// then when getting the next watermark, we would have Long.MIN_VALUE + 2 - 5 which
	// would lead to underflow.

	extractor.extractTimestamp(Long.MIN_VALUE + 2, -1);
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MAX_VALUE, -1L);
	assertEquals(Long.MAX_VALUE - 10, extractor.getCurrentWatermark().getTimestamp());
}

Source File: BoundedAssigner.java From flink-simple-tutorial with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        // 指定系统时间概念为 event time
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        List<Tuple2<String, Long>> collectionInput = new ArrayList<>();
        Tuple2<String, Long> a = new Tuple2<>("first event", 1L);
        Tuple2<String, Long> b = new Tuple2<>("second event", 2L);
        collectionInput.add(a);
        collectionInput.add(b);

        // 使用 Ascending 分配 时间信息和 watermark 设定10s 代表最长的时延
        DataStream<Tuple2<String, Long>> text = env.fromCollection(collectionInput);
        text.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<String, Long>>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Tuple2<String, Long> element) {
                return element.f1;
            }
        });

        env.execute();
    }

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From Flink-CEPplus with Apache License 2.0

6 votes

private void runValidTests(BoundedOutOfOrdernessTimestampExtractor<Long> extractor) {
	assertEquals(new Watermark(Long.MIN_VALUE), extractor.getCurrentWatermark());

	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(13L, extractor.extractTimestamp(13L, 0L));
	assertEquals(14L, extractor.extractTimestamp(14L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));

	assertEquals(new Watermark(10L), extractor.getCurrentWatermark());

	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(20L, extractor.extractTimestamp(20L, 0L));
	assertEquals(500L, extractor.extractTimestamp(500L, 0L));

	assertEquals(new Watermark(490L), extractor.getCurrentWatermark());

	assertEquals(Long.MAX_VALUE - 1, extractor.extractTimestamp(Long.MAX_VALUE - 1, 0L));
	assertEquals(new Watermark(Long.MAX_VALUE - 11), extractor.getCurrentWatermark());
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From Flink-CEPplus with Apache License 2.0

6 votes

@Test
public void testInitialFinalAndWatermarkUnderflow() {
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor = new LongExtractor(Time.milliseconds(10L));
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MIN_VALUE, -1L);

	// the following two lines check for underflow.
	// We have a max latency of 5 millis.
	// We insert an element with ts of Long.MIN_VALUE + 2, which will now be the max ts,
	// then when getting the next watermark, we would have Long.MIN_VALUE + 2 - 5 which
	// would lead to underflow.

	extractor.extractTimestamp(Long.MIN_VALUE + 2, -1);
	assertEquals(Long.MIN_VALUE, extractor.getCurrentWatermark().getTimestamp());

	extractor.extractTimestamp(Long.MAX_VALUE, -1L);
	assertEquals(Long.MAX_VALUE - 10, extractor.getCurrentWatermark().getTimestamp());
}

Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0

5 votes

static BoundedOutOfOrdernessTimestampExtractor<Event> createTimestampExtractor(ParameterTool pt) {
	return new BoundedOutOfOrdernessTimestampExtractor<Event>(
		Time.milliseconds(
			pt.getLong(
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()))) {

		private static final long serialVersionUID = -3154419724891779938L;

		@Override
		public long extractTimestamp(Event element) {
			return element.getEventTime();
		}
	};
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From flink with Apache License 2.0

5 votes

@Test
public void testInitializationAndRuntime() {
	Time maxAllowedLateness = Time.milliseconds(10L);
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor =
		new LongExtractor(maxAllowedLateness);

	assertEquals(maxAllowedLateness.toMilliseconds(),
		extractor.getMaxOutOfOrdernessInMillis());

	runValidTests(extractor);
}

Source File: Main2.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //BoundedOutOfOrdernessTimestampExtractor
        data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Word element) {
                return element.getTimestamp();
            }
        });

        data.print();
        env.execute("watermark demo");
    }

Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0

5 votes

static BoundedOutOfOrdernessTimestampExtractor<Event> createTimestampExtractor(ParameterTool pt) {
	return new BoundedOutOfOrdernessTimestampExtractor<Event>(
		Time.milliseconds(
			pt.getLong(
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()))) {

		private static final long serialVersionUID = -3154419724891779938L;

		@Override
		public long extractTimestamp(Event element) {
			return element.getEventTime();
		}
	};
}

Source File: ClickEventCount.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		})
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter")
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}

Source File: Main2.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        //并行度设置为 1
        env.setParallelism(1);
//        env.setParallelism(4);

        SingleOutputStreamOperator<Word> data = env.socketTextStream("localhost", 9001)
                .map(new MapFunction<String, Word>() {
                    @Override
                    public Word map(String value) throws Exception {
                        String[] split = value.split(",");
                        return new Word(split[0], Integer.valueOf(split[1]), Long.valueOf(split[2]));
                    }
                });

        //BoundedOutOfOrdernessTimestampExtractor
        data.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Word>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(Word element) {
                return element.getTimestamp();
            }
        });

        data.print();
        env.execute("watermark demo");
    }

Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0

5 votes

static BoundedOutOfOrdernessTimestampExtractor<Event> createTimestampExtractor(ParameterTool pt) {
	return new BoundedOutOfOrdernessTimestampExtractor<Event>(
		Time.milliseconds(
			pt.getLong(
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.key(),
				SEQUENCE_GENERATOR_SRC_EVENT_TIME_MAX_OUT_OF_ORDERNESS.defaultValue()))) {

		private static final long serialVersionUID = -3154419724891779938L;

		@Override
		public long extractTimestamp(Event element) {
			return element.getEventTime();
		}
	};
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From flink with Apache License 2.0

5 votes

@Test
public void testInitializationAndRuntime() {
	Time maxAllowedLateness = Time.milliseconds(10L);
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor =
		new LongExtractor(maxAllowedLateness);

	assertEquals(maxAllowedLateness.toMilliseconds(),
		extractor.getMaxOutOfOrdernessInMillis());

	runValidTests(extractor);
}

Source File: BoundedOutOfOrdernessTimestampExtractorTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testInitializationAndRuntime() {
	Time maxAllowedLateness = Time.milliseconds(10L);
	BoundedOutOfOrdernessTimestampExtractor<Long> extractor =
		new LongExtractor(maxAllowedLateness);

	assertEquals(maxAllowedLateness.toMilliseconds(),
		extractor.getMaxOutOfOrdernessInMillis());

	runValidTests(extractor);
}

Source File: ClickEventCount.java From flink-playgrounds with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	final ParameterTool params = ParameterTool.fromArgs(args);

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	configureEnvironment(params, env);

	boolean inflictBackpressure = params.has(BACKPRESSURE_OPTION);

	String inputTopic = params.get("input-topic", "input");
	String outputTopic = params.get("output-topic", "output");
	String brokers = params.get("bootstrap.servers", "localhost:9092");
	Properties kafkaProps = new Properties();
	kafkaProps.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokers);
	kafkaProps.setProperty(ConsumerConfig.GROUP_ID_CONFIG, "click-event-count");

	DataStream<ClickEvent> clicks =
			env.addSource(new FlinkKafkaConsumer<>(inputTopic, new ClickEventDeserializationSchema(), kafkaProps))
		.name("ClickEvent Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ClickEvent>(Time.of(200, TimeUnit.MILLISECONDS)) {
			@Override
			public long extractTimestamp(final ClickEvent element) {
				return element.getTimestamp().getTime();
			}
		});

	if (inflictBackpressure) {
		// Force a network shuffle so that the backpressure will affect the buffer pools
		clicks = clicks
			.keyBy(ClickEvent::getPage)
			.map(new BackpressureMap())
			.name("Backpressure");
	}

	DataStream<ClickEventStatistics> statistics = clicks
		.keyBy(ClickEvent::getPage)
		.timeWindow(WINDOW_SIZE)
		.aggregate(new CountingAggregator(),
			new ClickEventStatisticsCollector())
		.name("ClickEvent Counter");

	statistics
		.addSink(new FlinkKafkaProducer<>(
			outputTopic,
			new ClickEventStatisticsSerializationSchema(outputTopic),
			kafkaProps,
			FlinkKafkaProducer.Semantic.AT_LEAST_ONCE))
		.name("ClickEventStatistics Sink");

	env.execute("Click Event Count");
}

Source File: TurbineHeatProcessor.java From pravega-samples with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        ParameterTool params = ParameterTool.fromArgs(args);
        PravegaConfig pravegaConfig = PravegaConfig
                .fromParams(params)
                .withDefaultScope("examples");

        // ensure that the scope and stream exist
        Stream stream = Utils.createStream(
                pravegaConfig,
                params.get("input", "turbineHeatTest"),
                StreamConfiguration.builder().scalingPolicy(ScalingPolicy.fixed(1)).build());

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1); // required since on a multi core CPU machine, the watermark is not advancing due to idle sources and causing window not to trigger

        // 1. read and decode the sensor events from a Pravega stream
        FlinkPravegaReader<String> source = FlinkPravegaReader.<String>builder()
                .withPravegaConfig(pravegaConfig)
                .forStream(stream)
                .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class))
                .build();
        DataStream<SensorEvent> events = env.addSource(source, "input").map(new SensorMapper()).name("events");

        // 2. extract timestamp information to support 'event-time' processing
        SingleOutputStreamOperator<SensorEvent> timestamped = events.assignTimestampsAndWatermarks(
                new BoundedOutOfOrdernessTimestampExtractor<SensorEvent>(Time.seconds(10)) {
            @Override
            public long extractTimestamp(SensorEvent element) {
                return element.getTimestamp();
            }
        });

        // 3. summarize the temperature data for each sensor
        SingleOutputStreamOperator<SensorAggregate> summaries = timestamped
                .keyBy("sensorId")
                .window(TumblingEventTimeWindows.of(Time.days(1), Time.hours(8)))
                .fold(null, new SensorAggregator()).name("summaries");

        // 4. save to HDFS and print to stdout.  Refer to the TaskManager's 'Stdout' view in the Flink UI.
        summaries.print().name("stdout");
        if (params.has("output")) {
            summaries.writeAsCsv(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE);
        }

        env.execute("TurbineHeatProcessor_" + stream);
    }

Source File: StreamingETL.java From flink-streaming-etl with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
	// parse arguments
	ParameterTool params = ParameterTool.fromPropertiesFile(args[0]);

	// create streaming environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	// enable event time processing
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	// enable fault-tolerance
	env.enableCheckpointing(1000);

	// enable restarts
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(50, 500L));

	env.setStateBackend(new FsStateBackend("file:///home/robert/flink-workdir/flink-streaming-etl/state-backend"));

	// run each operator separately
	env.disableOperatorChaining();

	// get data from Kafka
	Properties kParams = params.getProperties();
	kParams.setProperty("group.id", UUID.randomUUID().toString());
	DataStream<ObjectNode> inputStream = env.addSource(new FlinkKafkaConsumer09<>(params.getRequired("topic"), new JSONDeserializationSchema(), kParams)).name("Kafka 0.9 Source")
		.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<ObjectNode>(Time.minutes(1L)) {
			@Override
			public long extractTimestamp(ObjectNode jsonNodes) {
				return jsonNodes.get("timestamp_ms").asLong();
			}
		}).name("Timestamp extractor");

	// filter out records without lang field
	DataStream<ObjectNode> tweetsWithLang = inputStream.filter(jsonNode -> jsonNode.has("user") && jsonNode.get("user").has("lang")).name("Filter records without 'lang' field");

	// select only lang = "en" tweets
	DataStream<ObjectNode> englishTweets = tweetsWithLang.filter(jsonNode -> jsonNode.get("user").get("lang").asText().equals("en")).name("Select 'lang'=en tweets");

	// write to file system
	RollingSink<ObjectNode> rollingSink = new RollingSink<>(params.get("sinkPath", "/home/robert/flink-workdir/flink-streaming-etl/rolling-sink"));
	rollingSink.setBucketer(new DateTimeBucketer("yyyy-MM-dd-HH-mm")); // do a bucket for each minute
	englishTweets.addSink(rollingSink).name("Rolling FileSystem Sink");

	// build aggregates (count per language) using window (10 seconds tumbling):
	DataStream<Tuple3<Long, String, Long>> languageCounts = tweetsWithLang.keyBy(jsonNode -> jsonNode.get("user").get("lang").asText())
		.timeWindow(Time.seconds(10))
		.apply(new Tuple3<>(0L, "", 0L), new JsonFoldCounter(), new CountEmitter()).name("Count per Langauage (10 seconds tumbling)");

	// write window aggregate to ElasticSearch
	List<InetSocketAddress> transportNodes = ImmutableList.of(new InetSocketAddress(InetAddress.getByName("localhost"), 9300));
	ElasticsearchSink<Tuple3<Long, String, Long>> elasticsearchSink = new ElasticsearchSink<>(params.toMap(), transportNodes, new ESRequest());

	languageCounts.addSink(elasticsearchSink).name("ElasticSearch2 Sink");

	// word-count on the tweet stream
	DataStream<Tuple2<Date, List<Tuple2<String, Long>>>> topWordCount = tweetsWithLang
		// get text from tweets
		.map(tweet -> tweet.get("text").asText()).name("Get text from Tweets")
		// split text into (word, 1) tuples
		.flatMap(new FlatMapFunction<String, Tuple2<String, Long>>() {
			@Override
			public void flatMap(String s, Collector<Tuple2<String, Long>> collector) throws Exception {
				String[] splits = s.split(" ");
				for (String sp : splits) {
					collector.collect(new Tuple2<>(sp, 1L));
				}
			}
		}).name("Tokenize words")
		// group by word
		.keyBy(0)
		// build 1 min windows, compute every 10 seconds --> count word frequency
		.timeWindow(Time.minutes(1L), Time.seconds(10L)).apply(new WordCountingWindow()).name("Count word frequency (1 min, 10 sec sliding window)")
		// build top n every 10 seconds
		.timeWindowAll(Time.seconds(10L)).apply(new TopNWords(10)).name("TopN Window (10s)");

	// write top Ns to Kafka topic
	topWordCount.addSink(new FlinkKafkaProducer09<>(params.getRequired("wc-topic"), new ListSerSchema(), params.getProperties())).name("Write topN to Kafka");

	env.execute("Streaming ETL");

}

org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor Java Examples