Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#assignTimestampsAndWatermarks()
The following examples show how to use
org.apache.flink.streaming.api.datastream.DataStream#assignTimestampsAndWatermarks() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DataStreamTest.java From flink with Apache License 2.0 | 7 votes |
/** * Ensure that WatermarkStrategy is easy to use in the API, without superfluous generics. */ @Test public void testErgonomicWatermarkStrategy() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> input = env.fromElements("bonjour"); // as soon as you have a chain of methods the first call needs a generic input.assignTimestampsAndWatermarks( WatermarkStrategy .forBoundedOutOfOrderness(Duration.ofMillis(10))); // as soon as you have a chain of methods the first call needs to specify the generic type input.assignTimestampsAndWatermarks( WatermarkStrategy .<String>forBoundedOutOfOrderness(Duration.ofMillis(10)) .withTimestampAssigner((event, timestamp) -> 42L)); }
Example 2
Source File: AscendingAssigner.java From flink-simple-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 指定系统时间概念为 event time env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); List<Tuple2<String, Long>> collectionInput = new ArrayList<>(); Tuple2<String, Long> a = new Tuple2<>("first event", 1L); Tuple2<String, Long> b = new Tuple2<>("second event", 2L); collectionInput.add(a); collectionInput.add(b); // 使用 Ascending 分配 时间信息和 watermark DataStream<Tuple2<String, Long>> text = env.fromCollection(collectionInput); text.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<String, Long>>() { @Override public long extractAscendingTimestamp(Tuple2<String, Long> element) { return element.f1; } }); env.execute(); }
Example 3
Source File: BoundedAssigner.java From flink-simple-tutorial with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 指定系统时间概念为 event time env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); List<Tuple2<String, Long>> collectionInput = new ArrayList<>(); Tuple2<String, Long> a = new Tuple2<>("first event", 1L); Tuple2<String, Long> b = new Tuple2<>("second event", 2L); collectionInput.add(a); collectionInput.add(b); // 使用 Ascending 分配 时间信息和 watermark 设定10s 代表最长的时延 DataStream<Tuple2<String, Long>> text = env.fromCollection(collectionInput); text.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<String, Long>>(Time.seconds(10)) { @Override public long extractTimestamp(Tuple2<String, Long> element) { return element.f1; } }); env.execute(); }
Example 4
Source File: SiddhiCEPITCase.java From flink-siddhi with Apache License 2.0 | 6 votes |
@Test public void testUnboundedPojoStreamAndReturnPojo() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Event> input = env.addSource(new RandomEventSource(5)); input.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Event>() { @Override public long extractAscendingTimestamp(Event element) { return element.getTimestamp(); } }); DataStream<Event> output = SiddhiCEP .define("inputStream", input, "id", "name", "price", "timestamp") .cql("from inputStream select timestamp, id, name, price insert into outputStream") .returns("outputStream", Event.class); String resultPath = tempFolder.newFile().toURI().toString(); output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE); env.execute(); assertEquals(5, getLineCount(resultPath)); }
Example 5
Source File: SiddhiCEPITCase.java From bahir-flink with Apache License 2.0 | 6 votes |
@Test public void testUnboundedPojoStreamAndReturnPojo() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Event> input = env.addSource(new RandomEventSource(5)); input.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Event>() { @Override public long extractAscendingTimestamp(Event element) { return element.getTimestamp(); } }); DataStream<Event> output = SiddhiCEP .define("inputStream", input, "id", "name", "price", "timestamp") .cql("from inputStream select timestamp, id, name, price insert into outputStream") .returns("outputStream", Event.class); String resultPath = tempFolder.newFile().toURI().toString(); output.writeAsText(resultPath, FileSystem.WriteMode.OVERWRITE); env.execute(); assertEquals(5, getLineCount(resultPath)); }
Example 6
Source File: TimestampITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * This tests whether timestamps are properly extracted in the timestamp * extractor and whether watermarks are also correctly forwarded from this with the auto watermark * interval. */ @Test public void testTimestampExtractorWithAutoInterval() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.getConfig().setAutoWatermarkInterval(10); env.setParallelism(1); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) throws Exception { int index = 1; while (index <= numElements) { ctx.collect(index); latch.await(); index++; } } @Override public void cancel() {} }); DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks( new AscendingTimestampExtractor<Integer>() { @Override public long extractAscendingTimestamp(Integer element) { return element; } }); extractOp .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)) .transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()); // verify that extractor picks up source parallelism Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism()); env.execute(); // verify that we get NUM_ELEMENTS watermarks for (int j = 0; j < numElements; j++) { if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) { long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp(); Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]); } } // the input is finite, so it should have a MAX Watermark assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1)); }
Example 7
Source File: TimestampITCase.java From flink with Apache License 2.0 | 4 votes |
/** * This tests whether timestamps are properly extracted in the timestamp * extractor and whether watermarks are also correctly forwarded from this with the auto watermark * interval. */ @Test public void testTimestampExtractorWithAutoInterval() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.getConfig().setAutoWatermarkInterval(10); env.setParallelism(1); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) throws Exception { int index = 1; while (index <= numElements) { ctx.collect(index); latch.await(); index++; } } @Override public void cancel() {} }); DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks( new AscendingTimestampExtractor<Integer>() { @Override public long extractAscendingTimestamp(Integer element) { return element; } }); extractOp .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)) .transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()); // verify that extractor picks up source parallelism Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism()); env.execute(); // verify that we get NUM_ELEMENTS watermarks for (int j = 0; j < numElements; j++) { if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) { long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp(); Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]); } } // the input is finite, so it should have a MAX Watermark assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1)); }
Example 8
Source File: TurbineHeatProcessor.java From pravega-samples with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); PravegaConfig pravegaConfig = PravegaConfig .fromParams(params) .withDefaultScope("examples"); // ensure that the scope and stream exist Stream stream = Utils.createStream( pravegaConfig, params.get("input", "turbineHeatTest"), StreamConfiguration.builder().scalingPolicy(ScalingPolicy.fixed(1)).build()); // set up the streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); // required since on a multi core CPU machine, the watermark is not advancing due to idle sources and causing window not to trigger // 1. read and decode the sensor events from a Pravega stream FlinkPravegaReader<String> source = FlinkPravegaReader.<String>builder() .withPravegaConfig(pravegaConfig) .forStream(stream) .withDeserializationSchema(PravegaSerialization.deserializationFor(String.class)) .build(); DataStream<SensorEvent> events = env.addSource(source, "input").map(new SensorMapper()).name("events"); // 2. extract timestamp information to support 'event-time' processing SingleOutputStreamOperator<SensorEvent> timestamped = events.assignTimestampsAndWatermarks( new BoundedOutOfOrdernessTimestampExtractor<SensorEvent>(Time.seconds(10)) { @Override public long extractTimestamp(SensorEvent element) { return element.getTimestamp(); } }); // 3. summarize the temperature data for each sensor SingleOutputStreamOperator<SensorAggregate> summaries = timestamped .keyBy("sensorId") .window(TumblingEventTimeWindows.of(Time.days(1), Time.hours(8))) .fold(null, new SensorAggregator()).name("summaries"); // 4. save to HDFS and print to stdout. Refer to the TaskManager's 'Stdout' view in the Flink UI. summaries.print().name("stdout"); if (params.has("output")) { summaries.writeAsCsv(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE); } env.execute("TurbineHeatProcessor_" + stream); }
Example 9
Source File: TimestampITCase.java From flink with Apache License 2.0 | 4 votes |
/** * This tests whether timestamps are properly extracted in the timestamp * extractor and whether watermarks are also correctly forwarded from this with the auto watermark * interval. */ @Test public void testTimestampExtractorWithAutoInterval() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.getConfig().setAutoWatermarkInterval(10); env.setParallelism(1); DataStream<Integer> source1 = env.addSource(new SourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) throws Exception { int index = 1; while (index <= numElements) { ctx.collect(index); latch.await(); index++; } } @Override public void cancel() {} }); DataStream<Integer> extractOp = source1.assignTimestampsAndWatermarks( new AscendingTimestampExtractor<Integer>() { @Override public long extractAscendingTimestamp(Integer element) { return element; } }); extractOp .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(true)) .transform("Timestamp Check", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()); // verify that extractor picks up source parallelism Assert.assertEquals(extractOp.getTransformation().getParallelism(), source1.getTransformation().getParallelism()); env.execute(); // verify that we get NUM_ELEMENTS watermarks for (int j = 0; j < numElements; j++) { if (!CustomOperator.finalWatermarks[0].get(j).equals(new Watermark(j))) { long wm = CustomOperator.finalWatermarks[0].get(j).getTimestamp(); Assert.fail("Wrong watermark. Expected: " + j + " Found: " + wm + " All: " + CustomOperator.finalWatermarks[0]); } } // the input is finite, so it should have a MAX Watermark assertEquals(Watermark.MAX_WATERMARK, CustomOperator.finalWatermarks[0].get(CustomOperator.finalWatermarks[0].size() - 1)); }
Example 10
Source File: BenchmarkJob.java From scotty-window-processor with Apache License 2.0 | 2 votes |
public BenchmarkJob(List<Window> assigner, StreamExecutionEnvironment env, final long runtime, final int throughput, final List<Tuple2<Long, Long>> gaps) { Map<String, String> configMap = new HashMap<>(); ParameterTool parameters = ParameterTool.fromMap(configMap); env.getConfig().setGlobalJobParameters(parameters); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); env.setMaxParallelism(1); KeyedScottyWindowOperator<Tuple, Tuple4<String, Integer, Long, Long>, Tuple4<String, Integer, Long, Long>> windowOperator = new KeyedScottyWindowOperator<>(new SumAggregation()); for(Window w: assigner){ windowOperator.addWindow(w); } DataStream<Tuple4<String, Integer, Long, Long>> messageStream = env .addSource(new de.tub.dima.scotty.flinkBenchmark.LoadGeneratorSource(runtime, throughput, gaps)); messageStream.flatMap(new de.tub.dima.scotty.flinkBenchmark.ThroughputLogger<>(200, throughput)); final SingleOutputStreamOperator<Tuple4<String, Integer, Long, Long>> timestampsAndWatermarks = messageStream .assignTimestampsAndWatermarks(new TimestampsAndWatermarks()); timestampsAndWatermarks .keyBy(0) .process(windowOperator) .addSink(new SinkFunction() { @Override public void invoke(final Object value) throws Exception { //System.out.println(value); } }); try { env.execute(); } catch (Exception e) { e.printStackTrace(); } }
Example 11
Source File: SimpleEdgeStream.java From gelly-streaming with Apache License 2.0 | 2 votes |
/** * Creates a graph from an edge stream operating in event time specified by timeExtractor . * * The time characteristic is set to event time. * * @see {@link org.apache.flink.streaming.api.TimeCharacteristic} * * @param edges a DataStream of edges. * @param timeExtractor the timestamp extractor. * @param context the execution environment. */ public SimpleEdgeStream(DataStream<Edge<K, EV>> edges, AscendingTimestampExtractor<Edge<K,EV>> timeExtractor, StreamExecutionEnvironment context) { context.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); this.edges = edges.assignTimestampsAndWatermarks(timeExtractor); this.context = context; }