org.apache.flink.streaming.api.datastream.DataStream#print

Source File: KinesisConsumerMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}

Source File: ConsumeFromKinesis.java From Flink-CEPplus with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	ParameterTool pt = ParameterTool.fromArgs(args);

	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.setParallelism(1);

	Properties kinesisConsumerConfig = new Properties();
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_REGION, pt.getRequired("region"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_ACCESS_KEY_ID, pt.getRequired("accesskey"));
	kinesisConsumerConfig.setProperty(ConsumerConfigConstants.AWS_SECRET_ACCESS_KEY, pt.getRequired("secretkey"));

	DataStream<String> kinesis = see.addSource(new FlinkKinesisConsumer<>(
		"flink-test",
		new SimpleStringSchema(),
		kinesisConsumerConfig));

	kinesis.print();

	see.execute();
}

Source File: StreamWordCountExample.java From toolbox with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .fromElements("Who's there?",
                        "I think I hear them. Stand, ho! Who's there?")
                //.socketTextStream("localhost", 9999)
                .flatMap(new Splitter())
                .keyBy(0)
                .sum(1);

        dataStream.print();

        env.execute();

        //env.execute("Socket Stream WordCount");
    }

Source File: KinesisConsumerMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(1);
    ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);

    Properties kinesisConsumerConfig = new Properties();
    kinesisConsumerConfig.setProperty("aws.region", parameterTool.getRequired("aws.region"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.accesskeyid", parameterTool.getRequired("aws.accesskey"));
    kinesisConsumerConfig.setProperty("aws.credentials.provider.basic.secretkey", parameterTool.getRequired("aws.secretkey"));

    DataStream<String> kinesis = env.addSource(new FlinkKinesisConsumer<>(
            "zhisheng",
            new SimpleStringSchema(),
            kinesisConsumerConfig));

    kinesis.print();

    env.execute();
}

Source File: ExactTriangleCount.java From gelly-streaming with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

		if (!parseParameters(args)) {
			return;
		}

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		SimpleEdgeStream<Integer, NullValue> edges = getGraphStream(env);

		DataStream<Tuple2<Integer, Integer>> result =
				edges.buildNeighborhood(false)
				.map(new ProjectCanonicalEdges())
				.keyBy(0, 1).flatMap(new IntersectNeighborhoods())
				.keyBy(0).flatMap(new SumAndEmitCounters());

		if (resultPath != null) {
			result.writeAsText(resultPath);
		}
		else {
			result.print();
		}

		env.execute("Exact Triangle Count");
	}

Source File: RollingSum.java From examples-java with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {

        // set up the streaming execution environment
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple3<Integer, Integer, Integer>> inputStream = env.fromElements(
            Tuple3.of(1, 2, 2), Tuple3.of(2, 3, 1), Tuple3.of(2, 2, 4), Tuple3.of(1, 5, 3));

        DataStream<Tuple3<Integer, Integer, Integer>> resultStream = inputStream
            .keyBy(0) // key on first field of tuples
            .sum(1); // sum the second field of the tuple

        resultStream.print();

        // execute the application
        env.execute();
    }

Source File: WindowWordCount.java From flinkDemo with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
        //.addSource(new RandomEventSource(5).closeDelay(1500));
                .socketTextStream("localhost", 8080)
                .flatMap(new Splitter())
                .keyBy(0)
                .timeWindow(Time.seconds(5))
                .sum(1);

        dataStream.print();
        env.execute();
    }

Source File: MatrixVectorMul.java From flink with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

		// Checking input parameters
		final ParameterTool params = ParameterTool.fromArgs(args);
		System.out.println("Usage: MatrixVectorMul [--output <path>] [--dimension <dimension> --data-size <data_size>] [--resource-name <resource_name>]");

		// Set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		// Make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(params);

		final int dimension = params.getInt("dimension", DEFAULT_DIM);
		final int dataSize = params.getInt("data-size", DEFAULT_DATA_SIZE);
		final String resourceName = params.get("resource-name", DEFAULT_RESOURCE_NAME);

		DataStream<List<Float>> result = env.addSource(new RandomVectorSource(dimension, dataSize))
						.map(new Multiplier(dimension, resourceName));

		// Emit result
		if (params.has("output")) {
			result.addSink(StreamingFileSink.forRowFormat(new Path(params.get("output")),
					new SimpleStringEncoder<List<Float>>()).build());
		} else {
			System.out.println("Printing result to stdout. Use --output to specify output path.");
			result.print();
		}
		// Execute program
		env.execute("Matrix-Vector Multiplication");
	}

Source File: App.java From Mastering-Apache-Flink with MIT License

5 votes

public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	DataStream<TemperatureEvent> inputEventStream = env.fromElements(new TemperatureEvent("xyz", 22.0),
			new TemperatureEvent("xyz", 20.1), new TemperatureEvent("xyz", 21.1), new TemperatureEvent("xyz", 22.2),
			new TemperatureEvent("xyz", 29.1), new TemperatureEvent("xyz", 22.3), new TemperatureEvent("xyz", 22.1),
			new TemperatureEvent("xyz", 22.4), new TemperatureEvent("xyz", 22.7),
			new TemperatureEvent("xyz", 27.0));

	Pattern<TemperatureEvent, ?> warningPattern = Pattern.<TemperatureEvent> begin("first")
			.subtype(TemperatureEvent.class).where(new FilterFunction<TemperatureEvent>() {
				private static final long serialVersionUID = 1L;

				public boolean filter(TemperatureEvent value) {
					if (value.getTemperature() >= 26.0) {
						return true;
					}
					return false;
				}
			}).within(Time.seconds(10));

	DataStream<Alert> patternStream = CEP.pattern(inputEventStream, warningPattern)
			.select(new PatternSelectFunction<TemperatureEvent, Alert>() {
				private static final long serialVersionUID = 1L;

				public Alert select(Map<String, TemperatureEvent> event) throws Exception {

					return new Alert("Temperature Rise Detected");
				}

			});

	patternStream.print();
	env.execute("CEP on Temperature Sensor");
}

Source File: WriteIntoKafka.java From flinkDemo with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    // create execution environment
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Map properties= new HashMap();
    properties.put("bootstrap.servers", "192.168.10.63:6667,192.168.10.64:6667,192.168.10.65:6667");
    properties.put("group.id", "t10");
    properties.put("enable.auto.commit", "false");
    properties.put("auto.commit.interval.ms", "1000");
    properties.put("auto.offset.reset", "earliest");
    properties.put("session.timeout.ms", "30000");
    properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    properties.put("topic", "kks-topic-FFT");
    //KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(properties);
    // parse user parameters
    //ParameterTool parameterTool = ParameterTool.fromArgs(args);
    ParameterTool parameterTool = ParameterTool.fromMap(properties);

    // add a simple source which is writing some strings
    DataStream<String> messageStream = env.addSource(new SimpleStringGenerator());

    // write stream to Kafka
    messageStream.addSink(new FlinkKafkaProducer010<>(parameterTool.getRequired("bootstrap.servers"),
            parameterTool.getRequired("topic"),
            new SimpleStringSchema()));

    messageStream.rebalance().map(new MapFunction<String, String>() {
        private static final long serialVersionUID = 1L;

        @Override
        public String map(String value) throws Exception {
            return value;
        }
    });

    messageStream.print();

    env.execute();
}

Source File: SideOutput.java From blog_demos with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    // 并行度为1
    env.setParallelism(1);

    // 定义OutputTag
    final OutputTag<String> outputTag = new OutputTag<String>("side-output"){};

    // 创建一个List，里面有两个Tuple2元素
    List<Tuple2<String, Integer>> list = new ArrayList<>();
    list.add(new Tuple2("aaa", 1));
    list.add(new Tuple2("bbb", 2));
    list.add(new Tuple2("ccc", 3));

    //通过List创建DataStream
    DataStream<Tuple2<String, Integer>> fromCollectionDataStream = env.fromCollection(list);

    //所有元素都进入mainDataStream，f1字段为奇数的元素进入SideOutput
    SingleOutputStreamOperator<String> mainDataStream = fromCollectionDataStream
            .process(new ProcessFunction<Tuple2<String, Integer>, String>() {
                @Override
                public void processElement(Tuple2<String, Integer> value, Context ctx, Collector<String> out) throws Exception {

                    //进入主流程的下一个算子
                    out.collect("main, name : " + value.f0 + ", value : " + value.f1);

                    //f1字段为奇数的元素进入SideOutput
                    if(1 == value.f1 % 2) {
                        ctx.output(outputTag, "side, name : " + value.f0 + ", value : " + value.f1);
                    }
                }
            });

    // 禁止chanin，这样可以在页面上看清楚原始的DAG
    mainDataStream.disableChaining();

    // 取得旁路数据
    DataStream<String> sideDataStream = mainDataStream.getSideOutput(outputTag);


    mainDataStream.print();
    sideDataStream.print();

    env.execute("processfunction demo : sideoutput");
}

Source File: DataStreamTest.java From flink with Apache License 2.0

4 votes

@Test
public void testChannelSelectors() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Long> src = env.generateSequence(0, 0);

	DataStream<Long> broadcast = src.broadcast();
	DataStreamSink<Long> broadcastSink = broadcast.print();
	StreamPartitioner<?> broadcastPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					broadcastSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

	DataStream<Long> shuffle = src.shuffle();
	DataStreamSink<Long> shuffleSink = shuffle.print();
	StreamPartitioner<?> shufflePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					shuffleSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(shufflePartitioner instanceof ShufflePartitioner);

	DataStream<Long> forward = src.forward();
	DataStreamSink<Long> forwardSink = forward.print();
	StreamPartitioner<?> forwardPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					forwardSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(forwardPartitioner instanceof ForwardPartitioner);

	DataStream<Long> rebalance = src.rebalance();
	DataStreamSink<Long> rebalanceSink = rebalance.print();
	StreamPartitioner<?> rebalancePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					rebalanceSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

	DataStream<Long> global = src.global();
	DataStreamSink<Long> globalSink = global.print();
	StreamPartitioner<?> globalPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					globalSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(globalPartitioner instanceof GlobalPartitioner);
}

Source File: StreamingJob.java From Mastering-Apache-Flink with MIT License

4 votes

public static void main(String[] args) throws Exception {
	// set up the streaming execution environment
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	// env.enableCheckpointing(5000);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	Properties properties = new Properties();
	properties.setProperty("bootstrap.servers", "localhost:9092");

	properties.setProperty("zookeeper.connect", "localhost:2181");
	properties.setProperty("group.id", "test");

	FlinkKafkaConsumer09<String> myConsumer = new FlinkKafkaConsumer09<>("temp", new SimpleStringSchema(),
			properties);
	myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());


	DataStream<Tuple2<String, Double>> keyedStream = env.addSource(myConsumer).flatMap(new Splitter()).keyBy(0)
			.timeWindow(Time.seconds(300))
			.apply(new WindowFunction<Tuple2<String, Double>, Tuple2<String, Double>, Tuple, TimeWindow>() {

				@Override
				public void apply(Tuple key, TimeWindow window, Iterable<Tuple2<String, Double>> input,
						Collector<Tuple2<String, Double>> out) throws Exception {
					double sum = 0L;
					int count = 0;
					for (Tuple2<String, Double> record : input) {
						sum += record.f1;
						count++;
					}

					Tuple2<String, Double> result = input.iterator().next();
					result.f1 = (sum/count);
					out.collect(result);

				}
			});

	keyedStream.print();

	// execute program
	env.execute("Flink Streaming Java API Skeleton");
}

Source File: SessionWindow.java From flink-simple-tutorial with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(1);

        // 构建输入数据, 希望目标是实现 3s 的 Session Gap
        final List<Tuple3<String, Long, Integer>> input = new ArrayList<>();

        input.add(new Tuple3<>("a", 1L, 1));
        input.add(new Tuple3<>("b", 1L, 1));
        input.add(new Tuple3<>("b", 3L, 1));
        input.add(new Tuple3<>("b", 5L, 1));
        input.add(new Tuple3<>("c", 6L, 1));
        // 下面 'a'和 'c' 的本次出现实现与上一次已经超过了 3s, 因此应该是一个新的窗口的起点
        input.add(new Tuple3<>("a", 10L, 1));
        input.add(new Tuple3<>("c", 11L, 1));

        DataStream<Tuple3<String, Long, Integer>> source = env
                .addSource(new SourceFunction<Tuple3<String, Long, Integer>>() {
                    private static final long serialVersionUID = 1L;

                    @Override
                    public void run(SourceContext<Tuple3<String, Long, Integer>> ctx) throws Exception {
                        for (Tuple3<String, Long, Integer> value : input) {
                            ctx.collectWithTimestamp(value, value.f1);
                            ctx.emitWatermark(new Watermark(value.f1 - 1));
                        }
                        ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
                    }

                    @Override
                    public void cancel() {
                    }
                });

        // 创建 Session Window, 间隔为 3s
        DataStream<Tuple3<String, Long, Integer>> aggregated = source
                .keyBy(0)
                .window(EventTimeSessionWindows.withGap(Time.seconds(3L)))
                .sum(2);

        System.out.println("Printing result to stdout. Use --output to specify output path.");
        aggregated.print();

        env.execute();
    }

Source File: DataStreamTest.java From flink with Apache License 2.0

4 votes

@Test
public void testChannelSelectors() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Long> src = env.generateSequence(0, 0);

	DataStream<Long> broadcast = src.broadcast();
	DataStreamSink<Long> broadcastSink = broadcast.print();
	StreamPartitioner<?> broadcastPartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					broadcastSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

	DataStream<Long> shuffle = src.shuffle();
	DataStreamSink<Long> shuffleSink = shuffle.print();
	StreamPartitioner<?> shufflePartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					shuffleSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(shufflePartitioner instanceof ShufflePartitioner);

	DataStream<Long> forward = src.forward();
	DataStreamSink<Long> forwardSink = forward.print();
	StreamPartitioner<?> forwardPartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					forwardSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(forwardPartitioner instanceof ForwardPartitioner);

	DataStream<Long> rebalance = src.rebalance();
	DataStreamSink<Long> rebalanceSink = rebalance.print();
	StreamPartitioner<?> rebalancePartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					rebalanceSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

	DataStream<Long> global = src.global();
	DataStreamSink<Long> globalSink = global.print();
	StreamPartitioner<?> globalPartitioner =
			getStreamGraph(env).getStreamEdges(src.getId(),
					globalSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(globalPartitioner instanceof GlobalPartitioner);
}

Source File: DataStreamPojoITCase.java From flink with Apache License 2.0

4 votes

/**
 * Test composite & nested key on the Data POJO.
 */
@Test
public void testNestedKeyOnNestedPojo() throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.getConfig().disableObjectReuse();
	see.setParallelism(4);

	DataStream<Data> dataStream = see.fromCollection(elements);

	DataStream<Data> summedStream = dataStream
			.keyBy("aaa", "stats.count")
			.sum("sum")
			.keyBy("aaa", "stats.count")
			.flatMap(new FlatMapFunction<Data, Data>() {
				private static final long serialVersionUID = -3678267280397950258L;
				Data[] first = new Data[3];
				@Override
				public void flatMap(Data value, Collector<Data> out) throws Exception {
					if (value.stats.count != 123) {
						throw new RuntimeException("Wrong value for value.stats.count");
					}
					if (first[value.aaa] == null) {
						first[value.aaa] = value;
						if (value.sum != 1) {
							throw new RuntimeException("Expected the sum to be one");
						}
					} else {
						if (value.sum != 2) {
							throw new RuntimeException("Expected the sum to be two");
						}
						if (first[value.aaa].aaa != value.aaa) {
							throw new RuntimeException("aaa key wrong");
						}
						if (first[value.aaa].abc != value.abc) {
							throw new RuntimeException("abc key wrong");
						}
						if (first[value.aaa].wxyz != value.wxyz) {
							throw new RuntimeException("wxyz key wrong");
						}
					}
				}
			});

	summedStream.print();

	see.execute();
}

Source File: DataStreamTest.java From Flink-CEPplus with Apache License 2.0

4 votes

@Test
public void testChannelSelectors() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Long> src = env.generateSequence(0, 0);

	DataStream<Long> broadcast = src.broadcast();
	DataStreamSink<Long> broadcastSink = broadcast.print();
	StreamPartitioner<?> broadcastPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					broadcastSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(broadcastPartitioner instanceof BroadcastPartitioner);

	DataStream<Long> shuffle = src.shuffle();
	DataStreamSink<Long> shuffleSink = shuffle.print();
	StreamPartitioner<?> shufflePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					shuffleSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(shufflePartitioner instanceof ShufflePartitioner);

	DataStream<Long> forward = src.forward();
	DataStreamSink<Long> forwardSink = forward.print();
	StreamPartitioner<?> forwardPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					forwardSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(forwardPartitioner instanceof ForwardPartitioner);

	DataStream<Long> rebalance = src.rebalance();
	DataStreamSink<Long> rebalanceSink = rebalance.print();
	StreamPartitioner<?> rebalancePartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					rebalanceSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(rebalancePartitioner instanceof RebalancePartitioner);

	DataStream<Long> global = src.global();
	DataStreamSink<Long> globalSink = global.print();
	StreamPartitioner<?> globalPartitioner =
			env.getStreamGraph().getStreamEdges(src.getId(),
					globalSink.getTransformation().getId()).get(0).getPartitioner();
	assertTrue(globalPartitioner instanceof GlobalPartitioner);
}

Source File: SessionWindowing.java From flink with Apache License 2.0

4 votes

@SuppressWarnings("serial")
public static void main(String[] args) throws Exception {

	final ParameterTool params = ParameterTool.fromArgs(args);
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.getConfig().setGlobalJobParameters(params);
	env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
	env.setParallelism(1);

	final boolean fileOutput = params.has("output");

	final List<Tuple3<String, Long, Integer>> input = new ArrayList<>();

	input.add(new Tuple3<>("a", 1L, 1));
	input.add(new Tuple3<>("b", 1L, 1));
	input.add(new Tuple3<>("b", 3L, 1));
	input.add(new Tuple3<>("b", 5L, 1));
	input.add(new Tuple3<>("c", 6L, 1));
	// We expect to detect the session "a" earlier than this point (the old
	// functionality can only detect here when the next starts)
	input.add(new Tuple3<>("a", 10L, 1));
	// We expect to detect session "b" and "c" at this point as well
	input.add(new Tuple3<>("c", 11L, 1));

	DataStream<Tuple3<String, Long, Integer>> source = env
			.addSource(new SourceFunction<Tuple3<String, Long, Integer>>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void run(SourceContext<Tuple3<String, Long, Integer>> ctx) throws Exception {
					for (Tuple3<String, Long, Integer> value : input) {
						ctx.collectWithTimestamp(value, value.f1);
						ctx.emitWatermark(new Watermark(value.f1 - 1));
					}
					ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
				}

				@Override
				public void cancel() {
				}
			});

	// We create sessions for each id with max timeout of 3 time units
	DataStream<Tuple3<String, Long, Integer>> aggregated = source
			.keyBy(0)
			.window(EventTimeSessionWindows.withGap(Time.milliseconds(3L)))
			.sum(2);

	if (fileOutput) {
		aggregated.writeAsText(params.get("output"));
	} else {
		System.out.println("Printing result to stdout. Use --output to specify output path.");
		aggregated.print();
	}

	env.execute();
}

Source File: DataStreamPojoITCase.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Test composite key on the Data POJO (with nested fields).
 */
@Test
public void testCompositeKeyOnNestedPojo() throws Exception {
	StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
	see.getConfig().disableObjectReuse();
	see.setParallelism(3);

	DataStream<Data> dataStream = see.fromCollection(elements);

	DataStream<Data> summedStream = dataStream
			.keyBy("aaa", "abc", "wxyz")
			.sum("sum")
			.keyBy("aaa", "abc", "wxyz")
			.flatMap(new FlatMapFunction<Data, Data>() {
				private static final long serialVersionUID = 788865239171396315L;
				Data[] first = new Data[3];
				@Override
				public void flatMap(Data value, Collector<Data> out) throws Exception {
					if (first[value.aaa] == null) {
						first[value.aaa] = value;
						if (value.sum != 1) {
							throw new RuntimeException("Expected the sum to be one");
						}
					} else {
						if (value.sum != 2) {
							throw new RuntimeException("Expected the sum to be two");
						}
						if (first[value.aaa].aaa != value.aaa) {
							throw new RuntimeException("aaa key wrong");
						}
						if (first[value.aaa].abc != value.abc) {
							throw new RuntimeException("abc key wrong");
						}
						if (first[value.aaa].wxyz != value.wxyz) {
							throw new RuntimeException("wxyz key wrong");
						}
					}
				}
			});

	summedStream.print();

	see.execute();
}

Source File: KafkaConsumerTestBase.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Test that ensures the KafkaConsumer is properly failing if the topic doesnt exist
 * and a wrong broker was specified.
 *
 * @throws Exception
 */
public void runFailOnNoBrokerTest() throws Exception {
	try {
		Properties properties = new Properties();

		StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
		see.getConfig().disableSysoutLogging();
		see.setRestartStrategy(RestartStrategies.noRestart());
		see.setParallelism(1);

		// use wrong ports for the consumers
		properties.setProperty("bootstrap.servers", "localhost:80");
		properties.setProperty("zookeeper.connect", "localhost:80");
		properties.setProperty("group.id", "test");
		properties.setProperty("request.timeout.ms", "3000"); // let the test fail fast
		properties.setProperty("socket.timeout.ms", "3000");
		properties.setProperty("session.timeout.ms", "2000");
		properties.setProperty("fetch.max.wait.ms", "2000");
		properties.setProperty("heartbeat.interval.ms", "1000");
		properties.putAll(secureProps);
		FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer("doesntexist", new SimpleStringSchema(), properties);
		DataStream<String> stream = see.addSource(source);
		stream.print();
		see.execute("No broker test");
	} catch (JobExecutionException jee) {
		if (kafkaServer.getVersion().equals("0.9") ||
			kafkaServer.getVersion().equals("0.10") ||
			kafkaServer.getVersion().equals("0.11") ||
			kafkaServer.getVersion().equals("2.0")) {
			assertTrue(jee.getCause() instanceof TimeoutException);

			TimeoutException te = (TimeoutException) jee.getCause();

			assertEquals("Timeout expired while fetching topic metadata", te.getMessage());
		} else {
			assertTrue(jee.getCause() instanceof RuntimeException);

			RuntimeException re = (RuntimeException) jee.getCause();

			assertTrue(re.getMessage().contains("Unable to retrieve any partitions"));
		}
	}
}

Java Code Examples for org.apache.flink.streaming.api.datastream.DataStream#print()