org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011 Java Exaples

Source File: Kafka011Example.java From Flink-CEPplus with Apache License 2.0

7 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<MetricEvent> metricData = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, MetricEvent.class));

    metricData.print();

    CheckPointUtil.setCheckpointConfig(env, parameterTool)
            .execute("zhisheng --- checkpoint config example");
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:9092");
    props.put("zookeeper.connect", "localhost:2181");
    props.put("group.id", "metric-group");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  //key 反序列化
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest"); //value 反序列化

    DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
            "metric",  //kafka topic
            new SimpleStringSchema(),  // String 序列化
            props)).setParallelism(1);

    dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台

    env.execute("Flink add data source");
}

Source File: Kafka011Example.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}

Source File: KafkaDeserializationSchemaTest.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);

        FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
                new KafkaMetricSchema(true),
                props);

        env.addSource(kafkaConsumer)
                .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() {
                    @Override
                    public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception {
                        try {
//                            System.out.println(jsonNodes);
                            MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class);
                            collector.collect(metricEvent);
                        } catch (Exception e) {
                            log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e);
                        }
                    }
                })
                .print();
        env.execute();
    }

Source File: FlinkKafkaConsumerTest1.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);
        //kafka topic list
        List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic"));
        FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props);
        //kafka topic Pattern
        //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props);


//        consumer.setStartFromLatest();
//        consumer.setStartFromEarliest()
        DataStreamSource<MetricEvent> data = env.addSource(consumer);

        data.print();

        env.execute("flink kafka connector test");
    }

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(parameterTool.getInt(STREAM_PARALLELISM, 1))
            .map(string -> GsonUtil.fromJson(string, Student.class)).setParallelism(4); //解析字符串成 student 对象

    //timeWindowAll 并行度只能为 1
    student.timeWindowAll(Time.minutes(1)).apply(new AllWindowFunction<Student, List<Student>, TimeWindow>() {
        @Override
        public void apply(TimeWindow window, Iterable<Student> values, Collector<List<Student>> out) throws Exception {
            ArrayList<Student> students = Lists.newArrayList(values);
            if (students.size() > 0) {
                log.info("1 分钟内收集到 student 的数据条数是：" + students.size());
                out.collect(students);
            }
        }
    }).addSink(new SinkToMySQL()).setParallelism(parameterTool.getInt(STREAM_SINK_PARALLELISM, 1));

    env.execute("flink learning connectors mysql");
}

Source File: FlinkKafkaConsumerTest1.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);
        //kafka topic list
        List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic"));
        FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props);
        //kafka topic Pattern
        //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props);


//        consumer.setStartFromLatest();
//        consumer.setStartFromEarliest()
        DataStreamSource<MetricEvent> data = env.addSource(consumer);

        data.print();

        env.execute("flink kafka connector test");
    }

Source File: KafkaDeserializationSchemaTest.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
        final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
        StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
        Properties props = buildKafkaProps(parameterTool);

        FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
                new KafkaMetricSchema(true),
                props);

        env.addSource(kafkaConsumer)
                .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() {
                    @Override
                    public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception {
                        try {
//                            System.out.println(jsonNodes);
                            MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class);
                            collector.collect(metricEvent);
                        } catch (Exception e) {
                            log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e);
                        }
                    }
                })
                .print();
        env.execute();
    }

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(parameterTool.getInt(STREAM_PARALLELISM, 1))
            .map(string -> GsonUtil.fromJson(string, Student.class)).setParallelism(4); //解析字符串成 student 对象

    //timeWindowAll 并行度只能为 1
    student.timeWindowAll(Time.minutes(1)).apply(new AllWindowFunction<Student, List<Student>, TimeWindow>() {
        @Override
        public void apply(TimeWindow window, Iterable<Student> values, Collector<List<Student>> out) throws Exception {
            ArrayList<Student> students = Lists.newArrayList(values);
            if (students.size() > 0) {
                log.info("1 分钟内收集到 student 的数据条数是：" + students.size());
                out.collect(students);
            }
        }
    }).addSink(new SinkToMySQL()).setParallelism(parameterTool.getInt(STREAM_SINK_PARALLELISM, 1));

    env.execute("flink learning connectors mysql");
}

Source File: FlinkKafkaSchemaTest1.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props));

    data.map(new MapFunction<String, Object>() {
        @Override
        public Object map(String string) throws Exception {
            writeEventToHbase(string, parameterTool);
            return string;
        }
    }).print();

    env.execute("flink learning connectors hbase");
}

Source File: KafkaSourceMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool);
    DataStream<String> dataStream = blinkStreamEnv.addSource(new FlinkKafkaConsumer011<>(parameterTool.get("kafka.topic"), new SimpleStringSchema(), properties));
    Table table = blinkStreamTableEnv.fromDataStream(dataStream, "word");
    blinkStreamTableEnv.registerTable("kafkaDataStream", table);

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Kafka Table Source");
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

    Properties props = new Properties();
    props.put("bootstrap.servers", "localhost:9092");
    props.put("zookeeper.connect", "localhost:2181");
    props.put("group.id", "metric-group");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");  //key 反序列化
    props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
    props.put("auto.offset.reset", "latest"); //value 反序列化

    DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>(
            "metric",  //kafka topic
            new SimpleStringSchema(),  // String 序列化
            props)).setParallelism(1);

    dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台

    env.execute("Flink add data source");
}

Source File: Kafka011Example.java From flink with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
	// parse input arguments
	final ParameterTool parameterTool = ParameterTool.fromArgs(args);
	StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool);

	DataStream<KafkaEvent> input = env
			.addSource(
				new FlinkKafkaConsumer011<>(
					parameterTool.getRequired("input-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties())
				.assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
			.keyBy("word")
			.map(new RollingAdditionMapper());

	input.addSink(
			new FlinkKafkaProducer011<>(
					parameterTool.getRequired("output-topic"),
					new KafkaEventSchema(),
					parameterTool.getProperties()));

	env.execute("Kafka 0.11 Example");
}

Source File: FlinkDataPipeline.java From tutorials with MIT License

6 votes

public static void capitalize() throws Exception {
    String inputTopic = "flink_input";
    String outputTopic = "flink_output";
    String consumerGroup = "baeldung";
    String address = "localhost:9092";

    StreamExecutionEnvironment environment =
      StreamExecutionEnvironment.getExecutionEnvironment();

    FlinkKafkaConsumer011<String> flinkKafkaConsumer =
      createStringConsumerForTopic(inputTopic, address, consumerGroup);
    flinkKafkaConsumer.setStartFromEarliest();

    DataStream<String> stringInputStream =
      environment.addSource(flinkKafkaConsumer);

    FlinkKafkaProducer011<String> flinkKafkaProducer =
    createStringProducer(outputTopic, address);

    stringInputStream
      .map(new WordsCapitalizer())
      .addSink(flinkKafkaProducer);

    environment.execute();
}

Source File: KafkaSourceMain.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    blinkStreamEnv.setParallelism(1);
    EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance()
            .useBlinkPlanner()
            .inStreamingMode()
            .build();
    StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings);

    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool);
    DataStream<String> dataStream = blinkStreamEnv.addSource(new FlinkKafkaConsumer011<>(parameterTool.get("kafka.topic"), new SimpleStringSchema(), properties));
    Table table = blinkStreamTableEnv.fromDataStream(dataStream, "word");
    blinkStreamTableEnv.registerTable("kafkaDataStream", table);

    RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()});
    blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink);

    Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word");

    wordCount.insertInto("sinkTable");

    blinkStreamTableEnv.execute("Blink Kafka Table Source");
}

Source File: Main.java From flink-learning with Apache License 2.0

6 votes

public static void main(String[] args) throws Exception {
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<MetricEvent> metricData = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, MetricEvent.class));

    metricData.print();

    CheckPointUtil.setCheckpointConfig(env, parameterTool)
            .execute("zhisheng --- checkpoint config example");
}

Source File: FlinkKafkaSchemaTest1.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);
    //kafka topic list
    List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"));
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props);

    DataStreamSource<MetricEvent> data = env.addSource(consumer);

    data.print();

    env.execute("flink kafka connector test");
}

Source File: KafkaConfigUtil.java From flink-learning with Apache License 2.0

5 votes

/**
 * @param env
 * @param topic
 * @param time  订阅的时间
 * @return
 * @throws IllegalAccessException
 */
public static DataStreamSource<MetricEvent> buildSource(StreamExecutionEnvironment env, String topic, Long time) throws IllegalAccessException {
    ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters();
    Properties props = buildKafkaProps(parameterTool);
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(
            topic,
            new MetricSchema(),
            props);
    //重置offset到time时刻
    if (time != 0L) {
        Map<KafkaTopicPartition, Long> partitionOffset = buildOffsetByTime(props, parameterTool, time);
        consumer.setStartFromSpecificOffsets(partitionOffset);
    }
    return env.addSource(consumer);
}

Source File: JSONKeyValueDeserializationSchemaTest.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    Properties props = buildKafkaProps(parameterTool);

    FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng",
            new JSONKeyValueDeserializationSchema(true), //可以控制是否需要元数据字段
            props);

    env.addSource(kafkaConsumer)
            .print();
    //读取到的数据在 value 字段中，对应的元数据在 metadata 字段中
    env.execute();
}

Source File: Main.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
        Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

        SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>(
                parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
                new SimpleStringSchema(),
                props))
                .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON
                .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() {
                    @Override
                    public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception {
                        //收集商品 id 和 price 两个属性
                        out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString()));
                    }
                });
//        product.print();

        //单个 Redis
        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build();
        product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper()));

        //Redis 的 ip 信息一般都从配置文件取出来
        //Redis 集群
/*        FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder()
                .setNodes(new HashSet<InetSocketAddress>(
                        Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/

        //Redis Sentinels
/*        FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder()
                .setMasterName("master")
                .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2")))
                .setPassword("")
                .setDatabase(1).build();*/

        env.execute("flink redis connector");
    }

Source File: Main.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{
    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL;
    Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool);

    SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>(
            parameterTool.get(METRICS_TOPIC),   //这个 kafka topic 需要和上面的工具类的 topic 一致
            new SimpleStringSchema(),
            props)).setParallelism(1)
            .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson，这里用的是gson解析，解析字符串成 student 对象

    student.addSink(new SinkToMySQL()); //数据 sink 到 mysql

    env.execute("Flink data sink");
}

Source File: AsyncIOAlert.java From flink-learning with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);

    Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool);
    FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(
            parameterTool.get("metrics.topic"),
            new MetricSchema(),
            properties);
    SingleOutputStreamOperator<MetricEvent> machineData = env.addSource(consumer)
            .assignTimestampsAndWatermarks(new MetricWatermark());

    AsyncDataStream.unorderedWait(machineData, new AlertRuleAsyncIOFunction(), 10000, TimeUnit.MICROSECONDS, 100)
            .map(metricEvent -> {
                List<String> ma = (List<String>) metricEvent.getFields().get("xx");
                AlertEvent alertEvent = new AlertEvent();
                alertEvent.setType(metricEvent.getName());
                alertEvent.setTrigerTime(metricEvent.getTimestamp());
                alertEvent.setMetricEvent(metricEvent);
                if (metricEvent.getTags().get("recover") != null && Boolean.valueOf(metricEvent.getTags().get("recover"))) {
                    alertEvent.setRecover(true);
                    alertEvent.setRecoverTime(metricEvent.getTimestamp());
                } else {
                    alertEvent.setRecover(false);
                }
                return alertEvent;
            })
            .print();

    env.execute("Async IO get MySQL data");
}

Source File: FlinkKafkaConsumerTest2.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {
    final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args);
    StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool);
    env.setParallelism(1);
    Properties props = buildKafkaProps(parameterTool);

    FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<>("user_behavior_sink", new SimpleStringSchema(), props);

    env.addSource(consumer).print();

    env.execute("flink kafka connector test");
}

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Source File: UnionListStateExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }

Source File: Kafka011SourceBuilder.java From Alink with Apache License 2.0

5 votes

@Override
public RichParallelSourceFunction<Row> build() {
    FlinkKafkaConsumer011<Row> consumer;
    if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) {
        Pattern pattern = Pattern.compile(topicPattern);
        consumer = new FlinkKafkaConsumer011<Row>(pattern, new MessageDeserialization(), properties);
    } else {
        consumer = new FlinkKafkaConsumer011<Row>(topic, new MessageDeserialization(), properties);
    }
    switch (super.startupMode) {
        case LATEST: {
            consumer.setStartFromLatest();
            break;
        }
        case EARLIEST: {
            consumer.setStartFromEarliest();
            break;
        }
        case GROUP_OFFSETS: {
            consumer.setStartFromGroupOffsets();
            break;
        }
        case TIMESTAMP: {
            consumer.setStartFromTimestamp(startTimeMs);
            break;
        }
        default: {
            throw new IllegalArgumentException("invalid startupMode.");
        }
    }

    return consumer;
}

org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011 Java Examples