org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011 Java Examples
The following examples show how to use
org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Kafka011Example.java From Flink-CEPplus with Apache License 2.0 | 7 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer011<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer011<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.11 Example"); }
Example #2
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<MetricEvent> metricData = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), new SimpleStringSchema(), props)).setParallelism(1) .map(string -> GsonUtil.fromJson(string, MetricEvent.class)); metricData.print(); CheckPointUtil.setCheckpointConfig(env, parameterTool) .execute("zhisheng --- checkpoint config example"); }
Example #3
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); Properties props = new Properties(); props.put("bootstrap.servers", "localhost:9092"); props.put("zookeeper.connect", "localhost:2181"); props.put("group.id", "metric-group"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //key 反序列化 props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("auto.offset.reset", "latest"); //value 反序列化 DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>( "metric", //kafka topic new SimpleStringSchema(), // String 序列化 props)).setParallelism(1); dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台 env.execute("Flink add data source"); }
Example #4
Source File: Kafka011Example.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer011<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer011<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.11 Example"); }
Example #5
Source File: KafkaDeserializationSchemaTest.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng", new KafkaMetricSchema(true), props); env.addSource(kafkaConsumer) .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() { @Override public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception { try { // System.out.println(jsonNodes); MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class); collector.collect(metricEvent); } catch (Exception e) { log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e); } } }) .print(); env.execute(); }
Example #6
Source File: FlinkKafkaConsumerTest1.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props); //kafka topic Pattern //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props); // consumer.setStartFromLatest(); // consumer.setStartFromEarliest() DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #7
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)); data.map(new MapFunction<String, Object>() { @Override public Object map(String string) throws Exception { writeEventToHbase(string, parameterTool); return string; } }).print(); env.execute("flink learning connectors hbase"); }
Example #8
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)).setParallelism(parameterTool.getInt(STREAM_PARALLELISM, 1)) .map(string -> GsonUtil.fromJson(string, Student.class)).setParallelism(4); //解析字符串成 student 对象 //timeWindowAll 并行度只能为 1 student.timeWindowAll(Time.minutes(1)).apply(new AllWindowFunction<Student, List<Student>, TimeWindow>() { @Override public void apply(TimeWindow window, Iterable<Student> values, Collector<List<Student>> out) throws Exception { ArrayList<Student> students = Lists.newArrayList(values); if (students.size() > 0) { log.info("1 分钟内收集到 student 的数据条数是:" + students.size()); out.collect(students); } } }).addSink(new SinkToMySQL()).setParallelism(parameterTool.getInt(STREAM_SINK_PARALLELISM, 1)); env.execute("flink learning connectors mysql"); }
Example #9
Source File: FlinkKafkaConsumerTest1.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic"), parameterTool.get("logs.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new MetricSchema(), props); //kafka topic Pattern //FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(java.utils.regex.Pattern.compile("test-topic-[0-9]"), new MetricSchema(), props); // consumer.setStartFromLatest(); // consumer.setStartFromEarliest() DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #10
Source File: KafkaDeserializationSchemaTest.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng", new KafkaMetricSchema(true), props); env.addSource(kafkaConsumer) .flatMap(new FlatMapFunction<ObjectNode, MetricEvent>() { @Override public void flatMap(ObjectNode jsonNodes, Collector<MetricEvent> collector) throws Exception { try { // System.out.println(jsonNodes); MetricEvent metricEvent = GsonUtil.fromJson(jsonNodes.get("value").asText(), MetricEvent.class); collector.collect(metricEvent); } catch (Exception e) { log.error("jsonNodes = {} convert to MetricEvent has an error", jsonNodes, e); } } }) .print(); env.execute(); }
Example #11
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)).setParallelism(parameterTool.getInt(STREAM_PARALLELISM, 1)) .map(string -> GsonUtil.fromJson(string, Student.class)).setParallelism(4); //解析字符串成 student 对象 //timeWindowAll 并行度只能为 1 student.timeWindowAll(Time.minutes(1)).apply(new AllWindowFunction<Student, List<Student>, TimeWindow>() { @Override public void apply(TimeWindow window, Iterable<Student> values, Collector<List<Student>> out) throws Exception { ArrayList<Student> students = Lists.newArrayList(values); if (students.size() > 0) { log.info("1 分钟内收集到 student 的数据条数是:" + students.size()); out.collect(students); } } }).addSink(new SinkToMySQL()).setParallelism(parameterTool.getInt(STREAM_SINK_PARALLELISM, 1)); env.execute("flink learning connectors mysql"); }
Example #12
Source File: FlinkKafkaSchemaTest1.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props); DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #13
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStreamSource<String> data = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)); data.map(new MapFunction<String, Object>() { @Override public Object map(String string) throws Exception { writeEventToHbase(string, parameterTool); return string; } }).print(); env.execute("flink learning connectors hbase"); }
Example #14
Source File: KafkaSourceMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); blinkStreamEnv.setParallelism(1); EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance() .useBlinkPlanner() .inStreamingMode() .build(); StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStream<String> dataStream = blinkStreamEnv.addSource(new FlinkKafkaConsumer011<>(parameterTool.get("kafka.topic"), new SimpleStringSchema(), properties)); Table table = blinkStreamTableEnv.fromDataStream(dataStream, "word"); blinkStreamTableEnv.registerTable("kafkaDataStream", table); RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()}); blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink); Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word"); wordCount.insertInto("sinkTable"); blinkStreamTableEnv.execute("Blink Kafka Table Source"); }
Example #15
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); Properties props = new Properties(); props.put("bootstrap.servers", "localhost:9092"); props.put("zookeeper.connect", "localhost:2181"); props.put("group.id", "metric-group"); props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //key 反序列化 props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); props.put("auto.offset.reset", "latest"); //value 反序列化 DataStreamSource<String> dataStreamSource = env.addSource(new FlinkKafkaConsumer011<>( "metric", //kafka topic new SimpleStringSchema(), // String 序列化 props)).setParallelism(1); dataStreamSource.print(); //把从 kafka 读取到的数据打印在控制台 env.execute("Flink add data source"); }
Example #16
Source File: Kafka011Example.java From flink with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer011<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer011<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.11 Example"); }
Example #17
Source File: FlinkDataPipeline.java From tutorials with MIT License | 6 votes |
public static void capitalize() throws Exception { String inputTopic = "flink_input"; String outputTopic = "flink_output"; String consumerGroup = "baeldung"; String address = "localhost:9092"; StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment(); FlinkKafkaConsumer011<String> flinkKafkaConsumer = createStringConsumerForTopic(inputTopic, address, consumerGroup); flinkKafkaConsumer.setStartFromEarliest(); DataStream<String> stringInputStream = environment.addSource(flinkKafkaConsumer); FlinkKafkaProducer011<String> flinkKafkaProducer = createStringProducer(outputTopic, address); stringInputStream .map(new WordsCapitalizer()) .addSink(flinkKafkaProducer); environment.execute(); }
Example #18
Source File: KafkaSourceMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment blinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment(); blinkStreamEnv.setParallelism(1); EnvironmentSettings blinkStreamSettings = EnvironmentSettings.newInstance() .useBlinkPlanner() .inStreamingMode() .build(); StreamTableEnvironment blinkStreamTableEnv = StreamTableEnvironment.create(blinkStreamEnv, blinkStreamSettings); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool); DataStream<String> dataStream = blinkStreamEnv.addSource(new FlinkKafkaConsumer011<>(parameterTool.get("kafka.topic"), new SimpleStringSchema(), properties)); Table table = blinkStreamTableEnv.fromDataStream(dataStream, "word"); blinkStreamTableEnv.registerTable("kafkaDataStream", table); RetractStreamTableSink<Row> retractStreamTableSink = new MyRetractStreamTableSink(new String[]{"_count", "word"}, new DataType[]{DataTypes.BIGINT(), DataTypes.STRING()}); blinkStreamTableEnv.registerTableSink("sinkTable", retractStreamTableSink); Table wordCount = blinkStreamTableEnv.sqlQuery("SELECT count(word) AS _count,word FROM kafkaDataStream GROUP BY word"); wordCount.insertInto("sinkTable"); blinkStreamTableEnv.execute("Blink Kafka Table Source"); }
Example #19
Source File: Main.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<MetricEvent> metricData = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), new SimpleStringSchema(), props)).setParallelism(1) .map(string -> GsonUtil.fromJson(string, MetricEvent.class)); metricData.print(); CheckPointUtil.setCheckpointConfig(env, parameterTool) .execute("zhisheng --- checkpoint config example"); }
Example #20
Source File: FlinkKafkaSchemaTest1.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); //kafka topic list List<String> topics = Arrays.asList(parameterTool.get("metrics.topic")); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>(topics, new KafkaDeserializationSchemaWrapper<>(new MetricSchema()), props); DataStreamSource<MetricEvent> data = env.addSource(consumer); data.print(); env.execute("flink kafka connector test"); }
Example #21
Source File: KafkaConfigUtil.java From flink-learning with Apache License 2.0 | 5 votes |
/** * @param env * @param topic * @param time 订阅的时间 * @return * @throws IllegalAccessException */ public static DataStreamSource<MetricEvent> buildSource(StreamExecutionEnvironment env, String topic, Long time) throws IllegalAccessException { ParameterTool parameterTool = (ParameterTool) env.getConfig().getGlobalJobParameters(); Properties props = buildKafkaProps(parameterTool); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>( topic, new MetricSchema(), props); //重置offset到time时刻 if (time != 0L) { Map<KafkaTopicPartition, Long> partitionOffset = buildOffsetByTime(props, parameterTool, time); consumer.setStartFromSpecificOffsets(partitionOffset); } return env.addSource(consumer); }
Example #22
Source File: JSONKeyValueDeserializationSchemaTest.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties props = buildKafkaProps(parameterTool); FlinkKafkaConsumer011<ObjectNode> kafkaConsumer = new FlinkKafkaConsumer011<>("zhisheng", new JSONKeyValueDeserializationSchema(true), //可以控制是否需要元数据字段 props); env.addSource(kafkaConsumer) .print(); //读取到的数据在 value 字段中,对应的元数据在 metadata 字段中 env.execute(); }
Example #23
Source File: Main.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<Tuple2<String, String>> product = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)) .map(string -> GsonUtil.fromJson(string, ProductEvent.class)) //反序列化 JSON .flatMap(new FlatMapFunction<ProductEvent, Tuple2<String, String>>() { @Override public void flatMap(ProductEvent value, Collector<Tuple2<String, String>> out) throws Exception { //收集商品 id 和 price 两个属性 out.collect(new Tuple2<>(value.getId().toString(), value.getPrice().toString())); } }); // product.print(); //单个 Redis FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig.Builder().setHost(parameterTool.get("redis.host")).build(); product.addSink(new RedisSink<Tuple2<String, String>>(conf, new RedisSinkMapper())); //Redis 的 ip 信息一般都从配置文件取出来 //Redis 集群 /* FlinkJedisClusterConfig clusterConfig = new FlinkJedisClusterConfig.Builder() .setNodes(new HashSet<InetSocketAddress>( Arrays.asList(new InetSocketAddress("redis1", 6379)))).build();*/ //Redis Sentinels /* FlinkJedisSentinelConfig sentinelConfig = new FlinkJedisSentinelConfig.Builder() .setMasterName("master") .setSentinels(new HashSet<>(Arrays.asList("sentinel1", "sentinel2"))) .setPassword("") .setDatabase(1).build();*/ env.execute("flink redis connector"); }
Example #24
Source File: Main.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); ParameterTool parameterTool = ExecutionEnvUtil.PARAMETER_TOOL; Properties props = KafkaConfigUtil.buildKafkaProps(parameterTool); SingleOutputStreamOperator<Student> student = env.addSource(new FlinkKafkaConsumer011<>( parameterTool.get(METRICS_TOPIC), //这个 kafka topic 需要和上面的工具类的 topic 一致 new SimpleStringSchema(), props)).setParallelism(1) .map(string -> GsonUtil.fromJson(string, Student.class)); //博客里面用的是 fastjson,这里用的是gson解析,解析字符串成 student 对象 student.addSink(new SinkToMySQL()); //数据 sink 到 mysql env.execute("Flink data sink"); }
Example #25
Source File: AsyncIOAlert.java From flink-learning with Apache License 2.0 | 5 votes |
@SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); Properties properties = KafkaConfigUtil.buildKafkaProps(parameterTool); FlinkKafkaConsumer011<MetricEvent> consumer = new FlinkKafkaConsumer011<>( parameterTool.get("metrics.topic"), new MetricSchema(), properties); SingleOutputStreamOperator<MetricEvent> machineData = env.addSource(consumer) .assignTimestampsAndWatermarks(new MetricWatermark()); AsyncDataStream.unorderedWait(machineData, new AlertRuleAsyncIOFunction(), 10000, TimeUnit.MICROSECONDS, 100) .map(metricEvent -> { List<String> ma = (List<String>) metricEvent.getFields().get("xx"); AlertEvent alertEvent = new AlertEvent(); alertEvent.setType(metricEvent.getName()); alertEvent.setTrigerTime(metricEvent.getTimestamp()); alertEvent.setMetricEvent(metricEvent); if (metricEvent.getTags().get("recover") != null && Boolean.valueOf(metricEvent.getTags().get("recover"))) { alertEvent.setRecover(true); alertEvent.setRecoverTime(metricEvent.getTimestamp()); } else { alertEvent.setRecover(false); } return alertEvent; }) .print(); env.execute("Async IO get MySQL data"); }
Example #26
Source File: FlinkKafkaConsumerTest2.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final ParameterTool parameterTool = ExecutionEnvUtil.createParameterTool(args); StreamExecutionEnvironment env = ExecutionEnvUtil.prepare(parameterTool); env.setParallelism(1); Properties props = buildKafkaProps(parameterTool); FlinkKafkaConsumer011<String> consumer = new FlinkKafkaConsumer011<>("user_behavior_sink", new SimpleStringSchema(), props); env.addSource(consumer).print(); env.execute("flink kafka connector test"); }
Example #27
Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); env.setParallelism(6); RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing); rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads); rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromLatest(); env.addSource(kafkaConsumer) .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class)) // 反序列化 JSON // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key .keyBy((KeySelector<UserVisitWebEvent, Long>) log -> Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong()) .addSink(new KeyedStateDeduplication.KeyedStateSink()); env.execute("TuningKeyedStateDeduplication"); }
Example #28
Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(6); // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend( "hdfs:///flink/checkpoints", true); rocksDBStateBackend.setNumberOfTransferingThreads(3); // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD rocksDBStateBackend.setPredefinedOptions( PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); // Checkpoint 间隔为 10 分钟 env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); // 配置 Checkpoint CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); // Kafka Consumer 配置 Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); env.addSource(kafkaConsumer) .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class)) // 反序列化 JSON .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId) .addSink(new KeyedStateSink()); env.execute("KeyedStateDeduplication"); }
Example #29
Source File: UnionListStateExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 1 分钟一次CheckPoint env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15)); env.setParallelism(3); CheckpointConfig checkpointConf = env.getCheckpointConfig(); // CheckPoint 语义 EXACTLY ONCE checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat"); FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>( // kafka topic, String 序列化 UnionListStateUtil.topic, new SimpleStringSchema(), props); env.addSource(kafkaConsumer011) .uid(UnionListStateUtil.topic) .addSink(new MySink()) .uid("MySink") .name("MySink"); env.execute("Flink unionListState"); }
Example #30
Source File: Kafka011SourceBuilder.java From Alink with Apache License 2.0 | 5 votes |
@Override public RichParallelSourceFunction<Row> build() { FlinkKafkaConsumer011<Row> consumer; if (!StringUtils.isNullOrWhitespaceOnly(topicPattern)) { Pattern pattern = Pattern.compile(topicPattern); consumer = new FlinkKafkaConsumer011<Row>(pattern, new MessageDeserialization(), properties); } else { consumer = new FlinkKafkaConsumer011<Row>(topic, new MessageDeserialization(), properties); } switch (super.startupMode) { case LATEST: { consumer.setStartFromLatest(); break; } case EARLIEST: { consumer.setStartFromEarliest(); break; } case GROUP_OFFSETS: { consumer.setStartFromGroupOffsets(); break; } case TIMESTAMP: { consumer.setStartFromTimestamp(startTimeMs); break; } default: { throw new IllegalArgumentException("invalid startupMode."); } } return consumer; }