org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase Java Exaples

Source File: KafkaDynamicTableFactoryTestBase.java From flink with Apache License 2.0

6 votes

@Test
public void testTableSourceCommitOnCheckpointsDisabled() {
	//Construct table source using options and table source factory
	ObjectIdentifier objectIdentifier = ObjectIdentifier.of(
		"default",
		"default",
		"scanTable");
	Map<String, String> tableOptions = getFullSourceOptions();
	tableOptions.remove("properties.group.id");
	CatalogTable catalogTable = createKafkaSourceCatalogTable(tableOptions);
	final DynamicTableSource tableSource = FactoryUtil.createTableSource(null,
		objectIdentifier,
		catalogTable,
		new Configuration(),
		Thread.currentThread().getContextClassLoader());

	// Test commitOnCheckpoints flag should be false when do not set consumer group.
	assertThat(tableSource, instanceOf(KafkaDynamicSourceBase.class));
	ScanTableSource.ScanRuntimeProvider providerWithoutGroupId = ((KafkaDynamicSourceBase) tableSource)
		.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
	assertThat(providerWithoutGroupId, instanceOf(SourceFunctionProvider.class));
	final SourceFunctionProvider functionProviderWithoutGroupId = (SourceFunctionProvider) providerWithoutGroupId;
	final SourceFunction<RowData> function = functionProviderWithoutGroupId.createSourceFunction();
	assertFalse(((FlinkKafkaConsumerBase) function).getEnableCommitOnCheckpoints());
}

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

5 votes

/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode();

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}

Source File: KafkaDynamicSourceBase.java From flink with Apache License 2.0

5 votes

/**
 * Returns a version-specific Kafka consumer with the start position configured.
 *
 * @param topic                 Kafka topic to consume.
 * @param properties            Properties for the Kafka consumer.
 * @param deserializationSchema Deserialization schema to use for Kafka records.
 * @return The version-specific Kafka consumer
 */
protected FlinkKafkaConsumerBase<RowData> getKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	FlinkKafkaConsumerBase<RowData> kafkaConsumer =
			createKafkaConsumer(topic, properties, deserializationSchema);
	switch (startupMode) {
		case EARLIEST:
			kafkaConsumer.setStartFromEarliest();
			break;
		case LATEST:
			kafkaConsumer.setStartFromLatest();
			break;
		case GROUP_OFFSETS:
			kafkaConsumer.setStartFromGroupOffsets();
			break;
		case SPECIFIC_OFFSETS:
			kafkaConsumer.setStartFromSpecificOffsets(specificStartupOffsets);
			break;
		case TIMESTAMP:
			kafkaConsumer.setStartFromTimestamp(startupTimestampMillis);
			break;
		}
	kafkaConsumer.setCommitOffsetsOnCheckpoints(properties.getProperty("group.id") != null);
	return kafkaConsumer;
}

Source File: KafkaDynamicSourceBase.java From flink with Apache License 2.0

5 votes

@Override
public ScanRuntimeProvider getScanRuntimeProvider(ScanContext runtimeProviderContext) {
	DeserializationSchema<RowData> deserializationSchema =
			this.decodingFormat.createRuntimeDecoder(runtimeProviderContext, this.outputDataType);
	// Version-specific Kafka consumer
	FlinkKafkaConsumerBase<RowData> kafkaConsumer =
			getKafkaConsumer(topic, properties, deserializationSchema);
	return SourceFunctionProvider.of(kafkaConsumer, false);
}

Source File: Kafka010DynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer010<>(topic, deserializationSchema, properties);
}

Source File: Kafka011DynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer011<>(topic, deserializationSchema, properties);
}

Source File: KafkaDynamicSource.java From flink with Apache License 2.0

5 votes

@Override
protected FlinkKafkaConsumerBase<RowData> createKafkaConsumer(
		String topic,
		Properties properties,
		DeserializationSchema<RowData> deserializationSchema) {
	return new FlinkKafkaConsumer<>(topic, deserializationSchema, properties);
}

Source File: KafkaTableSource.java From df_data_service with Apache License 2.0

5 votes

@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {
    // Version-specific Kafka consumer
    FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
    DataStream<Row> kafkaSource = env.addSource(kafkaConsumer);
    return kafkaSource;
}

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

5 votes

/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //latest   earliest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}

Source File: KafkaSource09.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer09<>(topicSets, deserializationSchema, properties);
}

Source File: KafkaSource.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //"enable.auto.commit"-> true
    //"auto.commit.interval.ms" -> 90000
    return new FlinkKafkaConsumer010<>(
            topicSets,
            deserializationSchema,
            properties);
}

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

5 votes

/**
 * 初始化(driver阶段执行)
 **/
public DataStream<Row> createSource(StreamExecutionEnvironment execEnv, KafkaSourceConfig config, SourceContext context)
{
    requireNonNull(execEnv, "execEnv is null");
    requireNonNull(config, "config is null");
    String topics = config.getTopics();
    String groupId = config.getGroupid();
    String offsetMode = config.getOffsetMode(); //latest earliest

    Properties properties = new Properties();
    for (Map.Entry<String, Object> entry : config.getOtherConfig().entrySet()) {
        if (entry.getValue() != null) {
            properties.setProperty(entry.getKey(), entry.getValue().toString());
        }
    }

    properties.put("bootstrap.servers", config.getBrokers());  //需要把集群的host 配置到程序所在机器
    //"enable.auto.commit" -> (false: java.lang.Boolean), //不自动提交偏移量
    //      "session.timeout.ms" -> "30000", //session默认是30秒 超过5秒不提交offect就会报错
    //      "heartbeat.interval.ms" -> "5000", //10秒提交一次 心跳周期
    properties.put("group.id", groupId); //注意不同的流 group.id必须要不同 否则会出现offect commit提交失败的错误
    properties.put("auto.offset.reset", offsetMode); //largest   smallest

    KafkaDeserializationSchema<Row> deserializationSchema = "json".equals(config.getValueType()) ?
            new JsonDeserializationSchema(context.getSchema()) : new RowDeserializer();

    List<String> topicSets = Arrays.asList(topics.split(","));
    //org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
    FlinkKafkaConsumerBase<Row> base = getKafkaConsumerBase(topicSets, deserializationSchema, properties);
    return execEnv.addSource(base);
}

Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Source File: KafkaSource08.java From sylph with Apache License 2.0

5 votes

@Override
public FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets, KafkaDeserializationSchema<Row> deserializationSchema, Properties properties)
{
    //kafka08 kafka09 需要设置 zk
    properties.put("zookeeper.connect", config.getZookeeper());
    //"auto.commit.enable"-> true
    //"auto.commit.interval.ms" -> 90000
    FlinkKafkaConsumer08<Row> kafkaConsumer08 = new FlinkKafkaConsumer08<>(topicSets, deserializationSchema, properties);
    //kafkaConsumer08.setCommitOffsetsOnCheckpoints(true);
    return kafkaConsumer08;
}

Source File: MapStateUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId，说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中，并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

4 votes

public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);

Source File: Kafka010AvroTableSource.java From df_data_service with Apache License 2.0

4 votes

@Override
FlinkKafkaConsumerBase<Row> getKafkaConsumer(String topic, Properties properties, DeserializationSchema<Row> deserializationSchema) {
    return new FlinkKafkaConsumer010<>(topic, deserializationSchema, properties);
}

Source File: Kafka09AvroTableSource.java From df_data_service with Apache License 2.0

4 votes

@Override
FlinkKafkaConsumerBase<Row> getKafkaConsumer(String topic, Properties properties, DeserializationSchema<Row> deserializationSchema) {
    return new FlinkKafkaConsumer09<>(topic, deserializationSchema, properties);
}

Source File: PvStatLocalKeyByExactlyOnce.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次 Checkpoint
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // Checkpoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest();


        env.addSource(appKafkaConsumer)
                .flatMap(new LocalKeyByFlatMap(10))
                // 按照 appId 进行 keyBy
                .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0)
                .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {
                    private ValueState<Long> pvState;
                    private long pv = 0;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        super.open(parameters);
                        // 初始化状态
                        pvState = getRuntimeContext().getState(
                                new ValueStateDescriptor<>("pvStat",
                                        TypeInformation.of(new TypeHint<Long>() {
                                        })));
                    }

                    @Override
                    public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                        // 从状态中获取该 app 的pv值，加上新收到的 pv 值以后后，update 到状态中
                        if (null == pvState.value()) {
                            log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1);
                            pv = tuple2.f1;
                        } else {
                            pv = pvState.value();
                            pv += tuple2.f1;
                            log.info("{} is old, PV is {}", tuple2.f0, pv);
                        }
                        pvState.update(pv);
                        tuple2.setField(pv, 1);
                        return tuple2;
                    }
                })
                .print();

        env.execute("Flink pv stat LocalKeyBy");
    }

Source File: PvStatLocalKeyByExactlyOnce.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次 Checkpoint
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // Checkpoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest();


        env.addSource(appKafkaConsumer)
                .flatMap(new LocalKeyByFlatMap(10))
                // 按照 appId 进行 keyBy
                .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0)
                .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() {
                    private ValueState<Long> pvState;
                    private long pv = 0;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        super.open(parameters);
                        // 初始化状态
                        pvState = getRuntimeContext().getState(
                                new ValueStateDescriptor<>("pvStat",
                                        TypeInformation.of(new TypeHint<Long>() {
                                        })));
                    }

                    @Override
                    public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception {
                        // 从状态中获取该 app 的pv值，加上新收到的 pv 值以后后，update 到状态中
                        if (null == pvState.value()) {
                            log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1);
                            pv = tuple2.f1;
                        } else {
                            pv = pvState.value();
                            pv += tuple2.f1;
                            log.info("{} is old, PV is {}", tuple2.f0, pv);
                        }
                        pvState.update(pv);
                        tuple2.setField(pv, 1);
                        return tuple2;
                    }
                })
                .print();

        env.execute("Flink pv stat LocalKeyBy");
    }

Source File: KafkaBaseSource.java From sylph with Apache License 2.0

4 votes

public abstract FlinkKafkaConsumerBase<Row> getKafkaConsumerBase(List<String> topicSets,
KafkaDeserializationSchema<Row> deserializationSchema, Properties properties);

Source File: MapStateUvExample.java From flink-learning with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId，说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中，并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: KafkaDynamicTableFactoryTestBase.java From flink with Apache License 2.0

4 votes

@Test
@SuppressWarnings("unchecked")
public void testTableSource() {
	// prepare parameters for Kafka table source
	final DataType producedDataType = SOURCE_SCHEMA.toPhysicalRowDataType();

	final Map<KafkaTopicPartition, Long> specificOffsets = new HashMap<>();
	specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_0), OFFSET_0);
	specificOffsets.put(new KafkaTopicPartition(TOPIC, PARTITION_1), OFFSET_1);

	DecodingFormat<DeserializationSchema<RowData>> decodingFormat =
			new TestFormatFactory.DecodingFormatMock(",", true);

	// Construct table source using options and table source factory
	ObjectIdentifier objectIdentifier = ObjectIdentifier.of(
			"default",
			"default",
			"scanTable");
	CatalogTable catalogTable = createKafkaSourceCatalogTable();
	final DynamicTableSource actualSource = FactoryUtil.createTableSource(null,
			objectIdentifier,
			catalogTable,
			new Configuration(),
			Thread.currentThread().getContextClassLoader());

	// Test scan source equals
	final KafkaDynamicSourceBase expectedKafkaSource = getExpectedScanSource(
			producedDataType,
			TOPIC,
			KAFKA_PROPERTIES,
			decodingFormat,
			StartupMode.SPECIFIC_OFFSETS,
			specificOffsets,
			0);
	final KafkaDynamicSourceBase actualKafkaSource = (KafkaDynamicSourceBase) actualSource;
	assertEquals(actualKafkaSource, expectedKafkaSource);

	// Test Kafka consumer
	ScanTableSource.ScanRuntimeProvider provider =
			actualKafkaSource.getScanRuntimeProvider(ScanRuntimeProviderContext.INSTANCE);
	assertThat(provider, instanceOf(SourceFunctionProvider.class));
	final SourceFunctionProvider sourceFunctionProvider = (SourceFunctionProvider) provider;
	final SourceFunction<RowData> sourceFunction = sourceFunctionProvider.createSourceFunction();
	assertThat(sourceFunction, instanceOf(getExpectedConsumerClass()));
	//  Test commitOnCheckpoints flag should be true when set consumer group
	assertTrue(((FlinkKafkaConsumerBase) sourceFunction).getEnableCommitOnCheckpoints());
}

org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumerBase Java Examples