org.apache.flink.streaming.api.environment.CheckpointConfig Java Exaples

Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSetCheckpointConfig() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();

	// use deprecated API to set not fail on checkpoint errors
	checkpointConfig.setFailOnCheckpointingErrors(false);
	Assert.assertFalse(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(CheckpointFailureManager.UNLIMITED_TOLERABLE_FAILURE_NUMBER, checkpointConfig.getTolerableCheckpointFailureNumber());

	// use new API to set tolerable declined checkpoint number
	checkpointConfig.setTolerableCheckpointFailureNumber(5);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());

	// after we configure the tolerable declined checkpoint number, deprecated API would not take effect
	checkpointConfig.setFailOnCheckpointingErrors(true);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());
}

Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0

6 votes

@Test
public void testSetCheckpointConfig() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();

	// use deprecated API to set not fail on checkpoint errors
	checkpointConfig.setFailOnCheckpointingErrors(false);
	Assert.assertFalse(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(CheckpointFailureManager.UNLIMITED_TOLERABLE_FAILURE_NUMBER, checkpointConfig.getTolerableCheckpointFailureNumber());

	// use new API to set tolerable declined checkpoint number
	checkpointConfig.setTolerableCheckpointFailureNumber(5);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());

	// after we configure the tolerable declined checkpoint number, deprecated API would not take effect
	checkpointConfig.setFailOnCheckpointingErrors(true);
	Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber());
}

Source File: UnionListStateExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: MapStateUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy
            .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() {
                // 存储当前 key 对应的 userId 集合
                private MapState<String,Boolean> userIdState;
                // 存储当前 key 对应的 UV 值
                private ValueState<Long> uvState;

                @Override
                public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception {
                    // 初始化 uvState
                    if(null == uvState.value()){
                        uvState.update(0L);
                    }
                    // userIdState 中不包含当前访问的 userId，说明该用户今天还未访问过该页面
                    // 则将该 userId put 到 userIdState 中，并把 UV 值 +1
                    if(!userIdState.contains(userVisitWebEvent.getUserId())){
                        userIdState.put(userVisitWebEvent.getUserId(),null);
                        uvState.update(uvState.value() + 1);
                    }
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    System.out.println(redisKey + "   :::   " + uvState.value());
                    return Tuple2.of(redisKey, uvState.value());
                }

                @Override
                public void open(Configuration parameters) throws Exception {
                    super.open(parameters);
                    // 从状态中恢复 userIdState
                    userIdState = getRuntimeContext().getMapState(
                            new MapStateDescriptor<>("userIdState",
                                    TypeInformation.of(new TypeHint<String>() {}),
                                    TypeInformation.of(new TypeHint<Boolean>() {})));
                    // 从状态中恢复 uvState
                    uvState = getRuntimeContext().getState(
                            new ValueStateDescriptor<>("uvState",
                                    TypeInformation.of(new TypeHint<Long>() {})));
                }
            })
            .addSink(new RedisSink<>(conf, new RedisSetSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: RegionFailoverITCase.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}

Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0

5 votes

private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
	long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
	CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
		? CheckpointingMode.EXACTLY_ONCE
		: CheckpointingMode.AT_LEAST_ONCE;

	env.enableCheckpointing(checkpointInterval, checkpointingMode);

	boolean enableExternalizedCheckpoints = pt.getBoolean(
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

	if (enableExternalizedCheckpoints) {
		String cleanupModeConfig = pt.get(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

		CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
		switch (cleanupModeConfig) {
			case "retain":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
				break;
			case "delete":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
				break;
			default:
				throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
		}
		env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);

		final int tolerableDeclinedCheckpointNumber = pt.getInt(
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(),
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue());
		env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber);
	}
}

Source File: StreamGraph.java From flink with Apache License 2.0

5 votes

public StreamGraph(ExecutionConfig executionConfig, CheckpointConfig checkpointConfig, SavepointRestoreSettings savepointRestoreSettings) {
	this.executionConfig = checkNotNull(executionConfig);
	this.checkpointConfig = checkNotNull(checkpointConfig);
	this.savepointRestoreSettings = checkNotNull(savepointRestoreSettings);

	// create an empty new stream graph.
	clear();
}

Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCheckpointConfigDefault() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();
	Assert.assertTrue(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(0, checkpointConfig.getTolerableCheckpointFailureNumber());
}

Source File: StreamGraph.java From flink with Apache License 2.0

5 votes

public StreamGraph(ExecutionConfig executionConfig, CheckpointConfig checkpointConfig) {
	this.executionConfig = checkNotNull(executionConfig);
	this.checkpointConfig = checkNotNull(checkpointConfig);

	// create an empty new stream graph.
	clear();
}

Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0

5 votes

private static void setupCheckpointing(final StreamExecutionEnvironment env, final ParameterTool pt) {
	String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue());
	long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue());
	CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once")
		? CheckpointingMode.EXACTLY_ONCE
		: CheckpointingMode.AT_LEAST_ONCE;

	env.enableCheckpointing(checkpointInterval, checkpointingMode);

	boolean enableExternalizedCheckpoints = pt.getBoolean(
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(),
		ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue());

	if (enableExternalizedCheckpoints) {
		String cleanupModeConfig = pt.get(
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(),
			ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue());

		CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode;
		switch (cleanupModeConfig) {
			case "retain":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION;
				break;
			case "delete":
				cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION;
				break;
			default:
				throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig);
		}
		env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode);

		final int tolerableDeclinedCheckpointNumber = pt.getInt(
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.key(),
			ENVIRONMENT_TOLERABLE_DECLINED_CHECKPOINT_NUMBER.defaultValue());
		env.getCheckpointConfig().setTolerableCheckpointFailureNumber(tolerableDeclinedCheckpointNumber);
	}
}

Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0

5 votes

@Test
public void testCheckpointConfigDefault() {
	StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment();
	CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig();
	Assert.assertTrue(checkpointConfig.isFailOnCheckpointingErrors());
	Assert.assertEquals(0, checkpointConfig.getTolerableCheckpointFailureNumber());
}

Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

@Test
public void generatorForwardsSavepointRestoreSettings() {
	StreamGraphGenerator streamGraphGenerator =
			new StreamGraphGenerator(Collections.emptyList(),
			new ExecutionConfig(),
			new CheckpointConfig());

	streamGraphGenerator.setSavepointRestoreSettings(SavepointRestoreSettings.forPath("hello"));

	StreamGraph streamGraph = streamGraphGenerator.generate();
	assertThat(streamGraph.getSavepointRestoreSettings().getRestorePath(), is("hello"));
}

Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0

5 votes

@Test
public void generatorForwardsSavepointRestoreSettings() {
	StreamGraph streamGraph = new StreamGraph(
			new ExecutionConfig(),
			new CheckpointConfig(),
			SavepointRestoreSettings.forPath("hello"));

	JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph);

	SavepointRestoreSettings savepointRestoreSettings = jobGraph.getSavepointRestoreSettings();
	assertThat(savepointRestoreSettings.getRestorePath(), is("hello"));
}

Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1));
        env.setParallelism(2);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat");

        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                UvExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig
                .Builder().setHost("192.168.30.244").build();

        env.addSource(kafkaConsumer)
                .map(string -> {
                    // 反序列化 JSON
                    UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson(
                            string, UserVisitWebEvent.class);
                    // 生成 Redis key，格式为 日期_pageId，如: 20191026_0
                    String redisKey = userVisitWebEvent.getDate() + "_"
                            + userVisitWebEvent.getPageId();
                    return Tuple2.of(redisKey, userVisitWebEvent.getUserId());
                })
                .returns(new TypeHint<Tuple2<String, String>>(){})
                .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper()));

        env.execute("Redis Set UV Stat");
    }

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

5 votes

private CheckpointingMode getCheckpointingMode(CheckpointConfig checkpointConfig) {
	CheckpointingMode checkpointingMode = checkpointConfig.getCheckpointingMode();

	checkArgument(checkpointingMode == CheckpointingMode.EXACTLY_ONCE ||
		checkpointingMode == CheckpointingMode.AT_LEAST_ONCE, "Unexpected checkpointing mode.");

	if (checkpointConfig.isCheckpointingEnabled()) {
		return checkpointingMode;
	} else {
		// the "at-least-once" input handler is slightly cheaper (in the absence of checkpoints),
		// so we use that one if checkpointing is not enabled
		return CheckpointingMode.AT_LEAST_ONCE;
	}
}

Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        env.setParallelism(6);

        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing);
        rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads);
        rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromLatest();

        env.addSource(kafkaConsumer)
            .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class))  // 反序列化 JSON
            // 这里将日志的主键 id 通过 murmur3_128 hash 后，将生成 long 类型数据当做 key
            .keyBy((KeySelector<UserVisitWebEvent, Long>) log ->
                    Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong())
            .addSink(new KeyedStateDeduplication.KeyedStateSink());

        env.execute("TuningKeyedStateDeduplication");
    }

Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception{

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(6);

        // 使用 RocksDBStateBackend 做为状态后端，并开启增量 Checkpoint
        RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend(
                "hdfs:///flink/checkpoints", true);
        rocksDBStateBackend.setNumberOfTransferingThreads(3);
        // 设置为机械硬盘+内存模式，强烈建议为 RocksDB 配备 SSD
        rocksDBStateBackend.setPredefinedOptions(
                PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM);
        rocksDBStateBackend.enableTtlCompactionFilter();
        env.setStateBackend(rocksDBStateBackend);

        // Checkpoint 间隔为 10 分钟
        env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10));
        // 配置 Checkpoint
        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8));
        checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20));
        checkpointConf.enableExternalizedCheckpoints(
                CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        // Kafka Consumer 配置
        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication");
        FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>(
                DeduplicationExampleUtil.topic, new SimpleStringSchema(), props)
                .setStartFromGroupOffsets();

        env.addSource(kafkaConsumer)
            .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class))  // 反序列化 JSON
            .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId)
            .addSink(new KeyedStateSink());

        env.execute("KeyedStateDeduplication");
    }

Source File: UnionListStateExample.java From flink-learning with Apache License 2.0

5 votes

public static void main(String[] args) throws Exception {

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        // 1 分钟一次CheckPoint
        env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15));
        env.setParallelism(3);

        CheckpointConfig checkpointConf = env.getCheckpointConfig();
        // CheckPoint 语义 EXACTLY ONCE
        checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        Properties props = new Properties();
        props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list);
        props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat");

        FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>(
                // kafka topic， String 序列化
                UnionListStateUtil.topic, new SimpleStringSchema(), props);

        env.addSource(kafkaConsumer011)
                .uid(UnionListStateUtil.topic)
                .addSink(new MySink())
                .uid("MySink")
                .name("MySink");

        env.execute("Flink unionListState");
    }

Source File: RegionFailoverITCase.java From flink with Apache License 2.0

5 votes

private JobGraph createJobGraph() {

		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(NUM_OF_REGIONS);
		env.setMaxParallelism(MAX_PARALLELISM);
		env.enableCheckpointing(200, CheckpointingMode.EXACTLY_ONCE);
		env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		env.disableOperatorChaining();
		env.getConfig().disableSysoutLogging();

		// Use DataStreamUtils#reinterpretAsKeyed to avoid merge regions and this stream graph would exist num of 'NUM_OF_REGIONS' individual regions.
		DataStreamUtils.reinterpretAsKeyedStream(
			env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS))
				.name(MULTI_REGION_SOURCE_NAME)
				.setParallelism(NUM_OF_REGIONS),
			(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
			TypeInformation.of(Integer.class))
			.map(new FailingMapperFunction(NUM_OF_RESTARTS))
			.setParallelism(NUM_OF_REGIONS)
			.addSink(new ValidatingSink())
			.setParallelism(NUM_OF_REGIONS);

		// another stream graph totally disconnected with the above one.
		env.addSource(new StringGeneratingSourceFunction(NUM_ELEMENTS, NUM_ELEMENTS / NUM_OF_RESTARTS)).
			name(SINGLE_REGION_SOURCE_NAME).setParallelism(1)
			.map((MapFunction<Tuple2<Integer, Integer>, Object>) value -> value).setParallelism(1);

		return env.getStreamGraph().getJobGraph();
	}

Source File: StreamingJobGraphGenerator.java From flink with Apache License 2.0

5 votes

@SuppressWarnings("deprecation")
private void preValidate() {
	CheckpointConfig checkpointConfig = streamGraph.getCheckpointConfig();

	if (checkpointConfig.isCheckpointingEnabled()) {
		// temporarily forbid checkpointing for iterative jobs
		if (streamGraph.isIterative() && !checkpointConfig.isForceCheckpointing()) {
			throw new UnsupportedOperationException(
				"Checkpointing is currently not supported by default for iterative jobs, as we cannot guarantee exactly once semantics. "
					+ "State checkpoints happen normally, but records in-transit during the snapshot will be lost upon failure. "
					+ "\nThe user can force enable state checkpoints with the reduced guarantees by calling: env.enableCheckpointing(interval,true)");
		}

		ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
		for (StreamNode node : streamGraph.getStreamNodes()) {
			StreamOperatorFactory operatorFactory = node.getOperatorFactory();
			if (operatorFactory != null) {
				Class<?> operatorClass = operatorFactory.getStreamOperatorClass(classLoader);
				if (InputSelectable.class.isAssignableFrom(operatorClass)) {

					throw new UnsupportedOperationException(
						"Checkpointing is currently not supported for operators that implement InputSelectable:"
							+ operatorClass.getName());
				}
			}
		}
	}

	if (checkpointConfig.isUnalignedCheckpointsEnabled() && getCheckpointingMode(checkpointConfig) != CheckpointingMode.EXACTLY_ONCE) {
		LOG.warn("Unaligned checkpoints can only be used with checkpointing mode EXACTLY_ONCE");
		checkpointConfig.enableUnalignedCheckpoints(false);
	}
}

Source File: StickyAllocationAndLocalRecoveryTestJob.java From flink with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		final ParameterTool pt = ParameterTool.fromArgs(args);

		final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

		env.setParallelism(pt.getInt("parallelism", 1));
		env.setMaxParallelism(pt.getInt("maxParallelism", pt.getInt("parallelism", 1)));
		env.enableCheckpointing(pt.getInt("checkpointInterval", 1000));
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, pt.getInt("restartDelay", 0)));
		if (pt.getBoolean("externalizedCheckpoints", false)) {
			env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
		}

		String stateBackend = pt.get("stateBackend", "file");
		String checkpointDir = pt.getRequired("checkpointDir");

		boolean killJvmOnFail = pt.getBoolean("killJvmOnFail", false);

		if ("file".equals(stateBackend)) {
			boolean asyncCheckpoints = pt.getBoolean("asyncCheckpoints", true);
			env.setStateBackend(new FsStateBackend(checkpointDir, asyncCheckpoints));
		} else if ("rocks".equals(stateBackend)) {
			boolean incrementalCheckpoints = pt.getBoolean("incrementalCheckpoints", false);
			env.setStateBackend(new RocksDBStateBackend(checkpointDir, incrementalCheckpoints));
		} else {
			throw new IllegalArgumentException("Unknown backend: " + stateBackend);
		}

		// make parameters available in the web interface
		env.getConfig().setGlobalJobParameters(pt);

		// delay to throttle down the production of the source
		long delay = pt.getLong("delay", 0L);

		// the maximum number of attempts, before the job finishes with success
		int maxAttempts = pt.getInt("maxAttempts", 3);

		// size of one artificial value
		int valueSize = pt.getInt("valueSize", 10);

		env.addSource(new RandomLongSource(maxAttempts, delay))
			.keyBy((KeySelector<Long, Long>) aLong -> aLong)
			.flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail))
			.addSink(new PrintSinkFunction<>());

		env.execute("Sticky Allocation And Local Recovery Test");
	}

Source File: DummyStreamExecutionEnvironment.java From flink with Apache License 2.0

4 votes

@Override
public CheckpointConfig getCheckpointConfig() {
	return realExecEnv.getCheckpointConfig();
}

Source File: DummyStreamExecutionEnvironment.java From flink with Apache License 2.0

4 votes

@Override
public CheckpointConfig getCheckpointConfig() {
	return realExecEnv.getCheckpointConfig();
}

Source File: StreamGraph.java From flink with Apache License 2.0

4 votes

public CheckpointConfig getCheckpointConfig() {
	return checkpointConfig;
}

Source File: RidesAndFaresSolution.java From flink-training-exercises with Apache License 2.0

4 votes

public static void main(String[] args) throws Exception {

		ParameterTool params = ParameterTool.fromArgs(args);
		final String ridesFile = params.get("rides", pathToRideData);
		final String faresFile = params.get("fares", pathToFareData);

		final int delay = 60;					// at most 60 seconds of delay
		final int servingSpeedFactor = 1800; 	// 30 minutes worth of events are served every second

		// Set up streaming execution environment, including Web UI and REST endpoint.
		// Checkpointing isn't needed for the RidesAndFares exercise; this setup is for
		// using the State Processor API.

		Configuration conf = new Configuration();
		conf.setString("state.backend", "filesystem");
		conf.setString("state.savepoints.dir", "file:///tmp/savepoints");
		conf.setString("state.checkpoints.dir", "file:///tmp/checkpoints");
		StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf);
		env.setParallelism(ExerciseBase.parallelism);

		env.enableCheckpointing(10000L);
		CheckpointConfig config = env.getCheckpointConfig();
		config.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

		DataStream<TaxiRide> rides = env
				.addSource(rideSourceOrTest(new TaxiRideSource(ridesFile, delay, servingSpeedFactor)))
				.filter((TaxiRide ride) -> ride.isStart)
				.keyBy(ride -> ride.rideId);

		DataStream<TaxiFare> fares = env
				.addSource(fareSourceOrTest(new TaxiFareSource(faresFile, delay, servingSpeedFactor)))
				.keyBy(fare -> fare.rideId);

		// Set a UID on the stateful flatmap operator so we can read its state using the State Processor API.
		DataStream<Tuple2<TaxiRide, TaxiFare>> enrichedRides = rides
				.connect(fares)
				.flatMap(new EnrichmentFunction())
				.uid("enrichment");

		printOrTest(enrichedRides);

		env.execute("Join Rides with Fares (java RichCoFlatMap)");
	}

org.apache.flink.streaming.api.environment.CheckpointConfig Java Examples