Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#getCheckpointConfig()
The following examples show how to use
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#getCheckpointConfig() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSetCheckpointConfig() { StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig(); // use deprecated API to set not fail on checkpoint errors checkpointConfig.setFailOnCheckpointingErrors(false); Assert.assertFalse(checkpointConfig.isFailOnCheckpointingErrors()); Assert.assertEquals(CheckpointFailureManager.UNLIMITED_TOLERABLE_FAILURE_NUMBER, checkpointConfig.getTolerableCheckpointFailureNumber()); // use new API to set tolerable declined checkpoint number checkpointConfig.setTolerableCheckpointFailureNumber(5); Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber()); // after we configure the tolerable declined checkpoint number, deprecated API would not take effect checkpointConfig.setFailOnCheckpointingErrors(true); Assert.assertEquals(5, checkpointConfig.getTolerableCheckpointFailureNumber()); }
Example 2
Source File: StreamGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test iteration job when disable slot sharing, check slot sharing group and co-location group. */ @Test public void testIterationWithSlotSharingDisabled() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); List<Transformation<?>> transformations = new ArrayList<>(); transformations.add(source.getTransformation()); transformations.add(iteration.getTransformation()); transformations.add(map.getTransformation()); transformations.add(filter.getTransformation()); StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig()); generator.setSlotSharingEnabled(false); StreamGraph streamGraph = generator.generate(); for (Tuple2<StreamNode, StreamNode> iterationPair : streamGraph.getIterationSourceSinkPairs()) { assertNotNull(iterationPair.f0.getCoLocationGroup()); assertEquals(iterationPair.f0.getCoLocationGroup(), iterationPair.f1.getCoLocationGroup()); assertNotNull(iterationPair.f0.getSlotSharingGroup()); assertEquals(iterationPair.f0.getSlotSharingGroup(), iterationPair.f1.getSlotSharingGroup()); } }
Example 3
Source File: UnionListStateExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 1 分钟一次CheckPoint env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15)); env.setParallelism(3); CheckpointConfig checkpointConf = env.getCheckpointConfig(); // CheckPoint 语义 EXACTLY ONCE checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat"); FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>( // kafka topic, String 序列化 UnionListStateUtil.topic, new SimpleStringSchema(), props); env.addSource(kafkaConsumer011) .uid(UnionListStateUtil.topic) .addSink(new MySink()) .uid("MySink") .name("MySink"); env.execute("Flink unionListState"); }
Example 4
Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); env.setParallelism(6); RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing); rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads); rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromLatest(); env.addSource(kafkaConsumer) .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class)) // 反序列化 JSON // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key .keyBy((KeySelector<UserVisitWebEvent, Long>) log -> Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong()) .addSink(new KeyedStateDeduplication.KeyedStateSink()); env.execute("TuningKeyedStateDeduplication"); }
Example 5
Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( UvExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig .Builder().setHost("192.168.30.244").build(); env.addSource(kafkaConsumer) .map(string -> { // 反序列化 JSON UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson( string, UserVisitWebEvent.class); // 生成 Redis key,格式为 日期_pageId,如: 20191026_0 String redisKey = userVisitWebEvent.getDate() + "_" + userVisitWebEvent.getPageId(); return Tuple2.of(redisKey, userVisitWebEvent.getUserId()); }) .returns(new TypeHint<Tuple2<String, String>>(){}) .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper())); env.execute("Redis Set UV Stat"); }
Example 6
Source File: MapStateUvExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( UvExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig .Builder().setHost("192.168.30.244").build(); env.addSource(kafkaConsumer) .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class)) // 反序列化 JSON .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() { // 存储当前 key 对应的 userId 集合 private MapState<String,Boolean> userIdState; // 存储当前 key 对应的 UV 值 private ValueState<Long> uvState; @Override public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception { // 初始化 uvState if(null == uvState.value()){ uvState.update(0L); } // userIdState 中不包含当前访问的 userId,说明该用户今天还未访问过该页面 // 则将该 userId put 到 userIdState 中,并把 UV 值 +1 if(!userIdState.contains(userVisitWebEvent.getUserId())){ userIdState.put(userVisitWebEvent.getUserId(),null); uvState.update(uvState.value() + 1); } // 生成 Redis key,格式为 日期_pageId,如: 20191026_0 String redisKey = userVisitWebEvent.getDate() + "_" + userVisitWebEvent.getPageId(); System.out.println(redisKey + " ::: " + uvState.value()); return Tuple2.of(redisKey, uvState.value()); } @Override public void open(Configuration parameters) throws Exception { super.open(parameters); // 从状态中恢复 userIdState userIdState = getRuntimeContext().getMapState( new MapStateDescriptor<>("userIdState", TypeInformation.of(new TypeHint<String>() {}), TypeInformation.of(new TypeHint<Boolean>() {}))); // 从状态中恢复 uvState uvState = getRuntimeContext().getState( new ValueStateDescriptor<>("uvState", TypeInformation.of(new TypeHint<Long>() {}))); } }) .addSink(new RedisSink<>(conf, new RedisSetSinkMapper())); env.execute("Redis Set UV Stat"); }
Example 7
Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCheckpointConfigDefault() { StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig(); Assert.assertTrue(checkpointConfig.isFailOnCheckpointingErrors()); Assert.assertEquals(0, checkpointConfig.getTolerableCheckpointFailureNumber()); }
Example 8
Source File: CheckpointExceptionHandlerConfigurationTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testCheckpointConfigDefault() { StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); CheckpointConfig checkpointConfig = streamExecutionEnvironment.getCheckpointConfig(); Assert.assertTrue(checkpointConfig.isFailOnCheckpointingErrors()); Assert.assertEquals(0, checkpointConfig.getTolerableCheckpointFailureNumber()); }
Example 9
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that disabled checkpointing sets the checkpointing interval to Long.MAX_VALUE. */ @Test public void testDisabledCheckpointing() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamGraph streamGraph = new StreamGraph(env.getConfig(), env.getCheckpointConfig()); assertFalse("Checkpointing enabled", streamGraph.getCheckpointConfig().isCheckpointingEnabled()); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph); JobCheckpointingSettings snapshottingSettings = jobGraph.getCheckpointingSettings(); assertEquals(Long.MAX_VALUE, snapshottingSettings.getCheckpointCoordinatorConfiguration().getCheckpointInterval()); }
Example 10
Source File: UnionListStateExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 1 分钟一次CheckPoint env.enableCheckpointing(TimeUnit.SECONDS.toMillis(15)); env.setParallelism(3); CheckpointConfig checkpointConf = env.getCheckpointConfig(); // CheckPoint 语义 EXACTLY ONCE checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UnionListStateUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat"); FlinkKafkaConsumer011<String> kafkaConsumer011 = new FlinkKafkaConsumer011<>( // kafka topic, String 序列化 UnionListStateUtil.topic, new SimpleStringSchema(), props); env.addSource(kafkaConsumer011) .uid(UnionListStateUtil.topic) .addSink(new MySink()) .uid("MySink") .name("MySink"); env.execute("Flink unionListState"); }
Example 11
Source File: KeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(6); // 使用 RocksDBStateBackend 做为状态后端,并开启增量 Checkpoint RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend( "hdfs:///flink/checkpoints", true); rocksDBStateBackend.setNumberOfTransferingThreads(3); // 设置为机械硬盘+内存模式,强烈建议为 RocksDB 配备 SSD rocksDBStateBackend.setPredefinedOptions( PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); // Checkpoint 间隔为 10 分钟 env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); // 配置 Checkpoint CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); // Kafka Consumer 配置 Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); env.addSource(kafkaConsumer) .map(log -> GsonUtil.fromJson(log, UserVisitWebEvent.class)) // 反序列化 JSON .keyBy((KeySelector<UserVisitWebEvent, String>) UserVisitWebEvent::getId) .addSink(new KeyedStateSink()); env.execute("KeyedStateDeduplication"); }
Example 12
Source File: TuningKeyedStateDeduplication.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception{ final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(10)); env.setParallelism(6); RocksDBStateBackend rocksDBStateBackend = new RocksDBStateBackend("hdfs:///flink/checkpoints", enableIncrementalCheckpointing); rocksDBStateBackend.setNumberOfTransferingThreads(numberOfTransferingThreads); rocksDBStateBackend.setPredefinedOptions(PredefinedOptions.SPINNING_DISK_OPTIMIZED_HIGH_MEM); rocksDBStateBackend.enableTtlCompactionFilter(); env.setStateBackend(rocksDBStateBackend); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.setMinPauseBetweenCheckpoints(TimeUnit.MINUTES.toMillis(8)); checkpointConf.setCheckpointTimeout(TimeUnit.MINUTES.toMillis(20)); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, DeduplicationExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "keyed-state-deduplication"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( DeduplicationExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromLatest(); env.addSource(kafkaConsumer) .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class)) // 反序列化 JSON // 这里将日志的主键 id 通过 murmur3_128 hash 后,将生成 long 类型数据当做 key .keyBy((KeySelector<UserVisitWebEvent, Long>) log -> Hashing.murmur3_128(5).hashUnencodedChars(log.getId()).asLong()) .addSink(new KeyedStateDeduplication.KeyedStateSink()); env.execute("TuningKeyedStateDeduplication"); }
Example 13
Source File: RedisSetUvExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( UvExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig .Builder().setHost("192.168.30.244").build(); env.addSource(kafkaConsumer) .map(string -> { // 反序列化 JSON UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson( string, UserVisitWebEvent.class); // 生成 Redis key,格式为 日期_pageId,如: 20191026_0 String redisKey = userVisitWebEvent.getDate() + "_" + userVisitWebEvent.getPageId(); return Tuple2.of(redisKey, userVisitWebEvent.getUserId()); }) .returns(new TypeHint<Tuple2<String, String>>(){}) .addSink(new RedisSink<>(conf, new RedisSaddSinkMapper())); env.execute("Redis Set UV Stat"); }
Example 14
Source File: HyperLogLogUvExample.java From flink-learning with Apache License 2.0 | 5 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( UvExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromLatest(); FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig .Builder().setHost("192.168.30.244").build(); env.addSource(kafkaConsumer) .map(string -> { // 反序列化 JSON UserVisitWebEvent userVisitWebEvent = GsonUtil.fromJson( string, UserVisitWebEvent.class); // 生成 Redis key,格式为 日期_pageId,如: 20191026_0 String redisKey = userVisitWebEvent.getDate() + "_" + userVisitWebEvent.getPageId(); return Tuple2.of(redisKey, userVisitWebEvent.getUserId()); }) .returns(new TypeHint<Tuple2<String, String>>(){}) .addSink(new RedisSink<>(conf, new RedisPfaddSinkMapper())); env.execute("Redis Set UV Stat"); }
Example 15
Source File: StreamingJobGraphGeneratorTest.java From flink with Apache License 2.0 | 4 votes |
/** * Test slot sharing group is enabled or disabled for iteration. */ @Test public void testDisableSlotSharingForIteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3).name("source"); IterativeStream<Integer> iteration = source.iterate(3000); iteration.name("iteration").setParallelism(2); DataStream<Integer> map = iteration.map(x -> x + 1).name("map").setParallelism(2); DataStream<Integer> filter = map.filter((x) -> false).name("filter").setParallelism(2); iteration.closeWith(filter).print(); List<Transformation<?>> transformations = new ArrayList<>(); transformations.add(source.getTransformation()); transformations.add(iteration.getTransformation()); transformations.add(map.getTransformation()); transformations.add(filter.getTransformation()); // when slot sharing group is disabled // all job vertices except iteration vertex would have no slot sharing group // iteration vertices would be set slot sharing group automatically StreamGraphGenerator generator = new StreamGraphGenerator(transformations, env.getConfig(), env.getCheckpointConfig()); generator.setSlotSharingEnabled(false); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(generator.generate()); SlotSharingGroup iterationSourceSlotSharingGroup = null; SlotSharingGroup iterationSinkSlotSharingGroup = null; CoLocationGroup iterationSourceCoLocationGroup = null; CoLocationGroup iterationSinkCoLocationGroup = null; for (JobVertex jobVertex : jobGraph.getVertices()) { if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SOURCE_NAME_PREFIX)) { iterationSourceSlotSharingGroup = jobVertex.getSlotSharingGroup(); iterationSourceCoLocationGroup = jobVertex.getCoLocationGroup(); } else if (jobVertex.getName().startsWith(StreamGraph.ITERATION_SINK_NAME_PREFIX)) { iterationSinkSlotSharingGroup = jobVertex.getSlotSharingGroup(); iterationSinkCoLocationGroup = jobVertex.getCoLocationGroup(); } else { assertNull(jobVertex.getSlotSharingGroup()); } } assertNotNull(iterationSourceSlotSharingGroup); assertNotNull(iterationSinkSlotSharingGroup); assertEquals(iterationSourceSlotSharingGroup, iterationSinkSlotSharingGroup); assertNotNull(iterationSourceCoLocationGroup); assertNotNull(iterationSinkCoLocationGroup); assertEquals(iterationSourceCoLocationGroup, iterationSinkCoLocationGroup); }
Example 16
Source File: RidesAndFaresSolution.java From flink-training-exercises with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String ridesFile = params.get("rides", pathToRideData); final String faresFile = params.get("fares", pathToFareData); final int delay = 60; // at most 60 seconds of delay final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second // Set up streaming execution environment, including Web UI and REST endpoint. // Checkpointing isn't needed for the RidesAndFares exercise; this setup is for // using the State Processor API. Configuration conf = new Configuration(); conf.setString("state.backend", "filesystem"); conf.setString("state.savepoints.dir", "file:///tmp/savepoints"); conf.setString("state.checkpoints.dir", "file:///tmp/checkpoints"); StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(conf); env.setParallelism(ExerciseBase.parallelism); env.enableCheckpointing(10000L); CheckpointConfig config = env.getCheckpointConfig(); config.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); DataStream<TaxiRide> rides = env .addSource(rideSourceOrTest(new TaxiRideSource(ridesFile, delay, servingSpeedFactor))) .filter((TaxiRide ride) -> ride.isStart) .keyBy(ride -> ride.rideId); DataStream<TaxiFare> fares = env .addSource(fareSourceOrTest(new TaxiFareSource(faresFile, delay, servingSpeedFactor))) .keyBy(fare -> fare.rideId); // Set a UID on the stateful flatmap operator so we can read its state using the State Processor API. DataStream<Tuple2<TaxiRide, TaxiFare>> enrichedRides = rides .connect(fares) .flatMap(new EnrichmentFunction()) .uid("enrichment"); printOrTest(enrichedRides); env.execute("Join Rides with Fares (java RichCoFlatMap)"); }
Example 17
Source File: AbstractFlinkClient.java From alchemy with Apache License 2.0 | 4 votes |
private void setBaseInfo(StreamExecutionEnvironment execEnv, SqlSubmitFlinkRequest request) { execEnv.setParallelism(request.getParallelism()); if (request.getMaxParallelism() != null) { execEnv.setMaxParallelism(request.getMaxParallelism()); } if (org.apache.commons.lang3.StringUtils.isNotEmpty(request.getTimeCharacteristic())) { execEnv.setStreamTimeCharacteristic(TimeCharacteristic.valueOf(request.getTimeCharacteristic())); } else { execEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); } if (request.getBufferTimeout() != null) { execEnv.setBufferTimeout(request.getBufferTimeout()); } if (org.apache.commons.lang3.StringUtils.isNotEmpty(request.getRestartStrategies())) { String strategies = request.getRestartStrategies(); com.dfire.platform.alchemy.common.RestartStrategies restartStrategies = com.dfire.platform.alchemy.common.RestartStrategies.valueOf(strategies.toUpperCase()); Map<String, Object> restartParams = request.getRestartParams(); switch (restartStrategies) { case NO: execEnv.setRestartStrategy(RestartStrategies.noRestart()); break; case FIXED: int restartAttempts = restartParams == null ? Constants.RESTART_ATTEMPTS : Integer.parseInt(restartParams.get(CONFIG_KEY_RESTART_ATTEMPTS).toString()); long delayBetweenAttempts = restartParams == null ? Constants.DELAY_BETWEEN_ATTEMPTS : Long.parseLong(restartParams.get(CONFIG_KEY_DELAY_BETWEEN_ATTEMPTS).toString()); execEnv .setRestartStrategy(RestartStrategies.fixedDelayRestart(restartAttempts, delayBetweenAttempts)); break; case FAILURE: int failureRate = restartParams == null ? Constants.FAILURE_RATE : Integer.parseInt(restartParams.get(CONFIG_KEY_FAILURE_RATE).toString()); long failureInterval = restartParams == null ? Constants.FAILURE_INTERVAL : Long.parseLong(restartParams.get(CONFIG_KEY_FAILURE_INTERVAL).toString()); long delayInterval = restartParams == null ? Constants.DELAY_INTERVAL : Long.parseLong(restartParams.get(CONFIG_KEY_DELAY_INTERVAL).toString()); execEnv.setRestartStrategy(RestartStrategies.failureRateRestart(failureRate, Time.of(failureInterval, TimeUnit.MILLISECONDS), Time.of(delayInterval, TimeUnit.MILLISECONDS))); break; case FALLBACK: execEnv.setRestartStrategy(RestartStrategies.fallBackRestart()); break; default: } } if (request.getCheckpointCfg() != null) { CheckpointConfig checkpointConfig = execEnv.getCheckpointConfig(); BeanUtils.copyProperties(request.getCheckpointCfg(), checkpointConfig); } }
Example 18
Source File: PvStatLocalKeyByExactlyOnce.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 1 分钟一次 Checkpoint env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); // Checkpoint 语义 EXACTLY ONCE checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat"); FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>( // kafka topic, String 序列化 PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest(); env.addSource(appKafkaConsumer) .flatMap(new LocalKeyByFlatMap(10)) // 按照 appId 进行 keyBy .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0) .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() { private ValueState<Long> pvState; private long pv = 0; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); // 初始化状态 pvState = getRuntimeContext().getState( new ValueStateDescriptor<>("pvStat", TypeInformation.of(new TypeHint<Long>() { }))); } @Override public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception { // 从状态中获取该 app 的pv值,加上新收到的 pv 值以后后,update 到状态中 if (null == pvState.value()) { log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1); pv = tuple2.f1; } else { pv = pvState.value(); pv += tuple2.f1; log.info("{} is old, PV is {}", tuple2.f0, pv); } pvState.update(pv); tuple2.setField(pv, 1); return tuple2; } }) .print(); env.execute("Flink pv stat LocalKeyBy"); }
Example 19
Source File: MapStateUvExample.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, UvExampleUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-uv-stat"); FlinkKafkaConsumerBase<String> kafkaConsumer = new FlinkKafkaConsumer011<>( UvExampleUtil.topic, new SimpleStringSchema(), props) .setStartFromGroupOffsets(); FlinkJedisPoolConfig conf = new FlinkJedisPoolConfig .Builder().setHost("192.168.30.244").build(); env.addSource(kafkaConsumer) .map(string -> GsonUtil.fromJson(string, UserVisitWebEvent.class)) // 反序列化 JSON .keyBy("date","pageId") // 按照 日期和页面 进行 keyBy .map(new RichMapFunction<UserVisitWebEvent, Tuple2<String, Long>>() { // 存储当前 key 对应的 userId 集合 private MapState<String,Boolean> userIdState; // 存储当前 key 对应的 UV 值 private ValueState<Long> uvState; @Override public Tuple2<String, Long> map(UserVisitWebEvent userVisitWebEvent) throws Exception { // 初始化 uvState if(null == uvState.value()){ uvState.update(0L); } // userIdState 中不包含当前访问的 userId,说明该用户今天还未访问过该页面 // 则将该 userId put 到 userIdState 中,并把 UV 值 +1 if(!userIdState.contains(userVisitWebEvent.getUserId())){ userIdState.put(userVisitWebEvent.getUserId(),null); uvState.update(uvState.value() + 1); } // 生成 Redis key,格式为 日期_pageId,如: 20191026_0 String redisKey = userVisitWebEvent.getDate() + "_" + userVisitWebEvent.getPageId(); System.out.println(redisKey + " ::: " + uvState.value()); return Tuple2.of(redisKey, uvState.value()); } @Override public void open(Configuration parameters) throws Exception { super.open(parameters); // 从状态中恢复 userIdState userIdState = getRuntimeContext().getMapState( new MapStateDescriptor<>("userIdState", TypeInformation.of(new TypeHint<String>() {}), TypeInformation.of(new TypeHint<Boolean>() {}))); // 从状态中恢复 uvState uvState = getRuntimeContext().getState( new ValueStateDescriptor<>("uvState", TypeInformation.of(new TypeHint<Long>() {}))); } }) .addSink(new RedisSink<>(conf, new RedisSetSinkMapper())); env.execute("Redis Set UV Stat"); }
Example 20
Source File: PvStatLocalKeyByExactlyOnce.java From flink-learning with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 1 分钟一次 Checkpoint env.enableCheckpointing(TimeUnit.MINUTES.toMillis(1)); env.setParallelism(2); CheckpointConfig checkpointConf = env.getCheckpointConfig(); // Checkpoint 语义 EXACTLY ONCE checkpointConf.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE); checkpointConf.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION); Properties props = new Properties(); props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, PvStatExactlyOnceKafkaUtil.broker_list); props.put(ConsumerConfig.GROUP_ID_CONFIG, "app-pv-stat"); FlinkKafkaConsumerBase<String> appKafkaConsumer = new FlinkKafkaConsumer011<>( // kafka topic, String 序列化 PvStatExactlyOnceKafkaUtil.topic, new SimpleStringSchema(), props).setStartFromLatest(); env.addSource(appKafkaConsumer) .flatMap(new LocalKeyByFlatMap(10)) // 按照 appId 进行 keyBy .keyBy((KeySelector<Tuple2<String, Long>, String>) appIdPv -> appIdPv.f0) .map(new RichMapFunction<Tuple2<String, Long>, Tuple2<String, Long>>() { private ValueState<Long> pvState; private long pv = 0; @Override public void open(Configuration parameters) throws Exception { super.open(parameters); // 初始化状态 pvState = getRuntimeContext().getState( new ValueStateDescriptor<>("pvStat", TypeInformation.of(new TypeHint<Long>() { }))); } @Override public Tuple2<String, Long> map(Tuple2<String, Long> tuple2) throws Exception { // 从状态中获取该 app 的pv值,加上新收到的 pv 值以后后,update 到状态中 if (null == pvState.value()) { log.info("{} is new, PV is {}", tuple2.f0, tuple2.f1); pv = tuple2.f1; } else { pv = pvState.value(); pv += tuple2.f1; log.info("{} is old, PV is {}", tuple2.f0, pv); } pvState.update(pv); tuple2.setField(pv, 1); return tuple2; } }) .print(); env.execute("Flink pv stat LocalKeyBy"); }