Java Code Examples for org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setRestartStrategy()
The following examples show how to use
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment#setRestartStrategy() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CassandraTupleWriteAheadSinkExample.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.enableCheckpointing(1000); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000)); env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend")); CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource())) .setQuery("INSERT INTO zhisheng.values (id, counter) values (?, ?);") .enableWriteAheadLog() .setClusterBuilder(new ClusterBuilder() { private static final long serialVersionUID = 2793938419775311824L; @Override public Cluster buildCluster(Cluster.Builder builder) { return builder.addContactPoint("127.0.0.1").build(); } }) .build(); sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello"); env.execute(); }
Example 2
Source File: FixedDelayRestartStrategyMain.java From flink-learning with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws Exception { //创建流运行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(ParameterTool.fromArgs(args)); //每隔 5s 重启一次,尝试三次如果 Job 还没有起来则停止 env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 5000)); env.addSource(new SourceFunction<Long>() { @Override public void run(SourceContext<Long> sourceContext) throws Exception { while (true) { sourceContext.collect(null); } } @Override public void cancel() { } }) .map((MapFunction<Long, Long>) aLong -> aLong / 1) .print(); env.execute("zhisheng fixedDelay Restart Strategy example"); }
Example 3
Source File: DataStreamAllroundTestJobFactory.java From flink with Apache License 2.0 | 6 votes |
private static void setupRestartStrategy(final StreamExecutionEnvironment env, final ParameterTool pt) { String restartStrategyConfig = pt.get(ENVIRONMENT_RESTART_STRATEGY.key()); if (restartStrategyConfig != null) { RestartStrategies.RestartStrategyConfiguration restartStrategy; switch (restartStrategyConfig) { case "fixed_delay": restartStrategy = RestartStrategies.fixedDelayRestart( pt.getInt( ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.key(), ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.defaultValue()), pt.getLong( ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.key(), ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.defaultValue())); break; case "no_restart": restartStrategy = RestartStrategies.noRestart(); break; default: throw new IllegalArgumentException("Unknown restart strategy: " + restartStrategyConfig); } env.setRestartStrategy(restartStrategy); } }
Example 4
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 5 votes |
/** * Test that ensures the KafkaConsumer is properly failing if the topic doesnt exist * and a wrong broker was specified. * * @throws Exception */ public void runFailOnNoBrokerTest() throws Exception { try { Properties properties = new Properties(); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.getConfig().disableSysoutLogging(); see.setRestartStrategy(RestartStrategies.noRestart()); see.setParallelism(1); // use wrong ports for the consumers properties.setProperty("bootstrap.servers", "localhost:80"); properties.setProperty("group.id", "test"); properties.setProperty("request.timeout.ms", "3000"); // let the test fail fast properties.setProperty("socket.timeout.ms", "3000"); properties.setProperty("session.timeout.ms", "2000"); properties.setProperty("fetch.max.wait.ms", "2000"); properties.setProperty("heartbeat.interval.ms", "1000"); properties.putAll(secureProps); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer("doesntexist", new SimpleStringSchema(), properties); DataStream<String> stream = see.addSource(source); stream.print(); see.execute("No broker test"); } catch (JobExecutionException jee) { if (kafkaServer.getVersion().equals("0.9") || kafkaServer.getVersion().equals("0.10") || kafkaServer.getVersion().equals("0.11") || kafkaServer.getVersion().equals("2.0")) { final Optional<TimeoutException> optionalTimeoutException = ExceptionUtils.findThrowable(jee, TimeoutException.class); assertTrue(optionalTimeoutException.isPresent()); final TimeoutException timeoutException = optionalTimeoutException.get(); assertEquals("Timeout expired while fetching topic metadata", timeoutException.getMessage()); } else { final Optional<Throwable> optionalThrowable = ExceptionUtils.findThrowableWithMessage(jee, "Unable to retrieve any partitions"); assertTrue(optionalThrowable.isPresent()); assertTrue(optionalThrowable.get() instanceof RuntimeException); } } }
Example 5
Source File: AbstractQueryableStateTestBase.java From flink with Apache License 2.0 | 5 votes |
/** * Tests simple value state queryable state instance. Each source emits * (subtaskIndex, 0)..(subtaskIndex, numElements) tuples, which are then * queried. The tests succeeds after each subtask index is queried with * value numElements (the latest element updated the state). */ @Test public void testValueState() throws Exception { final Deadline deadline = Deadline.now().plus(TEST_TIMEOUT); final long numElements = 1024L; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStateBackend(stateBackend); env.setParallelism(maxParallelism); // Very important, because cluster is shared between tests and we // don't explicitly check that all slots are available before // submitting. env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 1000L)); DataStream<Tuple2<Integer, Long>> source = env.addSource(new TestAscendingValueSource(numElements)); // Value state ValueStateDescriptor<Tuple2<Integer, Long>> valueState = new ValueStateDescriptor<>("any", source.getType()); source.keyBy(new KeySelector<Tuple2<Integer, Long>, Integer>() { private static final long serialVersionUID = 7662520075515707428L; @Override public Integer getKey(Tuple2<Integer, Long> value) { return value.f0; } }).asQueryableState("hakuna", valueState); try (AutoCancellableJob autoCancellableJob = new AutoCancellableJob(deadline, clusterClient, env)) { final JobID jobId = autoCancellableJob.getJobId(); final JobGraph jobGraph = autoCancellableJob.getJobGraph(); clusterClient.setDetached(true); clusterClient.submitJob(jobGraph, AbstractQueryableStateTestBase.class.getClassLoader()); executeValueQuery(deadline, client, jobId, "hakuna", valueState, numElements); } }
Example 6
Source File: KeyedStateCheckpointingITCase.java From flink with Apache License 2.0 | 5 votes |
protected void testProgramWithBackend(AbstractStateBackend stateBackend) throws Exception { assertEquals("Broken test setup", 0, (NUM_STRINGS / 2) % NUM_KEYS); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.enableCheckpointing(500); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L)); env.setStateBackend(stateBackend); // compute when (randomly) the failure should happen final int failurePosMin = (int) (0.6 * NUM_STRINGS / PARALLELISM); final int failurePosMax = (int) (0.8 * NUM_STRINGS / PARALLELISM); final int failurePos = (new Random().nextInt(failurePosMax - failurePosMin) + failurePosMin); final DataStream<Integer> stream1 = env.addSource( new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4)); final DataStream<Integer> stream2 = env.addSource( new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4)); stream1.union(stream2) .keyBy(new IdentityKeySelector<Integer>()) .map(new OnceFailingPartitionedSum(failurePos)) .keyBy(0) .addSink(new CounterSink()); env.execute(); // verify that we counted exactly right assertEquals(NUM_KEYS, CounterSink.ALL_COUNTS.size()); assertEquals(NUM_KEYS, OnceFailingPartitionedSum.ALL_SUMS.size()); for (Entry<Integer, Long> sum : OnceFailingPartitionedSum.ALL_SUMS.entrySet()) { assertEquals((long) sum.getKey() * NUM_STRINGS / NUM_KEYS, sum.getValue().longValue()); } for (long count : CounterSink.ALL_COUNTS.values()) { assertEquals(NUM_STRINGS / NUM_KEYS, count); } }
Example 7
Source File: FlinkPulsarITest.java From pulsar-flink with Apache License 2.0 | 5 votes |
@Test public void testOne2OneExactlyOnce() throws Exception { String topic = newTopic(); int parallelism = 5; int numElementsPerPartition = 1000; int totalElements = parallelism * numElementsPerPartition; int failAfterElements = numElementsPerPartition / 3; List<String> allTopicNames = new ArrayList<>(); for (int i = 0; i < parallelism; i++) { allTopicNames.add(topic + "-partition-" + i); } createTopic(topic, parallelism, adminUrl); generateRandomizedIntegerSequence( StreamExecutionEnvironment.getExecutionEnvironment(), topic, parallelism, numElementsPerPartition, true); // run the topology that fails and recovers StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(500); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); Properties sourceProps = sourceProperties(); sourceProps.setProperty(TOPIC_MULTI_OPTION_KEY, StringUtils.join(allTopicNames, ',')); env.addSource(new FlinkPulsarRowSource(serviceUrl, adminUrl, sourceProps).setStartFromEarliest()) .map(new PartitionValidationMapper(parallelism, 1)) .map(new FailingIdentityMapper<Row>(failAfterElements)) .addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1); FailingIdentityMapper.failedBefore = false; TestUtils.tryExecute(env, "one to one exactly once test"); }
Example 8
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests the proper consumption when having more Flink sources than Kafka partitions, which means * that some Flink sources will read no partitions. */ public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception { final String topic = "manyToOneTopic"; final int numPartitions = 5; final int numElementsPerPartition = 1000; final int totalElements = numPartitions * numElementsPerPartition; final int failAfterElements = numElementsPerPartition / 3; final int parallelism = 8; createTestTopic(topic, numPartitions, 1); DataGenerators.generateRandomizedIntegerSequence( StreamExecutionEnvironment.getExecutionEnvironment(), kafkaServer, topic, numPartitions, numElementsPerPartition, true); // run the topology that fails and recovers DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(500); env.setParallelism(parallelism); // set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions. env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); env.setBufferTimeout(0); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props); env .addSource(kafkaSource) .map(new PartitionValidatingMapper(numPartitions, 1)) .map(new FailingIdentityMapper<Integer>(failAfterElements)) .addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1); FailingIdentityMapper.failedBefore = false; tryExecute(env, "multi-source-one-partitions exactly once test"); deleteTestTopic(topic); }
Example 9
Source File: WindowCheckpointingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testTumblingProcessingTimeWindow() { final int numElements = 3000; try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(timeCharacteristic); env.getConfig().setAutoWatermarkInterval(10); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); SinkValidatorUpdaterAndChecker updaterAndChecker = new SinkValidatorUpdaterAndChecker(numElements, 1); env .addSource(new FailingSource(new Generator(), numElements, timeCharacteristic)) .rebalance() .keyBy(0) .timeWindow(Time.of(100, MILLISECONDS)) .apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple2<Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple2<Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); for (Tuple2<Long, IntType> value : values) { assertEquals(value.f0.intValue(), value.f1.value); out.collect(new Tuple2<>(value.f0, new IntType(1))); } } }) .addSink(new ValidatingSink<>(updaterAndChecker, updaterAndChecker, timeCharacteristic)) .setParallelism(1); tryExecute(env, "Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 10
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests the proper consumption when having more Flink sources than Kafka partitions, which means * that some Flink sources will read no partitions. */ public void runMultipleSourcesOnePartitionExactlyOnceTest() throws Exception { final String topic = "manyToOneTopic"; final int numPartitions = 5; final int numElementsPerPartition = 1000; final int totalElements = numPartitions * numElementsPerPartition; final int failAfterElements = numElementsPerPartition / 3; final int parallelism = 8; createTestTopic(topic, numPartitions, 1); DataGenerators.generateRandomizedIntegerSequence( StreamExecutionEnvironment.getExecutionEnvironment(), kafkaServer, topic, numPartitions, numElementsPerPartition, true); // run the topology that fails and recovers DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(500); env.setParallelism(parallelism); // set the number of restarts to one. The failing mapper will fail once, then it's only success exceptions. env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.setBufferTimeout(0); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props); env .addSource(kafkaSource) .map(new PartitionValidatingMapper(numPartitions, 1)) .map(new FailingIdentityMapper<Integer>(failAfterElements)) .addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1); FailingIdentityMapper.failedBefore = false; tryExecute(env, "multi-source-one-partitions exactly once test"); deleteTestTopic(topic); }
Example 11
Source File: EventTimeWindowCheckpointingITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testPreAggregatedSlidingTimeWindow() { final int numElementsPerKey = numElementsPerKey(); final int windowSize = windowSize(); final int windowSlide = windowSlide(); final int numKeys = numKeys(); try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); env.setStateBackend(this.stateBackend); env.getConfig().setUseSnapshotCompression(true); env .addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey)) .rebalance() .keyBy(0) .timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS)) .reduce( new ReduceFunction<Tuple2<Long, IntType>>() { @Override public Tuple2<Long, IntType> reduce( Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) { // validate that the function has been opened properly return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value)); } }, new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); for (Tuple2<Long, IntType> in: input) { out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1)); } } }) .addSink(new ValidatingSink<>( new SinkValidatorUpdateFun(numElementsPerKey), new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1); env.execute("Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 12
Source File: StreamSQLTestProgram.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv); tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0)); tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }
Example 13
Source File: DataStreamAllroundTestJobFactory.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void setupEnvironment(StreamExecutionEnvironment env, ParameterTool pt) throws Exception { // set checkpointing semantics String semantics = pt.get(TEST_SEMANTICS.key(), TEST_SEMANTICS.defaultValue()); long checkpointInterval = pt.getLong(ENVIRONMENT_CHECKPOINT_INTERVAL.key(), ENVIRONMENT_CHECKPOINT_INTERVAL.defaultValue()); CheckpointingMode checkpointingMode = semantics.equalsIgnoreCase("exactly-once") ? CheckpointingMode.EXACTLY_ONCE : CheckpointingMode.AT_LEAST_ONCE; env.enableCheckpointing(checkpointInterval, checkpointingMode); // use event time env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // parallelism env.setParallelism(pt.getInt(ENVIRONMENT_PARALLELISM.key(), ENVIRONMENT_PARALLELISM.defaultValue())); env.setMaxParallelism(pt.getInt(ENVIRONMENT_MAX_PARALLELISM.key(), ENVIRONMENT_MAX_PARALLELISM.defaultValue())); // restart strategy String restartStrategyConfig = pt.get(ENVIRONMENT_RESTART_STRATEGY.key()); if (restartStrategyConfig != null) { RestartStrategies.RestartStrategyConfiguration restartStrategy; switch (restartStrategyConfig) { case "fixed_delay": restartStrategy = RestartStrategies.fixedDelayRestart( pt.getInt( ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.key(), ENVIRONMENT_RESTART_STRATEGY_FIXED_ATTEMPTS.defaultValue()), pt.getLong( ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.key(), ENVIRONMENT_RESTART_STRATEGY_FIXED_DELAY.defaultValue())); break; case "no_restart": restartStrategy = RestartStrategies.noRestart(); break; default: throw new IllegalArgumentException("Unkown restart strategy: " + restartStrategyConfig); } env.setRestartStrategy(restartStrategy); } // state backend final String stateBackend = pt.get( STATE_BACKEND.key(), STATE_BACKEND.defaultValue()); final String checkpointDir = pt.getRequired(STATE_BACKEND_CHECKPOINT_DIR.key()); if ("file".equalsIgnoreCase(stateBackend)) { boolean asyncCheckpoints = pt.getBoolean( STATE_BACKEND_FILE_ASYNC.key(), STATE_BACKEND_FILE_ASYNC.defaultValue()); env.setStateBackend((StateBackend) new FsStateBackend(checkpointDir, asyncCheckpoints)); } else if ("rocks".equalsIgnoreCase(stateBackend)) { boolean incrementalCheckpoints = pt.getBoolean( STATE_BACKEND_ROCKS_INCREMENTAL.key(), STATE_BACKEND_ROCKS_INCREMENTAL.defaultValue()); env.setStateBackend((StateBackend) new RocksDBStateBackend(checkpointDir, incrementalCheckpoints)); } else { throw new IllegalArgumentException("Unknown backend requested: " + stateBackend); } boolean enableExternalizedCheckpoints = pt.getBoolean( ENVIRONMENT_EXTERNALIZE_CHECKPOINT.key(), ENVIRONMENT_EXTERNALIZE_CHECKPOINT.defaultValue()); if (enableExternalizedCheckpoints) { String cleanupModeConfig = pt.get( ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.key(), ENVIRONMENT_EXTERNALIZE_CHECKPOINT_CLEANUP.defaultValue()); CheckpointConfig.ExternalizedCheckpointCleanup cleanupMode; switch (cleanupModeConfig) { case "retain": cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION; break; case "delete": cleanupMode = CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION; break; default: throw new IllegalArgumentException("Unknown clean up mode for externalized checkpoints: " + cleanupModeConfig); } env.getCheckpointConfig().enableExternalizedCheckpoints(cleanupMode); } // make parameters available in the web interface env.getConfig().setGlobalJobParameters(pt); }
Example 14
Source File: KafkaShortRetentionTestBase.java From flink with Apache License 2.0 | 4 votes |
public void runAutoOffsetResetTest() throws Exception { final String topic = "auto-offset-reset-test"; final int parallelism = 1; final int elementsPerPartition = 50000; Properties tprops = new Properties(); tprops.setProperty("retention.ms", "250"); kafkaServer.createTestTopic(topic, parallelism, 1, tprops); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.noRestart()); // fail immediately // ----------- add producer dataflow ---------- DataStream<String> stream = env.addSource(new RichParallelSourceFunction<String>() { private boolean running = true; @Override public void run(SourceContext<String> ctx) throws InterruptedException { int cnt = getRuntimeContext().getIndexOfThisSubtask() * elementsPerPartition; int limit = cnt + elementsPerPartition; while (running && !stopProducer && cnt < limit) { ctx.collect("element-" + cnt); cnt++; Thread.sleep(10); } LOG.info("Stopping producer"); } @Override public void cancel() { running = false; } }); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); kafkaServer.produceIntoKafka(stream, topic, new SimpleStringSchema(), props, null); // ----------- add consumer dataflow ---------- NonContinousOffsetsDeserializationSchema deserSchema = new NonContinousOffsetsDeserializationSchema(); FlinkKafkaConsumerBase<String> source = kafkaServer.getConsumer(topic, deserSchema, props); DataStreamSource<String> consuming = env.addSource(source); consuming.addSink(new DiscardingSink<String>()); tryExecute(env, "run auto offset reset test"); kafkaServer.deleteTestTopic(topic); }
Example 15
Source File: EventTimeWindowCheckpointingITCase.java From flink with Apache License 2.0 | 4 votes |
public void doTestTumblingTimeWindowWithKVState(int maxParallelism) { final int numElementsPerKey = numElementsPerKey(); final int windowSize = windowSize(); final int numKeys = numKeys(); try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.setMaxParallelism(maxParallelism); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.setStateBackend(this.stateBackend); env.getConfig().setUseSnapshotCompression(true); env .addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey)) .rebalance() .keyBy(0) .timeWindow(Time.of(windowSize, MILLISECONDS)) .apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; private ValueState<Integer> count; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; count = getRuntimeContext().getState( new ValueStateDescriptor<>("count", Integer.class, 0)); } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) throws Exception { // the window count state starts with the key, so that we get // different count results for each key if (count.value() == 0) { count.update(tuple.<Long>getField(0).intValue()); } // validate that the function has been opened properly assertTrue(open); count.update(count.value() + 1); out.collect(new Tuple4<>(tuple.<Long>getField(0), window.getStart(), window.getEnd(), new IntType(count.value()))); } }) .addSink(new ValidatingSink<>( new CountingSinkValidatorUpdateFun(), new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize))).setParallelism(1); env.execute("Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 16
Source File: KafkaProducerTestBase.java From flink with Apache License 2.0 | 4 votes |
/** * This test sets KafkaProducer so that it will automatically flush the data and * and fails the broker to check whether flushed records since last checkpoint were not duplicated. */ protected void testExactlyOnce(boolean regularSink, int sinksCount) throws Exception { final String topic = (regularSink ? "exactlyOnceTopicRegularSink" : "exactlyTopicCustomOperator") + sinksCount; final int partition = 0; final int numElements = 1000; final int failAfterElements = 333; for (int i = 0; i < sinksCount; i++) { createTestTopic(topic + i, 1, 1); } TypeInformationSerializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); KeyedSerializationSchema<Integer> keyedSerializationSchema = new KeyedSerializationSchemaWrapper<>(schema); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(500); env.setParallelism(1); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); Properties properties = new Properties(); properties.putAll(standardProps); properties.putAll(secureProps); // process exactly failAfterElements number of elements and then shutdown Kafka broker and fail application List<Integer> expectedElements = getIntegersSequence(numElements); DataStream<Integer> inputStream = env .addSource(new IntegerSource(numElements)) .map(new FailingIdentityMapper<Integer>(failAfterElements)); for (int i = 0; i < sinksCount; i++) { FlinkKafkaPartitioner<Integer> partitioner = new FlinkKafkaPartitioner<Integer>() { @Override public int partition(Integer record, byte[] key, byte[] value, String targetTopic, int[] partitions) { return partition; } }; if (regularSink) { StreamSink<Integer> kafkaSink = kafkaServer.getProducerSink(topic + i, keyedSerializationSchema, properties, partitioner); inputStream.addSink(kafkaSink.getUserFunction()); } else { kafkaServer.produceIntoKafka(inputStream, topic + i, keyedSerializationSchema, properties, partitioner); } } FailingIdentityMapper.failedBefore = false; TestUtils.tryExecute(env, "Exactly once test"); for (int i = 0; i < sinksCount; i++) { // assert that before failure we successfully snapshot/flushed all expected elements assertExactlyOnceForTopic( properties, topic + i, partition, expectedElements, KAFKA_READ_TIMEOUT); deleteTestTopic(topic + i); } }
Example 17
Source File: KafkaConsumerTestBase.java From flink with Apache License 2.0 | 4 votes |
/** * Tests the proper consumption when having fewer Flink sources than Kafka partitions, so * one Flink source will read multiple Kafka partitions. */ public void runOneSourceMultiplePartitionsExactlyOnceTest() throws Exception { final String topic = "oneToManyTopic"; final int numPartitions = 5; final int numElementsPerPartition = 1000; final int totalElements = numPartitions * numElementsPerPartition; final int failAfterElements = numElementsPerPartition / 3; final int parallelism = 2; createTestTopic(topic, numPartitions, 1); DataGenerators.generateRandomizedIntegerSequence( StreamExecutionEnvironment.getExecutionEnvironment(), kafkaServer, topic, numPartitions, numElementsPerPartition, true); // run the topology that fails and recovers DeserializationSchema<Integer> schema = new TypeInformationSerializationSchema<>(BasicTypeInfo.INT_TYPE_INFO, new ExecutionConfig()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.enableCheckpointing(500); env.setParallelism(parallelism); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); FlinkKafkaConsumerBase<Integer> kafkaSource = kafkaServer.getConsumer(topic, schema, props); env .addSource(kafkaSource) .map(new PartitionValidatingMapper(numPartitions, 3)) .map(new FailingIdentityMapper<Integer>(failAfterElements)) .addSink(new ValidatingExactlyOnceSink(totalElements)).setParallelism(1); FailingIdentityMapper.failedBefore = false; tryExecute(env, "One-source-multi-partitions exactly once test"); deleteTestTopic(topic); }
Example 18
Source File: StatefulJobSavepointMigrationITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testSavepoint() throws Exception { final int parallelism = 4; final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setRestartStrategy(RestartStrategies.noRestart()); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); switch (testStateBackend) { case StateBackendLoader.ROCKSDB_STATE_BACKEND_NAME: env.setStateBackend(new RocksDBStateBackend(new MemoryStateBackend())); break; case StateBackendLoader.MEMORY_STATE_BACKEND_NAME: env.setStateBackend(new MemoryStateBackend()); break; default: throw new UnsupportedOperationException(); } env.enableCheckpointing(500); env.setParallelism(parallelism); env.setMaxParallelism(parallelism); SourceFunction<Tuple2<Long, Long>> nonParallelSource; SourceFunction<Tuple2<Long, Long>> parallelSource; RichFlatMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>> flatMap; OneInputStreamOperator<Tuple2<Long, Long>, Tuple2<Long, Long>> timelyOperator; if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) { nonParallelSource = new MigrationTestUtils.CheckpointingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS); parallelSource = new MigrationTestUtils.CheckpointingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS); flatMap = new CheckpointingKeyedStateFlatMap(); timelyOperator = new CheckpointingTimelyStatefulOperator(); } else if (executionMode == ExecutionMode.VERIFY_SAVEPOINT) { nonParallelSource = new MigrationTestUtils.CheckingNonParallelSourceWithListState(NUM_SOURCE_ELEMENTS); parallelSource = new MigrationTestUtils.CheckingParallelSourceWithUnionListState(NUM_SOURCE_ELEMENTS); flatMap = new CheckingKeyedStateFlatMap(); timelyOperator = new CheckingTimelyStatefulOperator(); } else { throw new IllegalStateException("Unknown ExecutionMode " + executionMode); } env .addSource(nonParallelSource).uid("CheckpointingSource1") .keyBy(0) .flatMap(flatMap).startNewChain().uid("CheckpointingKeyedStateFlatMap1") .keyBy(0) .transform( "timely_stateful_operator", new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(), timelyOperator).uid("CheckpointingTimelyStatefulOperator1") .addSink(new MigrationTestUtils.AccumulatorCountingSink<>()); env .addSource(parallelSource).uid("CheckpointingSource2") .keyBy(0) .flatMap(flatMap).startNewChain().uid("CheckpointingKeyedStateFlatMap2") .keyBy(0) .transform( "timely_stateful_operator", new TypeHint<Tuple2<Long, Long>>() {}.getTypeInfo(), timelyOperator).uid("CheckpointingTimelyStatefulOperator2") .addSink(new MigrationTestUtils.AccumulatorCountingSink<>()); if (executionMode == ExecutionMode.PERFORM_SAVEPOINT) { executeAndSavepoint( env, "src/test/resources/" + getSavepointPath(testMigrateVersion, testStateBackend), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2)); } else { restoreAndExecute( env, getResourceFilename(getSavepointPath(testMigrateVersion, testStateBackend)), new Tuple2<>(MigrationTestUtils.CheckingNonParallelSourceWithListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, 1), new Tuple2<>(MigrationTestUtils.CheckingParallelSourceWithUnionListState.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, parallelism), new Tuple2<>(CheckingKeyedStateFlatMap.SUCCESSFUL_RESTORE_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2), new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESS_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2), new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_EVENT_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2), new Tuple2<>(CheckingTimelyStatefulOperator.SUCCESSFUL_PROCESSING_TIME_CHECK_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2), new Tuple2<>(MigrationTestUtils.AccumulatorCountingSink.NUM_ELEMENTS_ACCUMULATOR, NUM_SOURCE_ELEMENTS * 2)); } }
Example 19
Source File: EventTimeAllWindowCheckpointingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testPreAggregatedTumblingTimeWindow() { final int numElementsPerKey = 3000; final int windowSize = 100; final int numKeys = 1; try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env .addSource(new FailingSource(new EventTimeWindowCheckpointingITCase.KeyedEventTimeGenerator(numKeys, windowSize), numElementsPerKey)) .rebalance() .timeWindowAll(Time.of(windowSize, MILLISECONDS)) .reduce( new ReduceFunction<Tuple2<Long, IntType>>() { @Override public Tuple2<Long, IntType> reduce( Tuple2<Long, IntType> a, Tuple2<Long, IntType> b) { return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value)); } }, new RichAllWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(1, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( TimeWindow window, Iterable<Tuple2<Long, IntType>> input, Collector<Tuple4<Long, Long, Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); for (Tuple2<Long, IntType> in: input) { out.collect(new Tuple4<>(in.f0, window.getStart(), window.getEnd(), in.f1)); } } }) .addSink(new ValidatingSink<>( new EventTimeWindowCheckpointingITCase.SinkValidatorUpdateFun(numElementsPerKey), new EventTimeWindowCheckpointingITCase.SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSize))) .setParallelism(1); env.execute("Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
Example 20
Source File: EventTimeWindowCheckpointingITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testSlidingTimeWindow() { final int numElementsPerKey = numElementsPerKey(); final int windowSize = windowSize(); final int windowSlide = windowSlide(); final int numKeys = numKeys(); try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setMaxParallelism(2 * PARALLELISM); env.setParallelism(PARALLELISM); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.enableCheckpointing(100); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0)); env.getConfig().disableSysoutLogging(); env.setStateBackend(this.stateBackend); env.getConfig().setUseSnapshotCompression(true); env .addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey)) .rebalance() .keyBy(0) .timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS)) .apply(new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() { private boolean open = false; @Override public void open(Configuration parameters) { assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks()); open = true; } @Override public void apply( Tuple tuple, TimeWindow window, Iterable<Tuple2<Long, IntType>> values, Collector<Tuple4<Long, Long, Long, IntType>> out) { // validate that the function has been opened properly assertTrue(open); int sum = 0; long key = -1; for (Tuple2<Long, IntType> value : values) { sum += value.f1.value; key = value.f0; } final Tuple4<Long, Long, Long, IntType> output = new Tuple4<>(key, window.getStart(), window.getEnd(), new IntType(sum)); out.collect(output); } }) .addSink(new ValidatingSink<>( new SinkValidatorUpdateFun(numElementsPerKey), new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1); env.execute("Tumbling Window Test"); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }