org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy Java Examples
The following examples show how to use
org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopPathBasedBulkFormatBuilder.java From flink with Apache License 2.0 | 6 votes |
public HadoopPathBasedBulkFormatBuilder( org.apache.hadoop.fs.Path basePath, HadoopPathBasedBulkWriter.Factory<IN> writerFactory, Configuration configuration, BucketAssigner<IN, BucketID> assigner) { this( basePath, writerFactory, new DefaultHadoopFileCommitterFactory(), configuration, assigner, OnCheckpointRollingPolicy.build(), new DefaultBucketFactoryImpl<>(), OutputFileConfig.builder().build()); }
Example #2
Source File: StreamingFileSink.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override Buckets<IN, BucketID> createBuckets(int subtaskIndex) throws IOException { return new Buckets<>( basePath, bucketAssigner, bucketFactory, new BulkPartWriter.Factory<>(writerFactory), OnCheckpointRollingPolicy.build(), subtaskIndex); }
Example #3
Source File: RollingPolicyTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testRollOnCheckpointPolicy() throws Exception { final File outDir = TEMP_FOLDER.newFolder(); final Path path = new Path(outDir.toURI()); final MethodCallCountingPolicyWrapper<String, String> rollingPolicy = new MethodCallCountingPolicyWrapper<>(OnCheckpointRollingPolicy.build()); final Buckets<String, String> buckets = createBuckets(path, rollingPolicy); rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L); buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 2L)); buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L)); buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L)); // ... we have a checkpoint so we roll ... buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>()); rollingPolicy.verifyCallCounters(1L, 1L, 2L, 0L, 0L, 0L); // ... create a new in-progress file (before we had closed the last one so it was null)... buckets.onElement("test1", new TestUtils.MockSinkContext(5L, 1L, 5L)); // ... we have a checkpoint so we roll ... buckets.snapshotState(2L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>()); rollingPolicy.verifyCallCounters(2L, 2L, 2L, 0L, 0L, 0L); buckets.close(); }
Example #4
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 5 votes |
@Override Buckets<IN, BucketID> createBuckets(int subtaskIndex) throws IOException { return new Buckets<>( basePath, bucketAssigner, bucketFactory, new BulkPartWriter.Factory<>(writerFactory), OnCheckpointRollingPolicy.build(), subtaskIndex, new PartFileConfig(partFilePrefix, partFileSuffix)); }
Example #5
Source File: RollingPolicyTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testRollOnCheckpointPolicy() throws Exception { final File outDir = TEMP_FOLDER.newFolder(); final Path path = new Path(outDir.toURI()); final MethodCallCountingPolicyWrapper<String, String> rollingPolicy = new MethodCallCountingPolicyWrapper<>(OnCheckpointRollingPolicy.build()); final Buckets<String, String> buckets = createBuckets(path, rollingPolicy); rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L); buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 2L)); buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L)); buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L)); // ... we have a checkpoint so we roll ... buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>()); rollingPolicy.verifyCallCounters(1L, 1L, 2L, 0L, 0L, 0L); // ... create a new in-progress file (before we had closed the last one so it was null)... buckets.onElement("test1", new TestUtils.MockSinkContext(5L, 1L, 5L)); // ... we have a checkpoint so we roll ... buckets.snapshotState(2L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>()); rollingPolicy.verifyCallCounters(2L, 2L, 2L, 0L, 0L, 0L); buckets.close(); }
Example #6
Source File: RollingPolicyTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testRollOnCheckpointPolicy() throws Exception { final File outDir = TEMP_FOLDER.newFolder(); final Path path = new Path(outDir.toURI()); final MethodCallCountingPolicyWrapper<String, String> rollingPolicy = new MethodCallCountingPolicyWrapper<>(OnCheckpointRollingPolicy.build()); final Buckets<String, String> buckets = createBuckets(path, rollingPolicy); rollingPolicy.verifyCallCounters(0L, 0L, 0L, 0L, 0L, 0L); buckets.onElement("test1", new TestUtils.MockSinkContext(1L, 1L, 2L)); buckets.onElement("test1", new TestUtils.MockSinkContext(2L, 1L, 2L)); buckets.onElement("test1", new TestUtils.MockSinkContext(3L, 1L, 3L)); // ... we have a checkpoint so we roll ... buckets.snapshotState(1L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>()); rollingPolicy.verifyCallCounters(1L, 1L, 2L, 0L, 0L, 0L); // ... create a new in-progress file (before we had closed the last one so it was null)... buckets.onElement("test1", new TestUtils.MockSinkContext(5L, 1L, 5L)); // ... we have a checkpoint so we roll ... buckets.snapshotState(2L, new TestUtils.MockListState<>(), new TestUtils.MockListState<>()); rollingPolicy.verifyCallCounters(2L, 2L, 2L, 0L, 0L, 0L); buckets.close(); }
Example #7
Source File: StreamSQLTestProgram.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv); tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0)); tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }
Example #8
Source File: StreamSQLTestProgram.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); String planner = params.get("planner", "old"); final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance(); builder.inStreamingMode(); if (planner.equals("old")) { builder.useOldPlanner(); } else if (planner.equals("blink")) { builder.useBlinkPlanner(); } final EnvironmentSettings settings = builder.build(); final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings); tEnv.registerTableSource("table1", new GeneratorTableSource(10, 100, 60, 0)); tEnv.registerTableSource("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }
Example #9
Source File: StreamSQLTestProgram.java From flink with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); String planner = params.get("planner", "blink"); final EnvironmentSettings.Builder builder = EnvironmentSettings.newInstance(); builder.inStreamingMode(); if (planner.equals("old")) { builder.useOldPlanner(); } else if (planner.equals("blink")) { builder.useBlinkPlanner(); } final EnvironmentSettings settings = builder.build(); final StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); final StreamTableEnvironment tEnv = StreamTableEnvironment.create(sEnv, settings); ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table1", new GeneratorTableSource(10, 100, 60, 0)); ((TableEnvironmentInternal) tEnv).registerTableSourceInternal("table2", new GeneratorTableSource(5, 0.2f, 60, 5)); int overWindowSizeSeconds = 1; int tumbleWindowSizeSeconds = 10; String overQuery = String.format( "SELECT " + " key, " + " rowtime, " + " COUNT(*) OVER (PARTITION BY key ORDER BY rowtime RANGE BETWEEN INTERVAL '%d' SECOND PRECEDING AND CURRENT ROW) AS cnt " + "FROM table1", overWindowSizeSeconds); String tumbleQuery = String.format( "SELECT " + " key, " + " CASE SUM(cnt) / COUNT(*) WHEN 101 THEN 1 ELSE 99 END AS correct, " + " TUMBLE_START(rowtime, INTERVAL '%d' SECOND) AS wStart, " + " TUMBLE_ROWTIME(rowtime, INTERVAL '%d' SECOND) AS rowtime " + "FROM (%s) " + "WHERE rowtime > TIMESTAMP '1970-01-01 00:00:01' " + "GROUP BY key, TUMBLE(rowtime, INTERVAL '%d' SECOND)", tumbleWindowSizeSeconds, tumbleWindowSizeSeconds, overQuery, tumbleWindowSizeSeconds); String joinQuery = String.format( "SELECT " + " t1.key, " + " t2.rowtime AS rowtime, " + " t2.correct," + " t2.wStart " + "FROM table2 t1, (%s) t2 " + "WHERE " + " t1.key = t2.key AND " + " t1.rowtime BETWEEN t2.rowtime AND t2.rowtime + INTERVAL '%d' SECOND", tumbleQuery, tumbleWindowSizeSeconds); String finalAgg = String.format( "SELECT " + " SUM(correct) AS correct, " + " TUMBLE_START(rowtime, INTERVAL '20' SECOND) AS rowtime " + "FROM (%s) " + "GROUP BY TUMBLE(rowtime, INTERVAL '20' SECOND)", joinQuery); // get Table for SQL query Table result = tEnv.sqlQuery(finalAgg); // convert Table into append-only DataStream DataStream<Row> resultStream = tEnv.toAppendStream(result, Types.ROW(Types.INT, Types.SQL_TIMESTAMP)); final StreamingFileSink<Row> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Row>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.toString()); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); resultStream // inject a KillMapper that forwards all records but terminates the first execution attempt .map(new KillMapper()).setParallelism(1) // add sink function .addSink(sink).setParallelism(1); sEnv.execute(); }
Example #10
Source File: StreamingFileSink.java From flink with Apache License 2.0 | 4 votes |
protected BulkFormatBuilder(Path basePath, BulkWriter.Factory<IN> writerFactory, BucketAssigner<IN, BucketID> assigner) { this(basePath, writerFactory, assigner, OnCheckpointRollingPolicy.build(), DEFAULT_BUCKET_CHECK_INTERVAL, new DefaultBucketFactoryImpl<>(), OutputFileConfig.builder().build()); }
Example #11
Source File: BucketsTest.java From Flink-CEPplus with Apache License 2.0 | 3 votes |
@Test public void testSnapshotAndRestore() throws Exception { final File outDir = TEMP_FOLDER.newFolder(); final Path path = new Path(outDir.toURI()); final RollingPolicy<String, String> onCheckpointRollingPolicy = OnCheckpointRollingPolicy.build(); final Buckets<String, String> buckets = createBuckets(path, onCheckpointRollingPolicy, 0); final ListState<byte[]> bucketStateContainer = new MockListState<>(); final ListState<Long> partCounterContainer = new MockListState<>(); buckets.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L)); buckets.snapshotState(0L, bucketStateContainer, partCounterContainer); assertThat(buckets.getActiveBuckets().get("test1"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test1")); buckets.onElement("test2", new TestUtils.MockSinkContext(null, 1L, 2L)); buckets.snapshotState(1L, bucketStateContainer, partCounterContainer); assertThat(buckets.getActiveBuckets().get("test1"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test1")); assertThat(buckets.getActiveBuckets().get("test2"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test2")); Buckets<String, String> restoredBuckets = restoreBuckets(path, onCheckpointRollingPolicy, 0, bucketStateContainer, partCounterContainer); final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets(); // because we commit pending files for previous checkpoints upon recovery Assert.assertTrue(activeBuckets.isEmpty()); }
Example #12
Source File: BucketsTest.java From flink with Apache License 2.0 | 3 votes |
@Test public void testSnapshotAndRestore() throws Exception { final File outDir = TEMP_FOLDER.newFolder(); final Path path = new Path(outDir.toURI()); final RollingPolicy<String, String> onCheckpointRollingPolicy = OnCheckpointRollingPolicy.build(); final Buckets<String, String> buckets = createBuckets(path, onCheckpointRollingPolicy, 0); final ListState<byte[]> bucketStateContainer = new MockListState<>(); final ListState<Long> partCounterContainer = new MockListState<>(); buckets.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L)); buckets.snapshotState(0L, bucketStateContainer, partCounterContainer); assertThat(buckets.getActiveBuckets().get("test1"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test1")); buckets.onElement("test2", new TestUtils.MockSinkContext(null, 1L, 2L)); buckets.snapshotState(1L, bucketStateContainer, partCounterContainer); assertThat(buckets.getActiveBuckets().get("test1"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test1")); assertThat(buckets.getActiveBuckets().get("test2"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test2")); Buckets<String, String> restoredBuckets = restoreBuckets(path, onCheckpointRollingPolicy, 0, bucketStateContainer, partCounterContainer); final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets(); // because we commit pending files for previous checkpoints upon recovery Assert.assertTrue(activeBuckets.isEmpty()); }
Example #13
Source File: BucketsTest.java From flink with Apache License 2.0 | 3 votes |
@Test public void testSnapshotAndRestore() throws Exception { final File outDir = TEMP_FOLDER.newFolder(); final Path path = new Path(outDir.toURI()); final RollingPolicy<String, String> onCheckpointRollingPolicy = OnCheckpointRollingPolicy.build(); final Buckets<String, String> buckets = createBuckets(path, onCheckpointRollingPolicy, 0); final ListState<byte[]> bucketStateContainer = new MockListState<>(); final ListState<Long> partCounterContainer = new MockListState<>(); buckets.onElement("test1", new TestUtils.MockSinkContext(null, 1L, 2L)); buckets.snapshotState(0L, bucketStateContainer, partCounterContainer); assertThat(buckets.getActiveBuckets().get("test1"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test1")); buckets.onElement("test2", new TestUtils.MockSinkContext(null, 1L, 2L)); buckets.snapshotState(1L, bucketStateContainer, partCounterContainer); assertThat(buckets.getActiveBuckets().get("test1"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test1")); assertThat(buckets.getActiveBuckets().get("test2"), hasSinglePartFileToBeCommittedOnCheckpointAck(path, "test2")); Buckets<String, String> restoredBuckets = restoreBuckets(path, onCheckpointRollingPolicy, 0, bucketStateContainer, partCounterContainer); final Map<String, Bucket<String, String>> activeBuckets = restoredBuckets.getActiveBuckets(); // because we commit pending files for previous checkpoints upon recovery Assert.assertTrue(activeBuckets.isEmpty()); }