org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator Java Examples
The following examples show how to use
org.apache.flink.streaming.api.functions.source.ContinuousFileReaderOperator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamExecutionEnvironment.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
Example #2
Source File: ContinuousFileProcessingRescalingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception { ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, maxParallelism, noOfTasks, taksIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
Example #3
Source File: StreamExecutionEnvironment.java From flink with Apache License 2.0 | 5 votes |
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
Example #4
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception { ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, maxParallelism, noOfTasks, taksIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
Example #5
Source File: ContinuousFileProcessingITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testProgram() throws Exception { /* * This test checks the interplay between the monitor and the reader * and also the failExternally() functionality. To test the latter we * set the parallelism to 1 so that we have the chaining between the sink, * which throws the SuccessException to signal the end of the test, and the * reader. * */ TextInputFormat format = new TextInputFormat(new Path(hdfsURI)); format.setFilePath(hdfsURI); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // create the stream execution environment with a parallelism > 1 to test final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL); // the monitor has always DOP 1 DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction); Assert.assertEquals(1, splits.getParallelism()); ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format); // the readers can be multiple DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader); Assert.assertEquals(PARALLELISM, content.getParallelism()); // finally for the sink we set the parallelism to 1 so that we can verify the output TestingSinkFunction sink = new TestingSinkFunction(); content.addSink(sink).setParallelism(1); Thread job = new Thread() { @Override public void run() { try { env.execute("ContinuousFileProcessingITCase Job."); } catch (Exception e) { Throwable th = e; for (int depth = 0; depth < 20; depth++) { if (th instanceof SuccessException) { return; } else if (th.getCause() != null) { th = th.getCause(); } else { break; } } e.printStackTrace(); Assert.fail(e.getMessage()); } } }; job.start(); // The modification time of the last created file. long lastCreatedModTime = Long.MIN_VALUE; // create the files to be read for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> tmpFile; long modTime; do { // give it some time so that the files have // different modification timestamps. Thread.sleep(50); tmpFile = fillWithData(hdfsURI, "file", i, "This is test line."); modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime(); if (modTime <= lastCreatedModTime) { // delete the last created file to recreate it with a different timestamp hdfs.delete(tmpFile.f0, false); } } while (modTime <= lastCreatedModTime); lastCreatedModTime = modTime; // put the contents in the expected results list before the reader picks them // this is to guarantee that they are in before the reader finishes (avoid race conditions) expectedContents.put(i, tmpFile.f1); org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i); hdfs.rename(tmpFile.f0, file); Assert.assertTrue(hdfs.exists(file)); } // wait for the job to finish. job.join(); }
Example #6
Source File: ContinuousFileProcessingMigrationTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Manually run this to write binary snapshot data. Remove @Ignore to run. */ @Ignore @Test public void writeReaderSnapshot() throws Exception { File testFolder = tempFolder.newFolder(); TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null); TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); // this always blocks to ensure that the reader doesn't to any actual processing so that // we keep the state for the four splits final OneShotLatch blockingLatch = new OneShotLatch(); BlockingFileInputFormat format = new BlockingFileInputFormat(blockingLatch, new Path(testFolder.getAbsolutePath())); TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format); ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>( format); initReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> testHarness = new OneInputStreamOperatorTestHarness<>(initReader); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); testHarness.open(); // create some state in the reader testHarness.processElement(new StreamRecord<>(split1)); testHarness.processElement(new StreamRecord<>(split2)); testHarness.processElement(new StreamRecord<>(split3)); testHarness.processElement(new StreamRecord<>(split4)); // take a snapshot of the operator's state. This will be used // to initialize another reader and compare the results of the // two operators. final OperatorSubtaskState snapshot; synchronized (testHarness.getCheckpointLock()) { snapshot = testHarness.snapshot(0L, 0L); } OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/reader-migration-test-flink" + flinkGenerateSavepointVersion + "-snapshot"); }
Example #7
Source File: ContinuousFileProcessingMigrationTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testReaderRestore() throws Exception { File testFolder = tempFolder.newFolder(); final OneShotLatch latch = new OneShotLatch(); BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testFolder.getAbsolutePath())); TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format); ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format); initReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> testHarness = new OneInputStreamOperatorTestHarness<>(initReader); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); testHarness.setup(); testHarness.initializeState( OperatorSnapshotUtil.getResourceFilename( "reader-migration-test-flink" + testMigrateVersion + "-snapshot")); testHarness.open(); latch.trigger(); // ... and wait for the operators to close gracefully synchronized (testHarness.getCheckpointLock()) { testHarness.close(); } TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null); TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); // compare if the results contain what they should contain and also if // they are the same, as they should. Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split1))); Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split2))); Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split3))); Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split4))); }
Example #8
Source File: ContinuousFileProcessingTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testReaderSnapshotRestore() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null); TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); final OneShotLatch latch = new OneShotLatch(); BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath)); TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format); ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format); initReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader); initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime); initTestInstance.open(); // create some state in the reader initTestInstance.processElement(new StreamRecord<>(split1)); initTestInstance.processElement(new StreamRecord<>(split2)); initTestInstance.processElement(new StreamRecord<>(split3)); initTestInstance.processElement(new StreamRecord<>(split4)); // take a snapshot of the operator's state. This will be used // to initialize another reader and compare the results of the // two operators. final OperatorSubtaskState snapshot; synchronized (initTestInstance.getCheckpointLock()) { snapshot = initTestInstance.snapshot(0L, 0L); } ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>( new BlockingFileInputFormat(latch, new Path(testBasePath))); restoredReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader); restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime); restoredTestInstance.initializeState(snapshot); restoredTestInstance.open(); // now let computation start latch.trigger(); // ... and wait for the operators to close gracefully synchronized (initTestInstance.getCheckpointLock()) { initTestInstance.close(); } synchronized (restoredTestInstance.getCheckpointLock()) { restoredTestInstance.close(); } FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1); FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2); FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3); FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4); // compare if the results contain what they should contain and also if // they are the same, as they should. Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1))); Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2))); Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3))); Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4))); Assert.assertArrayEquals( initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray() ); }
Example #9
Source File: ContinuousFileProcessingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProgram() throws Exception { /* * This test checks the interplay between the monitor and the reader * and also the failExternally() functionality. To test the latter we * set the parallelism to 1 so that we have the chaining between the sink, * which throws the SuccessException to signal the end of the test, and the * reader. * */ TextInputFormat format = new TextInputFormat(new Path(hdfsURI)); format.setFilePath(hdfsURI); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // create the stream execution environment with a parallelism > 1 to test final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL); // the monitor has always DOP 1 DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction); Assert.assertEquals(1, splits.getParallelism()); ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format); // the readers can be multiple DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader); Assert.assertEquals(PARALLELISM, content.getParallelism()); // finally for the sink we set the parallelism to 1 so that we can verify the output TestingSinkFunction sink = new TestingSinkFunction(); content.addSink(sink).setParallelism(1); Thread job = new Thread() { @Override public void run() { try { env.execute("ContinuousFileProcessingITCase Job."); } catch (Exception e) { Throwable th = e; for (int depth = 0; depth < 20; depth++) { if (th instanceof SuccessException) { return; } else if (th.getCause() != null) { th = th.getCause(); } else { break; } } e.printStackTrace(); Assert.fail(e.getMessage()); } } }; job.start(); // The modification time of the last created file. long lastCreatedModTime = Long.MIN_VALUE; // create the files to be read for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> tmpFile; long modTime; do { // give it some time so that the files have // different modification timestamps. Thread.sleep(50); tmpFile = fillWithData(hdfsURI, "file", i, "This is test line."); modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime(); if (modTime <= lastCreatedModTime) { // delete the last created file to recreate it with a different timestamp hdfs.delete(tmpFile.f0, false); } } while (modTime <= lastCreatedModTime); lastCreatedModTime = modTime; // put the contents in the expected results list before the reader picks them // this is to guarantee that they are in before the reader finishes (avoid race conditions) expectedContents.put(i, tmpFile.f1); org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i); hdfs.rename(tmpFile.f0, file); Assert.assertTrue(hdfs.exists(file)); } // wait for the job to finish. job.join(); }
Example #10
Source File: ContinuousFileProcessingMigrationTest.java From flink with Apache License 2.0 | 4 votes |
/** * Manually run this to write binary snapshot data. Remove @Ignore to run. */ @Ignore @Test public void writeReaderSnapshot() throws Exception { File testFolder = tempFolder.newFolder(); TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null); TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); // this always blocks to ensure that the reader doesn't to any actual processing so that // we keep the state for the four splits final OneShotLatch blockingLatch = new OneShotLatch(); BlockingFileInputFormat format = new BlockingFileInputFormat(blockingLatch, new Path(testFolder.getAbsolutePath())); TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format); ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>( format); initReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> testHarness = new OneInputStreamOperatorTestHarness<>(initReader); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); testHarness.open(); // create some state in the reader testHarness.processElement(new StreamRecord<>(split1)); testHarness.processElement(new StreamRecord<>(split2)); testHarness.processElement(new StreamRecord<>(split3)); testHarness.processElement(new StreamRecord<>(split4)); // take a snapshot of the operator's state. This will be used // to initialize another reader and compare the results of the // two operators. final OperatorSubtaskState snapshot; synchronized (testHarness.getCheckpointLock()) { snapshot = testHarness.snapshot(0L, 0L); } OperatorSnapshotUtil.writeStateHandle(snapshot, "src/test/resources/reader-migration-test-flink" + flinkGenerateSavepointVersion + "-snapshot"); }
Example #11
Source File: ContinuousFileProcessingMigrationTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testReaderRestore() throws Exception { File testFolder = tempFolder.newFolder(); final OneShotLatch latch = new OneShotLatch(); BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testFolder.getAbsolutePath())); TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format); ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format); initReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> testHarness = new OneInputStreamOperatorTestHarness<>(initReader); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); testHarness.setup(); testHarness.initializeState( OperatorSnapshotUtil.getResourceFilename( "reader-migration-test-flink" + testMigrateVersion + "-snapshot")); testHarness.open(); latch.trigger(); // ... and wait for the operators to close gracefully synchronized (testHarness.getCheckpointLock()) { testHarness.close(); } TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null); TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); // compare if the results contain what they should contain and also if // they are the same, as they should. Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split1))); Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split2))); Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split3))); Assert.assertTrue(testHarness.getOutput().contains(new StreamRecord<>(split4))); }
Example #12
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testReaderSnapshotRestore() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; TimestampedFileInputSplit split1 = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit split2 = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 101, 200, null); TimestampedFileInputSplit split3 = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit split4 = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); final OneShotLatch latch = new OneShotLatch(); BlockingFileInputFormat format = new BlockingFileInputFormat(latch, new Path(testBasePath)); TypeInformation<FileInputSplit> typeInfo = TypeExtractor.getInputFormatTypes(format); ContinuousFileReaderOperator<FileInputSplit> initReader = new ContinuousFileReaderOperator<>(format); initReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> initTestInstance = new OneInputStreamOperatorTestHarness<>(initReader); initTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime); initTestInstance.open(); // create some state in the reader initTestInstance.processElement(new StreamRecord<>(split1)); initTestInstance.processElement(new StreamRecord<>(split2)); initTestInstance.processElement(new StreamRecord<>(split3)); initTestInstance.processElement(new StreamRecord<>(split4)); // take a snapshot of the operator's state. This will be used // to initialize another reader and compare the results of the // two operators. final OperatorSubtaskState snapshot; synchronized (initTestInstance.getCheckpointLock()) { snapshot = initTestInstance.snapshot(0L, 0L); } ContinuousFileReaderOperator<FileInputSplit> restoredReader = new ContinuousFileReaderOperator<>( new BlockingFileInputFormat(latch, new Path(testBasePath))); restoredReader.setOutputType(typeInfo, new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, FileInputSplit> restoredTestInstance = new OneInputStreamOperatorTestHarness<>(restoredReader); restoredTestInstance.setTimeCharacteristic(TimeCharacteristic.EventTime); restoredTestInstance.initializeState(snapshot); restoredTestInstance.open(); // now let computation start latch.trigger(); // ... and wait for the operators to close gracefully synchronized (initTestInstance.getCheckpointLock()) { initTestInstance.close(); } synchronized (restoredTestInstance.getCheckpointLock()) { restoredTestInstance.close(); } FileInputSplit fsSplit1 = createSplitFromTimestampedSplit(split1); FileInputSplit fsSplit2 = createSplitFromTimestampedSplit(split2); FileInputSplit fsSplit3 = createSplitFromTimestampedSplit(split3); FileInputSplit fsSplit4 = createSplitFromTimestampedSplit(split4); // compare if the results contain what they should contain and also if // they are the same, as they should. Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit1))); Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit2))); Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit3))); Assert.assertTrue(initTestInstance.getOutput().contains(new StreamRecord<>(fsSplit4))); Assert.assertArrayEquals( initTestInstance.getOutput().toArray(), restoredTestInstance.getOutput().toArray() ); }