org.apache.flink.api.java.io.TextInputFormat Java Examples
The following examples show how to use
org.apache.flink.api.java.io.TextInputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testInvalidPathSpecification() throws Exception { String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/"; TextInputFormat format = new TextInputFormat(new Path(invalidPath)); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL); try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { // we should never arrive here with an invalid path Assert.fail("Test passes with an invalid path."); } }); // we should never arrive here with an invalid path Assert.fail("Test passed with an invalid path."); } catch (FileNotFoundException e) { Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage()); } }
Example #2
Source File: ContinuousFileProcessingTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testInvalidPathSpecification() throws Exception { String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/"; TextInputFormat format = new TextInputFormat(new Path(invalidPath)); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL); try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { // we should never arrive here with an invalid path Assert.fail("Test passes with an invalid path."); } }); // we should never arrive here with an invalid path Assert.fail("Test passed with an invalid path."); } catch (FileNotFoundException e) { Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage()); } }
Example #3
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testInvalidPathSpecification() throws Exception { String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/"; TextInputFormat format = new TextInputFormat(new Path(invalidPath)); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL); try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { // we should never arrive here with an invalid path Assert.fail("Test passes with an invalid path."); } }); // we should never arrive here with an invalid path Assert.fail("Test passed with an invalid path."); } catch (FileNotFoundException e) { Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage()); } }
Example #4
Source File: GlobExample.java From flink-examples with MIT License | 6 votes |
public static void main(String... args) throws Exception { File txtFile = new File("/tmp/test/file.txt"); File csvFile = new File("/tmp/test/file.csv"); File binFile = new File("/tmp/test/file.bin"); writeToFile(txtFile, "txt"); writeToFile(csvFile, "csv"); writeToFile(binFile, "bin"); final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); final TextInputFormat format = new TextInputFormat(new Path("/tmp/test")); GlobFilePathFilter filesFilter = new GlobFilePathFilter( Collections.singletonList("**"), Arrays.asList("**/file.bin") ); System.out.println(Arrays.toString(GlobFilePathFilter.class.getDeclaredFields())); format.setFilesFilter(filesFilter); DataSet<String> result = env.readFile(format, "/tmp"); result.writeAsText("/temp/out"); env.execute("GlobFilePathFilter-Test"); }
Example #5
Source File: ContinuousFileProcessingCheckpointITCase.java From flink with Apache License 2.0 | 6 votes |
@Override public void testProgram(StreamExecutionEnvironment env) { env.enableCheckpointing(10); // create and start the file creating thread. fc = new FileCreator(); fc.start(); // create the monitoring source along with the necessary readers. TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); DataStream<String> inputStream = env.readFile(format, localFsURI, FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL); TestingSinkFunction sink = new TestingSinkFunction(); inputStream.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { out.collect(value); } }).addSink(sink).setParallelism(1); }
Example #6
Source File: FlinkBatchTransformTranslators.java From flink-dataflow with Apache License 2.0 | 6 votes |
@Override public void translateNode(TextIO.Read.Bound<String> transform, FlinkBatchTranslationContext context) { String path = transform.getFilepattern(); String name = transform.getName(); TextIO.CompressionType compressionType = transform.getCompressionType(); boolean needsValidation = transform.needsValidation(); // TODO: Implement these. We need Flink support for this. LOG.warn("Translation of TextIO.CompressionType not yet supported. Is: {}.", compressionType); LOG.warn("Translation of TextIO.Read.needsValidation not yet supported. Is: {}.", needsValidation); PValue output = context.getOutput(transform); TypeInformation<String> typeInformation = context.getTypeInfo(output); DataSource<String> source = new DataSource<>(context.getExecutionEnvironment(), new TextInputFormat(new Path(path)), typeInformation, name); context.setOutputDataSet(output, source); }
Example #7
Source File: ContinuousFileProcessingCheckpointITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Override public void testProgram(StreamExecutionEnvironment env) { // set the restart strategy. env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0)); env.enableCheckpointing(10); // create and start the file creating thread. fc = new FileCreator(); fc.start(); // create the monitoring source along with the necessary readers. TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); DataStream<String> inputStream = env.readFile(format, localFsURI, FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL); TestingSinkFunction sink = new TestingSinkFunction(); inputStream.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { out.collect(value); } }).addSink(sink).setParallelism(1); }
Example #8
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSortingOnModTime() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final long[] modTimes = new long[NO_OF_FILES]; final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); Thread.sleep(400); filesCreated[i] = file.f0; modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime(); } TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // this is just to verify that all splits have been forwarded later. FileInputSplit[] splits = format.createInputSplits(1); ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE); ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes); monitoringFunction.open(new Configuration()); monitoringFunction.run(context); Assert.assertEquals(splits.length, context.getCounter()); // delete the created files. for (int i = 0; i < NO_OF_FILES; i++) { hdfs.delete(filesCreated[i], false); } }
Example #9
Source File: ContinuousFileProcessingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSortingOnModTime() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final long[] modTimes = new long[NO_OF_FILES]; final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); Thread.sleep(400); filesCreated[i] = file.f0; modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime(); } TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // this is just to verify that all splits have been forwarded later. FileInputSplit[] splits = format.createInputSplits(1); ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE); ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes); monitoringFunction.open(new Configuration()); monitoringFunction.run(context); Assert.assertEquals(splits.length, context.getCounter()); // delete the created files. for (int i = 0; i < NO_OF_FILES; i++) { hdfs.delete(filesCreated[i], false); } }
Example #10
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSortingOnModTime() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final long[] modTimes = new long[NO_OF_FILES]; final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); Thread.sleep(400); filesCreated[i] = file.f0; modTimes[i] = hdfs.getFileStatus(file.f0).getModificationTime(); } TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // this is just to verify that all splits have been forwarded later. FileInputSplit[] splits = format.createInputSplits(1); ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE); ModTimeVerifyingSourceContext context = new ModTimeVerifyingSourceContext(modTimes); monitoringFunction.open(new Configuration()); monitoringFunction.run(context); Assert.assertEquals(splits.length, context.getCounter()); // delete the created files. for (int i = 0; i < NO_OF_FILES; i++) { hdfs.delete(filesCreated[i], false); } }
Example #11
Source File: ContinuousFileProcessingCheckpointITCase.java From flink with Apache License 2.0 | 5 votes |
@Override public void testProgram(StreamExecutionEnvironment env) { // set the restart strategy. env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0)); env.enableCheckpointing(10); // create and start the file creating thread. fc = new FileCreator(); fc.start(); // create the monitoring source along with the necessary readers. TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); DataStream<String> inputStream = env.readFile(format, localFsURI, FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL); TestingSinkFunction sink = new TestingSinkFunction(); inputStream.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { out.collect(value); } }).addSink(sink).setParallelism(1); }
Example #12
Source File: SimpleFlinkStreamingCounterDataSource.java From jMetalSP with MIT License | 5 votes |
@Override public void run() { JMetalLogger.logger.info("Run Fink method in the streaming data source invoked") ; JMetalLogger.logger.info("Directory: " + directoryName) ; // environment.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(1,0)); //environment.enableCheckpointing(10); Path filePath = new Path(directoryName); TextInputFormat inputFormat = new TextInputFormat(filePath); inputFormat.setFilesFilter(FilePathFilter.createDefaultFilter()); DataStreamSource<String> data =environment.readFile(inputFormat,directoryName, FileProcessingMode.PROCESS_CONTINUOUSLY,time); try { Iterator<String> it=DataStreamUtils.collect(data); while (it.hasNext()){ Integer number = Integer.parseInt(it.next()); observable.setChanged(); observable.notifyObservers(new ObservedValue<Integer>(number)); } } catch (Exception e){ e.printStackTrace(); } }
Example #13
Source File: ContinuousFileProcessingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProgram() throws Exception { /* * This test checks the interplay between the monitor and the reader * and also the failExternally() functionality. To test the latter we * set the parallelism to 1 so that we have the chaining between the sink, * which throws the SuccessException to signal the end of the test, and the * reader. * */ TextInputFormat format = new TextInputFormat(new Path(hdfsURI)); format.setFilePath(hdfsURI); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // create the stream execution environment with a parallelism > 1 to test final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL); // the monitor has always DOP 1 DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction); Assert.assertEquals(1, splits.getParallelism()); TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format); // the readers can be multiple DataStream<String> content = splits.transform("FileSplitReader", typeInfo, new ContinuousFileReaderOperatorFactory<>(format)); Assert.assertEquals(PARALLELISM, content.getParallelism()); // finally for the sink we set the parallelism to 1 so that we can verify the output TestingSinkFunction sink = new TestingSinkFunction(); content.addSink(sink).setParallelism(1); CompletableFuture<Void> jobFuture = new CompletableFuture<>(); new Thread(() -> { try { env.execute("ContinuousFileProcessingITCase Job."); jobFuture.complete(null); } catch (Exception e) { if (ExceptionUtils.findThrowable(e, SuccessException.class).isPresent()) { jobFuture.complete(null); } else { jobFuture.completeExceptionally(e); } } }).start(); // The modification time of the last created file. long lastCreatedModTime = Long.MIN_VALUE; // create the files to be read for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> tmpFile; long modTime; do { // give it some time so that the files have // different modification timestamps. Thread.sleep(50); tmpFile = fillWithData(hdfsURI, "file", i, "This is test line."); modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime(); if (modTime <= lastCreatedModTime) { // delete the last created file to recreate it with a different timestamp hdfs.delete(tmpFile.f0, false); } } while (modTime <= lastCreatedModTime); lastCreatedModTime = modTime; // put the contents in the expected results list before the reader picks them // this is to guarantee that they are in before the reader finishes (avoid race conditions) expectedContents.put(i, tmpFile.f1); org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i); hdfs.rename(tmpFile.f0, file); Assert.assertTrue(hdfs.exists(file)); } jobFuture.get(); }
Example #14
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProcessContinuously() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final OneShotLatch latch = new OneShotLatch(); // create a single file in the directory Tuple2<org.apache.hadoop.fs.Path, String> bootstrap = createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line."); Assert.assertTrue(hdfs.exists(bootstrap.f0)); final Set<String> filesToBeRead = new TreeSet<>(); filesToBeRead.add(bootstrap.f0.getName()); TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY); final int totalNoOfFilesToBeRead = NO_OF_FILES + 1; // 1 for the bootstrap + NO_OF_FILES final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction, 1, totalNoOfFilesToBeRead); final Thread t = new Thread() { @Override public void run() { try { monitoringFunction.open(new Configuration()); monitoringFunction.run(context); } catch (Exception e) { Assert.fail(e.getMessage()); } } }; t.start(); if (!latch.isTriggered()) { latch.await(); } // create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); filesCreated[i] = file.f0; filesToBeRead.add(file.f0.getName()); } // wait until the monitoring thread exits t.join(); Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray()); // finally delete the files created for the test. hdfs.delete(bootstrap.f0, false); for (org.apache.hadoop.fs.Path path: filesCreated) { hdfs.delete(path, false); } }
Example #15
Source File: FeedbackPropertiesMatchTest.java From flink with Apache License 2.0 | 4 votes |
private static DataSourceNode getSourceNode() { return new DataSourceNode(new GenericDataSourceBase<String, TextInputFormat>(new TextInputFormat(new Path("/")), new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO))); }
Example #16
Source File: ChannelTest.java From flink with Apache License 2.0 | 4 votes |
private static final DataSourceNode getSourceNode() { return new DataSourceNode(new GenericDataSourceBase<String, TextInputFormat>( new TextInputFormat(new Path("/ignored")), new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "source")); }
Example #17
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProcessOnce() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final OneShotLatch latch = new OneShotLatch(); // create a single file in the directory Tuple2<org.apache.hadoop.fs.Path, String> bootstrap = createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line."); Assert.assertTrue(hdfs.exists(bootstrap.f0)); // the source is supposed to read only this file. final Set<String> filesToBeRead = new TreeSet<>(); filesToBeRead.add(bootstrap.f0.getName()); TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE); final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction); final Thread t = new Thread() { @Override public void run() { try { monitoringFunction.open(new Configuration()); monitoringFunction.run(context); // we would never arrive here if we were in // PROCESS_CONTINUOUSLY mode. // this will trigger the latch context.close(); } catch (Exception e) { Assert.fail(e.getMessage()); } } }; t.start(); if (!latch.isTriggered()) { latch.await(); } // create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); filesCreated[i] = ignoredFile.f0; } // wait until the monitoring thread exits t.join(); Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray()); // finally delete the files created for the test. hdfs.delete(bootstrap.f0, false); for (org.apache.hadoop.fs.Path path: filesCreated) { hdfs.delete(path, false); } }
Example #18
Source File: ContinuousFileProcessingMigrationTest.java From flink with Apache License 2.0 | 4 votes |
/** * Manually run this to write binary snapshot data. Remove @Ignore to run. */ @Ignore @Test public void writeMonitoringSourceSnapshot() throws Exception { File testFolder = tempFolder.newFolder(); long fileModTime = Long.MIN_VALUE; for (int i = 0; i < 1; i++) { Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line."); fileModTime = file.f0.lastModified(); } TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath())); final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction); final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0); testHarness.open(); final Throwable[] error = new Throwable[1]; final OneShotLatch latch = new OneShotLatch(); // run the source asynchronously Thread runner = new Thread() { @Override public void run() { try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { latch.trigger(); } @Override public void markAsTemporarilyIdle() { } }); } catch (Throwable t) { t.printStackTrace(); error[0] = t; } } }; runner.start(); if (!latch.isTriggered()) { latch.await(); } final OperatorSubtaskState snapshot; synchronized (testHarness.getCheckpointLock()) { snapshot = testHarness.snapshot(0L, 0L); } OperatorSnapshotUtil.writeStateHandle( snapshot, "src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot"); monitoringFunction.cancel(); runner.join(); testHarness.close(); }
Example #19
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProcessOnce() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final OneShotLatch latch = new OneShotLatch(); // create a single file in the directory Tuple2<org.apache.hadoop.fs.Path, String> bootstrap = createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line."); Assert.assertTrue(hdfs.exists(bootstrap.f0)); // the source is supposed to read only this file. final Set<String> filesToBeRead = new TreeSet<>(); filesToBeRead.add(bootstrap.f0.getName()); TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_ONCE); final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction); final Thread t = new Thread() { @Override public void run() { try { monitoringFunction.open(new Configuration()); monitoringFunction.run(context); // we would never arrive here if we were in // PROCESS_CONTINUOUSLY mode. // this will trigger the latch context.close(); } catch (Exception e) { Assert.fail(e.getMessage()); } } }; t.start(); if (!latch.isTriggered()) { latch.await(); } // create some additional files that should be processed in the case of PROCESS_CONTINUOUSLY final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> ignoredFile = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); filesCreated[i] = ignoredFile.f0; } // wait until the monitoring thread exits t.join(); Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray()); // finally delete the files created for the test. hdfs.delete(bootstrap.f0, false); for (org.apache.hadoop.fs.Path path: filesCreated) { hdfs.delete(path, false); } }
Example #20
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testFunctionRestore() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; org.apache.hadoop.fs.Path path = null; long fileModTime = Long.MIN_VALUE; for (int i = 0; i < 1; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); path = file.f0; fileModTime = hdfs.getFileStatus(file.f0).getModificationTime(); } TextInputFormat format = new TextInputFormat(new Path(testBasePath)); final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction); final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0); testHarness.open(); final Throwable[] error = new Throwable[1]; final OneShotLatch latch = new OneShotLatch(); final DummySourceContext sourceContext = new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { latch.trigger(); } }; // run the source asynchronously Thread runner = new Thread() { @Override public void run() { try { monitoringFunction.run(sourceContext); } catch (Throwable t) { t.printStackTrace(); error[0] = t; } } }; runner.start(); // first condition for the source to have updated its state: emit at least one element if (!latch.isTriggered()) { latch.await(); } // second condition for the source to have updated its state: it's not on the lock anymore, // this means it has processed all the splits and updated its state. synchronized (sourceContext.getCheckpointLock()) {} OperatorSubtaskState snapshot = testHarness.snapshot(0, 0); monitoringFunction.cancel(); runner.join(); testHarness.close(); final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy); AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0); testHarnessCopy.initializeState(snapshot); testHarnessCopy.open(); Assert.assertNull(error[0]); Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime()); hdfs.delete(path, false); }
Example #21
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProcessContinuously() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; final OneShotLatch latch = new OneShotLatch(); // create a single file in the directory Tuple2<org.apache.hadoop.fs.Path, String> bootstrap = createFileAndFillWithData(testBasePath, "file", NO_OF_FILES + 1, "This is test line."); Assert.assertTrue(hdfs.exists(bootstrap.f0)); final Set<String> filesToBeRead = new TreeSet<>(); filesToBeRead.add(bootstrap.f0.getName()); TextInputFormat format = new TextInputFormat(new Path(testBasePath)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY); final int totalNoOfFilesToBeRead = NO_OF_FILES + 1; // 1 for the bootstrap + NO_OF_FILES final FileVerifyingSourceContext context = new FileVerifyingSourceContext(latch, monitoringFunction, 1, totalNoOfFilesToBeRead); final Thread t = new Thread() { @Override public void run() { try { monitoringFunction.open(new Configuration()); monitoringFunction.run(context); } catch (Exception e) { Assert.fail(e.getMessage()); } } }; t.start(); if (!latch.isTriggered()) { latch.await(); } // create some additional files that will be processed in the case of PROCESS_CONTINUOUSLY final org.apache.hadoop.fs.Path[] filesCreated = new org.apache.hadoop.fs.Path[NO_OF_FILES]; for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); filesCreated[i] = file.f0; filesToBeRead.add(file.f0.getName()); } // wait until the monitoring thread exits t.join(); Assert.assertArrayEquals(filesToBeRead.toArray(), context.getSeenFiles().toArray()); // finally delete the files created for the test. hdfs.delete(bootstrap.f0, false); for (org.apache.hadoop.fs.Path path: filesCreated) { hdfs.delete(path, false); } }
Example #22
Source File: FeedbackPropertiesMatchTest.java From flink with Apache License 2.0 | 4 votes |
private static DataSourceNode getSourceNode() { return new DataSourceNode(new GenericDataSourceBase<String, TextInputFormat>(new TextInputFormat(new Path("/")), new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO))); }
Example #23
Source File: ChannelTest.java From flink with Apache License 2.0 | 4 votes |
private static final DataSourceNode getSourceNode() { return new DataSourceNode(new GenericDataSourceBase<String, TextInputFormat>( new TextInputFormat(new Path("/ignored")), new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "source")); }
Example #24
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 4 votes |
@Test public void testFunctionRestore() throws Exception { String testBasePath = hdfsURI + "/" + UUID.randomUUID() + "/"; org.apache.hadoop.fs.Path path = null; long fileModTime = Long.MIN_VALUE; for (int i = 0; i < 1; i++) { Tuple2<org.apache.hadoop.fs.Path, String> file = createFileAndFillWithData(testBasePath, "file", i, "This is test line."); path = file.f0; fileModTime = hdfs.getFileStatus(file.f0).getModificationTime(); } TextInputFormat format = new TextInputFormat(new Path(testBasePath)); final ContinuousFileMonitoringFunction<String> monitoringFunction = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction); final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0); testHarness.open(); final Throwable[] error = new Throwable[1]; final OneShotLatch latch = new OneShotLatch(); final DummySourceContext sourceContext = new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { latch.trigger(); } }; // run the source asynchronously Thread runner = new Thread() { @Override public void run() { try { monitoringFunction.run(sourceContext); } catch (Throwable t) { t.printStackTrace(); error[0] = t; } } }; runner.start(); // first condition for the source to have updated its state: emit at least one element if (!latch.isTriggered()) { latch.await(); } // second condition for the source to have updated its state: it's not on the lock anymore, // this means it has processed all the splits and updated its state. synchronized (sourceContext.getCheckpointLock()) {} OperatorSubtaskState snapshot = testHarness.snapshot(0, 0); monitoringFunction.cancel(); runner.join(); testHarness.close(); final ContinuousFileMonitoringFunction<String> monitoringFunctionCopy = createTestContinuousFileMonitoringFunction(format, FileProcessingMode.PROCESS_CONTINUOUSLY); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> srcCopy = new StreamSource<>(monitoringFunctionCopy); AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarnessCopy = new AbstractStreamOperatorTestHarness<>(srcCopy, 1, 1, 0); testHarnessCopy.initializeState(snapshot); testHarnessCopy.open(); Assert.assertNull(error[0]); Assert.assertEquals(fileModTime, monitoringFunctionCopy.getGlobalModificationTime()); hdfs.delete(path, false); }
Example #25
Source File: ContinuousFileProcessingMigrationTest.java From flink with Apache License 2.0 | 4 votes |
/** * Manually run this to write binary snapshot data. Remove @Ignore to run. */ @Ignore @Test public void writeMonitoringSourceSnapshot() throws Exception { File testFolder = tempFolder.newFolder(); long fileModTime = Long.MIN_VALUE; for (int i = 0; i < 1; i++) { Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line."); fileModTime = file.f0.lastModified(); } TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath())); final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction); final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0); testHarness.open(); final Throwable[] error = new Throwable[1]; final OneShotLatch latch = new OneShotLatch(); // run the source asynchronously Thread runner = new Thread() { @Override public void run() { try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { latch.trigger(); } @Override public void markAsTemporarilyIdle() { } }); } catch (Throwable t) { t.printStackTrace(); error[0] = t; } } }; runner.start(); if (!latch.isTriggered()) { latch.await(); } final OperatorSubtaskState snapshot; synchronized (testHarness.getCheckpointLock()) { snapshot = testHarness.snapshot(0L, 0L); } OperatorSnapshotUtil.writeStateHandle( snapshot, "src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot"); monitoringFunction.cancel(); runner.join(); testHarness.close(); }
Example #26
Source File: ContinuousFileProcessingITCase.java From flink with Apache License 2.0 | 4 votes |
@Test public void testProgram() throws Exception { /* * This test checks the interplay between the monitor and the reader * and also the failExternally() functionality. To test the latter we * set the parallelism to 1 so that we have the chaining between the sink, * which throws the SuccessException to signal the end of the test, and the * reader. * */ TextInputFormat format = new TextInputFormat(new Path(hdfsURI)); format.setFilePath(hdfsURI); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // create the stream execution environment with a parallelism > 1 to test final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL); // the monitor has always DOP 1 DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction); Assert.assertEquals(1, splits.getParallelism()); ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format); // the readers can be multiple DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader); Assert.assertEquals(PARALLELISM, content.getParallelism()); // finally for the sink we set the parallelism to 1 so that we can verify the output TestingSinkFunction sink = new TestingSinkFunction(); content.addSink(sink).setParallelism(1); Thread job = new Thread() { @Override public void run() { try { env.execute("ContinuousFileProcessingITCase Job."); } catch (Exception e) { Throwable th = e; for (int depth = 0; depth < 20; depth++) { if (th instanceof SuccessException) { return; } else if (th.getCause() != null) { th = th.getCause(); } else { break; } } e.printStackTrace(); Assert.fail(e.getMessage()); } } }; job.start(); // The modification time of the last created file. long lastCreatedModTime = Long.MIN_VALUE; // create the files to be read for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> tmpFile; long modTime; do { // give it some time so that the files have // different modification timestamps. Thread.sleep(50); tmpFile = fillWithData(hdfsURI, "file", i, "This is test line."); modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime(); if (modTime <= lastCreatedModTime) { // delete the last created file to recreate it with a different timestamp hdfs.delete(tmpFile.f0, false); } } while (modTime <= lastCreatedModTime); lastCreatedModTime = modTime; // put the contents in the expected results list before the reader picks them // this is to guarantee that they are in before the reader finishes (avoid race conditions) expectedContents.put(i, tmpFile.f1); org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i); hdfs.rename(tmpFile.f0, file); Assert.assertTrue(hdfs.exists(file)); } // wait for the job to finish. job.join(); }
Example #27
Source File: ContinuousFileProcessingITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testProgram() throws Exception { /* * This test checks the interplay between the monitor and the reader * and also the failExternally() functionality. To test the latter we * set the parallelism to 1 so that we have the chaining between the sink, * which throws the SuccessException to signal the end of the test, and the * reader. * */ TextInputFormat format = new TextInputFormat(new Path(hdfsURI)); format.setFilePath(hdfsURI); format.setFilesFilter(FilePathFilter.createDefaultFilter()); // create the stream execution environment with a parallelism > 1 to test final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, env.getParallelism(), INTERVAL); // the monitor has always DOP 1 DataStream<TimestampedFileInputSplit> splits = env.addSource(monitoringFunction); Assert.assertEquals(1, splits.getParallelism()); ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); TypeInformation<String> typeInfo = TypeExtractor.getInputFormatTypes(format); // the readers can be multiple DataStream<String> content = splits.transform("FileSplitReader", typeInfo, reader); Assert.assertEquals(PARALLELISM, content.getParallelism()); // finally for the sink we set the parallelism to 1 so that we can verify the output TestingSinkFunction sink = new TestingSinkFunction(); content.addSink(sink).setParallelism(1); Thread job = new Thread() { @Override public void run() { try { env.execute("ContinuousFileProcessingITCase Job."); } catch (Exception e) { Throwable th = e; for (int depth = 0; depth < 20; depth++) { if (th instanceof SuccessException) { return; } else if (th.getCause() != null) { th = th.getCause(); } else { break; } } e.printStackTrace(); Assert.fail(e.getMessage()); } } }; job.start(); // The modification time of the last created file. long lastCreatedModTime = Long.MIN_VALUE; // create the files to be read for (int i = 0; i < NO_OF_FILES; i++) { Tuple2<org.apache.hadoop.fs.Path, String> tmpFile; long modTime; do { // give it some time so that the files have // different modification timestamps. Thread.sleep(50); tmpFile = fillWithData(hdfsURI, "file", i, "This is test line."); modTime = hdfs.getFileStatus(tmpFile.f0).getModificationTime(); if (modTime <= lastCreatedModTime) { // delete the last created file to recreate it with a different timestamp hdfs.delete(tmpFile.f0, false); } } while (modTime <= lastCreatedModTime); lastCreatedModTime = modTime; // put the contents in the expected results list before the reader picks them // this is to guarantee that they are in before the reader finishes (avoid race conditions) expectedContents.put(i, tmpFile.f1); org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(hdfsURI + "/file" + i); hdfs.rename(tmpFile.f0, file); Assert.assertTrue(hdfs.exists(file)); } // wait for the job to finish. job.join(); }
Example #28
Source File: ChannelTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static final DataSourceNode getSourceNode() { return new DataSourceNode(new GenericDataSourceBase<String, TextInputFormat>( new TextInputFormat(new Path("/ignored")), new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO), "source")); }
Example #29
Source File: FeedbackPropertiesMatchTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static DataSourceNode getSourceNode() { return new DataSourceNode(new GenericDataSourceBase<String, TextInputFormat>(new TextInputFormat(new Path("/")), new OperatorInformation<String>(BasicTypeInfo.STRING_TYPE_INFO))); }
Example #30
Source File: ContinuousFileProcessingMigrationTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Manually run this to write binary snapshot data. Remove @Ignore to run. */ @Ignore @Test public void writeMonitoringSourceSnapshot() throws Exception { File testFolder = tempFolder.newFolder(); long fileModTime = Long.MIN_VALUE; for (int i = 0; i < 1; i++) { Tuple2<File, String> file = createFileAndFillWithData(testFolder, "file", i, "This is test line."); fileModTime = file.f0.lastModified(); } TextInputFormat format = new TextInputFormat(new Path(testFolder.getAbsolutePath())); final ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_CONTINUOUSLY, 1, INTERVAL); StreamSource<TimestampedFileInputSplit, ContinuousFileMonitoringFunction<String>> src = new StreamSource<>(monitoringFunction); final AbstractStreamOperatorTestHarness<TimestampedFileInputSplit> testHarness = new AbstractStreamOperatorTestHarness<>(src, 1, 1, 0); testHarness.open(); final Throwable[] error = new Throwable[1]; final OneShotLatch latch = new OneShotLatch(); // run the source asynchronously Thread runner = new Thread() { @Override public void run() { try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { latch.trigger(); } @Override public void markAsTemporarilyIdle() { } }); } catch (Throwable t) { t.printStackTrace(); error[0] = t; } } }; runner.start(); if (!latch.isTriggered()) { latch.await(); } final OperatorSubtaskState snapshot; synchronized (testHarness.getCheckpointLock()) { snapshot = testHarness.snapshot(0L, 0L); } OperatorSnapshotUtil.writeStateHandle( snapshot, "src/test/resources/monitoring-function-migration-test-" + fileModTime + "-flink" + flinkGenerateSavepointVersion + "-snapshot"); monitoringFunction.cancel(); runner.join(); testHarness.close(); }