org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit Java Examples
The following examples show how to use
org.apache.flink.streaming.api.functions.source.TimestampedFileInputSplit.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TimestampedFileInputSplitTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testSplitEquality() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); Assert.assertEquals(richFirstSplit, richSecondSplit); TimestampedFileInputSplit richModSecondSplit = new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null); Assert.assertNotEquals(richSecondSplit, richModSecondSplit); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplit.getModificationTime(), 10); Assert.assertNotEquals(richFirstSplit, richThirdSplit); TimestampedFileInputSplit richThirdSplitCopy = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplitCopy, richThirdSplit); }
Example #2
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testInvalidPathSpecification() throws Exception { String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/"; TextInputFormat format = new TextInputFormat(new Path(invalidPath)); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL); try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { // we should never arrive here with an invalid path Assert.fail("Test passes with an invalid path."); } }); // we should never arrive here with an invalid path Assert.fail("Test passed with an invalid path."); } catch (FileNotFoundException e) { Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage()); } }
Example #3
Source File: TimestampedFileInputSplitTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSplitEquality() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); Assert.assertEquals(richFirstSplit, richSecondSplit); TimestampedFileInputSplit richModSecondSplit = new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null); Assert.assertNotEquals(richSecondSplit, richModSecondSplit); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplit.getModificationTime(), 10); Assert.assertNotEquals(richFirstSplit, richThirdSplit); TimestampedFileInputSplit richThirdSplitCopy = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplitCopy, richThirdSplit); }
Example #4
Source File: TimestampedFileInputSplitTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testSplitEquality() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); Assert.assertEquals(richFirstSplit, richSecondSplit); TimestampedFileInputSplit richModSecondSplit = new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null); Assert.assertNotEquals(richSecondSplit, richModSecondSplit); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplit.getModificationTime(), 10); Assert.assertNotEquals(richFirstSplit, richThirdSplit); TimestampedFileInputSplit richThirdSplitCopy = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplitCopy, richThirdSplit); }
Example #5
Source File: ContinuousFileProcessingTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testInvalidPathSpecification() throws Exception { String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/"; TextInputFormat format = new TextInputFormat(new Path(invalidPath)); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL); try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { // we should never arrive here with an invalid path Assert.fail("Test passes with an invalid path."); } }); // we should never arrive here with an invalid path Assert.fail("Test passed with an invalid path."); } catch (FileNotFoundException e) { Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage()); } }
Example #6
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testInvalidPathSpecification() throws Exception { String invalidPath = "hdfs://" + hdfsCluster.getURI().getHost() + ":" + hdfsCluster.getNameNodePort() + "/invalid/"; TextInputFormat format = new TextInputFormat(new Path(invalidPath)); ContinuousFileMonitoringFunction<String> monitoringFunction = new ContinuousFileMonitoringFunction<>(format, FileProcessingMode.PROCESS_ONCE, 1, INTERVAL); try { monitoringFunction.run(new DummySourceContext() { @Override public void collect(TimestampedFileInputSplit element) { // we should never arrive here with an invalid path Assert.fail("Test passes with an invalid path."); } }); // we should never arrive here with an invalid path Assert.fail("Test passed with an invalid path."); } catch (FileNotFoundException e) { Assert.assertEquals("The provided file path " + format.getFilePath() + " does not exist.", e.getMessage()); } }
Example #7
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 6 votes |
private HarnessWithFormat buildAndStart( int noOfTasks, int taskIdx, int elementsBeforeCheckpoint, @Nullable OperatorSubtaskState initState, FileInputSplit... splits) throws Exception { BlockingFileInputFormat format = new BlockingFileInputFormat(new Path("test"), sizeOfSplit, elementsBeforeCheckpoint); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> harness = getTestHarness(format, noOfTasks, taskIdx); harness.setup(); if (initState != null) { harness.initializeState(initState); } harness.open(); if (splits != null) { for (int i = 0; i < splits.length; i++) { harness.processElement(new StreamRecord<>(getTimestampedSplit(i, splits[i]))); } } HarnessWithFormat harnessWithFormat = new HarnessWithFormat(harness, format); while (!format.isFirstChunkProcessed()) { harnessWithFormat.mailboxProcessor.runMailboxStep(); } return harnessWithFormat; }
Example #8
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception { ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, maxParallelism, noOfTasks, taksIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
Example #9
Source File: ContinuousFileProcessingRescalingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
Example #10
Source File: ContinuousFileProcessingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) { Preconditions.checkNotNull(split); return new FileInputSplit( split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames() ); }
Example #11
Source File: TimestampedFileInputSplitTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSplitComparison() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null); // smaller mod time Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0); // lexicographically on the path Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0); // same mod time, same file so smaller split number first Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0); // smaller modification time first Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0); }
Example #12
Source File: TimestampedFileInputSplitTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testIllegalArgument() { try { new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time } catch (Exception e) { if (!(e instanceof IllegalArgumentException)) { Assert.fail(e.getMessage()); } } }
Example #13
Source File: ContinuousFileProcessingRescalingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taksIdx) throws Exception { ContinuousFileReaderOperator<String> reader = new ContinuousFileReaderOperator<>(format); reader.setOutputType(TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()); OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>(reader, maxParallelism, noOfTasks, taksIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
Example #14
Source File: ContinuousFileProcessingMigrationTest.java From flink with Apache License 2.0 | 5 votes |
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) { checkNotNull(split); return new FileInputSplit( split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames() ); }
Example #15
Source File: TimestampedFileInputSplitTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testSplitComparison() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null); // smaller mod time Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0); // lexicographically on the path Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0); // same mod time, same file so smaller split number first Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0); // smaller modification time first Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0); }
Example #16
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
Example #17
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) { Preconditions.checkNotNull(split); return new FileInputSplit( split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames() ); }
Example #18
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
@Override public void collect(TimestampedFileInputSplit element) { String seenFileName = element.getPath().getName(); this.seenFiles.add(seenFileName); if (seenFiles.size() == elementsBeforeNotifying && !latch.isTriggered()) { latch.trigger(); } if (seenFiles.size() == elementsBeforeCanceling) { src.cancel(); } }
Example #19
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
@Override public void collect(TimestampedFileInputSplit element) { try { long modTime = hdfs.getFileStatus(new org.apache.hadoop.fs.Path(element.getPath().getPath())).getModificationTime(); Assert.assertTrue(modTime >= lastSeenModTime); Assert.assertEquals(expectedModificationTimes[splitCounter], modTime); lastSeenModTime = modTime; splitCounter++; } catch (IOException e) { Assert.fail(e.getMessage()); } }
Example #20
Source File: Utils.java From flink with Apache License 2.0 | 5 votes |
public static <OUT> OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, OUT> createContinuousFileProcessingTestHarness( FileInputFormat<OUT> inputFormat, TypeInformation<OUT> outTypeInfo, ExecutionConfig executionConfig) throws Exception { OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, OUT> testHarness = new OneInputStreamOperatorTestHarness<>(new ContinuousFileReaderOperatorFactory<>(inputFormat)); testHarness.getOperatorFactory().setOutputType( outTypeInfo, executionConfig == null ? testHarness.getExecutionConfig() : executionConfig); return testHarness; }
Example #21
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
Example #22
Source File: HiveTableSource.java From flink with Apache License 2.0 | 5 votes |
private DataStream<RowData> createStreamSourceForNonPartitionTable( StreamExecutionEnvironment execEnv, TypeInformation<RowData> typeInfo, HiveTableInputFormat inputFormat, HiveTablePartition hiveTable) { HiveTableFileInputFormat fileInputFormat = new HiveTableFileInputFormat(inputFormat, hiveTable); Configuration configuration = new Configuration(); catalogTable.getOptions().forEach(configuration::setString); String consumeOrderStr = configuration.get(STREAMING_SOURCE_CONSUME_ORDER); ConsumeOrder consumeOrder = ConsumeOrder.getConsumeOrder(consumeOrderStr); if (consumeOrder != ConsumeOrder.CREATE_TIME_ORDER) { throw new UnsupportedOperationException( "Only " + ConsumeOrder.CREATE_TIME_ORDER + " is supported for non partition table."); } String consumeOffset = configuration.get(STREAMING_SOURCE_CONSUME_START_OFFSET); // to Local zone mills instead of UTC mills long currentReadTime = TimestampData.fromLocalDateTime(toLocalDateTime(consumeOffset)) .toTimestamp().getTime(); Duration monitorInterval = configuration.get(STREAMING_SOURCE_MONITOR_INTERVAL); ContinuousFileMonitoringFunction<RowData> monitoringFunction = new ContinuousFileMonitoringFunction<>( fileInputFormat, FileProcessingMode.PROCESS_CONTINUOUSLY, execEnv.getParallelism(), monitorInterval.toMillis(), currentReadTime); ContinuousFileReaderOperatorFactory<RowData, TimestampedFileInputSplit> factory = new ContinuousFileReaderOperatorFactory<>(fileInputFormat); String sourceName = "HiveFileMonitoringFunction"; SingleOutputStreamOperator<RowData> source = execEnv.addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, factory); return new DataStreamSource<>(source); }
Example #23
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> getTestHarness( BlockingFileInputFormat format, int noOfTasks, int taskIdx) throws Exception { OneInputStreamOperatorTestHarness<TimestampedFileInputSplit, String> testHarness = new OneInputStreamOperatorTestHarness<>( new ContinuousFileReaderOperatorFactory<>(format, TypeExtractor.getInputFormatTypes(format), new ExecutionConfig()), maxParallelism, noOfTasks, taskIdx); testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime); return testHarness; }
Example #24
Source File: TimestampedFileInputSplitTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSplitComparison() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null); // smaller mod time Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0); // lexicographically on the path Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0); // same mod time, same file so smaller split number first Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0); // smaller modification time first Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0); }
Example #25
Source File: TimestampedFileInputSplitTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testIllegalArgument() { try { new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time } catch (Exception e) { if (!(e instanceof IllegalArgumentException)) { Assert.fail(e.getMessage()); } } }
Example #26
Source File: ContinuousFileProcessingMigrationTest.java From flink with Apache License 2.0 | 5 votes |
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) { checkNotNull(split); return new FileInputSplit( split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames() ); }
Example #27
Source File: StreamExecutionEnvironment.java From flink with Apache License 2.0 | 5 votes |
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperatorFactory<OUT, TimestampedFileInputSplit> factory = new ContinuousFileReaderOperatorFactory<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, factory); return new DataStreamSource<>(source); }
Example #28
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
private FileInputSplit createSplitFromTimestampedSplit(TimestampedFileInputSplit split) { Preconditions.checkNotNull(split); return new FileInputSplit( split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames() ); }
Example #29
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
@Override public void collect(TimestampedFileInputSplit element) { String seenFileName = element.getPath().getName(); this.seenFiles.add(seenFileName); if (seenFiles.size() == elementsBeforeNotifying && !latch.isTriggered()) { latch.trigger(); } if (seenFiles.size() == elementsBeforeCanceling) { src.cancel(); } }
Example #30
Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0 | 5 votes |
@Override public void collect(TimestampedFileInputSplit element) { try { long modTime = hdfs.getFileStatus(new org.apache.hadoop.fs.Path(element.getPath().getPath())).getModificationTime(); Assert.assertTrue(modTime >= lastSeenModTime); Assert.assertEquals(expectedModificationTimes[splitCounter], modTime); lastSeenModTime = modTime; splitCounter++; } catch (IOException e) { Assert.fail(e.getMessage()); } }