Java Code Examples for org.apache.flink.core.fs.FileInputSplit#getPath()
The following examples show how to use
org.apache.flink.core.fs.FileInputSplit#getPath() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestRowDataCsvInputFormat.java From flink with Apache License 2.0 | 6 votes |
@Override public void open(FileInputSplit split) throws IOException { inputFormat.open(split); Path path = split.getPath(); LinkedHashMap<String, String> partSpec = PartitionPathUtils.extractPartitionSpecFromPath(path); this.row = new GenericRowData(selectFields.length); for (int i = 0; i < selectFields.length; i++) { int selectField = selectFields[i]; String name = fieldNames.get(selectField); if (partitionKeys.contains(name)) { String value = partSpec.get(name); value = defaultPartValue.equals(value) ? null : value; this.row.setField( i, convertStringToInternal(value, fieldTypes.get(selectField))); } } this.csvRow = new Row(csvSelectConverters.size()); }
Example 2
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadRecordsLargerThanBuffer() throws IOException { final String myString = "aaaaaaaaaaaaaaaaaaaaa\n" + "bbbbbbbbbbbbbbbbbbbbbbbbb\n" + "ccccccccccccccccccc\n" + "ddddddddddddddddddddddddddddddddddd\n"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.setBufferSize(8); format.configure(parameters); String next; List<String> result = new ArrayList<String>(); format.open(split1); while ((next = format.nextRecord(null)) != null) { result.add(next); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); while ((next = format.nextRecord(null)) != null) { result.add(next); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); assertEquals(4, result.size()); assertEquals(Arrays.asList(myString.split("\n")), result); }
Example 3
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
Example 4
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testReadRecordsLargerThanBuffer() throws IOException { final String myString = "aaaaaaaaaaaaaaaaaaaaa\n" + "bbbbbbbbbbbbbbbbbbbbbbbbb\n" + "ccccccccccccccccccc\n" + "ddddddddddddddddddddddddddddddddddd\n"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.setBufferSize(8); format.configure(parameters); String next; List<String> result = new ArrayList<String>(); format.open(split1); while ((next = format.nextRecord(null)) != null) { result.add(next); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); while ((next = format.nextRecord(null)) != null) { result.add(next); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); assertEquals(4, result.size()); assertEquals(Arrays.asList(myString.split("\n")), result); }
Example 5
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the records are read correctly when the split boundary is in the middle of a record. */ @Test public void testReadOverSplitBoundariesUnaligned() throws IOException { final String myString = "value1\nvalue2\nvalue3"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split1); assertEquals("value1", format.nextRecord(null)); assertEquals("value2", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); assertEquals("value3", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); }
Example 6
Source File: FileInputFormat.java From flink with Apache License 2.0 | 5 votes |
/** * Opens an input stream to the file defined in the input format. * The stream is positioned at the beginning of the given split. * <p> * The stream is actually opened in an asynchronous thread to make sure any interruptions to the thread * working on the input format do not reach the file system. */ @Override public void open(FileInputSplit fileSplit) throws IOException { this.currentSplit = fileSplit; this.splitStart = fileSplit.getStart(); this.splitLength = fileSplit.getLength(); if (LOG.isDebugEnabled()) { LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]"); } // open the split in an asynchronous thread final InputSplitOpenThread isot = new InputSplitOpenThread(fileSplit, this.openTimeout); isot.start(); try { this.stream = isot.waitForCompletion(); this.stream = decorateInputStream(this.stream, fileSplit); } catch (Throwable t) { throw new IOException("Error opening the Input Split " + fileSplit.getPath() + " [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t); } // get FSDataInputStream if (this.splitStart != 0) { this.stream.seek(this.splitStart); } }
Example 7
Source File: OrcRowInputFormat.java From flink with Apache License 2.0 | 5 votes |
@Override public void open(FileInputSplit fileSplit) throws IOException { LOG.debug("Opening ORC file {}", fileSplit.getPath()); this.reader = new OrcRowSplitReader( conf, schema, selectedFields, conjunctPredicates, batchSize, fileSplit.getPath(), fileSplit.getStart(), fileSplit.getLength()); }
Example 8
Source File: ContinuousFileProcessingRescalingTest.java From flink with Apache License 2.0 | 5 votes |
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
Example 9
Source File: FileInputFormat.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Opens an input stream to the file defined in the input format. * The stream is positioned at the beginning of the given split. * <p> * The stream is actually opened in an asynchronous thread to make sure any interruptions to the thread * working on the input format do not reach the file system. */ @Override public void open(FileInputSplit fileSplit) throws IOException { this.currentSplit = fileSplit; this.splitStart = fileSplit.getStart(); this.splitLength = fileSplit.getLength(); if (LOG.isDebugEnabled()) { LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]"); } // open the split in an asynchronous thread final InputSplitOpenThread isot = new InputSplitOpenThread(fileSplit, this.openTimeout); isot.start(); try { this.stream = isot.waitForCompletion(); this.stream = decorateInputStream(this.stream, fileSplit); } catch (Throwable t) { throw new IOException("Error opening the Input Split " + fileSplit.getPath() + " [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t); } // get FSDataInputStream if (this.splitStart != 0) { this.stream.seek(this.splitStart); } }
Example 10
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 5 votes |
/** * Tests that the records are read correctly when the split boundary is in the middle of a record. */ @Test public void testReadOverSplitBoundariesUnaligned() throws IOException { final String myString = "value1\nvalue2\nvalue3"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split1); assertEquals("value1", format.nextRecord(null)); assertEquals("value2", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); assertEquals("value3", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); }
Example 11
Source File: FileInputFormat.java From flink with Apache License 2.0 | 5 votes |
/** * Opens an input stream to the file defined in the input format. * The stream is positioned at the beginning of the given split. * <p> * The stream is actually opened in an asynchronous thread to make sure any interruptions to the thread * working on the input format do not reach the file system. */ @Override public void open(FileInputSplit fileSplit) throws IOException { this.currentSplit = fileSplit; this.splitStart = fileSplit.getStart(); this.splitLength = fileSplit.getLength(); if (LOG.isDebugEnabled()) { LOG.debug("Opening input split " + fileSplit.getPath() + " [" + this.splitStart + "," + this.splitLength + "]"); } // open the split in an asynchronous thread final InputSplitOpenThread isot = new InputSplitOpenThread(fileSplit, this.openTimeout); isot.start(); try { this.stream = isot.waitForCompletion(); this.stream = decorateInputStream(this.stream, fileSplit); } catch (Throwable t) { throw new IOException("Error opening the Input Split " + fileSplit.getPath() + " [" + splitStart + "," + splitLength + "]: " + t.getMessage(), t); } // get FSDataInputStream if (this.splitStart != 0) { this.stream.seek(this.splitStart); } }
Example 12
Source File: ContinuousFileProcessingRescalingTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
Example 13
Source File: DelimitedInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testReadRecordsLargerThanBuffer() throws IOException { final String myString = "aaaaaaaaaaaaaaaaaaaaa\n" + "bbbbbbbbbbbbbbbbbbbbbbbbb\n" + "ccccccccccccccccccc\n" + "ddddddddddddddddddddddddddddddddddd\n"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.setBufferSize(8); format.configure(parameters); String next; List<String> result = new ArrayList<String>(); format.open(split1); while ((next = format.nextRecord(null)) != null) { result.add(next); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); while ((next = format.nextRecord(null)) != null) { result.add(next); } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); assertEquals(4, result.size()); assertEquals(Arrays.asList(myString.split("\n")), result); }
Example 14
Source File: DelimitedInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Tests that the records are read correctly when the split boundary is in the middle of a record. */ @Test public void testReadOverSplitBoundariesUnaligned() throws IOException { final String myString = "value1\nvalue2\nvalue3"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.configure(parameters); format.open(split1); assertEquals("value1", format.nextRecord(null)); assertEquals("value2", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); format.open(split2); assertEquals("value3", format.nextRecord(null)); assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); }
Example 15
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the correct number of records is read when the split boundary is exact at the record boundary. */ @Test public void testReadWithBufferSizeIsMultiple() throws IOException { final String myString = "aaaaaaa\nbbbbbbb\nccccccc\nddddddd\n"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.setBufferSize(2 * ((int) split1.getLength())); format.configure(parameters); String next; int count = 0; // read split 1 format.open(split1); while ((next = format.nextRecord(null)) != null) { assertEquals(7, next.length()); count++; } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); // this one must have read one too many, because the next split will skipp the trailing remainder // which happens to be one full record assertEquals(3, count); // read split 2 format.open(split2); while ((next = format.nextRecord(null)) != null) { assertEquals(7, next.length()); count++; } format.close(); assertEquals(4, count); }
Example 16
Source File: DelimitedInputFormatTest.java From flink with Apache License 2.0 | 4 votes |
/** * Tests that the correct number of records is read when the split boundary is exact at the record boundary. */ @Test public void testReadWithBufferSizeIsMultiple() throws IOException { final String myString = "aaaaaaa\nbbbbbbb\nccccccc\nddddddd\n"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.setBufferSize(2 * ((int) split1.getLength())); format.configure(parameters); String next; int count = 0; // read split 1 format.open(split1); while ((next = format.nextRecord(null)) != null) { assertEquals(7, next.length()); count++; } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); // this one must have read one too many, because the next split will skipp the trailing remainder // which happens to be one full record assertEquals(3, count); // read split 2 format.open(split2); while ((next = format.nextRecord(null)) != null) { assertEquals(7, next.length()); count++; } format.close(); assertEquals(4, count); }
Example 17
Source File: DelimitedInputFormatTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Tests that the correct number of records is read when the split boundary is exact at the record boundary. */ @Test public void testReadWithBufferSizeIsMultiple() throws IOException { final String myString = "aaaaaaa\nbbbbbbb\nccccccc\nddddddd\n"; final FileInputSplit split = createTempFile(myString); FileInputSplit split1 = new FileInputSplit(0, split.getPath(), 0, split.getLength() / 2, split.getHostnames()); FileInputSplit split2 = new FileInputSplit(1, split.getPath(), split1.getLength(), split.getLength(), split.getHostnames()); final Configuration parameters = new Configuration(); format.setBufferSize(2 * ((int) split1.getLength())); format.configure(parameters); String next; int count = 0; // read split 1 format.open(split1); while ((next = format.nextRecord(null)) != null) { assertEquals(7, next.length()); count++; } assertNull(format.nextRecord(null)); assertTrue(format.reachedEnd()); format.close(); // this one must have read one too many, because the next split will skipp the trailing remainder // which happens to be one full record assertEquals(3, count); // read split 2 format.open(split2); while ((next = format.nextRecord(null)) != null) { assertEquals(7, next.length()); count++; } format.close(); assertEquals(4, count); }