Java Code Examples for org.apache.flink.core.fs.FileSystem#getFileStatus()
The following examples show how to use
org.apache.flink.core.fs.FileSystem#getFileStatus() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<>(); for (Path filePath: getFilePaths()) { final FileSystem fs = filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(filePath); for (FileStatus partial : partials) { if (!partial.isDir()) { files.add(partial); } } } else { files.add(pathFile); } } return files; }
Example 2
Source File: FileCacheDirectoriesTest.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
@Test public void testDirectoryDownloadedFromBlob() throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID = new ExecutionAttemptID(); final String fileName = "test_file"; // copy / create the file final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry( fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true); Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); assertTrue(fileStatus.isDir()); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fs.exists(cacheFile)); final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath())); assertEquals(testFileContent, actualContent); }
Example 3
Source File: BinaryInputFormat.java From flink with Apache License 2.0 | 6 votes |
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<>(); for (Path filePath: getFilePaths()) { final FileSystem fs = filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(filePath); for (FileStatus partial : partials) { if (!partial.isDir()) { files.add(partial); } } } else { files.add(pathFile); } } return files; }
Example 4
Source File: FileCacheDirectoriesTest.java From flink with Apache License 2.0 | 6 votes |
@Test public void testDirectoryDownloadedFromBlob() throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID = new ExecutionAttemptID(); final String fileName = "test_file"; // copy / create the file final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry( fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true); Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); assertTrue(fileStatus.isDir()); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fs.exists(cacheFile)); final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath())); assertEquals(testFileContent, actualContent); }
Example 5
Source File: BinaryInputFormat.java From flink with Apache License 2.0 | 6 votes |
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<>(); for (Path filePath: getFilePaths()) { final FileSystem fs = filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(filePath); for (FileStatus partial : partials) { if (!partial.isDir()) { files.add(partial); } } } else { files.add(pathFile); } } return files; }
Example 6
Source File: FileCacheDirectoriesTest.java From flink with Apache License 2.0 | 6 votes |
private void testDirectoryDownloaded(DistributedCache.DistributedCacheEntry entry) throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID = new ExecutionAttemptID(); // copy / create the file final String fileName = "test_file"; Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); assertTrue(fileStatus.isDir()); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fs.exists(cacheFile)); final String actualContent = FileUtils.readFileUtf8(new File(cacheFile.getPath())); assertEquals(testFileContent, actualContent); }
Example 7
Source File: FileInputFormat.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
Example 8
Source File: FileCacheDirectoriesTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDirectoryCleanUp() throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); final String fileName = "test_file"; // copy / create the file final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry( fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true); Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1); fileCache.createTmpFile(fileName, entry, jobID, attemptID2); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID1); // still should be available assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID2); // still should be available, file will be deleted after cleanupInterval assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); // after a while, the file should disappear assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis); executorService.lastDeleteProcess.run(); assertFalse(fs.exists(dstPath)); assertFalse(fs.exists(cacheFile)); }
Example 9
Source File: FileInputFormat.java From flink with Apache License 2.0 | 5 votes |
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
Example 10
Source File: FileCacheDirectoriesTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDirectoryCleanUp() throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); final String fileName = "test_file"; // copy / create the file final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry( fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true); Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1); fileCache.createTmpFile(fileName, entry, jobID, attemptID2); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID1); // still should be available assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID2); // still should be available, file will be deleted after cleanupInterval assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); // after a while, the file should disappear assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis); executorService.lastDeleteProcess.run(); assertFalse(fs.exists(dstPath)); assertFalse(fs.exists(cacheFile)); }
Example 11
Source File: PartitionPathUtils.java From flink with Apache License 2.0 | 5 votes |
private static FileStatus[] getFileStatusRecurse(Path path, int expectLevel, FileSystem fs) { ArrayList<FileStatus> result = new ArrayList<>(); try { FileStatus fileStatus = fs.getFileStatus(path); listStatusRecursively(fs, fileStatus, 0, expectLevel, result); } catch (IOException ignore) { return new FileStatus[0]; } return result.toArray(new FileStatus[0]); }
Example 12
Source File: FileInputFormat.java From flink with Apache License 2.0 | 5 votes |
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
Example 13
Source File: FileCacheDirectoriesTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testDirectoryCleanUp() throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); final String fileName = "test_file"; // copy / create the file final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry( fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true); Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1); fileCache.createTmpFile(fileName, entry, jobID, attemptID2); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID1); // still should be available assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID2); // still should be available, file will be deleted after cleanupInterval assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); // after a while, the file should disappear assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis); executorService.lastDeleteProcess.run(); assertFalse(fs.exists(dstPath)); assertFalse(fs.exists(cacheFile)); }