Java Code Examples for org.apache.flink.core.fs.FileSystem#listStatus()
The following examples show how to use
org.apache.flink.core.fs.FileSystem#listStatus() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java From flink with Apache License 2.0 | 6 votes |
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<>(); for (Path filePath: getFilePaths()) { final FileSystem fs = filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(filePath); for (FileStatus partial : partials) { if (!partial.isDir()) { files.add(partial); } } } else { files.add(pathFile); } } return files; }
Example 2
Source File: StanfordTweetsDataSetInputFormat.java From flink-examples with MIT License | 6 votes |
@Override public TweetFileInputSplit[] createInputSplits(int minNumSplits) throws IOException { FileSystem fileSystem = getFileSystem(); FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath)); logger.info("Found {} files", statuses.length); List<TweetFileInputSplit> splits = new ArrayList<>(); for (int i = 0; i < statuses.length; i++) { FileStatus status = statuses[i]; String fileName = status.getPath().getName(); if (fileName.endsWith("edges")) { splits.add(new TweetFileInputSplit(i, status.getPath())); } } logger.info("Result number of splits: {}", splits.size()); return splits.toArray(new TweetFileInputSplit[splits.size()]); }
Example 3
Source File: RocksDBIncrementalRestoreOperation.java From flink with Apache License 2.0 | 6 votes |
/** * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from * a local state. */ private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException { FileSystem fileSystem = source.getFileSystem(); final FileStatus[] fileStatuses = fileSystem.listStatus(source); if (fileStatuses == null) { throw new IOException("Cannot list file statues. Directory " + source + " does not exist."); } for (FileStatus fileStatus : fileStatuses) { final Path filePath = fileStatus.getPath(); final String fileName = filePath.getName(); File restoreFile = new File(source.getPath(), fileName); File targetFile = new File(instanceRocksDBPath, fileName); if (fileName.endsWith(SST_FILE_SUFFIX)) { // hardlink'ing the immutable sst-files. Files.createLink(targetFile.toPath(), restoreFile.toPath()); } else { // true copy for all other files. Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING); } } }
Example 4
Source File: FileUtils.java From flink with Apache License 2.0 | 6 votes |
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException { String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', ""); if (fs.getFileStatus(fileOrDirectory).isDir()) { out.putNextEntry(new ZipEntry(relativePath + '/')); for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) { addToZip(containedFile.getPath(), fs, rootDir, out); } } else { ZipEntry entry = new ZipEntry(relativePath); out.putNextEntry(entry); try (FSDataInputStream in = fs.open(fileOrDirectory)) { IOUtils.copyBytes(in, out, false); } out.closeEntry(); } }
Example 5
Source File: FileMonitoringFunction.java From flink with Apache License 2.0 | 6 votes |
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
Example 6
Source File: FileMonitoringFunction.java From flink with Apache License 2.0 | 6 votes |
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
Example 7
Source File: PartitionTempFileManager.java From flink with Apache License 2.0 | 6 votes |
/** * Returns checkpoints whose keys are less than or equal to {@code toCpId} * in temporary base path. */ public static long[] headCheckpoints(FileSystem fs, Path basePath, long toCpId) throws IOException { List<Long> cps = new ArrayList<>(); for (FileStatus taskStatus : fs.listStatus(basePath)) { String name = taskStatus.getPath().getName(); if (isCheckpointDir(name)) { long currentCp = getCheckpointId(name); // commit paths that less than current checkpoint id. if (currentCp <= toCpId) { cps.add(currentCp); } } } return cps.stream().mapToLong(v -> v).toArray(); }
Example 8
Source File: RocksDBIncrementalRestoreOperation.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from * a local state. */ private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException { FileSystem fileSystem = source.getFileSystem(); final FileStatus[] fileStatuses = fileSystem.listStatus(source); if (fileStatuses == null) { throw new IOException("Cannot list file statues. Directory " + source + " does not exist."); } for (FileStatus fileStatus : fileStatuses) { final Path filePath = fileStatus.getPath(); final String fileName = filePath.getName(); File restoreFile = new File(source.getPath(), fileName); File targetFile = new File(instanceRocksDBPath, fileName); if (fileName.endsWith(SST_FILE_SUFFIX)) { // hardlink'ing the immutable sst-files. Files.createLink(targetFile.toPath(), restoreFile.toPath()); } else { // true copy for all other files. Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING); } } }
Example 9
Source File: FileUtils.java From flink with Apache License 2.0 | 6 votes |
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException { String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', ""); if (fs.getFileStatus(fileOrDirectory).isDir()) { out.putNextEntry(new ZipEntry(relativePath + '/')); for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) { addToZip(containedFile.getPath(), fs, rootDir, out); } } else { ZipEntry entry = new ZipEntry(relativePath); out.putNextEntry(entry); try (FSDataInputStream in = fs.open(fileOrDirectory)) { IOUtils.copyBytes(in, out, false); } out.closeEntry(); } }
Example 10
Source File: PartitionTempFileManager.java From flink with Apache License 2.0 | 5 votes |
/** * Returns task temporary paths in this checkpoint. */ public static List<Path> listTaskTemporaryPaths( FileSystem fs, Path basePath, long checkpointId) throws Exception { List<Path> taskTmpPaths = new ArrayList<>(); for (FileStatus taskStatus : fs.listStatus(new Path(basePath, checkpointName(checkpointId)))) { if (isTaskDir(taskStatus.getPath().getName())) { taskTmpPaths.add(taskStatus.getPath()); } } return taskTmpPaths; }
Example 11
Source File: ContinuousFileMonitoringFunction.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Returns the paths of the files not yet processed. * @param fileSystem The filesystem where the monitored directory resides. */ private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException { final FileStatus[] statuses; try { statuses = fileSystem.listStatus(path); } catch (IOException e) { // we may run into an IOException if files are moved while listing their status // delay the check for eligible files in this case return Collections.emptyMap(); } if (statuses == null) { LOG.warn("Path does not exist: {}", path); return Collections.emptyMap(); } else { Map<Path, FileStatus> files = new HashMap<>(); // handle the new files for (FileStatus status : statuses) { if (!status.isDir()) { Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); } } else if (format.getNestedFileEnumeration() && format.acceptFile(status)){ files.putAll(listEligibleFiles(fileSystem, status.getPath())); } } return files; } }
Example 12
Source File: ContinuousFileMonitoringFunction.java From flink with Apache License 2.0 | 5 votes |
/** * Returns the paths of the files not yet processed. * @param fileSystem The filesystem where the monitored directory resides. */ private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException { final FileStatus[] statuses; try { statuses = fileSystem.listStatus(path); } catch (IOException e) { // we may run into an IOException if files are moved while listing their status // delay the check for eligible files in this case return Collections.emptyMap(); } if (statuses == null) { LOG.warn("Path does not exist: {}", path); return Collections.emptyMap(); } else { Map<Path, FileStatus> files = new HashMap<>(); // handle the new files for (FileStatus status : statuses) { if (!status.isDir()) { Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); } } else if (format.getNestedFileEnumeration() && format.acceptFile(status)){ files.putAll(listEligibleFiles(fileSystem, status.getPath())); } } return files; } }
Example 13
Source File: PartitionPathUtils.java From flink with Apache License 2.0 | 5 votes |
/** * List file status without hidden files. */ public static FileStatus[] listStatusWithoutHidden(FileSystem fs, Path dir) throws IOException { FileStatus[] statuses = fs.listStatus(dir); if (statuses == null) { return null; } return Arrays.stream(statuses).filter(fileStatus -> !isHiddenFile(fileStatus)).toArray(FileStatus[]::new); }
Example 14
Source File: FileUtils.java From flink with Apache License 2.0 | 5 votes |
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException { tFS.mkdirs(targetPath); FileStatus[] contents = sFS.listStatus(sourcePath); for (FileStatus content : contents) { String distPath = content.getPath().toString(); if (content.isDir()) { if (distPath.endsWith("/")) { distPath = distPath.substring(0, distPath.length() - 1); } } String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/")); copy(content.getPath(), new Path(localPath), executable); } }
Example 15
Source File: FileUtils.java From flink with Apache License 2.0 | 5 votes |
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException { tFS.mkdirs(targetPath); FileStatus[] contents = sFS.listStatus(sourcePath); for (FileStatus content : contents) { String distPath = content.getPath().toString(); if (content.isDir()) { if (distPath.endsWith("/")) { distPath = distPath.substring(0, distPath.length() - 1); } } String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/")); copy(content.getPath(), new Path(localPath), executable); } }
Example 16
Source File: ContinuousFileMonitoringFunction.java From flink with Apache License 2.0 | 5 votes |
/** * Returns the paths of the files not yet processed. * @param fileSystem The filesystem where the monitored directory resides. */ private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException { final FileStatus[] statuses; try { statuses = fileSystem.listStatus(path); } catch (IOException e) { // we may run into an IOException if files are moved while listing their status // delay the check for eligible files in this case return Collections.emptyMap(); } if (statuses == null) { LOG.warn("Path does not exist: {}", path); return Collections.emptyMap(); } else { Map<Path, FileStatus> files = new HashMap<>(); // handle the new files for (FileStatus status : statuses) { if (!status.isDir()) { Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); } } else if (format.getNestedFileEnumeration() && format.acceptFile(status)){ files.putAll(listEligibleFiles(fileSystem, status.getPath())); } } return files; } }
Example 17
Source File: HadoopSwiftFileSystemITCase.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Test public void testDirectoryListing() throws Exception { final Configuration conf = createConfiguration(); FileSystem.initialize(conf); final Path directory = new Path("swift://" + CONTAINER + '.' + SERVICENAME + '/' + TEST_DATA_DIR + "/testdir/"); final FileSystem fs = directory.getFileSystem(); // directory must not yet exist assertFalse(fs.exists(directory)); try { // create directory assertTrue(fs.mkdirs(directory)); // seems the file system does not assume existence of empty directories assertTrue(fs.exists(directory)); // directory empty assertEquals(0, fs.listStatus(directory).length); // create some files final int numFiles = 3; for (int i = 0; i < numFiles; i++) { Path file = new Path(directory, "/file-" + i); try (FSDataOutputStream out = fs.create(file, FileSystem.WriteMode.NO_OVERWRITE); OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) { writer.write("hello-" + i + "\n"); } } FileStatus[] files = fs.listStatus(directory); assertNotNull(files); assertEquals(3, files.length); for (FileStatus status : files) { assertFalse(status.isDir()); } // now that there are files, the directory must exist assertTrue(fs.exists(directory)); } finally { // clean up fs.delete(directory, true); } // now directory must be gone assertFalse(fs.exists(directory)); }
Example 18
Source File: StanfordTweetsDataSetInputFormat.java From flink-examples with MIT License | 4 votes |
@Override public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException { FileSystem fileSystem = getFileSystem(); FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath)); return new GraphStatistics(statuses.length); }
Example 19
Source File: AbstractFileCheckpointStorageTestBase.java From flink with Apache License 2.0 | 4 votes |
/** * Validates that multiple checkpoints from different jobs with the same checkpoint ID do not * interfere with each other. */ @Test public void testPersistMultipleMetadataOnlyCheckpoints() throws Exception { final FileSystem fs = FileSystem.getLocalFileSystem(); final Path checkpointDir = new Path(tmp.newFolder().toURI()); final long checkpointId = 177; final CheckpointStorage storage1 = createCheckpointStorage(checkpointDir); storage1.initializeBaseLocations(); final CheckpointStorage storage2 = createCheckpointStorage(checkpointDir); storage2.initializeBaseLocations(); final CheckpointStorageLocation loc1 = storage1.initializeLocationForCheckpoint(checkpointId); final CheckpointStorageLocation loc2 = storage2.initializeLocationForCheckpoint(checkpointId); final byte[] data1 = {77, 66, 55, 99, 88}; final byte[] data2 = {1, 3, 2, 5, 4}; final CompletedCheckpointStorageLocation completedLocation1; try (CheckpointMetadataOutputStream out = loc1.createMetadataOutputStream()) { out.write(data1); completedLocation1 = out.closeAndFinalizeCheckpoint(); } final String result1 = completedLocation1.getExternalPointer(); final CompletedCheckpointStorageLocation completedLocation2; try (CheckpointMetadataOutputStream out = loc2.createMetadataOutputStream()) { out.write(data2); completedLocation2 = out.closeAndFinalizeCheckpoint(); } final String result2 = completedLocation2.getExternalPointer(); // check that this went to a file, but in a nested directory structure // one directory per storage FileStatus[] files = fs.listStatus(checkpointDir); assertEquals(2, files.length); // in each per-storage directory, one for the checkpoint FileStatus[] job1Files = fs.listStatus(files[0].getPath()); FileStatus[] job2Files = fs.listStatus(files[1].getPath()); assertTrue(job1Files.length >= 1); assertTrue(job2Files.length >= 1); assertTrue(fs.exists(new Path(result1, AbstractFsCheckpointStorage.METADATA_FILE_NAME))); assertTrue(fs.exists(new Path(result2, AbstractFsCheckpointStorage.METADATA_FILE_NAME))); // check that both storages can resolve each others contents validateContents(storage1.resolveCheckpoint(result1).getMetadataHandle(), data1); validateContents(storage1.resolveCheckpoint(result2).getMetadataHandle(), data2); validateContents(storage2.resolveCheckpoint(result1).getMetadataHandle(), data1); validateContents(storage2.resolveCheckpoint(result2).getMetadataHandle(), data2); }
Example 20
Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0 | 4 votes |
/** * Helper to test that the {@link BlobServer} recovery from its HA store works. * * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve * them via a shared HA store upon request of a {@link BlobCacheService}. * * @param config * blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH} * and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt> * @param blobStore * shared HA blob store to use * * @throws IOException * in case of failures */ public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException { final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID); String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId; Random rand = new Random(); try ( BlobServer server0 = new BlobServer(config, blobStore); BlobServer server1 = new BlobServer(config, blobStore); // use VoidBlobStore as the HA store to force download from server[1]'s HA store BlobCacheService cache1 = new BlobCacheService( config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort()) )) { server0.start(); server1.start(); // Random data byte[] expected = new byte[1024]; rand.nextBytes(expected); byte[] expected2 = Arrays.copyOfRange(expected, 32, 288); BlobKey[] keys = new BlobKey[2]; BlobKey nonHAKey; // Put job-related HA data JobID[] jobId = new JobID[] { new JobID(), new JobID() }; keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1 keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2 // put non-HA data nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB); verifyKeyDifferentHashEquals(keys[1], nonHAKey); // check that the storage directory exists final Path blobServerPath = new Path(storagePath, "blob"); FileSystem fs = blobServerPath.getFileSystem(); assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath)); // Verify HA requests from cache1 (connected to server1) with no immediate access to the file verifyContents(cache1, jobId[0], keys[0], expected); verifyContents(cache1, jobId[1], keys[1], expected2); // Verify non-HA file is not accessible from server1 verifyDeleted(cache1, jobId[0], nonHAKey); // Remove again server1.cleanupJob(jobId[0], true); server1.cleanupJob(jobId[1], true); // Verify everything is clean assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath))); if (fs.exists(blobServerPath)) { final org.apache.flink.core.fs.FileStatus[] recoveryFiles = fs.listStatus(blobServerPath); ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length); for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) { filenames.add(file.toString()); } fail("Unclean state backend: " + filenames); } } }