Java Code Examples for org.apache.flink.core.fs.Path#getFileSystem()
The following examples show how to use
org.apache.flink.core.fs.Path#getFileSystem() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryInputFormat.java From flink with Apache License 2.0 | 6 votes |
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<>(); for (Path filePath: getFilePaths()) { final FileSystem fs = filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(filePath); for (FileStatus partial : partials) { if (!partial.isDir()) { files.add(partial); } } } else { files.add(pathFile); } } return files; }
Example 2
Source File: YarnFileStageTestS3ITCase.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Verifies that nested directories are properly copied with to the given S3 path (using the * appropriate file system) during resource uploads for YARN. * * @param scheme * file system scheme * @param pathSuffix * test path suffix which will be the test's target path */ private void testRecursiveUploadForYarn(String scheme, String pathSuffix) throws Exception { ++numRecursiveUploadTests; final Path basePath = new Path(S3TestCredentials.getTestBucketUriWithScheme(scheme) + TEST_DATA_DIR); final HadoopFileSystem fs = (HadoopFileSystem) basePath.getFileSystem(); assumeFalse(fs.exists(basePath)); try { final Path directory = new Path(basePath, pathSuffix); YarnFileStageTest.testCopyFromLocalRecursive(fs.getHadoopFileSystem(), new org.apache.hadoop.fs.Path(directory.toUri()), tempFolder, true); } finally { // clean up fs.delete(basePath, true); } }
Example 3
Source File: RocksDBIncrementalRestoreOperation.java From flink with Apache License 2.0 | 6 votes |
/** * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from * a local state. */ private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException { FileSystem fileSystem = source.getFileSystem(); final FileStatus[] fileStatuses = fileSystem.listStatus(source); if (fileStatuses == null) { throw new IOException("Cannot list file statues. Directory " + source + " does not exist."); } for (FileStatus fileStatus : fileStatuses) { final Path filePath = fileStatus.getPath(); final String fileName = filePath.getName(); File restoreFile = new File(source.getPath(), fileName); File targetFile = new File(instanceRocksDBPath, fileName); if (fileName.endsWith(SST_FILE_SUFFIX)) { // hardlink'ing the immutable sst-files. Files.createLink(targetFile.toPath(), restoreFile.toPath()); } else { // true copy for all other files. Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING); } } }
Example 4
Source File: MemoryBackendCheckpointStorage.java From Flink-CEPplus with Apache License 2.0 | 6 votes |
/** * Creates a new MemoryBackendCheckpointStorage. * * @param jobId The ID of the job writing the checkpoints. * @param checkpointsBaseDirectory The directory to write checkpoints to. May be null, * in which case this storage does not support durable persistence. * @param defaultSavepointLocation The default savepoint directory, or null, if none is set. * @param maxStateSize The maximum size of each individual piece of state. * * @throws IOException Thrown if a checkpoint base directory is given configured and the * checkpoint directory cannot be created within that directory. */ public MemoryBackendCheckpointStorage( JobID jobId, @Nullable Path checkpointsBaseDirectory, @Nullable Path defaultSavepointLocation, int maxStateSize) throws IOException { super(jobId, defaultSavepointLocation); checkArgument(maxStateSize > 0); this.maxStateSize = maxStateSize; if (checkpointsBaseDirectory == null) { checkpointsDirectory = null; fileSystem = null; } else { this.fileSystem = checkpointsBaseDirectory.getFileSystem(); this.checkpointsDirectory = getCheckpointDirectoryForJob(checkpointsBaseDirectory, jobId); fileSystem.mkdirs(checkpointsDirectory); } }
Example 5
Source File: CheckpointStreamWithResultProvider.java From flink with Apache License 2.0 | 5 votes |
@Nonnull static CheckpointStreamWithResultProvider createDuplicatingStream( @Nonnegative long checkpointId, @Nonnull CheckpointedStateScope checkpointedStateScope, @Nonnull CheckpointStreamFactory primaryStreamFactory, @Nonnull LocalRecoveryDirectoryProvider secondaryStreamDirProvider) throws IOException { CheckpointStreamFactory.CheckpointStateOutputStream primaryOut = primaryStreamFactory.createCheckpointStateOutputStream(checkpointedStateScope); try { File outFile = new File( secondaryStreamDirProvider.subtaskSpecificCheckpointDirectory(checkpointId), String.valueOf(UUID.randomUUID())); Path outPath = new Path(outFile.toURI()); CheckpointStreamFactory.CheckpointStateOutputStream secondaryOut = new FileBasedStateOutputStream(outPath.getFileSystem(), outPath); return new CheckpointStreamWithResultProvider.PrimaryAndSecondaryStream(primaryOut, secondaryOut); } catch (IOException secondaryEx) { LOG.warn("Exception when opening secondary/local checkpoint output stream. " + "Continue only with the primary stream.", secondaryEx); } return new CheckpointStreamWithResultProvider.PrimaryStreamOnly(primaryOut); }
Example 6
Source File: RocksDBIncrementalRestoreOperation.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void cleanUpPathQuietly(@Nonnull Path path) { try { FileSystem fileSystem = path.getFileSystem(); if (fileSystem.exists(path)) { fileSystem.delete(path, true); } } catch (IOException ex) { LOG.warn("Failed to clean up path " + path, ex); } }
Example 7
Source File: FileUtils.java From flink with Apache License 2.0 | 5 votes |
public static Path compressDirectory(Path directory, Path target) throws IOException { FileSystem sourceFs = directory.getFileSystem(); FileSystem targetFs = target.getFileSystem(); try (ZipOutputStream out = new ZipOutputStream(targetFs.create(target, FileSystem.WriteMode.NO_OVERWRITE))) { addToZip(directory, sourceFs, directory.getParent(), out); } return target; }
Example 8
Source File: RocksDBCheckpointIterator.java From bravo with Apache License 2.0 | 5 votes |
private void copyStateDataHandleData( Path restoreFilePath, StreamStateHandle remoteFileHandle) throws IOException { FileSystem restoreFileSystem = restoreFilePath.getFileSystem(); FSDataInputStream inputStream = null; FSDataOutputStream outputStream = null; try { inputStream = remoteFileHandle.openInputStream(); cancelStreamRegistry.registerCloseable(inputStream); outputStream = restoreFileSystem.create(restoreFilePath, FileSystem.WriteMode.OVERWRITE); cancelStreamRegistry.registerCloseable(outputStream); byte[] buffer = new byte[8 * 1024]; while (true) { int numBytes = inputStream.read(buffer); if (numBytes == -1) { break; } outputStream.write(buffer, 0, numBytes); } } finally { if (cancelStreamRegistry.unregisterCloseable(inputStream)) { inputStream.close(); } if (cancelStreamRegistry.unregisterCloseable(outputStream)) { outputStream.close(); } } }
Example 9
Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private static void unzipPythonLibrary(Path targetDir) throws IOException { FileSystem targetFs = targetDir.getFileSystem(); ClassLoader classLoader = PythonPlanBinder.class.getClassLoader(); try (ZipInputStream zis = new ZipInputStream(classLoader.getResourceAsStream("python-source.zip"))) { ZipEntry entry = zis.getNextEntry(); while (entry != null) { String fileName = entry.getName(); Path newFile = new Path(targetDir, fileName); if (entry.isDirectory()) { targetFs.mkdirs(newFile); } else { try { LOG.debug("Unzipping to {}.", newFile); FSDataOutputStream fsDataOutputStream = targetFs.create(newFile, FileSystem.WriteMode.NO_OVERWRITE); IOUtils.copyBytes(zis, fsDataOutputStream, false); } catch (Exception e) { zis.closeEntry(); throw new IOException("Failed to unzip flink python library.", e); } } zis.closeEntry(); entry = zis.getNextEntry(); } zis.closeEntry(); } }
Example 10
Source File: HDFSTest.java From flink with Apache License 2.0 | 5 votes |
/** * Test that {@link FileUtils#deletePathIfEmpty(FileSystem, Path)} deletes the path if it is * empty. A path can only be empty if it is a directory which does not contain any * files/directories. */ @Test public void testDeletePathIfEmpty() throws IOException { final Path basePath = new Path(hdfsURI); final Path directory = new Path(basePath, UUID.randomUUID().toString()); final Path directoryFile = new Path(directory, UUID.randomUUID().toString()); final Path singleFile = new Path(basePath, UUID.randomUUID().toString()); FileSystem fs = basePath.getFileSystem(); fs.mkdirs(directory); byte[] data = "HDFSTest#testDeletePathIfEmpty".getBytes(ConfigConstants.DEFAULT_CHARSET); for (Path file: Arrays.asList(singleFile, directoryFile)) { org.apache.flink.core.fs.FSDataOutputStream outputStream = fs.create(file, FileSystem.WriteMode.OVERWRITE); outputStream.write(data); outputStream.close(); } // verify that the files have been created assertTrue(fs.exists(singleFile)); assertTrue(fs.exists(directoryFile)); // delete the single file assertFalse(FileUtils.deletePathIfEmpty(fs, singleFile)); assertTrue(fs.exists(singleFile)); // try to delete the non-empty directory assertFalse(FileUtils.deletePathIfEmpty(fs, directory)); assertTrue(fs.exists(directory)); // delete the file contained in the directory assertTrue(fs.delete(directoryFile, false)); // now the deletion should work assertTrue(FileUtils.deletePathIfEmpty(fs, directory)); assertFalse(fs.exists(directory)); }
Example 11
Source File: HadoopSwiftFileSystemITCase.java From flink with Apache License 2.0 | 5 votes |
@Test public void testSimpleFileWriteAndRead() throws Exception { final Configuration conf = createConfiguration(); final String testLine = "Hello Upload!"; FileSystem.initialize(conf); final Path path = new Path("swift://" + CONTAINER + '.' + SERVICENAME + '/' + TEST_DATA_DIR + "/test.txt"); final FileSystem fs = path.getFileSystem(); try { try (FSDataOutputStream out = fs.create(path, WriteMode.OVERWRITE); OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) { writer.write(testLine); } try (FSDataInputStream in = fs.open(path); InputStreamReader ir = new InputStreamReader(in, StandardCharsets.UTF_8); BufferedReader reader = new BufferedReader(ir)) { String line = reader.readLine(); assertEquals(testLine, line); } } finally { fs.delete(path, false); } }
Example 12
Source File: CheckpointStreamWithResultProvider.java From flink with Apache License 2.0 | 5 votes |
@Nonnull static CheckpointStreamWithResultProvider createDuplicatingStream( @Nonnegative long checkpointId, @Nonnull CheckpointedStateScope checkpointedStateScope, @Nonnull CheckpointStreamFactory primaryStreamFactory, @Nonnull LocalRecoveryDirectoryProvider secondaryStreamDirProvider) throws IOException { CheckpointStreamFactory.CheckpointStateOutputStream primaryOut = primaryStreamFactory.createCheckpointStateOutputStream(checkpointedStateScope); try { File outFile = new File( secondaryStreamDirProvider.subtaskSpecificCheckpointDirectory(checkpointId), String.valueOf(UUID.randomUUID())); Path outPath = new Path(outFile.toURI()); CheckpointStreamFactory.CheckpointStateOutputStream secondaryOut = new FileBasedStateOutputStream(outPath.getFileSystem(), outPath); return new CheckpointStreamWithResultProvider.PrimaryAndSecondaryStream(primaryOut, secondaryOut); } catch (IOException secondaryEx) { LOG.warn("Exception when opening secondary/local checkpoint output stream. " + "Continue only with the primary stream.", secondaryEx); } return new CheckpointStreamWithResultProvider.PrimaryStreamOnly(primaryOut); }
Example 13
Source File: HadoopOSSFileSystemITCase.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@BeforeClass public static void setup() throws IOException { OSSTestCredentials.assumeCredentialsAvailable(); final Configuration conf = new Configuration(); conf.setString("fs.oss.endpoint", OSSTestCredentials.getOSSEndpoint()); conf.setString("fs.oss.accessKeyId", OSSTestCredentials.getOSSAccessKey()); conf.setString("fs.oss.accessKeySecret", OSSTestCredentials.getOSSSecretKey()); FileSystem.initialize(conf); basePath = new Path(OSSTestCredentials.getTestBucketUri() + TEST_DATA_DIR); fs = basePath.getFileSystem(); deadline = 0; }
Example 14
Source File: FileCacheDirectoriesTest.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
@Test public void testDirectoryCleanUp() throws Exception { JobID jobID = new JobID(); ExecutionAttemptID attemptID1 = new ExecutionAttemptID(); ExecutionAttemptID attemptID2 = new ExecutionAttemptID(); final String fileName = "test_file"; // copy / create the file final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry( fileName, false, InstantiationUtil.serializeObject(permanentBlobKey), true); Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1); fileCache.createTmpFile(fileName, entry, jobID, attemptID2); final Path dstPath = copyResult.get(); final FileSystem fs = dstPath.getFileSystem(); final FileStatus fileStatus = fs.getFileStatus(dstPath); final Path cacheFile = new Path(dstPath, "cacheFile"); assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID1); // still should be available assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); fileCache.releaseJob(jobID, attemptID2); // still should be available, file will be deleted after cleanupInterval assertTrue(fileStatus.isDir()); assertTrue(fs.exists(cacheFile)); // after a while, the file should disappear assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis); executorService.lastDeleteProcess.run(); assertFalse(fs.exists(dstPath)); assertFalse(fs.exists(cacheFile)); }
Example 15
Source File: MapRFsFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Test public void testMapRFsKind() throws Exception { final Path path = new Path("maprfs:///my/path"); final FileSystem fs = path.getFileSystem(); assertEquals(FileSystemKind.FILE_SYSTEM, fs.getKind()); }
Example 16
Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static void deleteIfExists(Path path) throws IOException { FileSystem fs = path.getFileSystem(); if (fs.exists(path)) { fs.delete(path, true); } }
Example 17
Source File: FileOutputFormat.java From flink with Apache License 2.0 | 4 votes |
@Override public void open(int taskNumber, int numTasks) throws IOException { if (taskNumber < 0 || numTasks < 1) { throw new IllegalArgumentException("TaskNumber: " + taskNumber + ", numTasks: " + numTasks); } if (LOG.isDebugEnabled()) { LOG.debug("Opening stream for output (" + (taskNumber+1) + "/" + numTasks + "). WriteMode=" + writeMode + ", OutputDirectoryMode=" + outputDirectoryMode); } Path p = this.outputFilePath; if (p == null) { throw new IOException("The file path is null."); } final FileSystem fs = p.getFileSystem(); // if this is a local file system, we need to initialize the local output directory here if (!fs.isDistributedFS()) { if (numTasks == 1 && outputDirectoryMode == OutputDirectoryMode.PARONLY) { // output should go to a single file // prepare local output path. checks for write mode and removes existing files in case of OVERWRITE mode if(!fs.initOutPathLocalFS(p, writeMode, false)) { // output preparation failed! Cancel task. throw new IOException("Output path '" + p.toString() + "' could not be initialized. Canceling task..."); } } else { // numTasks > 1 || outDirMode == OutputDirectoryMode.ALWAYS if(!fs.initOutPathLocalFS(p, writeMode, true)) { // output preparation failed! Cancel task. throw new IOException("Output directory '" + p.toString() + "' could not be created. Canceling task..."); } } } // Suffix the path with the parallel instance index, if needed this.actualFilePath = (numTasks > 1 || outputDirectoryMode == OutputDirectoryMode.ALWAYS) ? p.suffix("/" + getDirectoryFileName(taskNumber)) : p; // create output file this.stream = fs.create(this.actualFilePath, writeMode); // at this point, the file creation must have succeeded, or an exception has been thrown this.fileCreated = true; }
Example 18
Source File: SnapshotDirectory.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private SnapshotDirectory(@Nonnull Path directory) throws IOException { this(directory, directory.getFileSystem()); }
Example 19
Source File: RocksDBStateUploader.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private StreamStateHandle uploadLocalFileToCheckpointFs( Path filePath, CheckpointStreamFactory checkpointStreamFactory, CloseableRegistry closeableRegistry) throws IOException { FSDataInputStream inputStream = null; CheckpointStreamFactory.CheckpointStateOutputStream outputStream = null; try { final byte[] buffer = new byte[READ_BUFFER_SIZE]; FileSystem backupFileSystem = filePath.getFileSystem(); inputStream = backupFileSystem.open(filePath); closeableRegistry.registerCloseable(inputStream); outputStream = checkpointStreamFactory .createCheckpointStateOutputStream(CheckpointedStateScope.SHARED); closeableRegistry.registerCloseable(outputStream); while (true) { int numBytes = inputStream.read(buffer); if (numBytes == -1) { break; } outputStream.write(buffer, 0, numBytes); } StreamStateHandle result = null; if (closeableRegistry.unregisterCloseable(outputStream)) { result = outputStream.closeAndGetHandle(); outputStream = null; } return result; } finally { if (closeableRegistry.unregisterCloseable(inputStream)) { IOUtils.closeQuietly(inputStream); } if (closeableRegistry.unregisterCloseable(outputStream)) { IOUtils.closeQuietly(outputStream); } } }
Example 20
Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0 | 4 votes |
/** * Helper to test that the {@link BlobServer} recovery from its HA store works. * * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve * them via a shared HA store upon request of a {@link BlobCacheService}. * * @param config * blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH} * and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt> * @param blobStore * shared HA blob store to use * * @throws IOException * in case of failures */ public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException { final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID); String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId; Random rand = new Random(); try ( BlobServer server0 = new BlobServer(config, blobStore); BlobServer server1 = new BlobServer(config, blobStore); // use VoidBlobStore as the HA store to force download from server[1]'s HA store BlobCacheService cache1 = new BlobCacheService( config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort()) )) { server0.start(); server1.start(); // Random data byte[] expected = new byte[1024]; rand.nextBytes(expected); byte[] expected2 = Arrays.copyOfRange(expected, 32, 288); BlobKey[] keys = new BlobKey[2]; BlobKey nonHAKey; // Put job-related HA data JobID[] jobId = new JobID[] { new JobID(), new JobID() }; keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1 keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2 // put non-HA data nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB); verifyKeyDifferentHashEquals(keys[1], nonHAKey); // check that the storage directory exists final Path blobServerPath = new Path(storagePath, "blob"); FileSystem fs = blobServerPath.getFileSystem(); assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath)); // Verify HA requests from cache1 (connected to server1) with no immediate access to the file verifyContents(cache1, jobId[0], keys[0], expected); verifyContents(cache1, jobId[1], keys[1], expected2); // Verify non-HA file is not accessible from server1 verifyDeleted(cache1, jobId[0], nonHAKey); // Remove again server1.cleanupJob(jobId[0], true); server1.cleanupJob(jobId[1], true); // Verify everything is clean assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath))); if (fs.exists(blobServerPath)) { final org.apache.flink.core.fs.FileStatus[] recoveryFiles = fs.listStatus(blobServerPath); ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length); for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) { filenames.add(file.toString()); } fail("Unclean state backend: " + filenames); } } }