Java Code Examples for org.apache.flink.core.fs.FileSystem#exists()
The following examples show how to use
org.apache.flink.core.fs.FileSystem#exists() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileUtils.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Copies all files from source to target and sets executable flag. Paths might be on different systems. * @param sourcePath source path to copy from * @param targetPath target path to copy to * @param executable if target file should be executable * @throws IOException if the copy fails */ public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException { // we unwrap the file system to get raw streams without safety net FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri()); FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri()); if (!tFS.exists(targetPath)) { if (sFS.getFileStatus(sourcePath).isDir()) { internalCopyDirectory(sourcePath, targetPath, executable, sFS, tFS); } else { internalCopyFile(sourcePath, targetPath, executable, sFS, tFS); } } }
Example 2
Source File: RocksDBIncrementalRestoreOperation.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
private void cleanUpPathQuietly(@Nonnull Path path) { try { FileSystem fileSystem = path.getFileSystem(); if (fileSystem.exists(path)) { fileSystem.delete(path, true); } } catch (IOException ex) { LOG.warn("Failed to clean up path " + path, ex); } }
Example 3
Source File: TaskLocalStateStoreImpl.java From Flink-CEPplus with Apache License 2.0 | 5 votes |
/** * Helper method to delete a directory. */ private void deleteDirectory(File directory) throws IOException { Path path = new Path(directory.toURI()); FileSystem fileSystem = path.getFileSystem(); if (fileSystem.exists(path)) { fileSystem.delete(path, true); } }
Example 4
Source File: TaskLocalStateStoreImpl.java From flink with Apache License 2.0 | 5 votes |
/** * Helper method to delete a directory. */ private void deleteDirectory(File directory) throws IOException { Path path = new Path(directory.toURI()); FileSystem fileSystem = path.getFileSystem(); if (fileSystem.exists(path)) { fileSystem.delete(path, true); } }
Example 5
Source File: AbstractHadoopFileSystemITTest.java From flink with Apache License 2.0 | 5 votes |
public static void cleanupDirectoryWithRetry(FileSystem fs, Path path, long consistencyToleranceNS) throws IOException, InterruptedException { fs.delete(path, true); long deadline = System.nanoTime() + consistencyToleranceNS; while (fs.exists(path) && System.nanoTime() - deadline < 0) { fs.delete(path, true); Thread.sleep(50L); } Assert.assertFalse(fs.exists(path)); }
Example 6
Source File: FileUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Copies all files from source to target and sets executable flag. Paths might be on different systems. * @param sourcePath source path to copy from * @param targetPath target path to copy to * @param executable if target file should be executable * @throws IOException if the copy fails */ public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException { // we unwrap the file system to get raw streams without safety net FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri()); FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri()); if (!tFS.exists(targetPath)) { if (sFS.getFileStatus(sourcePath).isDir()) { internalCopyDirectory(sourcePath, targetPath, executable, sFS, tFS); } else { internalCopyFile(sourcePath, targetPath, executable, sFS, tFS); } } }
Example 7
Source File: FileUtils.java From flink with Apache License 2.0 | 5 votes |
/** * Copies all files from source to target and sets executable flag. Paths might be on different systems. * @param sourcePath source path to copy from * @param targetPath target path to copy to * @param executable if target file should be executable * @throws IOException if the copy fails */ public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException { // we unwrap the file system to get raw streams without safety net FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri()); FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri()); if (!tFS.exists(targetPath)) { if (sFS.getFileStatus(sourcePath).isDir()) { internalCopyDirectory(sourcePath, targetPath, executable, sFS, tFS); } else { internalCopyFile(sourcePath, targetPath, executable, sFS, tFS); } } }
Example 8
Source File: RocksDBIncrementalRestoreOperation.java From flink with Apache License 2.0 | 5 votes |
private void cleanUpPathQuietly(@Nonnull Path path) { try { FileSystem fileSystem = path.getFileSystem(); if (fileSystem.exists(path)) { fileSystem.delete(path, true); } } catch (IOException ex) { LOG.warn("Failed to clean up path " + path, ex); } }
Example 9
Source File: TaskLocalStateStoreImpl.java From flink with Apache License 2.0 | 5 votes |
/** * Helper method to delete a directory. */ private void deleteDirectory(File directory) throws IOException { Path path = new Path(directory.toURI()); FileSystem fileSystem = path.getFileSystem(); if (fileSystem.exists(path)) { fileSystem.delete(path, true); } }
Example 10
Source File: RheemFileOutputFormat.java From rheem with Apache License 2.0 | 5 votes |
@Override public void open(int taskNumber, int numTasks) throws IOException { try { if (taskNumber < 0 || numTasks < 1) { throw new IllegalArgumentException("TaskNumber: " + taskNumber + ", numTasks: " + numTasks); } if (LOG.isDebugEnabled()) { LOG.debug("Opening stream for output (" + (taskNumber + 1) + "/" + numTasks + "). WriteMode=" + writeMode + ", OutputDirectoryMode=" + outputDirectoryMode); } Path p = this.outputFilePath; if (p == null) { throw new IOException("The file path is null."); } final FileSystem fs = p.getFileSystem(); if(fs.exists(p)) { fs.delete(p, true); } this.fileCreated = true; final SequenceFile.Writer.Option fileOption = SequenceFile.Writer.file(new org.apache.hadoop.fs.Path(p.toString())); final SequenceFile.Writer.Option keyClassOption = SequenceFile.Writer.keyClass(NullWritable.class); final SequenceFile.Writer.Option valueClassOption = SequenceFile.Writer.valueClass(BytesWritable.class); writer = SequenceFile.createWriter(new org.apache.hadoop.conf.Configuration(true), fileOption, keyClassOption, valueClassOption); }catch (Exception e){ e.printStackTrace(); } }
Example 11
Source File: ContinuousFileMonitoringFunction.java From flink with Apache License 2.0 | 4 votes |
@Override public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception { Path p = new Path(path); FileSystem fileSystem = FileSystem.get(p.toUri()); if (!fileSystem.exists(p)) { throw new FileNotFoundException("The provided file path " + path + " does not exist."); } checkpointLock = context.getCheckpointLock(); switch (watchType) { case PROCESS_CONTINUOUSLY: while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); } Thread.sleep(interval); } // here we do not need to set the running to false and the // globalModificationTime to Long.MAX_VALUE because to arrive here, // either close() or cancel() have already been called, so this // is already done. break; case PROCESS_ONCE: synchronized (checkpointLock) { // the following check guarantees that if we restart // after a failure and we managed to have a successful // checkpoint, we will not reprocess the directory. if (globalModificationTime == Long.MIN_VALUE) { monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE; } isRunning = false; } break; default: isRunning = false; throw new RuntimeException("Unknown WatchType" + watchType); } }
Example 12
Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0 | 4 votes |
/** * Helper to test that the {@link BlobServer} recovery from its HA store works. * * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve * them via a shared HA store upon request of a {@link BlobCacheService}. * * @param config * blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH} * and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt> * @param blobStore * shared HA blob store to use * * @throws IOException * in case of failures */ public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException { final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID); String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId; Random rand = new Random(); try ( BlobServer server0 = new BlobServer(config, blobStore); BlobServer server1 = new BlobServer(config, blobStore); // use VoidBlobStore as the HA store to force download from server[1]'s HA store BlobCacheService cache1 = new BlobCacheService( config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort()) )) { server0.start(); server1.start(); // Random data byte[] expected = new byte[1024]; rand.nextBytes(expected); byte[] expected2 = Arrays.copyOfRange(expected, 32, 288); BlobKey[] keys = new BlobKey[2]; BlobKey nonHAKey; // Put job-related HA data JobID[] jobId = new JobID[] { new JobID(), new JobID() }; keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1 keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2 // put non-HA data nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB); verifyKeyDifferentHashEquals(keys[1], nonHAKey); // check that the storage directory exists final Path blobServerPath = new Path(storagePath, "blob"); FileSystem fs = blobServerPath.getFileSystem(); assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath)); // Verify HA requests from cache1 (connected to server1) with no immediate access to the file verifyContents(cache1, jobId[0], keys[0], expected); verifyContents(cache1, jobId[1], keys[1], expected2); // Verify non-HA file is not accessible from server1 verifyDeleted(cache1, jobId[0], nonHAKey); // Remove again server1.cleanupJob(jobId[0], true); server1.cleanupJob(jobId[1], true); // Verify everything is clean assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath))); if (fs.exists(blobServerPath)) { final org.apache.flink.core.fs.FileStatus[] recoveryFiles = fs.listStatus(blobServerPath); ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length); for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) { filenames.add(file.toString()); } fail("Unclean state backend: " + filenames); } } }
Example 13
Source File: ContinuousFileMonitoringFunction.java From flink with Apache License 2.0 | 4 votes |
@Override public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception { Path p = new Path(path); FileSystem fileSystem = FileSystem.get(p.toUri()); if (!fileSystem.exists(p)) { throw new FileNotFoundException("The provided file path " + path + " does not exist."); } checkpointLock = context.getCheckpointLock(); switch (watchType) { case PROCESS_CONTINUOUSLY: while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); } Thread.sleep(interval); } // here we do not need to set the running to false and the // globalModificationTime to Long.MAX_VALUE because to arrive here, // either close() or cancel() have already been called, so this // is already done. break; case PROCESS_ONCE: synchronized (checkpointLock) { // the following check guarantees that if we restart // after a failure and we managed to have a successful // checkpoint, we will not reprocess the directory. if (globalModificationTime == Long.MIN_VALUE) { monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE; } isRunning = false; } break; default: isRunning = false; throw new RuntimeException("Unknown WatchType" + watchType); } }
Example 14
Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0 | 4 votes |
/** * Helper to test that the {@link BlobServer} recovery from its HA store works. * * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve * them via a shared HA store upon request of a {@link BlobCacheService}. * * @param config * blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH} * and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt> * @param blobStore * shared HA blob store to use * * @throws IOException * in case of failures */ public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException { final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID); String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId; Random rand = new Random(); try ( BlobServer server0 = new BlobServer(config, blobStore); BlobServer server1 = new BlobServer(config, blobStore); // use VoidBlobStore as the HA store to force download from server[1]'s HA store BlobCacheService cache1 = new BlobCacheService( config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort()) )) { server0.start(); server1.start(); // Random data byte[] expected = new byte[1024]; rand.nextBytes(expected); byte[] expected2 = Arrays.copyOfRange(expected, 32, 288); BlobKey[] keys = new BlobKey[2]; BlobKey nonHAKey; // Put job-related HA data JobID[] jobId = new JobID[] { new JobID(), new JobID() }; keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1 keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2 // put non-HA data nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB); verifyKeyDifferentHashEquals(keys[1], nonHAKey); // check that the storage directory exists final Path blobServerPath = new Path(storagePath, "blob"); FileSystem fs = blobServerPath.getFileSystem(); assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath)); // Verify HA requests from cache1 (connected to server1) with no immediate access to the file verifyContents(cache1, jobId[0], keys[0], expected); verifyContents(cache1, jobId[1], keys[1], expected2); // Verify non-HA file is not accessible from server1 verifyDeleted(cache1, jobId[0], nonHAKey); // Remove again server1.cleanupJob(jobId[0], true); server1.cleanupJob(jobId[1], true); // Verify everything is clean assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath))); if (fs.exists(blobServerPath)) { final org.apache.flink.core.fs.FileStatus[] recoveryFiles = fs.listStatus(blobServerPath); ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length); for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) { filenames.add(file.toString()); } fail("Unclean state backend: " + filenames); } } }
Example 15
Source File: PythonEnvUtils.java From flink with Apache License 2.0 | 4 votes |
/** * Prepares PythonEnvironment to start python process. * * @param pythonLibFiles The dependent Python files. * @return PythonEnvironment the Python environment which will be executed in Python process. */ public static PythonEnvironment preparePythonEnvironment(List<Path> pythonLibFiles) throws IOException { PythonEnvironment env = new PythonEnvironment(); // 1. setup temporary local directory for the user files String tmpDir = System.getProperty("java.io.tmpdir") + File.separator + "pyflink" + File.separator + UUID.randomUUID(); Path tmpDirPath = new Path(tmpDir); FileSystem fs = tmpDirPath.getFileSystem(); if (fs.exists(tmpDirPath)) { fs.delete(tmpDirPath, true); } fs.mkdirs(tmpDirPath); env.workingDirectory = tmpDirPath.toString(); StringBuilder pythonPathEnv = new StringBuilder(); pythonPathEnv.append(env.workingDirectory); // 2. create symbolLink in the working directory for the pyflink dependency libs. List<java.nio.file.Path> pythonLibs = getLibFiles(FLINK_OPT_DIR_PYTHON); for (java.nio.file.Path libPath : pythonLibs) { java.nio.file.Path symbolicLinkFilePath = FileSystems.getDefault().getPath(env.workingDirectory, libPath.getFileName().toString()); createSymbolicLinkForPyflinkLib(libPath, symbolicLinkFilePath); pythonPathEnv.append(File.pathSeparator); pythonPathEnv.append(symbolicLinkFilePath.toString()); } // 3. copy relevant python files to tmp dir and set them in PYTHONPATH. for (Path pythonFile : pythonLibFiles) { String sourceFileName = pythonFile.getName(); Path targetPath = new Path(tmpDirPath, sourceFileName); FileUtils.copy(pythonFile, targetPath, true); String targetFileNames = Files.walk(Paths.get(targetPath.toString())) .filter(Files::isRegularFile) .filter(f -> !f.toString().endsWith(".py")) .map(java.nio.file.Path::toString) .collect(Collectors.joining(File.pathSeparator)); pythonPathEnv.append(File.pathSeparator); pythonPathEnv.append(targetFileNames); } // 4. add the parent directory to PYTHONPATH for files suffixed with .py String pyFileParents = Files.walk(Paths.get(tmpDirPath.toString())) .filter(file -> file.toString().endsWith(".py")) .map(java.nio.file.Path::getParent) .distinct() .map(java.nio.file.Path::toString) .collect(Collectors.joining(File.pathSeparator)); if (!StringUtils.isNullOrWhitespaceOnly(pyFileParents)) { pythonPathEnv.append(File.pathSeparator); pythonPathEnv.append(pyFileParents); } env.pythonPath = pythonPathEnv.toString(); return env; }
Example 16
Source File: ContinuousFileMonitoringFunction.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
@Override public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception { Path p = new Path(path); FileSystem fileSystem = FileSystem.get(p.toUri()); if (!fileSystem.exists(p)) { throw new FileNotFoundException("The provided file path " + path + " does not exist."); } checkpointLock = context.getCheckpointLock(); switch (watchType) { case PROCESS_CONTINUOUSLY: while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); } Thread.sleep(interval); } // here we do not need to set the running to false and the // globalModificationTime to Long.MAX_VALUE because to arrive here, // either close() or cancel() have already been called, so this // is already done. break; case PROCESS_ONCE: synchronized (checkpointLock) { // the following check guarantees that if we restart // after a failure and we managed to have a successful // checkpoint, we will not reprocess the directory. if (globalModificationTime == Long.MIN_VALUE) { monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE; } isRunning = false; } break; default: isRunning = false; throw new RuntimeException("Unknown WatchType" + watchType); } }
Example 17
Source File: BlobServerRecoveryTest.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
/** * Helper to test that the {@link BlobServer} recovery from its HA store works. * * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve * them via a shared HA store upon request of a {@link BlobCacheService}. * * @param config * blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH} * and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt> * @param blobStore * shared HA blob store to use * * @throws IOException * in case of failures */ public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException { final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID); String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId; Random rand = new Random(); try ( BlobServer server0 = new BlobServer(config, blobStore); BlobServer server1 = new BlobServer(config, blobStore); // use VoidBlobStore as the HA store to force download from server[1]'s HA store BlobCacheService cache1 = new BlobCacheService( config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort()) )) { server0.start(); server1.start(); // Random data byte[] expected = new byte[1024]; rand.nextBytes(expected); byte[] expected2 = Arrays.copyOfRange(expected, 32, 288); BlobKey[] keys = new BlobKey[2]; BlobKey nonHAKey; // Put job-related HA data JobID[] jobId = new JobID[] { new JobID(), new JobID() }; keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1 keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2 // put non-HA data nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB); verifyKeyDifferentHashEquals(keys[1], nonHAKey); // check that the storage directory exists final Path blobServerPath = new Path(storagePath, "blob"); FileSystem fs = blobServerPath.getFileSystem(); assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath)); // Verify HA requests from cache1 (connected to server1) with no immediate access to the file verifyContents(cache1, jobId[0], keys[0], expected); verifyContents(cache1, jobId[1], keys[1], expected2); // Verify non-HA file is not accessible from server1 verifyDeleted(cache1, jobId[0], nonHAKey); // Remove again server1.cleanupJob(jobId[0], true); server1.cleanupJob(jobId[1], true); // Verify everything is clean assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath))); if (fs.exists(blobServerPath)) { final org.apache.flink.core.fs.FileStatus[] recoveryFiles = fs.listStatus(blobServerPath); ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length); for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) { filenames.add(file.toString()); } fail("Unclean state backend: " + filenames); } } }
Example 18
Source File: PythonStreamBinder.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static void deleteIfExists(Path path) throws IOException { FileSystem fs = path.getFileSystem(); if (fs.exists(path)) { fs.delete(path, true); } }
Example 19
Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0 | 4 votes |
private static void deleteIfExists(Path path) throws IOException { FileSystem fs = path.getFileSystem(); if (fs.exists(path)) { fs.delete(path, true); } }