org.apache.flink.core.fs.FileSystem#exists

Source File: FileUtils.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Copies all files from source to target and sets executable flag. Paths might be on different systems.
 * @param sourcePath source path to copy from
 * @param targetPath target path to copy to
 * @param executable if target file should be executable
 * @throws IOException if the copy fails
 */
public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException {
	// we unwrap the file system to get raw streams without safety net
	FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri());
	FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri());
	if (!tFS.exists(targetPath)) {
		if (sFS.getFileStatus(sourcePath).isDir()) {
			internalCopyDirectory(sourcePath, targetPath, executable, sFS, tFS);
		} else {
			internalCopyFile(sourcePath, targetPath, executable, sFS, tFS);
		}
	}
}

Source File: RocksDBIncrementalRestoreOperation.java From Flink-CEPplus with Apache License 2.0

5 votes

private void cleanUpPathQuietly(@Nonnull Path path) {
	try {
		FileSystem fileSystem = path.getFileSystem();
		if (fileSystem.exists(path)) {
			fileSystem.delete(path, true);
		}
	} catch (IOException ex) {
		LOG.warn("Failed to clean up path " + path, ex);
	}
}

Source File: TaskLocalStateStoreImpl.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Helper method to delete a directory.
 */
private void deleteDirectory(File directory) throws IOException {
	Path path = new Path(directory.toURI());
	FileSystem fileSystem = path.getFileSystem();
	if (fileSystem.exists(path)) {
		fileSystem.delete(path, true);
	}
}

Source File: TaskLocalStateStoreImpl.java From flink with Apache License 2.0

5 votes

/**
 * Helper method to delete a directory.
 */
private void deleteDirectory(File directory) throws IOException {
	Path path = new Path(directory.toURI());
	FileSystem fileSystem = path.getFileSystem();
	if (fileSystem.exists(path)) {
		fileSystem.delete(path, true);
	}
}

Source File: AbstractHadoopFileSystemITTest.java From flink with Apache License 2.0

5 votes

public static void cleanupDirectoryWithRetry(FileSystem fs, Path path, long consistencyToleranceNS) throws IOException, InterruptedException {
	fs.delete(path, true);
	long deadline = System.nanoTime() + consistencyToleranceNS;
	while (fs.exists(path) && System.nanoTime() - deadline < 0) {
		fs.delete(path, true);
		Thread.sleep(50L);
	}
	Assert.assertFalse(fs.exists(path));
}

Source File: FileUtils.java From flink with Apache License 2.0

5 votes

/**
 * Copies all files from source to target and sets executable flag. Paths might be on different systems.
 * @param sourcePath source path to copy from
 * @param targetPath target path to copy to
 * @param executable if target file should be executable
 * @throws IOException if the copy fails
 */
public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException {
	// we unwrap the file system to get raw streams without safety net
	FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri());
	FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri());
	if (!tFS.exists(targetPath)) {
		if (sFS.getFileStatus(sourcePath).isDir()) {
			internalCopyDirectory(sourcePath, targetPath, executable, sFS, tFS);
		} else {
			internalCopyFile(sourcePath, targetPath, executable, sFS, tFS);
		}
	}
}

Source File: FileUtils.java From flink with Apache License 2.0

5 votes

/**
 * Copies all files from source to target and sets executable flag. Paths might be on different systems.
 * @param sourcePath source path to copy from
 * @param targetPath target path to copy to
 * @param executable if target file should be executable
 * @throws IOException if the copy fails
 */
public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException {
	// we unwrap the file system to get raw streams without safety net
	FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri());
	FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri());
	if (!tFS.exists(targetPath)) {
		if (sFS.getFileStatus(sourcePath).isDir()) {
			internalCopyDirectory(sourcePath, targetPath, executable, sFS, tFS);
		} else {
			internalCopyFile(sourcePath, targetPath, executable, sFS, tFS);
		}
	}
}

Source File: RocksDBIncrementalRestoreOperation.java From flink with Apache License 2.0

5 votes

private void cleanUpPathQuietly(@Nonnull Path path) {
	try {
		FileSystem fileSystem = path.getFileSystem();
		if (fileSystem.exists(path)) {
			fileSystem.delete(path, true);
		}
	} catch (IOException ex) {
		LOG.warn("Failed to clean up path " + path, ex);
	}
}

Source File: TaskLocalStateStoreImpl.java From flink with Apache License 2.0

5 votes

/**
 * Helper method to delete a directory.
 */
private void deleteDirectory(File directory) throws IOException {
	Path path = new Path(directory.toURI());
	FileSystem fileSystem = path.getFileSystem();
	if (fileSystem.exists(path)) {
		fileSystem.delete(path, true);
	}
}

Source File: RheemFileOutputFormat.java From rheem with Apache License 2.0

5 votes

@Override
public void open(int taskNumber, int numTasks) throws IOException {
    try {
        if (taskNumber < 0 || numTasks < 1) {
            throw new IllegalArgumentException("TaskNumber: " + taskNumber + ", numTasks: " + numTasks);
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Opening stream for output (" + (taskNumber + 1) + "/" + numTasks + "). WriteMode=" + writeMode +
                    ", OutputDirectoryMode=" + outputDirectoryMode);
        }

        Path p = this.outputFilePath;
        if (p == null) {
            throw new IOException("The file path is null.");
        }

        final FileSystem fs = p.getFileSystem();

        if(fs.exists(p)) {
            fs.delete(p, true);
        }

        this.fileCreated = true;


        final SequenceFile.Writer.Option fileOption = SequenceFile.Writer.file(new org.apache.hadoop.fs.Path(p.toString()));
        final SequenceFile.Writer.Option keyClassOption = SequenceFile.Writer.keyClass(NullWritable.class);
        final SequenceFile.Writer.Option valueClassOption = SequenceFile.Writer.valueClass(BytesWritable.class);
        writer = SequenceFile.createWriter(new org.apache.hadoop.conf.Configuration(true), fileOption, keyClassOption, valueClassOption);
    }catch (Exception e){
        e.printStackTrace();
    }
}

Source File: ContinuousFileMonitoringFunction.java From flink with Apache License 2.0

4 votes

@Override
public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception {
	Path p = new Path(path);
	FileSystem fileSystem = FileSystem.get(p.toUri());
	if (!fileSystem.exists(p)) {
		throw new FileNotFoundException("The provided file path " + path + " does not exist.");
	}

	checkpointLock = context.getCheckpointLock();
	switch (watchType) {
		case PROCESS_CONTINUOUSLY:
			while (isRunning) {
				synchronized (checkpointLock) {
					monitorDirAndForwardSplits(fileSystem, context);
				}
				Thread.sleep(interval);
			}

			// here we do not need to set the running to false and the
			// globalModificationTime to Long.MAX_VALUE because to arrive here,
			// either close() or cancel() have already been called, so this
			// is already done.

			break;
		case PROCESS_ONCE:
			synchronized (checkpointLock) {

				// the following check guarantees that if we restart
				// after a failure and we managed to have a successful
				// checkpoint, we will not reprocess the directory.

				if (globalModificationTime == Long.MIN_VALUE) {
					monitorDirAndForwardSplits(fileSystem, context);
					globalModificationTime = Long.MAX_VALUE;
				}
				isRunning = false;
			}
			break;
		default:
			isRunning = false;
			throw new RuntimeException("Unknown WatchType" + watchType);
	}
}

Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0

4 votes

/**
 * Helper to test that the {@link BlobServer} recovery from its HA store works.
 *
 * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
 * them via a shared HA store upon request of a {@link BlobCacheService}.
 *
 * @param config
 * 		blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
 * 		and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
 * @param blobStore
 * 		shared HA blob store to use
 *
 * @throws IOException
 * 		in case of failures
 */
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
	final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
	String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
	Random rand = new Random();

	try (
		BlobServer server0 = new BlobServer(config, blobStore);
		BlobServer server1 = new BlobServer(config, blobStore);
		// use VoidBlobStore as the HA store to force download from server[1]'s HA store
		BlobCacheService cache1 = new BlobCacheService(
			config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort())
		)) {

		server0.start();
		server1.start();

		// Random data
		byte[] expected = new byte[1024];
		rand.nextBytes(expected);
		byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);

		BlobKey[] keys = new BlobKey[2];
		BlobKey nonHAKey;

		// Put job-related HA data
		JobID[] jobId = new JobID[] { new JobID(), new JobID() };
		keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
		keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2

		// put non-HA data
		nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
		verifyKeyDifferentHashEquals(keys[1], nonHAKey);

		// check that the storage directory exists
		final Path blobServerPath = new Path(storagePath, "blob");
		FileSystem fs = blobServerPath.getFileSystem();
		assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));

		// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
		verifyContents(cache1, jobId[0], keys[0], expected);
		verifyContents(cache1, jobId[1], keys[1], expected2);

		// Verify non-HA file is not accessible from server1
		verifyDeleted(cache1, jobId[0], nonHAKey);

		// Remove again
		server1.cleanupJob(jobId[0], true);
		server1.cleanupJob(jobId[1], true);

		// Verify everything is clean
		assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
		if (fs.exists(blobServerPath)) {
			final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
				fs.listStatus(blobServerPath);
			ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
			for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
				filenames.add(file.toString());
			}
			fail("Unclean state backend: " + filenames);
		}
	}
}

Source File: ContinuousFileMonitoringFunction.java From flink with Apache License 2.0

4 votes

@Override
public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception {
	Path p = new Path(path);
	FileSystem fileSystem = FileSystem.get(p.toUri());
	if (!fileSystem.exists(p)) {
		throw new FileNotFoundException("The provided file path " + path + " does not exist.");
	}

	checkpointLock = context.getCheckpointLock();
	switch (watchType) {
		case PROCESS_CONTINUOUSLY:
			while (isRunning) {
				synchronized (checkpointLock) {
					monitorDirAndForwardSplits(fileSystem, context);
				}
				Thread.sleep(interval);
			}

			// here we do not need to set the running to false and the
			// globalModificationTime to Long.MAX_VALUE because to arrive here,
			// either close() or cancel() have already been called, so this
			// is already done.

			break;
		case PROCESS_ONCE:
			synchronized (checkpointLock) {

				// the following check guarantees that if we restart
				// after a failure and we managed to have a successful
				// checkpoint, we will not reprocess the directory.

				if (globalModificationTime == Long.MIN_VALUE) {
					monitorDirAndForwardSplits(fileSystem, context);
					globalModificationTime = Long.MAX_VALUE;
				}
				isRunning = false;
			}
			break;
		default:
			isRunning = false;
			throw new RuntimeException("Unknown WatchType" + watchType);
	}
}

Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0

4 votes

/**
 * Helper to test that the {@link BlobServer} recovery from its HA store works.
 *
 * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
 * them via a shared HA store upon request of a {@link BlobCacheService}.
 *
 * @param config
 * 		blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
 * 		and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
 * @param blobStore
 * 		shared HA blob store to use
 *
 * @throws IOException
 * 		in case of failures
 */
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
	final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
	String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
	Random rand = new Random();

	try (
		BlobServer server0 = new BlobServer(config, blobStore);
		BlobServer server1 = new BlobServer(config, blobStore);
		// use VoidBlobStore as the HA store to force download from server[1]'s HA store
		BlobCacheService cache1 = new BlobCacheService(
			config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort())
		)) {

		server0.start();
		server1.start();

		// Random data
		byte[] expected = new byte[1024];
		rand.nextBytes(expected);
		byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);

		BlobKey[] keys = new BlobKey[2];
		BlobKey nonHAKey;

		// Put job-related HA data
		JobID[] jobId = new JobID[] { new JobID(), new JobID() };
		keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
		keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2

		// put non-HA data
		nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
		verifyKeyDifferentHashEquals(keys[1], nonHAKey);

		// check that the storage directory exists
		final Path blobServerPath = new Path(storagePath, "blob");
		FileSystem fs = blobServerPath.getFileSystem();
		assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));

		// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
		verifyContents(cache1, jobId[0], keys[0], expected);
		verifyContents(cache1, jobId[1], keys[1], expected2);

		// Verify non-HA file is not accessible from server1
		verifyDeleted(cache1, jobId[0], nonHAKey);

		// Remove again
		server1.cleanupJob(jobId[0], true);
		server1.cleanupJob(jobId[1], true);

		// Verify everything is clean
		assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
		if (fs.exists(blobServerPath)) {
			final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
				fs.listStatus(blobServerPath);
			ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
			for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
				filenames.add(file.toString());
			}
			fail("Unclean state backend: " + filenames);
		}
	}
}

Source File: PythonEnvUtils.java From flink with Apache License 2.0

4 votes

/**
 * Prepares PythonEnvironment to start python process.
 *
 * @param pythonLibFiles The dependent Python files.
 * @return PythonEnvironment the Python environment which will be executed in Python process.
 */
public static PythonEnvironment preparePythonEnvironment(List<Path> pythonLibFiles) throws IOException {
	PythonEnvironment env = new PythonEnvironment();

	// 1. setup temporary local directory for the user files
	String tmpDir = System.getProperty("java.io.tmpdir") +
		File.separator + "pyflink" + File.separator + UUID.randomUUID();

	Path tmpDirPath = new Path(tmpDir);
	FileSystem fs = tmpDirPath.getFileSystem();
	if (fs.exists(tmpDirPath)) {
		fs.delete(tmpDirPath, true);
	}
	fs.mkdirs(tmpDirPath);

	env.workingDirectory = tmpDirPath.toString();

	StringBuilder pythonPathEnv = new StringBuilder();

	pythonPathEnv.append(env.workingDirectory);

	// 2. create symbolLink in the working directory for the pyflink dependency libs.
	List<java.nio.file.Path> pythonLibs = getLibFiles(FLINK_OPT_DIR_PYTHON);
	for (java.nio.file.Path libPath : pythonLibs) {
		java.nio.file.Path symbolicLinkFilePath = FileSystems.getDefault().getPath(env.workingDirectory,
			libPath.getFileName().toString());
		createSymbolicLinkForPyflinkLib(libPath, symbolicLinkFilePath);
		pythonPathEnv.append(File.pathSeparator);
		pythonPathEnv.append(symbolicLinkFilePath.toString());
	}

	// 3. copy relevant python files to tmp dir and set them in PYTHONPATH.
	for (Path pythonFile : pythonLibFiles) {
		String sourceFileName = pythonFile.getName();
		Path targetPath = new Path(tmpDirPath, sourceFileName);
		FileUtils.copy(pythonFile, targetPath, true);
		String targetFileNames = Files.walk(Paths.get(targetPath.toString()))
			.filter(Files::isRegularFile)
			.filter(f -> !f.toString().endsWith(".py"))
			.map(java.nio.file.Path::toString)
			.collect(Collectors.joining(File.pathSeparator));
		pythonPathEnv.append(File.pathSeparator);
		pythonPathEnv.append(targetFileNames);
	}

	// 4. add the parent directory to PYTHONPATH for files suffixed with .py
	String pyFileParents = Files.walk(Paths.get(tmpDirPath.toString()))
		.filter(file -> file.toString().endsWith(".py"))
		.map(java.nio.file.Path::getParent)
		.distinct()
		.map(java.nio.file.Path::toString)
		.collect(Collectors.joining(File.pathSeparator));
	if (!StringUtils.isNullOrWhitespaceOnly(pyFileParents)) {
		pythonPathEnv.append(File.pathSeparator);
		pythonPathEnv.append(pyFileParents);
	}

	env.pythonPath = pythonPathEnv.toString();
	return env;
}

Source File: ContinuousFileMonitoringFunction.java From Flink-CEPplus with Apache License 2.0

4 votes

@Override
public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception {
	Path p = new Path(path);
	FileSystem fileSystem = FileSystem.get(p.toUri());
	if (!fileSystem.exists(p)) {
		throw new FileNotFoundException("The provided file path " + path + " does not exist.");
	}

	checkpointLock = context.getCheckpointLock();
	switch (watchType) {
		case PROCESS_CONTINUOUSLY:
			while (isRunning) {
				synchronized (checkpointLock) {
					monitorDirAndForwardSplits(fileSystem, context);
				}
				Thread.sleep(interval);
			}

			// here we do not need to set the running to false and the
			// globalModificationTime to Long.MAX_VALUE because to arrive here,
			// either close() or cancel() have already been called, so this
			// is already done.

			break;
		case PROCESS_ONCE:
			synchronized (checkpointLock) {

				// the following check guarantees that if we restart
				// after a failure and we managed to have a successful
				// checkpoint, we will not reprocess the directory.

				if (globalModificationTime == Long.MIN_VALUE) {
					monitorDirAndForwardSplits(fileSystem, context);
					globalModificationTime = Long.MAX_VALUE;
				}
				isRunning = false;
			}
			break;
		default:
			isRunning = false;
			throw new RuntimeException("Unknown WatchType" + watchType);
	}
}

Source File: BlobServerRecoveryTest.java From Flink-CEPplus with Apache License 2.0

4 votes

/**
 * Helper to test that the {@link BlobServer} recovery from its HA store works.
 *
 * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
 * them via a shared HA store upon request of a {@link BlobCacheService}.
 *
 * @param config
 * 		blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
 * 		and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
 * @param blobStore
 * 		shared HA blob store to use
 *
 * @throws IOException
 * 		in case of failures
 */
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
	final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
	String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
	Random rand = new Random();

	try (
		BlobServer server0 = new BlobServer(config, blobStore);
		BlobServer server1 = new BlobServer(config, blobStore);
		// use VoidBlobStore as the HA store to force download from server[1]'s HA store
		BlobCacheService cache1 = new BlobCacheService(
			config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort())
		)) {

		server0.start();
		server1.start();

		// Random data
		byte[] expected = new byte[1024];
		rand.nextBytes(expected);
		byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);

		BlobKey[] keys = new BlobKey[2];
		BlobKey nonHAKey;

		// Put job-related HA data
		JobID[] jobId = new JobID[] { new JobID(), new JobID() };
		keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
		keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2

		// put non-HA data
		nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
		verifyKeyDifferentHashEquals(keys[1], nonHAKey);

		// check that the storage directory exists
		final Path blobServerPath = new Path(storagePath, "blob");
		FileSystem fs = blobServerPath.getFileSystem();
		assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));

		// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
		verifyContents(cache1, jobId[0], keys[0], expected);
		verifyContents(cache1, jobId[1], keys[1], expected2);

		// Verify non-HA file is not accessible from server1
		verifyDeleted(cache1, jobId[0], nonHAKey);

		// Remove again
		server1.cleanupJob(jobId[0], true);
		server1.cleanupJob(jobId[1], true);

		// Verify everything is clean
		assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
		if (fs.exists(blobServerPath)) {
			final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
				fs.listStatus(blobServerPath);
			ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
			for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
				filenames.add(file.toString());
			}
			fail("Unclean state backend: " + filenames);
		}
	}
}

Source File: PythonStreamBinder.java From Flink-CEPplus with Apache License 2.0

4 votes

private static void deleteIfExists(Path path) throws IOException {
	FileSystem fs = path.getFileSystem();
	if (fs.exists(path)) {
		fs.delete(path, true);
	}
}

Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0

4 votes

private static void deleteIfExists(Path path) throws IOException {
	FileSystem fs = path.getFileSystem();
	if (fs.exists(path)) {
		fs.delete(path, true);
	}
}

Java Code Examples for org.apache.flink.core.fs.FileSystem#exists()