org.apache.flink.core.fs.Path#getFileSystem

Source File: BinaryInputFormat.java From flink with Apache License 2.0

6 votes

protected List<FileStatus> getFiles() throws IOException {
	// get all the files that are involved in the splits
	List<FileStatus> files = new ArrayList<>();

	for (Path filePath: getFilePaths()) {
		final FileSystem fs = filePath.getFileSystem();
		final FileStatus pathFile = fs.getFileStatus(filePath);

		if (pathFile.isDir()) {
			// input is directory. list all contained files
			final FileStatus[] partials = fs.listStatus(filePath);
			for (FileStatus partial : partials) {
				if (!partial.isDir()) {
					files.add(partial);
				}
			}
		} else {
			files.add(pathFile);
		}
	}
	return files;
}

Source File: YarnFileStageTestS3ITCase.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Verifies that nested directories are properly copied with to the given S3 path (using the
 * appropriate file system) during resource uploads for YARN.
 *
 * @param scheme
 * 		file system scheme
 * @param pathSuffix
 * 		test path suffix which will be the test's target path
 */
private void testRecursiveUploadForYarn(String scheme, String pathSuffix) throws Exception {
	++numRecursiveUploadTests;

	final Path basePath = new Path(S3TestCredentials.getTestBucketUriWithScheme(scheme) + TEST_DATA_DIR);
	final HadoopFileSystem fs = (HadoopFileSystem) basePath.getFileSystem();

	assumeFalse(fs.exists(basePath));

	try {
		final Path directory = new Path(basePath, pathSuffix);

		YarnFileStageTest.testCopyFromLocalRecursive(fs.getHadoopFileSystem(),
			new org.apache.hadoop.fs.Path(directory.toUri()), tempFolder, true);
	} finally {
		// clean up
		fs.delete(basePath, true);
	}
}

Source File: RocksDBIncrementalRestoreOperation.java From flink with Apache License 2.0

6 votes

/**
 * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
 * a local state.
 */
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {

	FileSystem fileSystem = source.getFileSystem();

	final FileStatus[] fileStatuses = fileSystem.listStatus(source);

	if (fileStatuses == null) {
		throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
	}

	for (FileStatus fileStatus : fileStatuses) {
		final Path filePath = fileStatus.getPath();
		final String fileName = filePath.getName();
		File restoreFile = new File(source.getPath(), fileName);
		File targetFile = new File(instanceRocksDBPath, fileName);
		if (fileName.endsWith(SST_FILE_SUFFIX)) {
			// hardlink'ing the immutable sst-files.
			Files.createLink(targetFile.toPath(), restoreFile.toPath());
		} else {
			// true copy for all other files.
			Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
		}
	}
}

Source File: MemoryBackendCheckpointStorage.java From Flink-CEPplus with Apache License 2.0

6 votes

/**
 * Creates a new MemoryBackendCheckpointStorage.
 *
 * @param jobId The ID of the job writing the checkpoints.
 * @param checkpointsBaseDirectory The directory to write checkpoints to. May be null,
 *                                 in which case this storage does not support durable persistence.
 * @param defaultSavepointLocation The default savepoint directory, or null, if none is set.
 * @param maxStateSize The maximum size of each individual piece of state.
 *
 * @throws IOException Thrown if a checkpoint base directory is given configured and the
 *                     checkpoint directory cannot be created within that directory.
 */
public MemoryBackendCheckpointStorage(
		JobID jobId,
		@Nullable Path checkpointsBaseDirectory,
		@Nullable Path defaultSavepointLocation,
		int maxStateSize) throws IOException {

	super(jobId, defaultSavepointLocation);

	checkArgument(maxStateSize > 0);
	this.maxStateSize = maxStateSize;

	if (checkpointsBaseDirectory == null) {
		checkpointsDirectory = null;
		fileSystem = null;
	}
	else {
		this.fileSystem = checkpointsBaseDirectory.getFileSystem();
		this.checkpointsDirectory = getCheckpointDirectoryForJob(checkpointsBaseDirectory, jobId);

		fileSystem.mkdirs(checkpointsDirectory);
	}
}

Source File: CheckpointStreamWithResultProvider.java From flink with Apache License 2.0

5 votes

@Nonnull
static CheckpointStreamWithResultProvider createDuplicatingStream(
	@Nonnegative long checkpointId,
	@Nonnull CheckpointedStateScope checkpointedStateScope,
	@Nonnull CheckpointStreamFactory primaryStreamFactory,
	@Nonnull LocalRecoveryDirectoryProvider secondaryStreamDirProvider) throws IOException {

	CheckpointStreamFactory.CheckpointStateOutputStream primaryOut =
		primaryStreamFactory.createCheckpointStateOutputStream(checkpointedStateScope);

	try {
		File outFile = new File(
			secondaryStreamDirProvider.subtaskSpecificCheckpointDirectory(checkpointId),
			String.valueOf(UUID.randomUUID()));
		Path outPath = new Path(outFile.toURI());

		CheckpointStreamFactory.CheckpointStateOutputStream secondaryOut =
			new FileBasedStateOutputStream(outPath.getFileSystem(), outPath);

		return new CheckpointStreamWithResultProvider.PrimaryAndSecondaryStream(primaryOut, secondaryOut);
	} catch (IOException secondaryEx) {
		LOG.warn("Exception when opening secondary/local checkpoint output stream. " +
			"Continue only with the primary stream.", secondaryEx);
	}

	return new CheckpointStreamWithResultProvider.PrimaryStreamOnly(primaryOut);
}

Source File: RocksDBIncrementalRestoreOperation.java From Flink-CEPplus with Apache License 2.0

5 votes

private void cleanUpPathQuietly(@Nonnull Path path) {
	try {
		FileSystem fileSystem = path.getFileSystem();
		if (fileSystem.exists(path)) {
			fileSystem.delete(path, true);
		}
	} catch (IOException ex) {
		LOG.warn("Failed to clean up path " + path, ex);
	}
}

Source File: FileUtils.java From flink with Apache License 2.0

5 votes

public static Path compressDirectory(Path directory, Path target) throws IOException {
	FileSystem sourceFs = directory.getFileSystem();
	FileSystem targetFs = target.getFileSystem();

	try (ZipOutputStream out = new ZipOutputStream(targetFs.create(target, FileSystem.WriteMode.NO_OVERWRITE))) {
		addToZip(directory, sourceFs, directory.getParent(), out);
	}
	return target;
}

Source File: RocksDBCheckpointIterator.java From bravo with Apache License 2.0

5 votes

private void copyStateDataHandleData(
		Path restoreFilePath,
		StreamStateHandle remoteFileHandle) throws IOException {

	FileSystem restoreFileSystem = restoreFilePath.getFileSystem();

	FSDataInputStream inputStream = null;
	FSDataOutputStream outputStream = null;

	try {
		inputStream = remoteFileHandle.openInputStream();
		cancelStreamRegistry.registerCloseable(inputStream);

		outputStream = restoreFileSystem.create(restoreFilePath, FileSystem.WriteMode.OVERWRITE);
		cancelStreamRegistry.registerCloseable(outputStream);

		byte[] buffer = new byte[8 * 1024];
		while (true) {
			int numBytes = inputStream.read(buffer);
			if (numBytes == -1) {
				break;
			}

			outputStream.write(buffer, 0, numBytes);
		}
	} finally {
		if (cancelStreamRegistry.unregisterCloseable(inputStream)) {
			inputStream.close();
		}

		if (cancelStreamRegistry.unregisterCloseable(outputStream)) {
			outputStream.close();
		}
	}
}

Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0

5 votes

private static void unzipPythonLibrary(Path targetDir) throws IOException {
	FileSystem targetFs = targetDir.getFileSystem();
	ClassLoader classLoader = PythonPlanBinder.class.getClassLoader();
	try (ZipInputStream zis = new ZipInputStream(classLoader.getResourceAsStream("python-source.zip"))) {
		ZipEntry entry = zis.getNextEntry();
		while (entry != null) {
			String fileName = entry.getName();
			Path newFile = new Path(targetDir, fileName);
			if (entry.isDirectory()) {
				targetFs.mkdirs(newFile);
			} else {
				try {
					LOG.debug("Unzipping to {}.", newFile);
					FSDataOutputStream fsDataOutputStream = targetFs.create(newFile, FileSystem.WriteMode.NO_OVERWRITE);
					IOUtils.copyBytes(zis, fsDataOutputStream, false);
				} catch (Exception e) {
					zis.closeEntry();
					throw new IOException("Failed to unzip flink python library.", e);
				}
			}

			zis.closeEntry();
			entry = zis.getNextEntry();
		}
		zis.closeEntry();
	}
}

Source File: HDFSTest.java From flink with Apache License 2.0

5 votes

/**
 * Test that {@link FileUtils#deletePathIfEmpty(FileSystem, Path)} deletes the path if it is
 * empty. A path can only be empty if it is a directory which does not contain any
 * files/directories.
 */
@Test
public void testDeletePathIfEmpty() throws IOException {
	final Path basePath = new Path(hdfsURI);
	final Path directory = new Path(basePath, UUID.randomUUID().toString());
	final Path directoryFile = new Path(directory, UUID.randomUUID().toString());
	final Path singleFile = new Path(basePath, UUID.randomUUID().toString());

	FileSystem fs = basePath.getFileSystem();

	fs.mkdirs(directory);

	byte[] data = "HDFSTest#testDeletePathIfEmpty".getBytes(ConfigConstants.DEFAULT_CHARSET);

	for (Path file: Arrays.asList(singleFile, directoryFile)) {
		org.apache.flink.core.fs.FSDataOutputStream outputStream = fs.create(file, FileSystem.WriteMode.OVERWRITE);
		outputStream.write(data);
		outputStream.close();
	}

	// verify that the files have been created
	assertTrue(fs.exists(singleFile));
	assertTrue(fs.exists(directoryFile));

	// delete the single file
	assertFalse(FileUtils.deletePathIfEmpty(fs, singleFile));
	assertTrue(fs.exists(singleFile));

	// try to delete the non-empty directory
	assertFalse(FileUtils.deletePathIfEmpty(fs, directory));
	assertTrue(fs.exists(directory));

	// delete the file contained in the directory
	assertTrue(fs.delete(directoryFile, false));

	// now the deletion should work
	assertTrue(FileUtils.deletePathIfEmpty(fs, directory));
	assertFalse(fs.exists(directory));
}

Source File: HadoopSwiftFileSystemITCase.java From flink with Apache License 2.0

5 votes

@Test
public void testSimpleFileWriteAndRead() throws Exception {
	final Configuration conf = createConfiguration();

	final String testLine = "Hello Upload!";

	FileSystem.initialize(conf);

	final Path path = new Path("swift://" + CONTAINER + '.' + SERVICENAME + '/' + TEST_DATA_DIR + "/test.txt");
	final FileSystem fs = path.getFileSystem();

	try {
		try (FSDataOutputStream out = fs.create(path, WriteMode.OVERWRITE);
			OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
			writer.write(testLine);
		}

		try (FSDataInputStream in = fs.open(path);
			InputStreamReader ir = new InputStreamReader(in, StandardCharsets.UTF_8);
			BufferedReader reader = new BufferedReader(ir)) {
			String line = reader.readLine();
			assertEquals(testLine, line);
		}
	}
	finally {
		fs.delete(path, false);
	}
}

Source File: CheckpointStreamWithResultProvider.java From flink with Apache License 2.0

5 votes

@Nonnull
static CheckpointStreamWithResultProvider createDuplicatingStream(
	@Nonnegative long checkpointId,
	@Nonnull CheckpointedStateScope checkpointedStateScope,
	@Nonnull CheckpointStreamFactory primaryStreamFactory,
	@Nonnull LocalRecoveryDirectoryProvider secondaryStreamDirProvider) throws IOException {

	CheckpointStreamFactory.CheckpointStateOutputStream primaryOut =
		primaryStreamFactory.createCheckpointStateOutputStream(checkpointedStateScope);

	try {
		File outFile = new File(
			secondaryStreamDirProvider.subtaskSpecificCheckpointDirectory(checkpointId),
			String.valueOf(UUID.randomUUID()));
		Path outPath = new Path(outFile.toURI());

		CheckpointStreamFactory.CheckpointStateOutputStream secondaryOut =
			new FileBasedStateOutputStream(outPath.getFileSystem(), outPath);

		return new CheckpointStreamWithResultProvider.PrimaryAndSecondaryStream(primaryOut, secondaryOut);
	} catch (IOException secondaryEx) {
		LOG.warn("Exception when opening secondary/local checkpoint output stream. " +
			"Continue only with the primary stream.", secondaryEx);
	}

	return new CheckpointStreamWithResultProvider.PrimaryStreamOnly(primaryOut);
}

Source File: HadoopOSSFileSystemITCase.java From Flink-CEPplus with Apache License 2.0

5 votes

@BeforeClass
public static void setup() throws IOException {
	OSSTestCredentials.assumeCredentialsAvailable();

	final Configuration conf = new Configuration();
	conf.setString("fs.oss.endpoint", OSSTestCredentials.getOSSEndpoint());
	conf.setString("fs.oss.accessKeyId", OSSTestCredentials.getOSSAccessKey());
	conf.setString("fs.oss.accessKeySecret", OSSTestCredentials.getOSSSecretKey());
	FileSystem.initialize(conf);
	basePath = new Path(OSSTestCredentials.getTestBucketUri() + TEST_DATA_DIR);
	fs = basePath.getFileSystem();
	deadline = 0;
}

Source File: FileCacheDirectoriesTest.java From Flink-CEPplus with Apache License 2.0

5 votes

@Test
public void testDirectoryCleanUp() throws Exception {
	JobID jobID = new JobID();
	ExecutionAttemptID attemptID1 = new ExecutionAttemptID();
	ExecutionAttemptID attemptID2 = new ExecutionAttemptID();

	final String fileName = "test_file";
	// copy / create the file
	final DistributedCache.DistributedCacheEntry entry = new DistributedCache.DistributedCacheEntry(
		fileName,
		false,
		InstantiationUtil.serializeObject(permanentBlobKey),
		true);
	Future<Path> copyResult = fileCache.createTmpFile(fileName, entry, jobID, attemptID1);
	fileCache.createTmpFile(fileName, entry, jobID, attemptID2);

	final Path dstPath = copyResult.get();
	final FileSystem fs = dstPath.getFileSystem();
	final FileStatus fileStatus = fs.getFileStatus(dstPath);
	final Path cacheFile = new Path(dstPath, "cacheFile");
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID1);
	// still should be available
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	fileCache.releaseJob(jobID, attemptID2);
	// still should be available, file will be deleted after cleanupInterval
	assertTrue(fileStatus.isDir());
	assertTrue(fs.exists(cacheFile));

	// after a while, the file should disappear
	assertEquals(CLEANUP_INTERVAL, executorService.lastDelayMillis);
	executorService.lastDeleteProcess.run();

	assertFalse(fs.exists(dstPath));
	assertFalse(fs.exists(cacheFile));
}

Source File: MapRFsFactoryTest.java From flink with Apache License 2.0

5 votes

@Test
public void testMapRFsKind() throws Exception {
	final Path path = new Path("maprfs:///my/path");

	final FileSystem fs = path.getFileSystem();

	assertEquals(FileSystemKind.FILE_SYSTEM, fs.getKind());
}

Source File: PythonPlanBinder.java From Flink-CEPplus with Apache License 2.0

4 votes

private static void deleteIfExists(Path path) throws IOException {
	FileSystem fs = path.getFileSystem();
	if (fs.exists(path)) {
		fs.delete(path, true);
	}
}

Source File: FileOutputFormat.java From flink with Apache License 2.0

4 votes

@Override
public void open(int taskNumber, int numTasks) throws IOException {
	if (taskNumber < 0 || numTasks < 1) {
		throw new IllegalArgumentException("TaskNumber: " + taskNumber + ", numTasks: " + numTasks);
	}
	
	if (LOG.isDebugEnabled()) {
		LOG.debug("Opening stream for output (" + (taskNumber+1) + "/" + numTasks + "). WriteMode=" + writeMode +
				", OutputDirectoryMode=" + outputDirectoryMode);
	}
	
	Path p = this.outputFilePath;
	if (p == null) {
		throw new IOException("The file path is null.");
	}
	
	final FileSystem fs = p.getFileSystem();

	// if this is a local file system, we need to initialize the local output directory here
	if (!fs.isDistributedFS()) {
		
		if (numTasks == 1 && outputDirectoryMode == OutputDirectoryMode.PARONLY) {
			// output should go to a single file
			
			// prepare local output path. checks for write mode and removes existing files in case of OVERWRITE mode
			if(!fs.initOutPathLocalFS(p, writeMode, false)) {
				// output preparation failed! Cancel task.
				throw new IOException("Output path '" + p.toString() + "' could not be initialized. Canceling task...");
			}
		}
		else {
			// numTasks > 1 || outDirMode == OutputDirectoryMode.ALWAYS
			
			if(!fs.initOutPathLocalFS(p, writeMode, true)) {
				// output preparation failed! Cancel task.
				throw new IOException("Output directory '" + p.toString() + "' could not be created. Canceling task...");
			}
		}
	}



	// Suffix the path with the parallel instance index, if needed
	this.actualFilePath = (numTasks > 1 || outputDirectoryMode == OutputDirectoryMode.ALWAYS) ? p.suffix("/" + getDirectoryFileName(taskNumber)) : p;

	// create output file
	this.stream = fs.create(this.actualFilePath, writeMode);
	
	// at this point, the file creation must have succeeded, or an exception has been thrown
	this.fileCreated = true;
}

Source File: SnapshotDirectory.java From Flink-CEPplus with Apache License 2.0

4 votes

private SnapshotDirectory(@Nonnull Path directory) throws IOException {
	this(directory, directory.getFileSystem());
}

Source File: RocksDBStateUploader.java From Flink-CEPplus with Apache License 2.0

4 votes

private StreamStateHandle uploadLocalFileToCheckpointFs(
	Path filePath,
	CheckpointStreamFactory checkpointStreamFactory,
	CloseableRegistry closeableRegistry) throws IOException {
	FSDataInputStream inputStream = null;
	CheckpointStreamFactory.CheckpointStateOutputStream outputStream = null;

	try {
		final byte[] buffer = new byte[READ_BUFFER_SIZE];

		FileSystem backupFileSystem = filePath.getFileSystem();
		inputStream = backupFileSystem.open(filePath);
		closeableRegistry.registerCloseable(inputStream);

		outputStream = checkpointStreamFactory
			.createCheckpointStateOutputStream(CheckpointedStateScope.SHARED);
		closeableRegistry.registerCloseable(outputStream);

		while (true) {
			int numBytes = inputStream.read(buffer);

			if (numBytes == -1) {
				break;
			}

			outputStream.write(buffer, 0, numBytes);
		}

		StreamStateHandle result = null;
		if (closeableRegistry.unregisterCloseable(outputStream)) {
			result = outputStream.closeAndGetHandle();
			outputStream = null;
		}
		return result;

	} finally {

		if (closeableRegistry.unregisterCloseable(inputStream)) {
			IOUtils.closeQuietly(inputStream);
		}

		if (closeableRegistry.unregisterCloseable(outputStream)) {
			IOUtils.closeQuietly(outputStream);
		}
	}
}

Source File: BlobServerRecoveryTest.java From flink with Apache License 2.0

4 votes

/**
 * Helper to test that the {@link BlobServer} recovery from its HA store works.
 *
 * <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
 * them via a shared HA store upon request of a {@link BlobCacheService}.
 *
 * @param config
 * 		blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
 * 		and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
 * @param blobStore
 * 		shared HA blob store to use
 *
 * @throws IOException
 * 		in case of failures
 */
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
	final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
	String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
	Random rand = new Random();

	try (
		BlobServer server0 = new BlobServer(config, blobStore);
		BlobServer server1 = new BlobServer(config, blobStore);
		// use VoidBlobStore as the HA store to force download from server[1]'s HA store
		BlobCacheService cache1 = new BlobCacheService(
			config, new VoidBlobStore(), new InetSocketAddress("localhost", server1.getPort())
		)) {

		server0.start();
		server1.start();

		// Random data
		byte[] expected = new byte[1024];
		rand.nextBytes(expected);
		byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);

		BlobKey[] keys = new BlobKey[2];
		BlobKey nonHAKey;

		// Put job-related HA data
		JobID[] jobId = new JobID[] { new JobID(), new JobID() };
		keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
		keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2

		// put non-HA data
		nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
		verifyKeyDifferentHashEquals(keys[1], nonHAKey);

		// check that the storage directory exists
		final Path blobServerPath = new Path(storagePath, "blob");
		FileSystem fs = blobServerPath.getFileSystem();
		assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));

		// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
		verifyContents(cache1, jobId[0], keys[0], expected);
		verifyContents(cache1, jobId[1], keys[1], expected2);

		// Verify non-HA file is not accessible from server1
		verifyDeleted(cache1, jobId[0], nonHAKey);

		// Remove again
		server1.cleanupJob(jobId[0], true);
		server1.cleanupJob(jobId[1], true);

		// Verify everything is clean
		assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
		if (fs.exists(blobServerPath)) {
			final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
				fs.listStatus(blobServerPath);
			ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
			for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
				filenames.add(file.toString());
			}
			fail("Unclean state backend: " + filenames);
		}
	}
}

Java Code Examples for org.apache.flink.core.fs.Path#getFileSystem()