Java Code Examples for org.apache.hadoop.fs.FileStatus#isFile()
The following examples show how to use
org.apache.hadoop.fs.FileStatus#isFile() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HistoryFileManager.java From big-c with Apache License 2.0 | 6 votes |
@VisibleForTesting protected static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); try { RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } } catch (FileNotFoundException fe) { LOG.error("Error while scanning directory " + path, fe); } return jhStatusList; }
Example 2
Source File: HadoopSegmentPreprocessingJob.java From incubator-pinot with Apache License 2.0 | 6 votes |
/** * Finds the avro file in the input folder, and returns its avro schema * @param inputPathDir Path to input directory * @return Input schema * @throws IOException exception when accessing to IO */ private Schema getSchema(Path inputPathDir) throws IOException { FileSystem fs = FileSystem.get(new Configuration()); Schema avroSchema = null; for (FileStatus fileStatus : fs.listStatus(inputPathDir)) { if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(".avro")) { _logger.info("Extracting schema from " + fileStatus.getPath()); try (DataFileStream<GenericRecord> dataStreamReader = getAvroReader(inputPathDir)) { avroSchema = dataStreamReader.getSchema(); } break; } } return avroSchema; }
Example 3
Source File: ParquetReader.java From parquet-mr with Apache License 2.0 | 6 votes |
public ParquetReader<T> build() throws IOException { ParquetReadOptions options = optionsBuilder.build(); if (path != null) { FileSystem fs = path.getFileSystem(conf); FileStatus stat = fs.getFileStatus(path); if (stat.isFile()) { return new ParquetReader<>( Collections.singletonList((InputFile) HadoopInputFile.fromStatus(stat, conf)), options, getReadSupport()); } else { List<InputFile> files = new ArrayList<>(); for (FileStatus fileStatus : fs.listStatus(path, HiddenFileFilter.INSTANCE)) { files.add(HadoopInputFile.fromStatus(fileStatus, conf)); } return new ParquetReader<T>(files, options, getReadSupport()); } } else { return new ParquetReader<>(Collections.singletonList(file), options, getReadSupport()); } }
Example 4
Source File: FSFactory.java From paraflow with Apache License 2.0 | 6 votes |
public List<Path> listFiles(Path dirPath) { List<Path> files = new ArrayList<>(); FileStatus[] fileStatuses; if (this.fileSystem == null) { return ImmutableList.of(); } try { fileStatuses = this.fileSystem.listStatus(dirPath); if (fileStatuses != null) { for (FileStatus f : fileStatuses) { //avoid add empty file if (f.isFile() && f.getLen() > 0) { files.add(f.getPath()); } } } } catch (IOException e) { log.error(e); throw new PrestoException(PARAFLOW_HDFS_FILE_ERROR, e); } return files; }
Example 5
Source File: TestFileOutputCommitter.java From hadoop with Apache License 2.0 | 6 votes |
private void validateMapFileOutputContent( FileSystem fs, Path dir) throws IOException { // map output is a directory with index and data files Path expectedMapDir = new Path(dir, partFile); assert(fs.getFileStatus(expectedMapDir).isDirectory()); FileStatus[] files = fs.listStatus(expectedMapDir); int fileCount = 0; boolean dataFileFound = false; boolean indexFileFound = false; for (FileStatus f : files) { if (f.isFile()) { ++fileCount; if (f.getPath().getName().equals(MapFile.INDEX_FILE_NAME)) { indexFileFound = true; } else if (f.getPath().getName().equals(MapFile.DATA_FILE_NAME)) { dataFileFound = true; } } } assert(fileCount > 0); assert(dataFileFound && indexFileFound); }
Example 6
Source File: HistoryFileManager.java From hadoop with Apache License 2.0 | 6 votes |
@VisibleForTesting protected static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); try { RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } } catch (FileNotFoundException fe) { LOG.error("Error while scanning directory " + path, fe); } return jhStatusList; }
Example 7
Source File: Configuration.java From laser with Apache License 2.0 | 6 votes |
public synchronized void load(Path path, FileSystem fs) throws IOException { final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); FileStatus[] fileStatus = fs.listStatus(path, new GlobFilter( "*.properties")); for (FileStatus file : fileStatus) { if (file.isFile()) { Path p = file.getPath(); FSDataInputStream in = fs.open(p); Collection configuration = OBJECT_MAPPER.readValue(in, Collection.class); String collection = p.getName().substring(0, p.getName().lastIndexOf(".properties")); configuration.setCollecion(collection); mapper.put(collection, configuration); } } }
Example 8
Source File: AbstractViolationPolicyEnforcement.java From hbase with Apache License 2.0 | 5 votes |
/** * Computes the size of a single file on the filesystem. If the size cannot be computed for some * reason, a {@link SpaceLimitingException} is thrown, as the file may violate a quota. If the * provided path does not reference a file, an {@link IllegalArgumentException} is thrown. * * @param fs The FileSystem which the path refers to a file upon * @param path The path on the {@code fs} to a file whose size is being checked * @return The size in bytes of the file */ long getFileSize(FileSystem fs, String path) throws SpaceLimitingException { final FileStatus status; try { status = fs.getFileStatus(new Path(Objects.requireNonNull(path))); } catch (IOException e) { throw new SpaceLimitingException( getPolicyName(), "Could not verify length of file to bulk load: " + path, e); } if (!status.isFile()) { throw new IllegalArgumentException(path + " is not a file."); } return status.getLen(); }
Example 9
Source File: SafeFileOutputCommitter.java From datawave with Apache License 2.0 | 5 votes |
/** * I could have used the fs.listFiles(path, true), however that provides the LocatedFileStatus which returns all of the block locations as well as the file * status. This is a cheaper iterator which only requests the FileStatus for each file as all we need to know is which paths are files vs directories. * * @param fs * @param path * @return A remote iterator of paths for file only */ protected RemoteIterator<Path> listFiles(final FileSystem fs, final Path path) { return new RemoteIterator<Path>() { private ArrayDeque<FileStatus> files = new ArrayDeque<>(); private Path curFile = null; private boolean initialized = false; private void initialize() throws IOException { if (!initialized) { files.add(fs.getFileStatus(path)); initialized = true; } } @Override public boolean hasNext() throws FileNotFoundException, IOException { initialize(); while (curFile == null && !files.isEmpty()) { FileStatus file = files.removeLast(); if (file.isFile()) { curFile = file.getPath(); } else { FileStatus[] status = fs.listStatus(file.getPath()); Collections.addAll(files, status); } } return curFile != null; } @Override public Path next() throws FileNotFoundException, IOException { if (hasNext()) { Path result = curFile; curFile = null; return result; } throw new java.util.NoSuchElementException("No more files under " + path); } }; }
Example 10
Source File: CephFileSystem.java From cephfs-hadoop with GNU Lesser General Public License v2.1 | 5 votes |
public boolean delete(Path path, boolean recursive) throws IOException { path = makeAbsolute(path); /* path exists? */ FileStatus status; try { status = getFileStatus(path); } catch (FileNotFoundException e) { return false; } /* we're done if its a file */ if (status.isFile()) { ceph.unlink(path); return true; } /* get directory contents */ FileStatus[] dirlist = listStatus(path); if (dirlist == null) return false; if (!recursive && dirlist.length > 0) throw new IOException("Directory " + path.toString() + "is not empty."); for (FileStatus fs : dirlist) { if (!delete(fs.getPath(), recursive)) return false; } ceph.rmdir(path); return true; }
Example 11
Source File: FragmentMerger.java From indexr with Apache License 2.0 | 5 votes |
private void mergeFolder(Path path) throws IOException { logger.debug("mergeFolder: {}", path); FileStatus[] fileStatuses = fileSystem.listStatus(path); List<FileStatus> files = new ArrayList<>(); List<FileStatus> folders = new ArrayList<>(); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (!SegmentHelper.checkSegmentByPath(fileStatus.getPath())) { continue; } if (fileStatus.isFile()) { files.add(fileStatus); } else if (fileStatus.isDirectory()) { folders.add(fileStatus); } } //logger.debug("files: {}", files); //logger.debug("folders: {}", folders); // Merge files under current folder if (files.size() > 1) { mergeFiles(path, files); } // Merge sub folders for (FileStatus folder : folders) { mergeFolder(folder.getPath()); } }
Example 12
Source File: BasicFormatMatcher.java From Bats with Apache License 2.0 | 5 votes |
public boolean matches(DrillFileSystem fs, FileStatus status) throws IOException{ if (ranges.isEmpty() || status.isDirectory()) { return false; } // walk all the way down in the symlinks until a hard entry is reached FileStatus current = status; while (current.isSymlink()) { current = fs.getFileStatus(status.getSymlink()); } // if hard entry is not a file nor can it be a symlink then it is not readable simply deny matching. if (!current.isFile()) { return false; } final Range<Long> fileRange = Range.closedOpen( 0L, status.getLen()); try (FSDataInputStream is = fs.open(status.getPath())) { for(RangeMagics rMagic : ranges) { Range<Long> r = rMagic.range; if (!fileRange.encloses(r)) { continue; } int len = (int) (r.upperEndpoint() - r.lowerEndpoint()); byte[] bytes = new byte[len]; is.readFully(r.lowerEndpoint(), bytes); for (byte[] magic : rMagic.magics) { if (Arrays.equals(magic, bytes)) { return true; } } } } return false; }
Example 13
Source File: FileSystemUtil.java From Bats with Apache License 2.0 | 5 votes |
/** * Checks if file status is applicable based on file system object {@link Scope}. * * @param status file status * @param scope file system objects scope * @return true if status is applicable, false otherwise */ private static boolean isStatusApplicable(FileStatus status, Scope scope) { switch (scope) { case DIRECTORIES: return status.isDirectory(); case FILES: return status.isFile(); case ALL: return true; default: return false; } }
Example 14
Source File: PseudoDistributedFileSystem.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override protected Callable<FileStatus> newMapTask(final String address) throws IOException { // TODO Auto-generated method stub return new Callable<FileStatus>() { @Override public FileStatus call() throws Exception { // Only directories should be removed with a fork/join task FileStatus status = getDelegateFileSystem(address).getFileStatus(path); if (status.isFile()) { throw new FileNotFoundException("Directory not found: " + path); } return status; } }; }
Example 15
Source File: FileStatusTreeTraverser.java From circus-train with Apache License 2.0 | 5 votes |
@Override public Iterable<FileStatus> children(FileStatus root) { if (root.isFile()) { return ImmutableList.of(); } try { FileStatus[] listStatus = fileSystem.listStatus(root.getPath()); if (listStatus == null || listStatus.length == 0) { return ImmutableList.of(); } return ImmutableList.copyOf(listStatus); } catch (IOException e) { throw new CircusTrainException("Unable to list children for path: " + root.getPath()); } }
Example 16
Source File: HttpFSFileSystem.java From hadoop with Apache License 2.0 | 5 votes |
public static FILE_TYPE getType(FileStatus fileStatus) { if (fileStatus.isFile()) { return FILE; } if (fileStatus.isDirectory()) { return DIRECTORY; } if (fileStatus.isSymlink()) { return SYMLINK; } throw new IllegalArgumentException("Could not determine filetype for: " + fileStatus.getPath()); }
Example 17
Source File: Type.java From examples with Apache License 2.0 | 4 votes |
public boolean matches(FileStatus stat) { return stat.isFile(); }
Example 18
Source File: SemiTransactionalHiveMetastore.java From presto with Apache License 2.0 | 4 votes |
private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory, Set<String> queryIds, boolean deleteEmptyDirectories) { // don't delete hidden presto directories if (directory.getName().startsWith(".presto")) { return new RecursiveDeleteResult(false, ImmutableList.of()); } FileStatus[] allFiles; try { allFiles = fileSystem.listStatus(directory); } catch (IOException e) { ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder(); notDeletedItems.add(directory.toString() + "/**"); return new RecursiveDeleteResult(false, notDeletedItems.build()); } boolean allDescendentsDeleted = true; ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder(); for (FileStatus fileStatus : allFiles) { if (fileStatus.isFile()) { Path filePath = fileStatus.getPath(); String fileName = filePath.getName(); boolean eligible = false; // never delete presto dot files if (!fileName.startsWith(".presto")) { eligible = queryIds.stream().anyMatch(id -> fileName.startsWith(id) || fileName.endsWith(id)); } if (eligible) { if (!deleteIfExists(fileSystem, filePath, false)) { allDescendentsDeleted = false; notDeletedEligibleItems.add(filePath.toString()); } } else { allDescendentsDeleted = false; } } else if (fileStatus.isDirectory()) { RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(), queryIds, deleteEmptyDirectories); if (!subResult.isDirectoryNoLongerExists()) { allDescendentsDeleted = false; } if (!subResult.getNotDeletedEligibleItems().isEmpty()) { notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems()); } } else { allDescendentsDeleted = false; notDeletedEligibleItems.add(fileStatus.getPath().toString()); } } if (allDescendentsDeleted && deleteEmptyDirectories) { verify(notDeletedEligibleItems.build().isEmpty()); if (!deleteIfExists(fileSystem, directory, false)) { return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/")); } return new RecursiveDeleteResult(true, ImmutableList.of()); } return new RecursiveDeleteResult(false, notDeletedEligibleItems.build()); }
Example 19
Source File: COSAPIClient.java From stocator with Apache License 2.0 | 4 votes |
@Override public boolean rename(String hostName, String srcPath, String dstPath) throws IOException { LOG.debug("Rename path {} to {}", srcPath, dstPath); Path src = new Path(srcPath); Path dst = new Path(dstPath); String srcKey = pathToKey(src); String dstKey = pathToKey(dst); if (srcKey.isEmpty()) { throw new IOException("Rename failed " + srcPath + " to " + dstPath + " source is root directory"); } if (dstKey.isEmpty()) { throw new IOException("Rename failed " + srcPath + " to " + dstPath + " dest is root directory"); } // get the source file status; this raises a FNFE if there is no source // file. FileStatus srcStatus = getFileStatus(hostName, src, "rename"); if (srcKey.equals(dstKey)) { LOG.debug("rename: src and dest refer to the same file or directory: {}", dstPath); throw new IOException("source + " + srcPath + "and dest " + dstPath + " refer to the same file or directory"); } FileStatus dstStatus = null; try { dstStatus = getFileStatus(hostName, dst, "rename"); // if there is no destination entry, an exception is raised. // hence this code sequence can assume that there is something // at the end of the path; the only detail being what it is and // whether or not it can be the destination of the rename. if (srcStatus.isDirectory()) { if (dstStatus.isFile()) { throw new IOException("source + " + srcPath + "and dest " + dstPath + "source is a directory and dest is a file"); } // at this point the destination is an empty directory } else { // source is a file. The destination must be a directory, // empty or not if (dstStatus.isFile()) { throw new IOException("source + " + srcPath + "and dest " + dstPath + "Cannot rename onto an existing file"); } } } catch (FileNotFoundException e) { LOG.debug("rename: destination path {} not found", dstPath); } if (srcStatus.isFile()) { LOG.debug("rename: renaming file {} to {}", src, dst); long length = srcStatus.getLen(); if (dstStatus != null && dstStatus.isDirectory()) { String newDstKey = dstKey; if (!newDstKey.endsWith("/")) { newDstKey = newDstKey + "/"; } String filename = srcKey.substring(pathToKey(src.getParent()).length() + 1); newDstKey = newDstKey + filename; copyFile(srcKey, newDstKey, length); } else { copyFile(srcKey, dstKey, srcStatus.getLen()); } delete(hostName, src, false); } else { LOG.debug("rename: renaming file {} to {} failed. Source file is directory", src, dst); } if (!(src.getParent().equals(dst.getParent()))) { LOG.debug("{} is not equal to {}. Going to create directory {}",src.getParent(), dst.getParent(), src.getParent()); createDirectoryIfNecessary(hostName, src.getParent()); } return true; }
Example 20
Source File: FileOutputCommitter.java From hadoop with Apache License 2.0 | 4 votes |
/** * Merge two paths together. Anything in from will be moved into to, if there * are any name conflicts while merging the files or directories in from win. * @param fs the File System to use * @param from the path data is coming from. * @param to the path data is going to. * @throws IOException on any error */ private void mergePaths(FileSystem fs, final FileStatus from, final Path to) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Merging data from " + from + " to " + to); } FileStatus toStat; try { toStat = fs.getFileStatus(to); } catch (FileNotFoundException fnfe) { toStat = null; } if (from.isFile()) { if (toStat != null) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } } if (!fs.rename(from.getPath(), to)) { throw new IOException("Failed to rename " + from + " to " + to); } } else if (from.isDirectory()) { if (toStat != null) { if (!toStat.isDirectory()) { if (!fs.delete(to, true)) { throw new IOException("Failed to delete " + to); } renameOrMerge(fs, from, to); } else { //It is a directory so merge everything in the directories for (FileStatus subFrom : fs.listStatus(from.getPath())) { Path subTo = new Path(to, subFrom.getPath().getName()); mergePaths(fs, subFrom, subTo); } } } else { renameOrMerge(fs, from, to); } } }