Java Code Examples for org.apache.hadoop.fs.PathFilter#accept()
The following examples show how to use
org.apache.hadoop.fs.PathFilter#accept() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HDFSFile.java From incubator-iotdb with Apache License 2.0 | 6 votes |
private List<HDFSFile> listFiles(String fileFolder, PathFilter pathFilter) { List<HDFSFile> files = new ArrayList<>(); try { Path path = new Path(fileFolder); for (FileStatus fileStatus : fs.listStatus(path)) { Path filePath = fileStatus.getPath(); if (pathFilter.accept(filePath)) { HDFSFile file = new HDFSFile(filePath.toUri().toString()); files.add(file); } } } catch (IOException e) { logger.error("Failed to list files in {}. ", fileFolder); } return files; }
Example 2
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 3
Source File: FileUtils.java From streamx with Apache License 2.0 | 6 votes |
private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter) throws IOException { if (!storage.exists(path.toString())) { return new ArrayList<>(); } ArrayList<FileStatus> result = new ArrayList<>(); FileStatus[] statuses = storage.listStatus(path.toString()); for (FileStatus status : statuses) { if (status.isDirectory()) { result.addAll(traverseImpl(storage, status.getPath(), filter)); } else { if (filter.accept(status.getPath())) { result.add(status); } } } return result; }
Example 4
Source File: S3PartitionedOutputCommitter.java From s3committer with Apache License 2.0 | 6 votes |
@Override protected List<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException { PathFilter filter = HiddenPathFilter.get(); // get files on the local FS in the attempt path Path attemptPath = getTaskAttemptPath(context); FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = attemptFS .listFiles(attemptPath, true /* recursive */ ); List<FileStatus> stats = Lists.newArrayList(); while (iter.hasNext()) { FileStatus stat = iter.next(); if (filter.accept(stat.getPath())) { stats.add(stat); } } return stats; }
Example 5
Source File: HistoryFileManager.java From big-c with Apache License 2.0 | 6 votes |
@VisibleForTesting protected static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); try { RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } } catch (FileNotFoundException fe) { LOG.error("Error while scanning directory " + path, fe); } return jhStatusList; }
Example 6
Source File: HistoryFileManager.java From hadoop with Apache License 2.0 | 6 votes |
@VisibleForTesting protected static List<FileStatus> scanDirectory(Path path, FileContext fc, PathFilter pathFilter) throws IOException { path = fc.makeQualified(path); List<FileStatus> jhStatusList = new ArrayList<FileStatus>(); try { RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path); while (fileStatusIter.hasNext()) { FileStatus fileStatus = fileStatusIter.next(); Path filePath = fileStatus.getPath(); if (fileStatus.isFile() && pathFilter.accept(filePath)) { jhStatusList.add(fileStatus); } } } catch (FileNotFoundException fe) { LOG.error("Error while scanning directory " + path, fe); } return jhStatusList; }
Example 7
Source File: GenerateData.java From hadoop with Apache License 2.0 | 6 votes |
static DataStatistics publishPlainDataStatistics(Configuration conf, Path inputDir) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); // obtain input data file statuses long dataSize = 0; long fileCount = 0; RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true); PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter(); while (iter.hasNext()) { LocatedFileStatus lStatus = iter.next(); if (filter.accept(lStatus.getPath())) { dataSize += lStatus.getLen(); ++fileCount; } } // publish the plain data statistics LOG.info("Total size of input data : " + StringUtils.humanReadableInt(dataSize)); LOG.info("Total number of input data files : " + fileCount); return new DataStatistics(dataSize, fileCount, false); }
Example 8
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example 9
Source File: FileAndDirectoryInputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example 10
Source File: MemoryStorage.java From streamx with Apache License 2.0 | 5 votes |
@Override public FileStatus[] listStatus(String path, PathFilter filter) throws IOException { if (failure == Failure.listStatusFailure) { failure = Failure.noFailure; throw new IOException("listStatus failed."); } List<FileStatus> result = new ArrayList<>(); for (String key: data.keySet()) { if (key.startsWith(path) && filter.accept(new Path(key))) { FileStatus status = new FileStatus(data.get(key).size(), false, 1, 0, 0, 0, null, null, null, new Path(key)); result.add(status); } } return result.toArray(new FileStatus[result.size()]); }
Example 11
Source File: HCatMetadataServiceTest.java From hadoop-etl-udfs with MIT License | 5 votes |
@Override public FileStatus[] listStatus(Path path, PathFilter filter) throws IOException { List<FileStatus> subPaths = pathsAndContent.get(path.toString()); List<FileStatus> filteredSubPaths = new ArrayList<>(); for (FileStatus subPath : subPaths) { if (filter.accept(subPath.getPath())) { filteredSubPaths.add(fakeFileStatus(subPath.getPath().toString())); } } return filteredSubPaths.toArray(new FileStatus[filteredSubPaths.size()]); }
Example 12
Source File: ContainerFileSystem.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override protected RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, final PathFilter filter) throws FileNotFoundException, IOException { final String container = getContainerName(f); final PathFilter alteredFilter = (path) -> { return filter.accept(transform(path, container)); }; return RemoteIterators.transform( ListAccessor.listLocatedFileStatus(getFileSystemForPath(f).fs(), pathWithoutContainer(f), alteredFilter), t -> new LocatedFileStatus(ContainerFileSystem.transform(t, container), t.getBlockLocations()) ); }
Example 13
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 14
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example 15
Source File: CombineFileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (filter.accept(path)) { return true; } } return false; }
Example 16
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 17
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 18
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example 19
Source File: JobHistoryUtils.java From hadoop with Apache License 2.0 | 5 votes |
private static List<FileStatus> listFilteredStatus(FileContext fc, Path root, PathFilter filter) throws IOException { List<FileStatus> fsList = remoteIterToList(fc.listStatus(root)); if (filter == null) { return fsList; } else { List<FileStatus> filteredList = new LinkedList<FileStatus>(); for (FileStatus fs : fsList) { if (filter.accept(fs.getPath())) { filteredList.add(fs); } } return filteredList; } }
Example 20
Source File: CombineFileInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (filter.accept(path)) { return true; } } return false; }