org.apache.hadoop.fs.PathFilter Java Examples
The following examples show how to use
org.apache.hadoop.fs.PathFilter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenerateData.java From hadoop with Apache License 2.0 | 6 votes |
static DataStatistics publishPlainDataStatistics(Configuration conf, Path inputDir) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); // obtain input data file statuses long dataSize = 0; long fileCount = 0; RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true); PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter(); while (iter.hasNext()) { LocatedFileStatus lStatus = iter.next(); if (filter.accept(lStatus.getPath())) { dataSize += lStatus.getLen(); ++fileCount; } } // publish the plain data statistics LOG.info("Total size of input data : " + StringUtils.humanReadableInt(dataSize)); LOG.info("Total number of input data files : " + fileCount); return new DataStatistics(dataSize, fileCount, false); }
Example #2
Source File: FileUtils.java From streamx with Apache License 2.0 | 6 votes |
private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter) throws IOException { if (!storage.exists(path.toString())) { return new ArrayList<>(); } ArrayList<FileStatus> result = new ArrayList<>(); FileStatus[] statuses = storage.listStatus(path.toString()); for (FileStatus status : statuses) { if (status.isDirectory()) { result.addAll(traverseImpl(storage, status.getPath(), filter)); } else { if (filter.accept(status.getPath())) { result.add(status); } } } return result; }
Example #3
Source File: S3PartitionedOutputCommitter.java From s3committer with Apache License 2.0 | 6 votes |
@Override protected List<FileStatus> getTaskOutput(TaskAttemptContext context) throws IOException { PathFilter filter = HiddenPathFilter.get(); // get files on the local FS in the attempt path Path attemptPath = getTaskAttemptPath(context); FileSystem attemptFS = attemptPath.getFileSystem(context.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = attemptFS .listFiles(attemptPath, true /* recursive */ ); List<FileStatus> stats = Lists.newArrayList(); while (iter.hasNext()) { FileStatus stat = iter.next(); if (filter.accept(stat.getPath())) { stats.add(stat); } } return stats; }
Example #4
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example #5
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example #6
Source File: CombineFileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
/** * Create a new pool and add the filters to it. * A pathname can satisfy any one of the specified filters. * A split cannot have files from different pools. */ protected void createPool(PathFilter... filters) { MultiPathFilter multi = new MultiPathFilter(); for (PathFilter f: filters) { multi.add(f); } pools.add(multi); }
Example #7
Source File: HadoopFileSystemWrapper.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException { try (WaitRecorder recorder = OperatorStats.getWaitRecorder(operatorStats)) { return underlyingFs.globStatus(pathPattern, filter); } catch(FSError e) { throw propagateFSError(e); } }
Example #8
Source File: CombineFileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** * Create a new pool and add the filters to it. * A pathname can satisfy any one of the specified filters. * A split cannot have files from different pools. */ protected void createPool(PathFilter... filters) { MultiPathFilter multi = new MultiPathFilter(); for (PathFilter f: filters) { multi.add(f); } pools.add(multi); }
Example #9
Source File: BaseHoplogTestCase.java From gemfirexd-oss with Apache License 2.0 | 5 votes |
protected FileStatus[] getBucketHoplogs(FileSystem fs, String regionAndBucket, final String type) throws IOException { FileStatus[] hoplogs = fs.listStatus( new Path(testDataDir, regionAndBucket), new PathFilter() { @Override public boolean accept(Path file) { return file.getName().endsWith(type); } }); return hoplogs; }
Example #10
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example #11
Source File: CombineFileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (filter.accept(path)) { return true; } } return false; }
Example #12
Source File: FileAndDirectoryInputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example #13
Source File: FileSystemUtil.java From Bats with Apache License 2.0 | 5 votes |
/** * Will merge given array of filters into one. * If given array of filters is empty, will return {@link #DUMMY_FILTER}. * * @param filters array of filters * @return one filter that combines all given filters */ public static PathFilter mergeFilters(PathFilter... filters) { if (filters.length == 0) { return DUMMY_FILTER; } return path -> Stream.of(filters).allMatch(filter -> filter.accept(path)); }
Example #14
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example #15
Source File: JobHistoryUtils.java From big-c with Apache License 2.0 | 5 votes |
private static List<FileStatus> listFilteredStatus(FileContext fc, Path root, PathFilter filter) throws IOException { List<FileStatus> fsList = remoteIterToList(fc.listStatus(root)); if (filter == null) { return fsList; } else { List<FileStatus> filteredList = new LinkedList<FileStatus>(); for (FileStatus fs : fsList) { if (filter.accept(fs.getPath())) { filteredList.add(fs); } } return filteredList; } }
Example #16
Source File: FileSystemUtil.java From Bats with Apache License 2.0 | 5 votes |
/** * Lists file statuses non-recursively based on given file system objects {@link Scope}. * * @param fs file system * @param path path to file or directory * @param scope file system objects scope * @param suppressExceptions indicates if exceptions should be ignored * @param filter filter to be applied * @return list of file statuses */ private static List<FileStatus> listNonRecursive(FileSystem fs, Path path, Scope scope, boolean suppressExceptions, PathFilter filter) throws IOException { try { return Stream.of(fs.listStatus(path, filter)) .filter(status -> isStatusApplicable(status, scope)) .collect(Collectors.toList()); } catch (Exception e) { if (suppressExceptions) { logger.debug("Exception during listing file statuses", e); return Collections.emptyList(); } else { throw e; } } }
Example #17
Source File: FileIterator.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
public FileIterator(FileSplit inSplit, TaskAttemptContext context) { conf = context.getConfiguration(); fileDirSplits = new LinkedList<FileSplit>(); LinkedList<FileSplit> src = new LinkedList<FileSplit>(); src.add(inSplit); iterator = src.iterator(); PathFilter jobFilter = getInputPathFilter(); List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(FileAndDirectoryInputFormat.hiddenFileFilter); if (jobFilter != null) { filters.add(jobFilter); } inputFilter = new FileAndDirectoryInputFormat.MultiPathFilter(filters); }
Example #18
Source File: ListHDFS.java From localization_nifi with Apache License 2.0 | 5 votes |
private PathFilter createPathFilter(final ProcessContext context) { final Pattern filePattern = Pattern.compile(context.getProperty(FILE_FILTER).getValue()); return new PathFilter() { @Override public boolean accept(Path path) { return filePattern.matcher(path.getName()).matches(); } }; }
Example #19
Source File: LocatedFileStatusFetcher.java From hadoop with Apache License 2.0 | 5 votes |
ProcessInputDirCallable(FileSystem fs, FileStatus fileStatus, boolean recursive, PathFilter inputFilter) { this.fs = fs; this.fileStatus = fileStatus; this.recursive = recursive; this.inputFilter = inputFilter; }
Example #20
Source File: AvroUtil.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
/** * Get the schema of AVRO files stored in a directory */ public static Schema getAvroSchema(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); Path fileToTest; if (fs.isDirectory(path)) { FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); if (fileStatuses.length == 0) { return null; } fileToTest = fileStatuses[0].getPath(); } else { fileToTest = path; } SeekableInput input = new FsInput(fileToTest, conf); DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); Schema result = fileReader.getSchema(); fileReader.close(); return result; }
Example #21
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example #22
Source File: FileAndDirectoryInputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
protected List<FileStatus> listStatus(JobContext job ) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } // get tokens for all the required FileSystems.. TokenCache.obtainTokensForNamenodes(job.getCredentials(), dirs, job.getConfiguration()); // Whether we need to recursive look into the directory structure boolean recursive = getInputDirRecursive(job); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); List<FileStatus> result = simpleListStatus(job, dirs, inputFilter, recursive); LOG.info("Total input paths to process : " + result.size()); return result; }
Example #23
Source File: ContainerFileSystem.java From dremio-oss with Apache License 2.0 | 5 votes |
@Override protected RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f, final PathFilter filter) throws FileNotFoundException, IOException { final String container = getContainerName(f); final PathFilter alteredFilter = (path) -> { return filter.accept(transform(path, container)); }; return RemoteIterators.transform( ListAccessor.listLocatedFileStatus(getFileSystemForPath(f).fs(), pathWithoutContainer(f), alteredFilter), t -> new LocatedFileStatus(ContainerFileSystem.transform(t, container), t.getBlockLocations()) ); }
Example #24
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
/** * Get a PathFilter instance of the filter set for the input paths. * * @return the PathFilter instance set for the job, NULL if none has been set. */ public static PathFilter getInputPathFilter(JobConf conf) { Class<? extends PathFilter> filterClass = conf.getClass( org.apache.hadoop.mapreduce.lib.input.FileInputFormat.PATHFILTER_CLASS, null, PathFilter.class); return (filterClass != null) ? ReflectionUtils.newInstance(filterClass, conf) : null; }
Example #25
Source File: CombineFileInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (filter.accept(path)) { return true; } } return false; }
Example #26
Source File: CombineFileInputFormat.java From aliyun-maxcompute-data-collectors with Apache License 2.0 | 5 votes |
public String toString() { StringBuffer buf = new StringBuffer(); buf.append("["); for (PathFilter f: filters) { buf.append(f); buf.append(","); } buf.append("]"); return buf.toString(); }
Example #27
Source File: FileAndDirectoryInputFormat.java From marklogic-contentpump with Apache License 2.0 | 5 votes |
protected void simpleAddInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { FileStatus[] files = fs.listStatus(path, inputFilter); for (int j = 0; j < files.length; j++) { if (files[j].isDirectory()) { simpleAddInputPathRecursively(result, fs, files[j].getPath(), inputFilter); } else { result.add(files[j]); } } }
Example #28
Source File: LocatedFileStatusFetcher.java From big-c with Apache License 2.0 | 5 votes |
ProcessInputDirCallable(FileSystem fs, FileStatus fileStatus, boolean recursive, PathFilter inputFilter) { this.fs = fs; this.fileStatus = fileStatus; this.recursive = recursive; this.inputFilter = inputFilter; }
Example #29
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
public boolean accept(Path path) { for (PathFilter filter : filters) { if (!filter.accept(path)) { return false; } } return true; }
Example #30
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }