Java Code Examples for org.apache.hadoop.fs.FileSystem#listLocatedStatus()
The following examples show how to use
org.apache.hadoop.fs.FileSystem#listLocatedStatus() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 2
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 3
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 4
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 5
Source File: IncrementalCheckpointManagerTest.java From attic-apex-malhar with Apache License 2.0 | 6 votes |
@Test public void testPurge() throws IOException, InterruptedException { FileSystem fileSystem = FileSystem.newInstance(new Configuration()); testTransferWindowFiles(); RemoteIterator<LocatedFileStatus> iterator = fileSystem.listLocatedStatus( new Path(testMeta.applicationPath + "/bucket_data")); Assert.assertTrue(iterator.hasNext()); testMeta.managedStateContext.getBucketsFileSystem().deleteTimeBucketsLessThanEqualTo(200); iterator = fileSystem.listLocatedStatus(new Path(testMeta.applicationPath + "/bucket_data")); if (iterator.hasNext()) { Assert.fail("All buckets should be deleted"); } }
Example 6
Source File: RaidNode.java From RDFS with Apache License 2.0 | 6 votes |
public static List<LocatedFileStatus> listDirectoryRaidLocatedFileStatus( Configuration conf, FileSystem srcFs, Path p) throws IOException { long minFileSize = conf.getLong(MINIMUM_RAIDABLE_FILESIZE_KEY, MINIMUM_RAIDABLE_FILESIZE); List<LocatedFileStatus> lfs = new ArrayList<LocatedFileStatus>(); RemoteIterator<LocatedFileStatus> iter = srcFs.listLocatedStatus(p); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (stat.isDir()) { return null; } // We don't raid too small files if (stat.getLen() < minFileSize) { continue; } lfs.add(stat); } if (lfs.size() == 0) return null; return lfs; }
Example 7
Source File: CachingDirectoryLister.java From presto with Apache License 2.0 | 5 votes |
@Override public RemoteIterator<LocatedFileStatus> list(FileSystem fs, Table table, Path path) throws IOException { List<LocatedFileStatus> files = cache.getIfPresent(path); if (files != null) { return simpleRemoteIterator(files); } RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path); if (tablePrefixes.stream().noneMatch(prefix -> prefix.matches(table.getSchemaTableName()))) { return iterator; } return cachingRemoteIterator(iterator, path); }
Example 8
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 9
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 10
Source File: SpillServiceImpl.java From dremio-oss with Apache License 2.0 | 5 votes |
private void sweep(String spillDir, long targetTime) { try { final Path spillDirPath = new Path(spillDir); FileSystem fileSystem = spillDirPath.getFileSystem(SPILLING_CONFIG); RemoteIterator<LocatedFileStatus> files = fileSystem.listLocatedStatus(spillDirPath); while (files.hasNext()) { LocatedFileStatus st = files.next(); if (st.getModificationTime() <= targetTime) { fileSystem.delete(st.getPath(), true); } } } catch (IOException e) { // exception silently ignored. Directory will be revisited at the next sweep } }
Example 11
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (Path p: dirs) { FileSystem fs = p.getFileSystem(job); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 12
Source File: FileInputFormat.java From big-c with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 13
Source File: WALInputFormat.java From hbase with Apache License 2.0 | 5 votes |
private List<FileStatus> getFiles(FileSystem fs, Path dir, long startTime, long endTime) throws IOException { List<FileStatus> result = new ArrayList<>(); LOG.debug("Scanning " + dir.toString() + " for WAL files"); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(dir); if (!iter.hasNext()) return Collections.emptyList(); while (iter.hasNext()) { LocatedFileStatus file = iter.next(); if (file.isDirectory()) { // recurse into sub directories result.addAll(getFiles(fs, file.getPath(), startTime, endTime)); } else { String name = file.getPath().toString(); int idx = name.lastIndexOf('.'); if (idx > 0) { try { long fileStartTime = Long.parseLong(name.substring(idx+1)); if (fileStartTime <= endTime) { LOG.info("Found: " + file); result.add(file); } } catch (NumberFormatException x) { idx = 0; } } if (idx == 0) { LOG.warn("File " + name + " does not appear to be an WAL file. Skipping..."); } } } return result; }
Example 14
Source File: FileInputFormat.java From RDFS with Apache License 2.0 | 5 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files together with their block locations * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addLocatedInputPathRecursively(List<LocatedFileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { for(RemoteIterator<LocatedFileStatus> itor = fs.listLocatedStatus(path, inputFilter); itor.hasNext();) { LocatedFileStatus stat = itor.next(); if (stat.isDir()) { addLocatedInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } }
Example 15
Source File: PlacementMonitor.java From RDFS with Apache License 2.0 | 5 votes |
LocatedFileStatus getLocatedFileStatus( FileSystem fs, Path p) throws IOException { HashMap<String, LocatedFileStatus> cache = locatedFileStatusCache.get(); LocatedFileStatus result = cache.get(p.toUri().getPath()); if (result != null) { return result; } Path parent = p.getParent(); String parentPath = parent.toUri().getPath(); //If we already did listlocatedStatus on parent path, //it means path p doesn't exist, we don't need to list again if (cache.containsKey(parentPath) && cache.get(parentPath) == null) { return null; } RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(parent); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); cache.put(stat.getPath().toUri().getPath(), stat); } // trick: add parent path to the cache with value = null cache.put(parentPath, null); result = cache.get(p.toUri().getPath()); // This may still return null return result; }
Example 16
Source File: FileInputFormat.java From RDFS with Apache License 2.0 | 4 votes |
/** List input directories. * Subclasses may override to, e.g., select only files matching a regular * expression. * * @param job the job to list input paths for * @return array of LocatedFileStatus objects * @throws IOException if zero items. */ protected List<LocatedFileStatus> listLocatedStatus(JobContext job ) throws IOException { Path[] dirs = getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } List<IOException> errors = new ArrayList<IOException>(); // creates a MultiPathFilter with the hiddenFileFilter and the // user provided one (if any). List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(hiddenFileFilter); PathFilter jobFilter = getInputPathFilter(job); if (jobFilter != null) { filters.add(jobFilter); } PathFilter inputFilter = new MultiPathFilter(filters); List<LocatedFileStatus> result = new ArrayList<LocatedFileStatus>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { for(RemoteIterator<LocatedFileStatus> itor = fs.listLocatedStatus(globStat.getPath(), inputFilter); itor.hasNext();) { result.add(itor.next()); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } LOG.info("Total input paths to process : " + result.size()); return result; }