Java Code Examples for org.apache.hadoop.fs.RemoteIterator#hasNext()
The following examples show how to use
org.apache.hadoop.fs.RemoteIterator#hasNext() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FSAgent.java From Bats with Apache License 2.0 | 7 votes |
public List<String> listFiles(String dir) throws IOException { List<String> files = new ArrayList<>(); Path path = new Path(dir); FileStatus fileStatus = fileSystem.getFileStatus(path); if (!fileStatus.isDirectory()) { throw new FileNotFoundException("Cannot read directory " + dir); } RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); files.add(lfs.getPath().getName()); } return files; }
Example 2
Source File: GenerateData.java From hadoop with Apache License 2.0 | 6 votes |
static DataStatistics publishPlainDataStatistics(Configuration conf, Path inputDir) throws IOException { FileSystem fs = inputDir.getFileSystem(conf); // obtain input data file statuses long dataSize = 0; long fileCount = 0; RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true); PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter(); while (iter.hasNext()) { LocatedFileStatus lStatus = iter.next(); if (filter.accept(lStatus.getPath())) { dataSize += lStatus.getLen(); ++fileCount; } } // publish the plain data statistics LOG.info("Total size of input data : " + StringUtils.humanReadableInt(dataSize)); LOG.info("Total number of input data files : " + fileCount); return new DataStatistics(dataSize, fileCount, false); }
Example 3
Source File: ResourceLocalizationService.java From hadoop with Apache License 2.0 | 6 votes |
private void cleanUpFilesPerUserDir(FileContext lfs, DeletionService del, Path userDirPath) throws IOException { RemoteIterator<FileStatus> userDirStatus = lfs.listStatus(userDirPath); FileDeletionTask dependentDeletionTask = del.createFileDeletionTask(null, userDirPath, new Path[] {}); if (userDirStatus != null && userDirStatus.hasNext()) { List<FileDeletionTask> deletionTasks = new ArrayList<FileDeletionTask>(); while (userDirStatus.hasNext()) { FileStatus status = userDirStatus.next(); String owner = status.getOwner(); FileDeletionTask deletionTask = del.createFileDeletionTask(owner, null, new Path[] { status.getPath() }); deletionTask.addFileDeletionTaskDependency(dependentDeletionTask); deletionTasks.add(deletionTask); } for (FileDeletionTask task : deletionTasks) { del.scheduleFileDeletionTask(task); } } else { del.scheduleFileDeletionTask(dependentDeletionTask); } }
Example 4
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 5
Source File: TestEncryptionZones.java From hadoop with Apache License 2.0 | 6 votes |
/** * Checks that an encryption zone with the specified keyName and path (if not * null) is present. * * @throws IOException if a matching zone could not be found */ public void assertZonePresent(String keyName, String path) throws IOException { final RemoteIterator<EncryptionZone> it = dfsAdmin.listEncryptionZones(); boolean match = false; while (it.hasNext()) { EncryptionZone zone = it.next(); boolean matchKey = (keyName == null); boolean matchPath = (path == null); if (keyName != null && zone.getKeyName().equals(keyName)) { matchKey = true; } if (path != null && zone.getPath().equals(path)) { matchPath = true; } if (matchKey && matchPath) { match = true; break; } } assertTrue("Did not find expected encryption zone with keyName " + keyName + " path " + path, match ); }
Example 6
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 6 votes |
/** * Add files in the input path recursively into the results. * @param result * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
Example 7
Source File: CryptoAdmin.java From hadoop with Apache License 2.0 | 6 votes |
@Override public int run(Configuration conf, List<String> args) throws IOException { if (!args.isEmpty()) { System.err.println("Can't understand argument: " + args.get(0)); return 1; } final DistributedFileSystem dfs = AdminHelper.getDFS(conf); try { final TableListing listing = new TableListing.Builder() .addField("").addField("", true) .wrapWidth(AdminHelper.MAX_LINE_WIDTH).hideHeaders().build(); final RemoteIterator<EncryptionZone> it = dfs.listEncryptionZones(); while (it.hasNext()) { EncryptionZone ez = it.next(); listing.addRow(ez.getPath(), ez.getKeyName()); } System.out.println(listing.toString()); } catch (IOException e) { System.err.println(prettifyException(e)); return 2; } return 0; }
Example 8
Source File: TestRetryCacheWithHA.java From hadoop with Apache License 2.0 | 6 votes |
@Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator<CacheDirectiveEntry> iter = dfs.listCacheDirectives( new CacheDirectiveInfo.Builder(). setPool(directive.getPool()). setPath(directive.getPath()). build()); while (iter.hasNext()) { CacheDirectiveInfo result = iter.next().getInfo(); if ((result.getId() == id) && (result.getReplication().shortValue() == newReplication)) { return true; } } Thread.sleep(1000); } return false; }
Example 9
Source File: FSAgent.java From Bats with Apache License 2.0 | 6 votes |
public List<LocatedFileStatus> listFilesInfo(String dir) throws IOException { List<LocatedFileStatus> files = new ArrayList<>(); Path path = new Path(dir); FileStatus fileStatus = fileSystem.getFileStatus(path); if (!fileStatus.isDirectory()) { throw new FileNotFoundException("Cannot read directory " + dir); } RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); files.add(lfs); } return files; }
Example 10
Source File: HDFSResourceStore.java From kylin-on-parquet-v2 with Apache License 2.0 | 5 votes |
@Override protected void visitFolderImpl(String folderPath, boolean recursive, VisitFilter filter, boolean loadContent, Visitor visitor) throws IOException { Path p = getRealHDFSPath(folderPath); if (!fs.exists(p) || !fs.isDirectory(p)) { return; } String fsPathPrefix = p.toUri().getPath(); String resPathPrefix = folderPath.endsWith("/") ? folderPath : folderPath + "/"; RemoteIterator<LocatedFileStatus> it = fs.listFiles(p, recursive); while (it.hasNext()) { LocatedFileStatus status = it.next(); if (status.isDirectory()) continue; String path = status.getPath().toUri().getPath(); if (!path.startsWith(fsPathPrefix)) throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix); String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1); if (filter.matches(resPath, status.getModificationTime())) { RawResource raw; if (loadContent) raw = new RawResource(resPath, status.getModificationTime(), fs.open(status.getPath())); else raw = new RawResource(resPath, status.getModificationTime()); try { visitor.visit(raw); } finally { raw.close(); } } } }
Example 11
Source File: TestRetryCacheWithHA.java From hadoop with Apache License 2.0 | 5 votes |
@Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator<CachePoolEntry> iter = dfs.listCachePools(); if (!iter.hasNext()) { return true; } Thread.sleep(1000); } return false; }
Example 12
Source File: PrestoS3FileSystem.java From presto with Apache License 2.0 | 5 votes |
@Override public FileStatus[] listStatus(Path path) throws IOException { STATS.newListStatusCall(); List<LocatedFileStatus> list = new ArrayList<>(); RemoteIterator<LocatedFileStatus> iterator = listLocatedStatus(path); while (iterator.hasNext()) { list.add(iterator.next()); } return toArray(list, LocatedFileStatus.class); }
Example 13
Source File: TestLogAggregationService.java From hadoop with Apache License 2.0 | 5 votes |
private int numOfLogsAvailable(LogAggregationService logAggregationService, ApplicationId appId, boolean sizeLimited, String lastLogFile) throws IOException { Path appLogDir = logAggregationService.getRemoteAppLogDir(appId, this.user); RemoteIterator<FileStatus> nodeFiles = null; try { Path qualifiedLogDir = FileContext.getFileContext(this.conf).makeQualified(appLogDir); nodeFiles = FileContext.getFileContext(qualifiedLogDir.toUri(), this.conf) .listStatus(appLogDir); } catch (FileNotFoundException fnf) { return -1; } int count = 0; while (nodeFiles.hasNext()) { FileStatus status = nodeFiles.next(); String filename = status.getPath().getName(); if (filename.contains(LogAggregationUtils.TMP_FILE_SUFFIX) || (lastLogFile != null && filename.contains(lastLogFile) && sizeLimited)) { return -1; } if (filename.contains(LogAggregationUtils .getNodeString(logAggregationService.getNodeId()))) { count++; } } return count; }
Example 14
Source File: FSStorageAgent.java From Bats with Apache License 2.0 | 5 votes |
@Override public long[] getWindowIds(int operatorId) throws IOException { Path lPath = new Path(path + Path.SEPARATOR + String.valueOf(operatorId)); try { FileStatus status = fileContext.getFileStatus(lPath); if (!status.isDirectory()) { throw new RuntimeException("Checkpoint location is not a directory"); } } catch (FileNotFoundException ex) { // During initialization checkpoint directory may not exists. fileContext.mkdir(lPath, FsPermission.getDirDefault(), true); } RemoteIterator<FileStatus> fileStatusRemoteIterator = fileContext.listStatus(lPath); List<Long> lwindows = new ArrayList<>(); while (fileStatusRemoteIterator.hasNext()) { FileStatus fileStatus = fileStatusRemoteIterator.next(); String name = fileStatus.getPath().getName(); if (name.equals(TMP_FILE)) { continue; } lwindows.add(STATELESS_CHECKPOINT_WINDOW_ID.equals(name) ? Stateless.WINDOW_ID : Long.parseLong(name, 16)); } long[] windowIds = new long[lwindows.size()]; for (int i = 0; i < windowIds.length; i++) { windowIds[i] = lwindows.get(i); } return windowIds; }
Example 15
Source File: HdfsIOBenchmark.java From incubator-crail with Apache License 2.0 | 5 votes |
void browseDir() throws Exception { System.out.println("reading enumarate dir, path " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); //benchmark System.out.println("starting benchmark..."); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false); while (iter.hasNext()) { LocatedFileStatus status = iter.next(); System.out.println(status.getPath()); } fs.close(); }
Example 16
Source File: FileInputFormat.java From hadoop with Apache License 2.0 | 5 votes |
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i=0; i < dirs.length; ++i) { Path p = dirs[i]; FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat: matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
Example 17
Source File: TestRetryCacheWithHA.java From hadoop with Apache License 2.0 | 5 votes |
@Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator<CachePoolEntry> iter = dfs.listCachePools(); if (iter.hasNext() && iter.next().getInfo().getLimit() == 99) { return true; } Thread.sleep(1000); } return false; }
Example 18
Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0 | 4 votes |
public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job, boolean isRecursive, boolean directoriesOnly, int partitionId) { final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>(); final Path rootLocation = new Path(partitionDir); try { // TODO: DX-16001 - make async configurable for Hive. final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job); if (fs.exists(rootLocation)) { final FileStatus rootStatus = fs.getFileStatus(rootLocation); if (rootStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(true) .build()); final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false); while (statuses.hasNext()) { LocatedFileStatus fileStatus = statuses.next(); final Path filePath = fileStatus.getPath(); if (fileStatus.isDirectory()) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(true) .build()); } else if (fileStatus.isFile() && !directoriesOnly) { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(PathUtils.relativePath(filePath, rootLocation)) .setLastModificationTime(fileStatus.getModificationTime()) .setIsDir(false) .build()); } } } else { cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder() .setPath(EMPTY_STRING) .setLastModificationTime(rootStatus.getModificationTime()) .setIsDir(false) .build()); } return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder() .setPartitionId(partitionId) .setPartitionRootDir(fs.makeQualified(rootLocation).toString()) .addAllCachedEntities(cachedEntities) .build(); } return null; } catch (IOException e) { throw new RuntimeException(e); } }
Example 19
Source File: TraceBuilder.java From hadoop with Apache License 2.0 | 4 votes |
/** * Processes the input file/folder argument. If the input is a file, * then it is directly considered for further processing by TraceBuilder. * If the input is a folder, then all the history logs in the * input folder are considered for further processing. * * If isRecursive is true, then the input path is recursively scanned * for job history logs for further processing by TraceBuilder. * * NOTE: If the input represents a globbed path, then it is first flattened * and then the individual paths represented by the globbed input * path are considered for further processing. * * @param input input path, possibly globbed * @param conf configuration * @param isRecursive whether to recursively traverse the input paths to * find history logs * @return the input history log files' paths * @throws FileNotFoundException * @throws IOException */ static List<Path> processInputArgument(String input, Configuration conf, boolean isRecursive) throws FileNotFoundException, IOException { Path inPath = new Path(input); FileSystem fs = inPath.getFileSystem(conf); FileStatus[] inStatuses = fs.globStatus(inPath); List<Path> inputPaths = new LinkedList<Path>(); if (inStatuses == null || inStatuses.length == 0) { return inputPaths; } for (FileStatus inStatus : inStatuses) { Path thisPath = inStatus.getPath(); if (inStatus.isDirectory()) { // Find list of files in this path(recursively if -recursive option // is specified). List<FileStatus> historyLogs = new ArrayList<FileStatus>(); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(thisPath, isRecursive); while (iter.hasNext()) { LocatedFileStatus child = iter.next(); String fileName = child.getPath().getName(); if (!(fileName.endsWith(".crc") || fileName.startsWith("."))) { historyLogs.add(child); } } if (historyLogs.size() > 0) { // Add the sorted history log file names in this path to the // inputPaths list FileStatus[] sortableNames = historyLogs.toArray(new FileStatus[historyLogs.size()]); Arrays.sort(sortableNames, new HistoryLogsComparator()); for (FileStatus historyLog : sortableNames) { inputPaths.add(historyLog.getPath()); } } } else { inputPaths.add(thisPath); } } return inputPaths; }
Example 20
Source File: LogCLIHelpers.java From hadoop with Apache License 2.0 | 4 votes |
@Private @VisibleForTesting public int dumpAContainersLogs(String appId, String containerId, String nodeId, String jobOwner) throws IOException { ApplicationId applicationId = ConverterUtils.toApplicationId(appId); List<Path> remoteAppLogDirs = AggregatedLogsBlock.getRemoteAppLogDirs(getConf(), applicationId, jobOwner); String remoteAppLogDir = StringUtils.join(remoteAppLogDirs, ","); RemoteIterator<FileStatus> nodeFiles; try { nodeFiles = AggregatedLogsBlock.getFileListAtRemoteAppDir(getConf(), remoteAppLogDirs, applicationId, jobOwner); } catch (FileNotFoundException fnf) { logDirNotExist(remoteAppLogDir.toString()); return -1; } boolean foundContainerLogs = false; while (nodeFiles.hasNext()) { FileStatus thisNodeFile = nodeFiles.next(); String fileName = thisNodeFile.getPath().getName(); if (fileName.contains(LogAggregationUtils.getNodeString(nodeId)) && !fileName.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) { AggregatedLogFormat.LogReader reader = null; try { reader = new AggregatedLogFormat.LogReader(getConf(), thisNodeFile.getPath()); if (dumpAContainerLogs(containerId, reader, System.out, thisNodeFile.getModificationTime()) > -1) { foundContainerLogs = true; } } finally { if (reader != null) { reader.close(); } } } } if (!foundContainerLogs) { containerLogNotFound(containerId); return -1; } return 0; }