org.apache.hadoop.fs.RemoteIterator#hasNext

Source File: FSAgent.java From Bats with Apache License 2.0

7 votes

public List<String> listFiles(String dir) throws IOException
{
  List<String> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs.getPath().getName());
  }
  return files;
}

Source File: GenerateData.java From hadoop with Apache License 2.0

6 votes

static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}

Source File: ResourceLocalizationService.java From hadoop with Apache License 2.0

6 votes

private void cleanUpFilesPerUserDir(FileContext lfs, DeletionService del,
    Path userDirPath) throws IOException {
  RemoteIterator<FileStatus> userDirStatus = lfs.listStatus(userDirPath);
  FileDeletionTask dependentDeletionTask =
      del.createFileDeletionTask(null, userDirPath, new Path[] {});
  if (userDirStatus != null && userDirStatus.hasNext()) {
    List<FileDeletionTask> deletionTasks = new ArrayList<FileDeletionTask>();
    while (userDirStatus.hasNext()) {
      FileStatus status = userDirStatus.next();
      String owner = status.getOwner();
      FileDeletionTask deletionTask =
          del.createFileDeletionTask(owner, null,
            new Path[] { status.getPath() });
      deletionTask.addFileDeletionTaskDependency(dependentDeletionTask);
      deletionTasks.add(deletionTask);
    }
    for (FileDeletionTask task : deletionTasks) {
      del.scheduleFileDeletionTask(task);
    }
  } else {
    del.scheduleFileDeletionTask(dependentDeletionTask);
  }
}

Source File: FileInputFormat.java From hadoop with Apache License 2.0

6 votes

/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}

Source File: TestEncryptionZones.java From hadoop with Apache License 2.0

6 votes

/**
 * Checks that an encryption zone with the specified keyName and path (if not
 * null) is present.
 *
 * @throws IOException if a matching zone could not be found
 */
public void assertZonePresent(String keyName, String path) throws IOException {
  final RemoteIterator<EncryptionZone> it = dfsAdmin.listEncryptionZones();
  boolean match = false;
  while (it.hasNext()) {
    EncryptionZone zone = it.next();
    boolean matchKey = (keyName == null);
    boolean matchPath = (path == null);
    if (keyName != null && zone.getKeyName().equals(keyName)) {
      matchKey = true;
    }
    if (path != null && zone.getPath().equals(path)) {
      matchPath = true;
    }
    if (matchKey && matchPath) {
      match = true;
      break;
    }
  }
  assertTrue("Did not find expected encryption zone with keyName " + keyName +
          " path " + path, match
  );
}

Source File: FileInputFormat.java From hadoop with Apache License 2.0

6 votes

/**
 * Add files in the input path recursively into the results.
 * @param result
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result,
    FileSystem fs, Path path, PathFilter inputFilter) 
    throws IOException {
  RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
  while (iter.hasNext()) {
    LocatedFileStatus stat = iter.next();
    if (inputFilter.accept(stat.getPath())) {
      if (stat.isDirectory()) {
        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
      } else {
        result.add(stat);
      }
    }
  }
}

Source File: CryptoAdmin.java From hadoop with Apache License 2.0

6 votes

@Override
public int run(Configuration conf, List<String> args) throws IOException {
  if (!args.isEmpty()) {
    System.err.println("Can't understand argument: " + args.get(0));
    return 1;
  }

  final DistributedFileSystem dfs = AdminHelper.getDFS(conf);
  try {
    final TableListing listing = new TableListing.Builder()
      .addField("").addField("", true)
      .wrapWidth(AdminHelper.MAX_LINE_WIDTH).hideHeaders().build();
    final RemoteIterator<EncryptionZone> it = dfs.listEncryptionZones();
    while (it.hasNext()) {
      EncryptionZone ez = it.next();
      listing.addRow(ez.getPath(), ez.getKeyName());
    }
    System.out.println(listing.toString());
  } catch (IOException e) {
    System.err.println(prettifyException(e));
    return 2;
  }

  return 0;
}

Source File: TestRetryCacheWithHA.java From hadoop with Apache License 2.0

6 votes

@Override
boolean checkNamenodeBeforeReturn() throws Exception {
  for (int i = 0; i < CHECKTIMES; i++) {
    RemoteIterator<CacheDirectiveEntry> iter =
        dfs.listCacheDirectives(
            new CacheDirectiveInfo.Builder().
                setPool(directive.getPool()).
                setPath(directive.getPath()).
                build());
    while (iter.hasNext()) {
      CacheDirectiveInfo result = iter.next().getInfo();
      if ((result.getId() == id) &&
          (result.getReplication().shortValue() == newReplication)) {
        return true;
      }
    }
    Thread.sleep(1000);
  }
  return false;
}

Source File: FSAgent.java From Bats with Apache License 2.0

6 votes

public List<LocatedFileStatus> listFilesInfo(String dir) throws IOException
{
  List<LocatedFileStatus> files = new ArrayList<>();
  Path path = new Path(dir);

  FileStatus fileStatus = fileSystem.getFileStatus(path);
  if (!fileStatus.isDirectory()) {
    throw new FileNotFoundException("Cannot read directory " + dir);
  }
  RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
  while (it.hasNext()) {
    LocatedFileStatus lfs = it.next();
    files.add(lfs);
  }
  return files;
}

Source File: HDFSResourceStore.java From kylin-on-parquet-v2 with Apache License 2.0

5 votes

@Override
protected void visitFolderImpl(String folderPath, boolean recursive, VisitFilter filter, boolean loadContent,
        Visitor visitor) throws IOException {
    Path p = getRealHDFSPath(folderPath);
    if (!fs.exists(p) || !fs.isDirectory(p)) {
        return;
    }

    String fsPathPrefix = p.toUri().getPath();
    String resPathPrefix = folderPath.endsWith("/") ? folderPath : folderPath + "/";

    RemoteIterator<LocatedFileStatus> it = fs.listFiles(p, recursive);
    while (it.hasNext()) {
        LocatedFileStatus status = it.next();
        if (status.isDirectory())
            continue;

        String path = status.getPath().toUri().getPath();
        if (!path.startsWith(fsPathPrefix))
            throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix);

        String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1);

        if (filter.matches(resPath, status.getModificationTime())) {
            RawResource raw;
            if (loadContent)
                raw = new RawResource(resPath, status.getModificationTime(), fs.open(status.getPath()));
            else
                raw = new RawResource(resPath, status.getModificationTime());

            try {
                visitor.visit(raw);
            } finally {
                raw.close();
            }
        }
    }
}

Source File: TestRetryCacheWithHA.java From hadoop with Apache License 2.0

5 votes

@Override
boolean checkNamenodeBeforeReturn() throws Exception {
  for (int i = 0; i < CHECKTIMES; i++) {
    RemoteIterator<CachePoolEntry> iter = dfs.listCachePools();
    if (!iter.hasNext()) {
      return true;
    }
    Thread.sleep(1000);
  }
  return false;
}

Source File: PrestoS3FileSystem.java From presto with Apache License 2.0

5 votes

@Override
public FileStatus[] listStatus(Path path)
        throws IOException
{
    STATS.newListStatusCall();
    List<LocatedFileStatus> list = new ArrayList<>();
    RemoteIterator<LocatedFileStatus> iterator = listLocatedStatus(path);
    while (iterator.hasNext()) {
        list.add(iterator.next());
    }
    return toArray(list, LocatedFileStatus.class);
}

Source File: TestLogAggregationService.java From hadoop with Apache License 2.0

5 votes

private int numOfLogsAvailable(LogAggregationService logAggregationService,
    ApplicationId appId, boolean sizeLimited, String lastLogFile)
    throws IOException {
  Path appLogDir = logAggregationService.getRemoteAppLogDir(appId, this.user);
  RemoteIterator<FileStatus> nodeFiles = null;
  try {
    Path qualifiedLogDir =
        FileContext.getFileContext(this.conf).makeQualified(appLogDir);
    nodeFiles =
        FileContext.getFileContext(qualifiedLogDir.toUri(), this.conf)
          .listStatus(appLogDir);
  } catch (FileNotFoundException fnf) {
    return -1;
  }
  int count = 0;
  while (nodeFiles.hasNext()) {
    FileStatus status = nodeFiles.next();
    String filename = status.getPath().getName();
    if (filename.contains(LogAggregationUtils.TMP_FILE_SUFFIX)
        || (lastLogFile != null && filename.contains(lastLogFile)
            && sizeLimited)) {
      return -1;
    }
    if (filename.contains(LogAggregationUtils
      .getNodeString(logAggregationService.getNodeId()))) {
      count++;
    }
  }
  return count;
}

Source File: FSStorageAgent.java From Bats with Apache License 2.0

5 votes

@Override
public long[] getWindowIds(int operatorId) throws IOException
{
  Path lPath = new Path(path + Path.SEPARATOR + String.valueOf(operatorId));
  try {
    FileStatus status = fileContext.getFileStatus(lPath);
    if (!status.isDirectory()) {
      throw new RuntimeException("Checkpoint location is not a directory");
    }
  } catch (FileNotFoundException ex) {
    // During initialization checkpoint directory may not exists.
    fileContext.mkdir(lPath, FsPermission.getDirDefault(), true);
  }

  RemoteIterator<FileStatus> fileStatusRemoteIterator = fileContext.listStatus(lPath);
  List<Long> lwindows = new ArrayList<>();
  while (fileStatusRemoteIterator.hasNext()) {
    FileStatus fileStatus = fileStatusRemoteIterator.next();
    String name = fileStatus.getPath().getName();
    if (name.equals(TMP_FILE)) {
      continue;
    }
    lwindows.add(STATELESS_CHECKPOINT_WINDOW_ID.equals(name) ? Stateless.WINDOW_ID : Long.parseLong(name, 16));
  }
  long[] windowIds = new long[lwindows.size()];
  for (int i = 0; i < windowIds.length; i++) {
    windowIds[i] = lwindows.get(i);
  }
  return windowIds;
}

Source File: HdfsIOBenchmark.java From incubator-crail with Apache License 2.0

5 votes

void browseDir() throws Exception {
	System.out.println("reading enumarate dir, path " + path);
	Configuration conf = new Configuration();
	FileSystem fs = FileSystem.get(conf); 
	
	//benchmark
	System.out.println("starting benchmark...");
	RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
	while (iter.hasNext()) {
		LocatedFileStatus status = iter.next();
		System.out.println(status.getPath());
	}		
	fs.close();
}

Source File: FileInputFormat.java From hadoop with Apache License 2.0

5 votes

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}

Source File: TestRetryCacheWithHA.java From hadoop with Apache License 2.0

5 votes

@Override
boolean checkNamenodeBeforeReturn() throws Exception {
  for (int i = 0; i < CHECKTIMES; i++) {
    RemoteIterator<CachePoolEntry> iter = dfs.listCachePools();
    if (iter.hasNext() && iter.next().getInfo().getLimit() == 99) {
      return true;
    }
    Thread.sleep(1000);
  }
  return false;
}

Source File: HiveMetadataUtils.java From dremio-oss with Apache License 2.0

4 votes

public static HiveReaderProto.FileSystemPartitionUpdateKey getFSBasedUpdateKey(String partitionDir, JobConf job,
                                                                               boolean isRecursive, boolean directoriesOnly,
                                                                               int partitionId) {
  final List<HiveReaderProto.FileSystemCachedEntity> cachedEntities = new ArrayList<>();
  final Path rootLocation = new Path(partitionDir);
  try {
    // TODO: DX-16001 - make async configurable for Hive.
    final HadoopFileSystemWrapper fs = new HadoopFileSystemWrapper(rootLocation, job);

    if (fs.exists(rootLocation)) {
      final FileStatus rootStatus = fs.getFileStatus(rootLocation);
      if (rootStatus.isDirectory()) {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(true)
          .build());

        final RemoteIterator<LocatedFileStatus> statuses = isRecursive ? fs.listFiles(rootLocation, true) : fs.listFiles(rootLocation, false);
        while (statuses.hasNext()) {
          LocatedFileStatus fileStatus = statuses.next();
          final Path filePath = fileStatus.getPath();
          if (fileStatus.isDirectory()) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(true)
              .build());
          } else if (fileStatus.isFile() && !directoriesOnly) {
            cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
              .setPath(PathUtils.relativePath(filePath, rootLocation))
              .setLastModificationTime(fileStatus.getModificationTime())
              .setIsDir(false)
              .build());
          }
        }
      } else {
        cachedEntities.add(HiveReaderProto.FileSystemCachedEntity.newBuilder()
          .setPath(EMPTY_STRING)
          .setLastModificationTime(rootStatus.getModificationTime())
          .setIsDir(false)
          .build());
      }
      return HiveReaderProto.FileSystemPartitionUpdateKey.newBuilder()
        .setPartitionId(partitionId)
        .setPartitionRootDir(fs.makeQualified(rootLocation).toString())
        .addAllCachedEntities(cachedEntities)
        .build();
    }
    return null;
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Source File: TraceBuilder.java From hadoop with Apache License 2.0

4 votes

/**
 * Processes the input file/folder argument. If the input is a file,
 * then it is directly considered for further processing by TraceBuilder.
 * If the input is a folder, then all the history logs in the
 * input folder are considered for further processing.
 *
 * If isRecursive is true, then the input path is recursively scanned
 * for job history logs for further processing by TraceBuilder.
 *
 * NOTE: If the input represents a globbed path, then it is first flattened
 *       and then the individual paths represented by the globbed input
 *       path are considered for further processing.
 *
 * @param input        input path, possibly globbed
 * @param conf         configuration
 * @param isRecursive  whether to recursively traverse the input paths to
 *                     find history logs
 * @return the input history log files' paths
 * @throws FileNotFoundException
 * @throws IOException
 */
static List<Path> processInputArgument(String input, Configuration conf,
    boolean isRecursive) throws FileNotFoundException, IOException {
  Path inPath = new Path(input);
  FileSystem fs = inPath.getFileSystem(conf);
  FileStatus[] inStatuses = fs.globStatus(inPath);

  List<Path> inputPaths = new LinkedList<Path>();
  if (inStatuses == null || inStatuses.length == 0) {
    return inputPaths;
  }

  for (FileStatus inStatus : inStatuses) {
    Path thisPath = inStatus.getPath();
    if (inStatus.isDirectory()) {

      // Find list of files in this path(recursively if -recursive option
      // is specified).
      List<FileStatus> historyLogs = new ArrayList<FileStatus>();

      RemoteIterator<LocatedFileStatus> iter =
        fs.listFiles(thisPath, isRecursive);
      while (iter.hasNext()) {
        LocatedFileStatus child = iter.next();
        String fileName = child.getPath().getName();

        if (!(fileName.endsWith(".crc") || fileName.startsWith("."))) {
          historyLogs.add(child);
        }
      }

      if (historyLogs.size() > 0) {
        // Add the sorted history log file names in this path to the
        // inputPaths list
        FileStatus[] sortableNames =
            historyLogs.toArray(new FileStatus[historyLogs.size()]);
        Arrays.sort(sortableNames, new HistoryLogsComparator());

        for (FileStatus historyLog : sortableNames) {
          inputPaths.add(historyLog.getPath());
        }
      }
    } else {
      inputPaths.add(thisPath);
    }
  }

  return inputPaths;
}

Source File: LogCLIHelpers.java From hadoop with Apache License 2.0

4 votes

@Private
@VisibleForTesting
public int dumpAContainersLogs(String appId, String containerId,
    String nodeId, String jobOwner) throws IOException {

  ApplicationId applicationId = ConverterUtils.toApplicationId(appId);
  List<Path> remoteAppLogDirs = AggregatedLogsBlock.getRemoteAppLogDirs(getConf(), applicationId, jobOwner);
  String remoteAppLogDir = StringUtils.join(remoteAppLogDirs, ",");

  RemoteIterator<FileStatus> nodeFiles;
  try {
    nodeFiles = AggregatedLogsBlock.getFileListAtRemoteAppDir(getConf(), remoteAppLogDirs, applicationId, jobOwner);
  } catch (FileNotFoundException fnf) {
    logDirNotExist(remoteAppLogDir.toString());
    return -1;
  }
  boolean foundContainerLogs = false;
  while (nodeFiles.hasNext()) {
    FileStatus thisNodeFile = nodeFiles.next();
    String fileName = thisNodeFile.getPath().getName();
    if (fileName.contains(LogAggregationUtils.getNodeString(nodeId))
        && !fileName.endsWith(LogAggregationUtils.TMP_FILE_SUFFIX)) {
      AggregatedLogFormat.LogReader reader = null;
      try {
        reader =
            new AggregatedLogFormat.LogReader(getConf(),
              thisNodeFile.getPath());
        if (dumpAContainerLogs(containerId, reader, System.out,
            thisNodeFile.getModificationTime()) > -1) {
          foundContainerLogs = true;
        }
      } finally {
        if (reader != null) {
          reader.close();
        }
      }
    }
  }
  if (!foundContainerLogs) {
    containerLogNotFound(containerId);
    return -1;
  }
  return 0;
}

Java Code Examples for org.apache.hadoop.fs.RemoteIterator#hasNext()