org.apache.hadoop.fs.FileSystem#listStatus

Source File: TestUtils.java From systemds with Apache License 2.0

6 votes

public static boolean readDMLBoolean(String filePath) {
	try {
		Boolean b = null;
		Path outDirectory = new Path(filePath);
		FileSystem fs = IOUtilFunctions.getFileSystem(outDirectory, conf);
		String line;
		FileStatus[] outFiles = fs.listStatus(outDirectory);
		for (FileStatus file : outFiles) {
			FSDataInputStream fsout = fs.open(file.getPath());
			try(BufferedReader outIn = new BufferedReader(new InputStreamReader(fsout))) {
				while ((line = outIn.readLine()) != null) { // only 1 scalar value in file
					b = Boolean.valueOf(Boolean.parseBoolean(line));
				}
			}
		}
		return b.booleanValue();
	} catch (IOException e) {
		assertTrue("could not read from file " + filePath, false);
	}
	return _AssertOccured;
}

Source File: BlurOutputCommitter.java From incubator-retired-blur with Apache License 2.0

6 votes

@Override
public void commitJob(JobContext jobContext) throws IOException {
  // look through all the shards for attempts that need to be cleaned up.
  // also find all the attempts that are finished
  // then rename all the attempts jobs to commits
  LOG.info("Commiting Job [{0}]", jobContext.getJobID());
  Configuration configuration = jobContext.getConfiguration();
  Path tableOutput = BlurOutputFormat.getOutputPath(configuration);
  LOG.info("TableOutput path [{0}]", tableOutput);
  makeSureNoEmptyShards(configuration, tableOutput);
  FileSystem fileSystem = tableOutput.getFileSystem(configuration);
  for (FileStatus fileStatus : fileSystem.listStatus(tableOutput)) {
    LOG.info("Checking file status [{0}] with path [{1}]", fileStatus, fileStatus.getPath());
    if (isShard(fileStatus)) {
      commitOrAbortJob(jobContext, fileStatus.getPath(), true);
    }
  }
  LOG.info("Commiting Complete [{0}]", jobContext.getJobID());
}

Source File: DistCp.java From RDFS with Apache License 2.0

6 votes

/**go to the directory we created for the chunk files
 * the chunk files are named as 0, 1, 2, 3....
 * For example, if a file File1 is chopped into 3 chunks, 
 * the we should have a directory /File1_chunkfiles, and
 * there are three files in that directory:
 * /File1_chunkfiles/0, /File1_chunkfiles/1, File1_chunkfiles/2
 * The returned chunkFilePath arrays contains the paths of 
 * those chunks in sorted order. Also we can make sure there is 
 * no missing chunks by checking the chunk file name .
 * For example, if we only have /File1_chunkfiles/0, File1_chunkfiles/2
 * we know that /File1_chunkfiles/1 is missing.
 * @param chunkFileDir the directory named with filename_chunkfiles
 * @return the paths to all the chunk files in the chunkFileDir
 * @throws IOException 
 */
private static Path[] getChunkFilePaths(Configuration conf, JobConf jobConf,
    final Arguments args, Path chunkFileDir, int chunkNum) throws IOException{
  FileSystem dstfs = args.dst.getFileSystem(conf);
  FileStatus [] chunkFileStatus = dstfs.listStatus(chunkFileDir);
  HashSet <String> chunkFilePathSet = new HashSet<String>(chunkFileStatus.length);
  for(FileStatus chunkfs:chunkFileStatus){
    chunkFilePathSet.add(chunkfs.getPath().toUri().getPath());
  }
  Path[] chunkFilePaths = new Path[chunkNum];
  for(int i = 0; i < chunkNum; ++i) {
    //make sure we add the chunk file in order,and the chunk file name is 
    //named in number
    Path chunkFile = new Path(chunkFileDir, Integer.toString(i));
    //make sure the chunk file is not missing
    if(chunkFilePathSet.contains(chunkFile.toUri().getPath()))
      chunkFilePaths[i] = chunkFile;
    else
      throw new IOException("Chunk File: " + chunkFile.toUri().getPath() +
          "doesn't exist!");
  }
  return chunkFilePaths;
}

Source File: TestJoinQuery.java From tajo with Apache License 2.0

6 votes

protected static List<Path> getPartitionPathList(FileSystem fs, Path path) throws Exception {
  FileStatus[] files = fs.listStatus(path);
  List<Path> paths = new ArrayList<>();
  if (files != null) {
    for (FileStatus eachFile: files) {
      if (eachFile.isFile()) {
        paths.add(path);
        return paths;
      } else {
        paths.addAll(getPartitionPathList(fs, eachFile.getPath()));
      }
    }
  }

  return paths;
}

Source File: HadoopSegmentPreprocessingJob.java From incubator-pinot with Apache License 2.0

6 votes

/**
 * Finds the avro file in the input folder, and returns its avro schema
 * @param inputPathDir Path to input directory
 * @return Input schema
 * @throws IOException exception when accessing to IO
 */
private Schema getSchema(Path inputPathDir)
    throws IOException {
  FileSystem fs = FileSystem.get(new Configuration());
  Schema avroSchema = null;
  for (FileStatus fileStatus : fs.listStatus(inputPathDir)) {
    if (fileStatus.isFile() && fileStatus.getPath().getName().endsWith(".avro")) {
      _logger.info("Extracting schema from " + fileStatus.getPath());
      try (DataFileStream<GenericRecord> dataStreamReader = getAvroReader(inputPathDir)) {
        avroSchema = dataStreamReader.getSchema();
      }
      break;
    }
  }
  return avroSchema;
}

Source File: FileBasedOutputSizeReader.java From spork with Apache License 2.0

6 votes

/**
 * Returns the total size of output files in bytes
 * @param sto POStore
 * @param conf configuration
 */
@Override
public long getOutputSize(POStore sto, Configuration conf) throws IOException {
    if (!supports(sto, conf)) {
        log.warn("'" + sto.getStoreFunc().getClass().getCanonicalName()
                + "' is not supported by " + getClass().getCanonicalName());
        return -1;
    }

    long bytes = 0;
    Path p = new Path(getLocationUri(sto));
    FileSystem fs = p.getFileSystem(conf);
    FileStatus[] lst = fs.listStatus(p);
    if (lst != null) {
        for (FileStatus status : lst) {
            bytes += status.getLen();
        }
    }

    return bytes;
}

Source File: JobClient.java From RDFS with Apache License 2.0

6 votes

/**
 * Checks if the job directory is clean and has all the required components
 * for (re) starting the job
 */
public static boolean isJobDirValid(Path jobDirPath, FileSystem fs)
throws IOException {
  FileStatus[] contents = fs.listStatus(jobDirPath);
  int matchCount = 0;
  if (contents != null && contents.length >=2) {
    for (FileStatus status : contents) {
      if ("job.xml".equals(status.getPath().getName())) {
        ++matchCount;
      }
      if ("job.split".equals(status.getPath().getName())) {
        ++matchCount;
      }
    }
    if (matchCount == 2) {
      return true;
    }
  }
  return false;
}

Source File: TestDataJoin.java From RDFS with Apache License 2.0

6 votes

private static void confirmOutput(Path out, JobConf job, int srcs)
    throws IOException {
  FileSystem fs = out.getFileSystem(job);
  FileStatus[] outlist = fs.listStatus(out);
  assertEquals(1, outlist.length);
  assertTrue(0 < outlist[0].getLen());
  FSDataInputStream in = fs.open(outlist[0].getPath());
  LineRecordReader rr = new LineRecordReader(in, 0, Integer.MAX_VALUE, job);
  LongWritable k = new LongWritable();
  Text v = new Text();
  int count = 0;
  while (rr.next(k, v)) {
    String[] vals = v.toString().split("\t");
    assertEquals(srcs + 1, vals.length);
    int[] ivals = new int[vals.length];
    for (int i = 0; i < vals.length; ++i)
      ivals[i] = Integer.parseInt(vals[i]);
    assertEquals(0, ivals[0] % (srcs * srcs));
    for (int i = 1; i < vals.length; ++i) {
      assertEquals((ivals[i] - (i - 1)) * srcs, 10 * ivals[0]);
    }
    ++count;
  }
  assertEquals(4, count);
}

Source File: ReaderTextCSV.java From systemds with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
private static MatrixBlock readCSVMatrixFromHDFS( Path path, JobConf job, FileSystem fs, MatrixBlock dest, 
		long rlen, long clen, int blen, boolean hasHeader, String delim, boolean fill, double fillValue )
	throws IOException, DMLRuntimeException
{
	//prepare file paths in alphanumeric order
	ArrayList<Path> files=new ArrayList<>();
	if(fs.isDirectory(path)) {
		for(FileStatus stat: fs.listStatus(path, IOUtilFunctions.hiddenFileFilter))
			files.add(stat.getPath());
		Collections.sort(files);
	}
	else
		files.add(path);
	
	//determine matrix size via additional pass if required
	if ( dest == null ) {
		dest = computeCSVSize(files, job, fs, hasHeader, delim, fill, fillValue);
		clen = dest.getNumColumns();
	}
	
	//actual read of individual files
	long lnnz = 0;
	MutableInt row = new MutableInt(0);
	for(int fileNo=0; fileNo<files.size(); fileNo++) {
		lnnz += readCSVMatrixFromInputStream(fs.open(files.get(fileNo)), path.toString(), dest, 
			row, rlen, clen, blen, hasHeader, delim, fill, fillValue, fileNo==0);
	}
	
	//post processing
	dest.setNonZeros( lnnz );
	
	return dest;
}

Source File: TestDeleteMobTable.java From hbase with Apache License 2.0

5 votes

private int countMobFiles(TableName tn, String familyName) throws IOException {
  FileSystem fs = TEST_UTIL.getTestFileSystem();
  Path mobFileDir = MobUtils.getMobFamilyPath(TEST_UTIL.getConfiguration(), tn, familyName);
  if (fs.exists(mobFileDir)) {
    return fs.listStatus(mobFileDir).length;
  }
  return 0;
}

Source File: StorageUtil.java From tajo with Apache License 2.0

5 votes

/**
 * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*".
 *
 * This method finds the maximum sequence number from existing data files through the above patterns.
 * If it cannot find any matched file or the maximum number, it will return -1.
 *
 * @param fs
 * @param path
 * @param recursive
 * @return The maximum sequence number
 * @throws java.io.IOException
 */
public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException {
  if (!fs.isDirectory(path)) {
    return -1;
  }

  FileStatus[] files = fs.listStatus(path);

  if (files == null || files.length == 0) {
    return -1;
  }

  int maxValue = -1;

  for (FileStatus eachFile: files) {
    // In the case of partition table, return largest value within all partition dirs.
    int value;
    if (eachFile.isDirectory() && recursive) {
      value = getMaxFileSequence(fs, eachFile.getPath(), recursive);
      if (value > maxValue) {
        maxValue = value;
      }
    } else {
      if (eachFile.getPath().getName().matches(fileNamePatternV08) ||
          eachFile.getPath().getName().matches(fileNamePatternV09)) {
        value = getSequence(eachFile.getPath().getName());
        if (value > maxValue) {
          maxValue = value;
        }
      }
    }
  }

  return maxValue;
}

Source File: JobLauncherUtils.java From incubator-gobblin with Apache License 2.0

5 votes

/**
 * Cleanup staging data of all tasks of a job.
 *
 * @param state a {@link State} instance storing job configuration properties
 * @param logger a {@link Logger} used for logging
 */
public static void cleanJobStagingData(State state, Logger logger) throws IOException {
  Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR),
      "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR);
  Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR),
      "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR);

  String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI);
  FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));

  Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR));
  logger.info("Cleaning up staging directory " + jobStagingPath);
  HadoopUtils.deletePath(fs, jobStagingPath, true);

  if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) {
    logger.info("Deleting directory " + jobStagingPath.getParent());
    HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true);
  }

  Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));
  logger.info("Cleaning up output directory " + jobOutputPath);
  HadoopUtils.deletePath(fs, jobOutputPath, true);

  if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) {
    logger.info("Deleting directory " + jobOutputPath.getParent());
    HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true);
  }

  if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) {
    if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) {
      Path jobErrPath = new Path(state.getProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE));
      log.info("Cleaning up err directory : " + jobErrPath);
      HadoopUtils.deleteIfExists(fs, jobErrPath, true);
    }
  }
}

Source File: TableShardCountCollapser.java From incubator-retired-blur with Apache License 2.0

5 votes

private Path[] getPaths() throws IOException {
  FileSystem fileSystem = path.getFileSystem(getConf());
  FileStatus[] listStatus = fileSystem.listStatus(path);
  SortedSet<Path> shards = new TreeSet<Path>();
  for (FileStatus status : listStatus) {
    Path shardPath = status.getPath();
    if (shardPath.getName().startsWith(BlurConstants.SHARD_PREFIX)) {
      shards.add(shardPath);
    }
  }
  return shards.toArray(new Path[shards.size()]);
}

Source File: IndexImporter.java From incubator-retired-blur with Apache License 2.0

5 votes

private Map<Path, Path> toMap(FileSystem fileSystem, Set<Path> inuseDirs) throws IOException {
  Map<Path, Path> result = new TreeMap<Path, Path>();
  for (Path p : inuseDirs) {
    if (!fileSystem.isFile(p)) {
      FileStatus[] listStatus = fileSystem.listStatus(p);
      for (FileStatus status : listStatus) {
        result.put(status.getPath(), p);
      }
    }
  }
  return result;
}

Source File: MapReduceRunner.java From halvade with GNU General Public License v3.0

4 votes

protected int runPass1RNAJob(Configuration pass1Conf, String tmpOutDir) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException {
    HalvadeConf.setIsPass2(pass1Conf, false);
    HalvadeResourceManager.setJobResources(halvadeOpts, pass1Conf, HalvadeResourceManager.RNA_SHMEM_PASS1, halvadeOpts.nodes == 1, halvadeOpts.useBamInput);
    int pass2Reduces = HalvadeResourceManager.getPass2Reduces(halvadeOpts);
    halvadeOpts.splitChromosomes(pass1Conf, pass2Reduces);
    HalvadeConf.setPass2Suffix(pass1Conf, pass2suffix);
    
    Job pass1Job = Job.getInstance(pass1Conf, "Halvade pass 1 RNA pipeline");
    pass1Job.addCacheArchive(new URI(halvadeOpts.halvadeBinaries));
    pass1Job.setJarByClass(be.ugent.intec.halvade.hadoop.mapreduce.HalvadeMapper.class);
    // set pass 2 suffix so only this job finds it!
    FileSystem fs = FileSystem.get(new URI(halvadeOpts.in), pass1Conf);
    try {
        if (fs.getFileStatus(new Path(halvadeOpts.in)).isDirectory()) {
            // add every file in directory
            FileStatus[] files = fs.listStatus(new Path(halvadeOpts.in));
            for(FileStatus file : files) {
                if (!file.isDirectory()) {
                    FileInputFormat.addInputPath(pass1Job, file.getPath());
                }
            }
        } else {
            FileInputFormat.addInputPath(pass1Job, new Path(halvadeOpts.in));
        }
    } catch (IOException | IllegalArgumentException e) {
        Logger.EXCEPTION(e);
    }

    FileSystem outFs = FileSystem.get(new URI(tmpOutDir), pass1Conf);
    boolean skipPass1 = false;
    if (outFs.exists(new Path(tmpOutDir))) {
        // check if genome already exists
        skipPass1 = outFs.exists(new Path(tmpOutDir + "/_SUCCESS"));
        if(skipPass1)
            Logger.DEBUG("pass1 genome already created, skipping pass 1");
        else {
            Logger.INFO("The output directory \'" + tmpOutDir + "\' already exists.");
            Logger.INFO("ERROR: Please remove this directory before trying again.");
            System.exit(-2);
        }
    }
    if(!skipPass1) {
        FileOutputFormat.setOutputPath(pass1Job, new Path(tmpOutDir));
        pass1Job.setMapperClass(be.ugent.intec.halvade.hadoop.mapreduce.StarAlignPassXMapper.class);

        pass1Job.setInputFormatClass(HalvadeTextInputFormat.class);
        pass1Job.setMapOutputKeyClass(GenomeSJ.class);
        pass1Job.setMapOutputValueClass(Text.class);

        pass1Job.setSortComparatorClass(GenomeSJSortComparator.class);
        pass1Job.setGroupingComparatorClass(GenomeSJGroupingComparator.class);
        pass1Job.setNumReduceTasks(1); 
        pass1Job.setReducerClass(be.ugent.intec.halvade.hadoop.mapreduce.RebuildStarGenomeReducer.class);          
        pass1Job.setOutputKeyClass(LongWritable.class);
        pass1Job.setOutputValueClass(Text.class);

        return runTimedJob(pass1Job, "Halvade pass 1 Job");
    } else
        return 0;
}

Source File: TestDistCh.java From hadoop with Apache License 2.0

4 votes

public void testDistCh() throws Exception {
  final Configuration conf = new Configuration();

  conf.set(CapacitySchedulerConfiguration.PREFIX+CapacitySchedulerConfiguration.ROOT+"."+CapacitySchedulerConfiguration.QUEUES, "default");
  conf.set(CapacitySchedulerConfiguration.PREFIX+CapacitySchedulerConfiguration.ROOT+".default."+CapacitySchedulerConfiguration.CAPACITY, "100");
  final MiniDFSCluster cluster=  new MiniDFSCluster.Builder(conf).numDataNodes(2).format(true).build();
  
  final FileSystem fs = cluster.getFileSystem();
  final FsShell shell = new FsShell(conf);
  
  try {
    final FileTree tree = new FileTree(fs, "testDistCh");
    final FileStatus rootstatus = fs.getFileStatus(tree.rootdir);

    runLsr(shell, tree.root, 0);

    final String[] args = new String[NUN_SUBS];
    final ChPermissionStatus[] newstatus = new ChPermissionStatus[NUN_SUBS];

    
    args[0]="/test/testDistCh/sub0:sub1::";
    newstatus[0] = new ChPermissionStatus(rootstatus, "sub1", "", "");

    args[1]="/test/testDistCh/sub1::sub2:";
    newstatus[1] = new ChPermissionStatus(rootstatus, "", "sub2", "");

    args[2]="/test/testDistCh/sub2:::437";
    newstatus[2] = new ChPermissionStatus(rootstatus, "", "", "437");

    args[3]="/test/testDistCh/sub3:sub1:sub2:447";
    newstatus[3] = new ChPermissionStatus(rootstatus, "sub1", "sub2", "447");
 
    args[4]="/test/testDistCh/sub4::sub5:437";
    newstatus[4] = new ChPermissionStatus(rootstatus, "", "sub5", "437");

    args[5]="/test/testDistCh/sub5:sub1:sub5:";
    newstatus[5] = new ChPermissionStatus(rootstatus, "sub1", "sub5", "");

    args[6]="/test/testDistCh/sub6:sub3::437";
    newstatus[6] = new ChPermissionStatus(rootstatus, "sub3", "", "437");
    
    System.out.println("args=" + Arrays.asList(args).toString().replace(",", ",\n  "));
    System.out.println("newstatus=" + Arrays.asList(newstatus).toString().replace(",", ",\n  "));

    //run DistCh
    new DistCh(MiniMRClientClusterFactory.create(this.getClass(), 2, conf).getConfig()).run(args);
    runLsr(shell, tree.root, 0);

    //check results
    for(int i = 0; i < NUN_SUBS; i++) {
      Path sub = new Path(tree.root + "/sub" + i);
      checkFileStatus(newstatus[i], fs.getFileStatus(sub));
      for(FileStatus status : fs.listStatus(sub)) {
        checkFileStatus(newstatus[i], status);
      }
    }
  } finally {
    cluster.shutdown();
  }
}

Source File: GenericMRLoadGenerator.java From big-c with Apache License 2.0

4 votes

public int run(String [] argv) throws Exception {
  Job job = Job.getInstance(getConf());
  job.setJarByClass(GenericMRLoadGenerator.class);
  job.setMapperClass(SampleMapper.class);
  job.setReducerClass(SampleReducer.class);
  if (!parseArgs(argv, job)) {
    return -1;
  }

  Configuration conf = job.getConfiguration();
  if (null == FileOutputFormat.getOutputPath(job)) {
    // No output dir? No writes
    job.setOutputFormatClass(NullOutputFormat.class);
  }

  if (0 == FileInputFormat.getInputPaths(job).length) {
    // No input dir? Generate random data
    System.err.println("No input path; ignoring InputFormat");
    confRandom(job);
  } else if (null != conf.getClass(INDIRECT_INPUT_FORMAT, null)) {
    // specified IndirectInputFormat? Build src list
    JobClient jClient = new JobClient(conf);  
    Path tmpDir = new Path("/tmp");
    Random r = new Random();
    Path indirInputFile = new Path(tmpDir,
        Integer.toString(r.nextInt(Integer.MAX_VALUE), 36) + "_files");
    conf.set(INDIRECT_INPUT_FILE, indirInputFile.toString());
    SequenceFile.Writer writer = SequenceFile.createWriter(
        tmpDir.getFileSystem(conf), conf, indirInputFile,
        LongWritable.class, Text.class,
        SequenceFile.CompressionType.NONE);
    try {
      for (Path p : FileInputFormat.getInputPaths(job)) {
        FileSystem fs = p.getFileSystem(conf);
        Stack<Path> pathstack = new Stack<Path>();
        pathstack.push(p);
        while (!pathstack.empty()) {
          for (FileStatus stat : fs.listStatus(pathstack.pop())) {
            if (stat.isDirectory()) {
              if (!stat.getPath().getName().startsWith("_")) {
                pathstack.push(stat.getPath());
              }
            } else {
              writer.sync();
              writer.append(new LongWritable(stat.getLen()),
                  new Text(stat.getPath().toUri().toString()));
            }
          }
        }
      }
    } finally {
      writer.close();
    }
  }

  Date startTime = new Date();
  System.out.println("Job started: " + startTime);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  Date endTime = new Date();
  System.out.println("Job ended: " + endTime);
  System.out.println("The job took " +
                     (endTime.getTime() - startTime.getTime()) /1000 +
                     " seconds.");

  return ret;
}

Source File: Examples.java From datafu with Apache License 2.0

4 votes

private int countOutputFolders(Path path) throws IOException
{
  FileSystem fs = getFileSystem();
  return fs.listStatus(path,PathUtils.nonHiddenPathFilter).length;
}

Source File: TestHBaseTable.java From tajo with Apache License 2.0

4 votes

@Test
public void testInsertIntoLocation() throws Exception {
  executeString(
      "CREATE TABLE hbase_mapped_table (rk text, col1 text, col2 text) TABLESPACE cluster1 " +
      "USING hbase WITH ('table'='hbase_table', 'columns'=':key,col1:a,col2:', " +
      "'hbase.split.rowkeys'='010,040,060,080')").close();

  assertTableExists("hbase_mapped_table");

  try {
    // create test table
    Schema schema = SchemaBuilder.builder()
        .add("id", Type.TEXT)
        .add("name", Type.TEXT)
        .add("comment", Type.TEXT)
        .build();
    List<String> datas = new ArrayList<>();
    DecimalFormat df = new DecimalFormat("000");
    for (int i = 99; i >= 0; i--) {
      datas.add(df.format(i) + "|value" + i + "|comment-" + i);
    }
    TajoTestingCluster.createTable(conf, getCurrentDatabase() + ".base_table",
        schema, datas.toArray(new String[datas.size()]), 2);

    executeString("insert into location '/tmp/hfile_test' " +
        "select id, name, comment from base_table ").close();

    FileSystem fs = testingCluster.getDefaultFileSystem();
    Path path = new Path("/tmp/hfile_test");
    assertTrue(fs.exists(path));

    FileStatus[] files = fs.listStatus(path);
    assertNotNull(files);
    assertEquals(2, files.length);

    int index = 0;
    for (FileStatus eachFile: files) {
      assertEquals("/tmp/hfile_test/part-01-00000" + index + "-00" + index, eachFile.getPath().toUri().getPath());
      for (FileStatus subFile: fs.listStatus(eachFile.getPath())) {
        assertTrue(subFile.isFile());
        assertTrue(subFile.getLen() > 0);
      }
      index++;
    }
  } finally {
    executeString("DROP TABLE base_table PURGE").close();
    executeString("DROP TABLE hbase_mapped_table PURGE").close();
  }
}

Source File: TestMRRJobsDAGApi.java From tez with Apache License 2.0

4 votes

@Test(timeout = 60000)
public void testHistoryLogging() throws IOException,
    InterruptedException, TezException, ClassNotFoundException, YarnException {
  SleepProcessorConfig spConf = new SleepProcessorConfig(1);

  DAG dag = DAG.create("TezSleepProcessorHistoryLogging");
  Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
          SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 2,
      Resource.newInstance(1024, 1));
  dag.addVertex(vertex);

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
      .nextInt(100000))));
  remoteFs.mkdirs(remoteStagingDir);
  tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

  FileSystem localFs = FileSystem.getLocal(tezConf);
  Path historyLogDir = new Path(TEST_ROOT_DIR, "testHistoryLogging");
  localFs.mkdirs(historyLogDir);

  tezConf.set(TezConfiguration.TEZ_SIMPLE_HISTORY_LOGGING_DIR,
      localFs.makeQualified(historyLogDir).toString());

  tezConf.setBoolean(TezConfiguration.TEZ_AM_SESSION_MODE, false);
  TezClient tezSession = TezClient.create("TezSleepProcessorHistoryLogging", tezConf);
  tezSession.start();

  DAGClient dagClient = tezSession.submitDAG(dag);

  DAGStatus dagStatus = dagClient.getDAGStatus(null);
  while (!dagStatus.isCompleted()) {
    LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
        + dagStatus.getState());
    Thread.sleep(500l);
    dagStatus = dagClient.getDAGStatus(null);
  }
  assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());

  FileStatus historyLogFileStatus = null;
  for (FileStatus fileStatus : localFs.listStatus(historyLogDir)) {
    if (fileStatus.isDirectory()) {
      continue;
    }
    Path p = fileStatus.getPath();
    if (p.getName().startsWith(SimpleHistoryLoggingService.LOG_FILE_NAME_PREFIX)) {
      historyLogFileStatus = fileStatus;
      break;
    }
  }
  Assert.assertNotNull(historyLogFileStatus);
  Assert.assertTrue(historyLogFileStatus.getLen() > 0);
  tezSession.stop();
}

Java Code Examples for org.apache.hadoop.fs.FileSystem#listStatus()