org.apache.hadoop.fs.FileSystem#delete

Source File: MRAppMaster.java From big-c with Apache License 2.0

6 votes

/**
 * clean up staging directories for the job.
 * @throws IOException
 */
public void cleanupStagingDir() throws IOException {
  /* make sure we clean the staging files */
  String jobTempDir = null;
  FileSystem fs = getFileSystem(getConfig());
  try {
    if (!keepJobFiles(new JobConf(getConfig()))) {
      jobTempDir = getConfig().get(MRJobConfig.MAPREDUCE_JOB_DIR);
      if (jobTempDir == null) {
        LOG.warn("Job Staging directory is null");
        return;
      }
      Path jobTempDirPath = new Path(jobTempDir);
      LOG.info("Deleting staging directory " + FileSystem.getDefaultUri(getConfig()) +
          " " + jobTempDir);
      fs.delete(jobTempDirPath, true);
    }
  } catch(IOException io) {
    LOG.error("Failed to cleanup staging dir " + jobTempDir, io);
  }
}

Source File: NativeAzureFileSystemBaseTest.java From hadoop with Apache License 2.0

6 votes

@Test
public void testCopyFromLocalFileSystem() throws Exception {
  Path localFilePath = new Path(System.getProperty("test.build.data",
      "azure_test"));
  FileSystem localFs = FileSystem.get(new Configuration());
  localFs.delete(localFilePath, true);
  try {
    writeString(localFs, localFilePath, "Testing");
    Path dstPath = new Path("copiedFromLocal");
    assertTrue(FileUtil.copy(localFs, localFilePath, fs, dstPath, false,
        fs.getConf()));
    assertTrue(fs.exists(dstPath));
    assertEquals("Testing", readString(fs, dstPath));
    fs.delete(dstPath, true);
  } finally {
    localFs.delete(localFilePath, true);
  }
}

Source File: BaseTestHBaseFsck.java From hbase with Apache License 2.0

5 votes

public void deleteTableDir(TableName table) throws IOException {
  Path rootDir = CommonFSUtils.getRootDir(conf);
  FileSystem fs = rootDir.getFileSystem(conf);
  Path p = CommonFSUtils.getTableDir(rootDir, table);
  HBaseFsck.debugLsr(conf, p);
  boolean success = fs.delete(p, true);
  LOG.info("Deleted " + p + " sucessfully? " + success);
}

Source File: HoplogUtilJUnitTest.java From gemfirexd-oss with Apache License 2.0

5 votes

@Override 
protected void tearDown() throws Exception{
  FileSystem fs = hdfsStore.getFileSystem();
  Path cleanUpIntervalPath = new Path(hdfsStore.getHomeDir(),HoplogConfig.CLEAN_UP_INTERVAL_FILE_NAME);
  if (fs.exists(cleanUpIntervalPath)){
    fs.delete(cleanUpIntervalPath, true);
  }  
  super.tearDown();
}

Source File: VisualJob.java From multimedia-indexing with Apache License 2.0

5 votes

private Job createJob(String inputPath, String outputPath) throws Exception {
    Configuration conf = getConf();
    Job job = new Job(conf);
    job.setJarByClass(VisualJob.class);
    job.setNumReduceTasks(90);

    FileSystem fs = FileSystem.get(new URI(outputPath), conf);
    if (fs.exists(new Path(outputPath))) {
        fs.delete(new Path(outputPath), true);
    }

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(FloatArrayWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(VisualThreadedMapper.class);
    job.setReducerClass(VisualReducer.class);

    return job;
}

Source File: TestFileSystems.java From incubator-tajo with Apache License 2.0

5 votes

public Path getTestDir(FileSystem fs, String dir) throws IOException {
  Path path = new Path(dir);
  if(fs.exists(path))
    fs.delete(path, true);

  fs.mkdirs(path);

  return fs.makeQualified(path);
}

Source File: HdfsDirFile.java From spliceengine with GNU Affero General Public License v3.0

5 votes

@Override
public boolean delete() {
	try {
		FileSystem fs = getFileSystem();
		return fs.delete(new Path(path), false);
	} catch (IOException e) {
		LOG.error(String.format("An exception occurred while deleting the path '%s'.", path), e);
		return false;
	}
}

Source File: TestJobInProgress.java From RDFS with Apache License 2.0

5 votes

@SuppressWarnings("unchecked")
JobConf configure(Class MapClass,Class ReduceClass, int maps, int reducers,
                  boolean locality) 
throws Exception {
  JobConf jobConf = mrCluster.createJobConf();
  final Path inDir = new Path("./failjob/input");
  final Path outDir = new Path("./failjob/output");
  String input = "Test failing job.\n One more line";
  FileSystem inFs = inDir.getFileSystem(jobConf);
  FileSystem outFs = outDir.getFileSystem(jobConf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("create directory failed" + inDir.toString());
  }

  DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
  file.writeBytes(input);
  file.close();
  jobConf.setJobName("failmaptask");
  if (locality) {
    jobConf.setInputFormat(TextInputFormat.class);
  } else {
    jobConf.setInputFormat(UtilsForTests.RandomInputFormat.class);
  }
  jobConf.setOutputKeyClass(Text.class);
  jobConf.setOutputValueClass(Text.class);
  jobConf.setMapperClass(MapClass);
  jobConf.setCombinerClass(ReduceClass);
  jobConf.setReducerClass(ReduceClass);
  FileInputFormat.setInputPaths(jobConf, inDir);
  FileOutputFormat.setOutputPath(jobConf, outDir);
  jobConf.setNumMapTasks(maps);
  jobConf.setNumReduceTasks(reducers);
  return jobConf; 
}

Source File: TestLocalModeWithNewApis.java From hadoop with Apache License 2.0

5 votes

@Test
public void testNewApis() throws Exception {
  Random r = new Random(System.currentTimeMillis());
  Path tmpBaseDir = new Path("/tmp/wc-" + r.nextInt());
  final Path inDir = new Path(tmpBaseDir, "input");
  final Path outDir = new Path(tmpBaseDir, "output");
  String input = "The quick brown fox\nhas many silly\nred fox sox\n";
  FileSystem inFs = inDir.getFileSystem(conf);
  FileSystem outFs = outDir.getFileSystem(conf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
  }

  Job job = Job.getInstance(conf, "word count");
  job.setJarByClass(TestLocalModeWithNewApis.class);
  job.setMapperClass(TokenizerMapper.class);
  job.setCombinerClass(IntSumReducer.class);
  job.setReducerClass(IntSumReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  FileInputFormat.addInputPath(job, inDir);
  FileOutputFormat.setOutputPath(job, outDir);
  assertEquals(job.waitForCompletion(true), true);

  String output = readOutput(outDir, conf);
  assertEquals("The\t1\nbrown\t1\nfox\t2\nhas\t1\nmany\t1\n" +
               "quick\t1\nred\t1\nsilly\t1\nsox\t1\n", output);
  
  outFs.delete(tmpBaseDir, true);
}

Source File: AbstractPutHDFSRecord.java From nifi with Apache License 2.0

5 votes

/**
 * Attempts to rename srcFile to destFile up to 10 times, with a 200ms sleep in between each attempt.
 *
 * If the file has not been renamed after 10 attempts, a FailureException is thrown.
 *
 * @param fileSystem the file system where the files are located
 * @param srcFile the source file
 * @param destFile the destination file to rename the source to
 * @throws IOException if IOException happens while attempting to rename
 * @throws InterruptedException if renaming is interrupted
 * @throws FailureException if the file couldn't be renamed after 10 attempts
 */
protected void rename(final FileSystem fileSystem, final Path srcFile, final Path destFile) throws IOException, InterruptedException, FailureException {
    boolean renamed = false;
    for (int i = 0; i < 10; i++) { // try to rename multiple times.
        if (fileSystem.rename(srcFile, destFile)) {
            renamed = true;
            break;// rename was successful
        }
        Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
    }
    if (!renamed) {
        fileSystem.delete(srcFile, false);
        throw new FailureException("Could not rename file " + srcFile + " to its final filename");
    }
}

Source File: HFilePerformanceEvaluation.java From hbase with Apache License 2.0

5 votes

/**
 * Write a test HFile with the given codec & cipher
 * @param conf
 * @param fs
 * @param mf
 * @param codec "none", "lzo", "gz", "snappy"
 * @param cipher "none", "aes"
 * @throws Exception
 */
private void runWriteBenchmark(Configuration conf, FileSystem fs, Path mf, String codec,
    String cipher) throws Exception {
  if (fs.exists(mf)) {
    fs.delete(mf, true);
  }

  runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT, codec, cipher),
      ROW_COUNT, codec, getCipherName(conf, cipher));

}

Source File: DistributedCacheUtilImplOSDependentTest.java From pentaho-hadoop-shims with Apache License 2.0

5 votes

@Test
public void findFiles_hdfs_native() throws Exception {
  DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl();

  // Copy the contents of test folder
  FileObject source = DistributedCacheTestUtil.createTestFolderWithContent();
  Path root = new Path( "bin/test/stageArchiveForCacheTest" );
  Configuration conf = new Configuration();
  FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem( conf );
  Path dest = new Path( root, "org/pentaho/mapreduce/" );
  try {
    try {
      ch.stageForCache( source, fs, dest, true );

      List<Path> files = ch.findFiles( fs, dest, null );
      assertEquals( 6, files.size() );

      files = ch.findFiles( fs, dest, Pattern.compile( ".*jar$" ) );
      assertEquals( 2, files.size() );

      files = ch.findFiles( fs, dest, Pattern.compile( ".*folder$" ) );
      assertEquals( 1, files.size() );
    } finally {
      fs.delete( root, true );
    }
  } finally {
    source.delete( new AllFileSelector() );
  }
}

Source File: TestJavaSerialization.java From big-c with Apache License 2.0

5 votes

private void cleanAndCreateInput(FileSystem fs) throws IOException {
  fs.delete(INPUT_FILE, true);
  fs.delete(OUTPUT_DIR, true);

  OutputStream os = fs.create(INPUT_FILE);

  Writer wr = new OutputStreamWriter(os);
  wr.write("b a\n");
  wr.close();
}

Source File: TestRecoveredEdits.java From hbase with Apache License 2.0

4 votes

private void testReplayWorksWithMemoryCompactionPolicy(MemoryCompactionPolicy policy) throws
  IOException {
  Configuration conf = new Configuration(TEST_UTIL.getConfiguration());
  // Set it so we flush every 1M or so.  Thats a lot.
  conf.setInt(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 1024*1024);
  conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, String.valueOf(policy).toLowerCase());
  // The file of recovered edits has a column family of 'meta'.
  final String columnFamily = "meta";
  byte[][] columnFamilyAsByteArray = new byte[][] { Bytes.toBytes(columnFamily) };
  TableDescriptor tableDescriptor = TableDescriptorBuilder
    .newBuilder(TableName.valueOf(testName.getMethodName())).setColumnFamily(
      ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(columnFamily)).build())
    .build();
  RegionInfo hri = RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
  final String encodedRegionName = hri.getEncodedName();
  Path hbaseRootDir = TEST_UTIL.getDataTestDir();
  FileSystem fs = FileSystem.get(TEST_UTIL.getConfiguration());
  Path tableDir = CommonFSUtils.getTableDir(hbaseRootDir, tableDescriptor.getTableName());
  HRegionFileSystem hrfs =
      new HRegionFileSystem(TEST_UTIL.getConfiguration(), fs, tableDir, hri);
  if (fs.exists(hrfs.getRegionDir())) {
    LOG.info("Region directory already exists. Deleting.");
    fs.delete(hrfs.getRegionDir(), true);
  }
  HRegion region = HBaseTestingUtility
      .createRegionAndWAL(hri, hbaseRootDir, conf, tableDescriptor, blockCache);
  assertEquals(encodedRegionName, region.getRegionInfo().getEncodedName());
  List<String> storeFiles = region.getStoreFileList(columnFamilyAsByteArray);
  // There should be no store files.
  assertTrue(storeFiles.isEmpty());
  region.close();
  Path regionDir = FSUtils.getRegionDirFromRootDir(hbaseRootDir, hri);
  Path recoveredEditsDir = WALSplitUtil.getRegionDirRecoveredEditsDir(regionDir);
  // This is a little fragile getting this path to a file of 10M of edits.
  Path recoveredEditsFile = new Path(
    System.getProperty("test.build.classes", "target/test-classes"),
      "0000000000000016310");
  // Copy this file under the region's recovered.edits dir so it is replayed on reopen.
  Path destination = new Path(recoveredEditsDir, recoveredEditsFile.getName());
  fs.copyToLocalFile(recoveredEditsFile, destination);
  assertTrue(fs.exists(destination));
  // Now the file 0000000000000016310 is under recovered.edits, reopen the region to replay.
  region = HRegion.openHRegion(region, null);
  assertEquals(encodedRegionName, region.getRegionInfo().getEncodedName());
  storeFiles = region.getStoreFileList(columnFamilyAsByteArray);
  // Our 0000000000000016310 is 10MB. Most of the edits are for one region. Lets assume that if
  // we flush at 1MB, that there are at least 3 flushed files that are there because of the
  // replay of edits.
  if(policy == MemoryCompactionPolicy.EAGER || policy == MemoryCompactionPolicy.ADAPTIVE) {
    assertTrue("Files count=" + storeFiles.size(), storeFiles.size() >= 1);
  } else {
    assertTrue("Files count=" + storeFiles.size(), storeFiles.size() > 10);
  }
  // Now verify all edits made it into the region.
  int count = verifyAllEditsMadeItIn(fs, conf, recoveredEditsFile, region);
  LOG.info("Checked " + count + " edits made it in");
}

Source File: TestHDFSEventSink.java From mt-flume with Apache License 2.0

4 votes

@Test
public void testSlowAppendFailure() throws InterruptedException,
    LifecycleException, EventDeliveryException, IOException {

  LOG.debug("Starting...");
  final String fileName = "FlumeData";
  final long rollCount = 5;
  final long batchSize = 2;
  final int numBatches = 2;
  String newPath = testPath + "/singleBucket";
  int i = 1, j = 1;

  // clear the test directory
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(conf);
  Path dirPath = new Path(newPath);
  fs.delete(dirPath, true);
  fs.mkdirs(dirPath);

  // create HDFS sink with slow writer
  HDFSBadWriterFactory badWriterFactory = new HDFSBadWriterFactory();
  sink = new HDFSEventSink(badWriterFactory);

  Context context = new Context();
  context.put("hdfs.path", newPath);
  context.put("hdfs.filePrefix", fileName);
  context.put("hdfs.rollCount", String.valueOf(rollCount));
  context.put("hdfs.batchSize", String.valueOf(batchSize));
  context.put("hdfs.fileType", HDFSBadWriterFactory.BadSequenceFileType);
  context.put("hdfs.callTimeout", Long.toString(1000));
  Configurables.configure(sink, context);

  Channel channel = new MemoryChannel();
  Configurables.configure(channel, context);

  sink.setChannel(channel);
  sink.start();

  Calendar eventDate = Calendar.getInstance();

  // push the event batches into channel
  for (i = 0; i < numBatches; i++) {
    Transaction txn = channel.getTransaction();
    txn.begin();
    for (j = 1; j <= batchSize; j++) {
      Event event = new SimpleEvent();
      eventDate.clear();
      eventDate.set(2011, i, i, i, 0); // yy mm dd
      event.getHeaders().put("timestamp",
          String.valueOf(eventDate.getTimeInMillis()));
      event.getHeaders().put("hostname", "Host" + i);
      event.getHeaders().put("slow", "1500");
      event.setBody(("Test." + i + "." + j).getBytes());
      channel.put(event);
    }
    txn.commit();
    txn.close();

    // execute sink to process the events
    Status satus = sink.process();

    // verify that the append returned backoff due to timeotu
    Assert.assertEquals(satus, Status.BACKOFF);
  }

  sink.stop();
}

Source File: FlowSort.java From MapReduce-Demo with MIT License

4 votes

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
	// 设置HDFS配置信息
	String namenode_ip = "192.168.17.10";
	String hdfs = "hdfs://" + namenode_ip + ":9000";
	Configuration conf = new Configuration();
	conf.set("fs.defaultFS", hdfs);
	conf.set("mapreduce.app-submission.cross-platform", "true");

	// 设置job配置信息
	Job job = Job.getInstance(conf, "FlowSort");
	job.setJarByClass(FlowSort.class);
	job.setJar("export\\FlowSort.jar");
	// Mapper
	job.setMapperClass(SortMapper.class);
	job.setMapOutputKeyClass(MySortKey.class);
	job.setMapOutputValueClass(Text.class);
	// Reducer
	job.setReducerClass(SortReducer.class);
	job.setOutputKeyClass(Text.class);
	job.setOutputKeyClass(MySortKey.class);
	// 作业输入输出路径
	String dataDir = "/workspace/flowStatistics/output/part-r-00000"; // 实验数据目录
	String outputDir = "/workspace/flowStatistics/output_sort"; // 实验输出目录
	Path inPath = new Path(hdfs + dataDir);
	Path outPath = new Path(hdfs + outputDir);
	FileInputFormat.addInputPath(job, inPath);
	FileOutputFormat.setOutputPath(job, outPath);
	FileSystem fs = FileSystem.get(conf);
	if (fs.exists(outPath)) {
		fs.delete(outPath, true);
	}
	// 运行作业
	System.out.println("Job: FlowSort is running...");
	if (job.waitForCompletion(true)) {
		System.out.println("success!");
		System.exit(0);
	} else {
		System.out.println("failed!");
		System.exit(1);
	}
}

Source File: Regression.java From ml-ease with Apache License 2.0

4 votes

@Override
public void run() throws Exception
{
  JobConfig config = super.getJobConfig();
  Path outBasePath = new Path(config.get(OUTPUT_BASE_PATH));
  JobConf conf = super.createJobConf();
  if (config.getBoolean("force.output.overwrite", false))
  {
    FileSystem fs = outBasePath.getFileSystem(conf);
    fs.delete(outBasePath, true);
  }
  
  String prepareOutputPath = outBasePath + "/tmp-data";
  // first run the preparation job
  JobConfig configPrepare = JobConfig.clone(config);
  configPrepare.put(AbstractAvroJob.OUTPUT_PATH, prepareOutputPath);
  RegressionPrepare regressionPrepareJob = new RegressionPrepare("Regression-Prepare", 
                                                                 configPrepare);
  regressionPrepareJob.run();
  
  // now start running the regression train using admm
  JobConfig configTrain = JobConfig.clone(config);
  configTrain.put(AbstractAvroJob.INPUT_PATHS, prepareOutputPath);
  RegressionAdmmTrain regressionAdmmTrainJob = new RegressionAdmmTrain("Regression-Admm-Train", configTrain);
  regressionAdmmTrainJob.run();
  
  // now test
  if (config.containsKey(TEST_PATH))
  {
    JobConfig configTest = JobConfig.clone(config);
    configTest.put(AbstractAvroJob.INPUT_PATHS, config.get(TEST_PATH));
    configTest.put(RegressionTest.MODEL_BASE_PATH, outBasePath.toString());
    String outTestBasePath = outBasePath.toString()+"/test";
    configTest.put(RegressionTest.OUTPUT_BASE_PATH, outTestBasePath);
    RegressionTest regressionTestJob = new RegressionTest("Regression-Test", configTest);
    regressionTestJob.run();
    
    // compute test loglikelihood
    JobConfig configTestLoglik = JobConfig.clone(config);
    configTestLoglik.put(RegressionTestLoglik.INPUT_BASE_PATHS, outTestBasePath);
    configTestLoglik.put(RegressionTestLoglik.OUTPUT_BASE_PATH, outTestBasePath);
    RegressionTestLoglik regressionTestLoglikJob = new RegressionTestLoglik("Regression-Test-Loglik", configTestLoglik);
    regressionTestLoglikJob.run();
  }
}

Source File: TrainingSparkRunner.java From ambiverse-nlu with Apache License 2.0

4 votes

private void binaryEvaluation(DataFrame predictions, String output, TrainingSettings trainingSettings) throws IOException {

        FileSystem fs = FileSystem.get(new Configuration());
        Path evalPath = new Path(output+"binary_evaluation_"+trainingSettings.getClassificationMethod()+".txt");
        fs.delete(evalPath, true);
        FSDataOutputStream fsdos = fs.create(evalPath);

        BinaryClassificationMetrics metrics = new BinaryClassificationMetrics(predictions
                .select("rawPrediction", "label")
                .javaRDD()
                .map((Row row) -> {
                    Vector vector = row.getAs("rawPrediction");
                    Double label = row.getAs("label");
                    return new Tuple2<Object, Object>(vector.apply(1), label);
                }).rdd());


        // Precision by threshold
        JavaRDD<Tuple2<Object, Object>> precision = metrics.precisionByThreshold().toJavaRDD();
        IOUtils.write("\nPrecision by threshold: " + precision.collect(), fsdos);

        // Recall by threshold
        JavaRDD<Tuple2<Object, Object>> recall = metrics.recallByThreshold().toJavaRDD();
        IOUtils.write("\nRecall by threshold: " + recall.collect(), fsdos);

        // F Score by threshold
        JavaRDD<Tuple2<Object, Object>> f1Score = metrics.fMeasureByThreshold().toJavaRDD();
        IOUtils.write("\nF1 Score by threshold: " + f1Score.collect(), fsdos);

        JavaRDD<Tuple2<Object, Object>> f2Score = metrics.fMeasureByThreshold(2.0).toJavaRDD();
        IOUtils.write("\nF2 Score by threshold: " + f2Score.collect(), fsdos);

        // Precision-recall curve
        JavaRDD<Tuple2<Object, Object>> prc = metrics.pr().toJavaRDD();
        IOUtils.write("\nPrecision-recall curve: " + prc.collect(), fsdos);

        // Thresholds
        JavaRDD<Double> thresholds = precision.map(t -> new Double(t._1().toString()));

        // ROC Curve
        JavaRDD<Tuple2<Object, Object>> roc = metrics.roc().toJavaRDD();
        IOUtils.write("\nROC curve: " + roc.collect(), fsdos);

        // AUPRC
        IOUtils.write("\nArea under precision-recall curve = " + metrics.areaUnderPR(), fsdos);

        // AUROC
        IOUtils.write("\nArea under ROC = " + metrics.areaUnderROC(), fsdos);

        fsdos.flush();
        IOUtils.closeQuietly(fsdos);
    }

Source File: TestHFileCleaner.java From hbase with Apache License 2.0

4 votes

@Test
public void testOnConfigurationChange() throws Exception {
  // constants
  final int ORIGINAL_THROTTLE_POINT = 512 * 1024;
  final int ORIGINAL_QUEUE_INIT_SIZE = 512;
  final int UPDATE_THROTTLE_POINT = 1024;// small enough to change large/small check
  final int UPDATE_QUEUE_INIT_SIZE = 1024;
  final int LARGE_FILE_NUM = 5;
  final int SMALL_FILE_NUM = 20;
  final int LARGE_THREAD_NUM = 2;
  final int SMALL_THREAD_NUM = 4;
  final long THREAD_TIMEOUT_MSEC = 30 * 1000L;
  final long THREAD_CHECK_INTERVAL_MSEC = 500L;

  Configuration conf = UTIL.getConfiguration();
  // no cleaner policies = delete all files
  conf.setStrings(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, "");
  conf.setInt(HFileCleaner.HFILE_DELETE_THROTTLE_THRESHOLD, ORIGINAL_THROTTLE_POINT);
  conf.setInt(HFileCleaner.LARGE_HFILE_QUEUE_INIT_SIZE, ORIGINAL_QUEUE_INIT_SIZE);
  conf.setInt(HFileCleaner.SMALL_HFILE_QUEUE_INIT_SIZE, ORIGINAL_QUEUE_INIT_SIZE);
  Server server = new DummyServer();
  Path archivedHfileDir =
      new Path(UTIL.getDataTestDirOnTestFS(), HConstants.HFILE_ARCHIVE_DIRECTORY);

  // setup the cleaner
  FileSystem fs = UTIL.getDFSCluster().getFileSystem();
  final HFileCleaner cleaner = new HFileCleaner(1000, server, conf, fs, archivedHfileDir, POOL);
  Assert.assertEquals(ORIGINAL_THROTTLE_POINT, cleaner.getThrottlePoint());
  Assert.assertEquals(ORIGINAL_QUEUE_INIT_SIZE, cleaner.getLargeQueueInitSize());
  Assert.assertEquals(ORIGINAL_QUEUE_INIT_SIZE, cleaner.getSmallQueueInitSize());
  Assert.assertEquals(HFileCleaner.DEFAULT_HFILE_DELETE_THREAD_TIMEOUT_MSEC,
      cleaner.getCleanerThreadTimeoutMsec());
  Assert.assertEquals(HFileCleaner.DEFAULT_HFILE_DELETE_THREAD_CHECK_INTERVAL_MSEC,
      cleaner.getCleanerThreadCheckIntervalMsec());

  // clean up archive directory and create files for testing
  fs.delete(archivedHfileDir, true);
  fs.mkdirs(archivedHfileDir);
  createFilesForTesting(LARGE_FILE_NUM, SMALL_FILE_NUM, fs, archivedHfileDir);

  // call cleaner, run as daemon to test the interrupt-at-middle case
  Thread t = new Thread() {
    @Override
    public void run() {
      cleaner.chore();
    }
  };
  t.setDaemon(true);
  t.start();
  // wait until file clean started
  while (cleaner.getNumOfDeletedSmallFiles() == 0) {
    Thread.yield();
  }

  // trigger configuration change
  Configuration newConf = new Configuration(conf);
  newConf.setInt(HFileCleaner.HFILE_DELETE_THROTTLE_THRESHOLD, UPDATE_THROTTLE_POINT);
  newConf.setInt(HFileCleaner.LARGE_HFILE_QUEUE_INIT_SIZE, UPDATE_QUEUE_INIT_SIZE);
  newConf.setInt(HFileCleaner.SMALL_HFILE_QUEUE_INIT_SIZE, UPDATE_QUEUE_INIT_SIZE);
  newConf.setInt(HFileCleaner.LARGE_HFILE_DELETE_THREAD_NUMBER, LARGE_THREAD_NUM);
  newConf.setInt(HFileCleaner.SMALL_HFILE_DELETE_THREAD_NUMBER, SMALL_THREAD_NUM);
  newConf.setLong(HFileCleaner.HFILE_DELETE_THREAD_TIMEOUT_MSEC, THREAD_TIMEOUT_MSEC);
  newConf.setLong(HFileCleaner.HFILE_DELETE_THREAD_CHECK_INTERVAL_MSEC,
      THREAD_CHECK_INTERVAL_MSEC);

  LOG.debug("File deleted from large queue: " + cleaner.getNumOfDeletedLargeFiles()
      + "; from small queue: " + cleaner.getNumOfDeletedSmallFiles());
  cleaner.onConfigurationChange(newConf);

  // check values after change
  Assert.assertEquals(UPDATE_THROTTLE_POINT, cleaner.getThrottlePoint());
  Assert.assertEquals(UPDATE_QUEUE_INIT_SIZE, cleaner.getLargeQueueInitSize());
  Assert.assertEquals(UPDATE_QUEUE_INIT_SIZE, cleaner.getSmallQueueInitSize());
  Assert.assertEquals(LARGE_THREAD_NUM + SMALL_THREAD_NUM, cleaner.getCleanerThreads().size());
  Assert.assertEquals(THREAD_TIMEOUT_MSEC, cleaner.getCleanerThreadTimeoutMsec());
  Assert.assertEquals(THREAD_CHECK_INTERVAL_MSEC, cleaner.getCleanerThreadCheckIntervalMsec());

  // make sure no cost when onConfigurationChange called with no change
  List<Thread> oldThreads = cleaner.getCleanerThreads();
  cleaner.onConfigurationChange(newConf);
  List<Thread> newThreads = cleaner.getCleanerThreads();
  Assert.assertArrayEquals(oldThreads.toArray(), newThreads.toArray());

  // wait until clean done and check
  t.join();
  LOG.debug("File deleted from large queue: " + cleaner.getNumOfDeletedLargeFiles()
      + "; from small queue: " + cleaner.getNumOfDeletedSmallFiles());
  Assert.assertTrue("Should delete more than " + LARGE_FILE_NUM
      + " files from large queue but actually " + cleaner.getNumOfDeletedLargeFiles(),
    cleaner.getNumOfDeletedLargeFiles() > LARGE_FILE_NUM);
  Assert.assertTrue("Should delete less than " + SMALL_FILE_NUM
      + " files from small queue but actually " + cleaner.getNumOfDeletedSmallFiles(),
    cleaner.getNumOfDeletedSmallFiles() < SMALL_FILE_NUM);
}

Source File: GCRegionProcedure.java From hbase with Apache License 2.0

4 votes

@Override
protected Flow executeFromState(MasterProcedureEnv env, GCRegionState state)
    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
  if (LOG.isTraceEnabled()) {
    LOG.trace(this + " execute state=" + state);
  }
  MasterServices masterServices = env.getMasterServices();
  try {
    switch (state) {
      case GC_REGION_PREPARE:
        // Nothing to do to prepare.
        setNextState(GCRegionState.GC_REGION_ARCHIVE);
        break;
      case GC_REGION_ARCHIVE:
        MasterFileSystem mfs = masterServices.getMasterFileSystem();
        FileSystem fs = mfs.getFileSystem();
        if (HFileArchiver.exists(masterServices.getConfiguration(), fs, getRegion())) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Archiving region=" + getRegion().getShortNameToLog());
          }
          HFileArchiver.archiveRegion(masterServices.getConfiguration(), fs, getRegion());
        }
        FileSystem walFs = mfs.getWALFileSystem();
        // Cleanup the directories on WAL filesystem also
        Path regionWALDir = CommonFSUtils.getWALRegionDir(env.getMasterConfiguration(),
          getRegion().getTable(), getRegion().getEncodedName());
        if (walFs.exists(regionWALDir)) {
          if (!walFs.delete(regionWALDir, true)) {
            LOG.debug("Failed to delete {}", regionWALDir);
          }
        }
        Path wrongRegionWALDir = CommonFSUtils.getWrongWALRegionDir(env.getMasterConfiguration(),
          getRegion().getTable(), getRegion().getEncodedName());
        if (walFs.exists(wrongRegionWALDir)) {
          if (!walFs.delete(wrongRegionWALDir, true)) {
            LOG.debug("Failed to delete {}", regionWALDir);
          }
        }
        setNextState(GCRegionState.GC_REGION_PURGE_METADATA);
        break;
      case GC_REGION_PURGE_METADATA:
        // TODO: Purge metadata before removing from HDFS? This ordering is copied
        // from CatalogJanitor.
        AssignmentManager am = masterServices.getAssignmentManager();
        if (am != null) {
          if (am.getRegionStates() != null) {
            am.getRegionStates().deleteRegion(getRegion());
          }
        }
        MetaTableAccessor.deleteRegionInfo(masterServices.getConnection(), getRegion());
        masterServices.getServerManager().removeRegion(getRegion());
        FavoredNodesManager fnm = masterServices.getFavoredNodesManager();
        if (fnm != null) {
          fnm.deleteFavoredNodesForRegions(Lists.newArrayList(getRegion()));
        }
        return Flow.NO_MORE_STATE;
      default:
        throw new UnsupportedOperationException(this + " unhandled state=" + state);
    }
  } catch (IOException ioe) {
    // TODO: This is going to spew log? Add retry backoff
    LOG.warn("Error trying to GC " + getRegion().getShortNameToLog() + "; retrying...", ioe);
  }
  return Flow.HAS_MORE_STATE;
}

Java Code Examples for org.apache.hadoop.fs.FileSystem#delete()