org.apache.hadoop.hdfs.MiniDFSCluster#getNameNode

Source File: TestStartup.java From hadoop with Apache License 2.0

6 votes

/**
 * start with -importCheckpoint option and verify that the files are in separate directories and of the right length
 * @throws IOException
 */
private void checkNameNodeFiles() throws IOException{

  // start namenode with import option
  LOG.info("-- about to start DFS cluster");
  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(config)
                                .format(false)
                                .manageDataDfsDirs(false)
                                .manageNameDfsDirs(false)
                                .startupOption(IMPORT).build();
    cluster.waitActive();
    LOG.info("--NN started with checkpoint option");
    NameNode nn = cluster.getNameNode();
    assertNotNull(nn);	
    // Verify that image file sizes did not change.
    FSImage image = nn.getFSImage();
    verifyDifferentDirs(image, this.fsimageLength, this.editsLength);
  } finally {
    if(cluster != null)
      cluster.shutdown();
  }
}

Source File: TestPlacementMonitor.java From RDFS with Apache License 2.0

5 votes

private void setupCluster() throws IOException {
  setupConf();
  // start the cluster with one datanode
  cluster = new MiniDFSCluster(conf, 6, true, racks, hosts);
  cluster.waitActive();
  fs = cluster.getFileSystem();
  placementMonitor = new PlacementMonitor(conf);
  placementMonitor.start();
  blockMover = placementMonitor.blockMover;
  namenode = cluster.getNameNode();
  datanodes = namenode.getDatanodeReport(DatanodeReportType.LIVE);
}

Source File: HdfsSortedOplogOrganizerJUnitTest.java From gemfirexd-oss with Apache License 2.0

5 votes

public void testActiveReaderWithNameNodeHA() throws Exception {
  deleteMiniClusterDir();
  int nn1port = AvailablePortHelper.getRandomAvailableTCPPort();
  int nn2port = AvailablePortHelper.getRandomAvailableTCPPort();
  
  MiniDFSCluster cluster = initMiniHACluster(nn1port, nn2port);
  initClientHAConf(nn1port, nn2port);
  
  HDFSStoreImpl store1 = (HDFSStoreImpl) hsf.create("Store-1");
  regionfactory.setHDFSStoreName(store1.getName());
  Region<Object, Object> region1 = regionfactory.create("region-1");
  HdfsRegionManager regionManager1 = ((LocalRegion)region1).getHdfsRegionManager();
  
  HdfsSortedOplogOrganizer organizer = new HdfsSortedOplogOrganizer(regionManager1, 0);
  ArrayList<TestEvent> items = new ArrayList<TestEvent>();
  for (int i = 100000; i < 101000; i++) {
    items.add(new TestEvent(("" + i), (i + " some string " + i)));
  }
  organizer.flush(items.iterator(), items.size());
  organizer.getSortedOplogs().get(0).get().getReader();
  
  TestUtils.addExpectedException("java.io.EOFException");
  NameNode nnode2 = cluster.getNameNode(1);
  assertTrue(nnode2.isStandbyState());
  cluster.shutdownNameNode(0);
  cluster.transitionToActive(1);
  assertFalse(nnode2.isStandbyState());
  
  for (int i = 100000; i < 100500; i++) {
    byte[] keyBytes1 = BlobHelper.serializeToBlob("" + i);
    assertEquals(i + " some string " + i, organizer.read(keyBytes1).getValue());
  }
  TestUtils.removeExpectedException("java.io.EOFException");
  
  region1.destroyRegion();
  store1.destroy();
  cluster.shutdown();
  FileUtils.deleteDirectory(new File("hdfs-test-cluster"));
}

Source File: TestRecount.java From RDFS with Apache License 2.0

5 votes

@Before
public void setup() throws IOException {
  dfscluster = new MiniDFSCluster(conf, 0, true, null);
  dfscluster.waitActive();
  fs = dfscluster.getFileSystem();
  namesystem = dfscluster.getNameNode().namesystem;
}

Source File: TestDataNodeMultipleRegistrations.java From hadoop with Apache License 2.0

5 votes

@Test
public void testClusterIdMismatch() throws Exception {
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
      .build();
  try {
    cluster.waitActive();

    DataNode dn = cluster.getDataNodes().get(0);
    BPOfferService [] bposs = dn.getAllBpOs(); 
    LOG.info("dn bpos len (should be 2):" + bposs.length);
    Assert.assertEquals("should've registered with two namenodes", bposs.length,2);
    
    // add another namenode
    cluster.addNameNode(conf, 9938);
    Thread.sleep(500);// lets wait for the registration to happen
    bposs = dn.getAllBpOs(); 
    LOG.info("dn bpos len (should be 3):" + bposs.length);
    Assert.assertEquals("should've registered with three namenodes", bposs.length,3);
    
    // change cluster id and another Namenode
    StartupOption.FORMAT.setClusterId("DifferentCID");
    cluster.addNameNode(conf, 9948);
    NameNode nn4 = cluster.getNameNode(3);
    assertNotNull("cannot create nn4", nn4);

    Thread.sleep(500);// lets wait for the registration to happen
    bposs = dn.getAllBpOs(); 
    LOG.info("dn bpos len (still should be 3):" + bposs.length);
    Assert.assertEquals("should've registered with three namenodes", 3, bposs.length);
  } finally {
      cluster.shutdown();
  }
}

Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0

5 votes

public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR+1, true, null);
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    FSNamesystem namesystem = cluster.getNameNode().namesystem;
    Block b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = namesystem.blocksMap.nodeIterator(b).next();
    namesystem.addToInvalidates(b, dn, true);
    namesystem.blocksMap.removeNode(b, dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}

Source File: TestBookKeeperAsHASharedDir.java From hadoop with Apache License 2.0

5 votes

/**
 * NameNode should load the edits correctly if the applicable edits are
 * present in the BKJM.
 */
@Test
public void testNameNodeMultipleSwitchesUsingBKJM() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/correctEditLogSelection").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
        .manageNameDfsSharedDirs(false).build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    cluster.waitActive();
    cluster.transitionToActive(0);
    nn1.getRpcServer().rollEditLog(); // Roll Edits from current Active.
    // Transition to standby current active gracefully.
    cluster.transitionToStandby(0);
    // Make the other Active and Roll edits multiple times
    cluster.transitionToActive(1);
    nn2.getRpcServer().rollEditLog();
    nn2.getRpcServer().rollEditLog();
    // Now One more failover. So NN1 should be able to failover successfully.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestBackupNode.java From hadoop with Apache License 2.0

5 votes

private void testBNInSync(MiniDFSCluster cluster, final BackupNode backup,
    int testIdx) throws Exception {
  
  final NameNode nn = cluster.getNameNode();
  final FileSystem fs = cluster.getFileSystem();

  // Do a bunch of namespace operations, make sure they're replicated
  // to the BN.
  for (int i = 0; i < 10; i++) {
    final String src = "/test_" + testIdx + "_" + i;
    LOG.info("Creating " + src + " on NN");
    Path p = new Path(src);
    assertTrue(fs.mkdirs(p));
    
    GenericTestUtils.waitFor(new Supplier<Boolean>() {
      @Override
      public Boolean get() {
        LOG.info("Checking for " + src + " on BN");
        try {
          boolean hasFile = backup.getNamesystem().getFileInfo(src, false) != null;
          boolean txnIdMatch =
            backup.getRpcServer().getTransactionID() ==
            nn.getRpcServer().getTransactionID();
          return hasFile && txnIdMatch;
        } catch (Exception e) {
          throw new RuntimeException(e);
        }
      }
    }, 30, 10000);
  }
  
  assertStorageDirsMatch(nn, backup);
}

Source File: HdfsSortedOplogOrganizerJUnitTest.java From gemfirexd-oss with Apache License 2.0

5 votes

public void testNewReaderWithNameNodeHA() throws Exception {
  deleteMiniClusterDir();
  int nn1port = AvailablePortHelper.getRandomAvailableTCPPort();
  int nn2port = AvailablePortHelper.getRandomAvailableTCPPort();
  
  MiniDFSCluster cluster = initMiniHACluster(nn1port, nn2port);
  initClientHAConf(nn1port, nn2port);
  
  HDFSStoreImpl store1 = (HDFSStoreImpl) hsf.create("Store-1");
  regionfactory.setHDFSStoreName(store1.getName());
  Region<Object, Object> region1 = regionfactory.create("region-1");
  HdfsRegionManager regionManager1 = ((LocalRegion)region1).getHdfsRegionManager();
  
  HoplogOrganizer<SortedHoplogPersistedEvent> organizer = doRead(regionManager1);
  organizer.close();
  
  TestUtils.addExpectedException("java.io.EOFException");
  NameNode nnode2 = cluster.getNameNode(1);
  assertTrue(nnode2.isStandbyState());
  cluster.shutdownNameNode(0);
  cluster.transitionToActive(1);
  assertFalse(nnode2.isStandbyState());
  
  organizer = new HdfsSortedOplogOrganizer(regionManager1, 0);
  byte[] keyBytes1 = BlobHelper.serializeToBlob("1");
  byte[] keyBytes3 = BlobHelper.serializeToBlob("3");
  byte[] keyBytes4 = BlobHelper.serializeToBlob("4");
  assertEquals("2-1", organizer.read(keyBytes1).getValue());
  assertEquals("3-3", organizer.read(keyBytes3).getValue());
  assertEquals("1-4", organizer.read(keyBytes4).getValue());
  TestUtils.removeExpectedException("java.io.EOFException");

  region1.destroyRegion();
  store1.destroy();
  cluster.shutdown();
  FileUtils.deleteDirectory(new File("hdfs-test-cluster"));
}

Source File: TestEditLogTailer.java From hadoop with Apache License 2.0

4 votes

@Test
public void testTailer() throws IOException, InterruptedException,
    ServiceFailedException {
  Configuration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);

  HAUtil.setAllowStandbyReads(conf, true);
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  cluster.waitActive();
  
  cluster.transitionToActive(0);
  
  NameNode nn1 = cluster.getNameNode(0);
  NameNode nn2 = cluster.getNameNode(1);
  try {
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
  } finally {
    cluster.shutdown();
  }
}

Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0

4 votes

public void testUnderReplicationWithDecommissionDataNode() throws Exception {
  final Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = (short)1;
  File f = new File(HOST_FILE_PATH);
  if (f.exists()) {
    f.delete();
  }
  conf.set("dfs.hosts.exclude", HOST_FILE_PATH);
  LOG.info("Start the cluster");
  final MiniDFSCluster cluster = 
    new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null);
  try {
    final FSNamesystem namesystem = cluster.getNameNode().namesystem;
    final FileSystem fs = cluster.getFileSystem();
    DatanodeDescriptor[] datanodes = (DatanodeDescriptor[])
          namesystem.heartbeats.toArray(
              new DatanodeDescriptor[REPLICATION_FACTOR]);
    assertEquals(1, datanodes.length);
    // populate the cluster with a one block file
    final Path FILE_PATH = new Path("/testfile2");
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);

    // shutdown the datanode
    DataNodeProperties dnprop = shutdownDataNode(cluster, datanodes[0]);
    assertEquals(1, namesystem.getMissingBlocksCount()); // one missing block
    assertEquals(0, namesystem.getNonCorruptUnderReplicatedBlocks());

    // Make the only datanode to be decommissioned
    LOG.info("Decommission the datanode " + dnprop);
    addToExcludeFile(namesystem.getConf(), datanodes);
    namesystem.refreshNodes(namesystem.getConf());      
    
    // bring up the datanode
    cluster.restartDataNode(dnprop);

    // Wait for block report
    LOG.info("wait for its block report to come in");
    NumberReplicas num;
    long startTime = System.currentTimeMillis();
    do {
     namesystem.readLock();
     try {
       num = namesystem.countNodes(block);
     } finally {
       namesystem.readUnlock();
     }
     Thread.sleep(1000);
     LOG.info("live: " + num.liveReplicas() 
         + "Decom: " + num.decommissionedReplicas());
    } while (num.decommissionedReplicas() != 1 &&
        System.currentTimeMillis() - startTime < 30000);
    assertEquals("Decommissioning Replicas doesn't reach 1", 
        1, num.decommissionedReplicas());
    assertEquals(1, namesystem.getNonCorruptUnderReplicatedBlocks());
    assertEquals(0, namesystem.getMissingBlocksCount());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestEditLogTailer.java From big-c with Apache License 2.0

4 votes

@Test
public void testTailer() throws IOException, InterruptedException,
    ServiceFailedException {
  Configuration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);

  HAUtil.setAllowStandbyReads(conf, true);
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  cluster.waitActive();
  
  cluster.transitionToActive(0);
  
  NameNode nn1 = cluster.getNameNode(0);
  NameNode nn2 = cluster.getNameNode(1);
  try {
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
  } finally {
    cluster.shutdown();
  }
}

Source File: TestWebHDFSForHA.java From hadoop with Apache License 2.0

4 votes

/**
 * Make sure the WebHdfsFileSystem will retry based on RetriableException when
 * rpcServer is null in NamenodeWebHdfsMethods while NameNode starts up.
 */
@Test (timeout=120000)
public void testRetryWhileNNStartup() throws Exception {
  final Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
  MiniDFSCluster cluster = null;
  final Map<String, Boolean> resultMap = new HashMap<String, Boolean>();

  try {
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo)
        .numDataNodes(0).build();
    HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
    cluster.waitActive();
    cluster.transitionToActive(0);

    final NameNode namenode = cluster.getNameNode(0);
    final NamenodeProtocols rpcServer = namenode.getRpcServer();
    Whitebox.setInternalState(namenode, "rpcServer", null);

    new Thread() {
      @Override
      public void run() {
        boolean result = false;
        FileSystem fs = null;
        try {
          fs = FileSystem.get(WEBHDFS_URI, conf);
          final Path dir = new Path("/test");
          result = fs.mkdirs(dir);
        } catch (IOException e) {
          result = false;
        } finally {
          IOUtils.cleanup(null, fs);
        }
        synchronized (TestWebHDFSForHA.this) {
          resultMap.put("mkdirs", result);
          TestWebHDFSForHA.this.notifyAll();
        }
      }
    }.start();

    Thread.sleep(1000);
    Whitebox.setInternalState(namenode, "rpcServer", rpcServer);
    synchronized (this) {
      while (!resultMap.containsKey("mkdirs")) {
        this.wait();
      }
      Assert.assertTrue(resultMap.get("mkdirs"));
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestBookKeeperAsHASharedDir.java From big-c with Apache License 2.0

4 votes

/**
 * Test that two namenodes can't continue as primary
 */
@Test
public void testMultiplePrimariesStarted() throws Exception {
  Path p1 = new Path("/testBKJMMultiplePrimary");

  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
             BKJMUtil.createJournalURI("/hotfailoverMultiple").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(0)
      .manageNameDfsSharedDirs(false)
      .checkExitOnShutdown(false)
      .build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    cluster.waitActive();
    cluster.transitionToActive(0);

    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    fs.mkdirs(p1);
    nn1.getRpcServer().rollEditLog();
    cluster.transitionToActive(1);
    fs = cluster.getFileSystem(0); // get the older active server.

    try {
      fs.delete(p1, true);
      fail("Log update on older active should cause it to exit");
    } catch (RemoteException re) {
      assertTrue(re.getClassName().contains("ExitException"));
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestListCorruptFileBlocks.java From hadoop with Apache License 2.0

4 votes

@Test (timeout=300000)
public void testlistCorruptFileBlocks() throws Exception {
  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
  conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1); // datanode scans
                                                         // directories
  FileSystem fs = null;

  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil util = new DFSTestUtil.Builder().
        setName("testGetCorruptFiles").setNumFiles(3).setMaxLevels(1).
        setMaxSize(1024).build();
    util.createFiles(fs, "/corruptData");

    final NameNode namenode = cluster.getNameNode();
    Collection<FSNamesystem.CorruptFileBlockInfo> corruptFileBlocks = 
      namenode.getNamesystem().listCorruptFileBlocks("/corruptData", null);
    int numCorrupt = corruptFileBlocks.size();
    assertTrue(numCorrupt == 0);
    // delete the blocks
    String bpid = cluster.getNamesystem().getBlockPoolId();
    for (int i = 0; i < 4; i++) {
      for (int j = 0; j <= 1; j++) {
        File storageDir = cluster.getInstanceStorageDir(i, j);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(
            data_dir);
        if (metadataFiles == null)
          continue;
        // assertTrue("Blocks do not exist in data-dir", (blocks != null) &&
        // (blocks.length > 0));
        for (File metadataFile : metadataFiles) {
          File blockFile = Block.metaToBlockFile(metadataFile);
          LOG.info("Deliberately removing file " + blockFile.getName());
          assertTrue("Cannot remove file.", blockFile.delete());
          LOG.info("Deliberately removing file " + metadataFile.getName());
          assertTrue("Cannot remove file.", metadataFile.delete());
          // break;
        }
      }
    }

    int count = 0;
    corruptFileBlocks = namenode.getNamesystem().
      listCorruptFileBlocks("/corruptData", null);
    numCorrupt = corruptFileBlocks.size();
    while (numCorrupt < 3) {
      Thread.sleep(1000);
      corruptFileBlocks = namenode.getNamesystem()
          .listCorruptFileBlocks("/corruptData", null);
      numCorrupt = corruptFileBlocks.size();
      count++;
      if (count > 30)
        break;
    }
    // Validate we get all the corrupt files
    LOG.info("Namenode has bad files. " + numCorrupt);
    assertTrue(numCorrupt == 3);
    // test the paging here

    FSNamesystem.CorruptFileBlockInfo[] cfb = corruptFileBlocks
        .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
    // now get the 2nd and 3rd file that is corrupt
    String[] cookie = new String[]{"1"};
    Collection<FSNamesystem.CorruptFileBlockInfo> nextCorruptFileBlocks =
      namenode.getNamesystem()
        .listCorruptFileBlocks("/corruptData", cookie);
    FSNamesystem.CorruptFileBlockInfo[] ncfb = nextCorruptFileBlocks
        .toArray(new FSNamesystem.CorruptFileBlockInfo[0]);
    numCorrupt = nextCorruptFileBlocks.size();
    assertTrue(numCorrupt == 2);
    assertTrue(ncfb[0].block.getBlockName()
        .equalsIgnoreCase(cfb[1].block.getBlockName()));

    corruptFileBlocks =
      namenode.getNamesystem()
        .listCorruptFileBlocks("/corruptData", cookie);
    numCorrupt = corruptFileBlocks.size();
    assertTrue(numCorrupt == 0);
    // Do a listing on a dir which doesn't have any corrupt blocks and
    // validate
    util.createFiles(fs, "/goodData");
    corruptFileBlocks = 
      namenode.getNamesystem().listCorruptFileBlocks("/goodData", null);
    numCorrupt = corruptFileBlocks.size();
    assertTrue(numCorrupt == 0);
    util.cleanup(fs, "/corruptData");
    util.cleanup(fs, "/goodData");
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestDataNodeMultipleRegistrations.java From hadoop with Apache License 2.0

4 votes

/**
 * starts single nn and single dn and verifies registration and handshake
 * 
 * @throws IOException
 */
@Test
public void testFedSingleNN() throws IOException {
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nameNodePort(9927).build();
  try {
    NameNode nn1 = cluster.getNameNode();
    assertNotNull("cannot create nn1", nn1);

    String bpid1 = FSImageTestUtil.getFSImage(nn1).getBlockPoolID();
    String cid1 = FSImageTestUtil.getFSImage(nn1).getClusterID();
    int lv1 = FSImageTestUtil.getFSImage(nn1).getLayoutVersion();
    LOG.info("nn1: lv=" + lv1 + ";cid=" + cid1 + ";bpid=" + bpid1 + ";uri="
        + nn1.getNameNodeAddress());

    // check number of vlumes in fsdataset
    DataNode dn = cluster.getDataNodes().get(0);
    final Map<String, Object> volInfos = dn.data.getVolumeInfoMap();
    Assert.assertTrue("No volumes in the fsdataset", volInfos.size() > 0);
    int i = 0;
    for (Map.Entry<String, Object> e : volInfos.entrySet()) {
      LOG.info("vol " + i++ + ") " + e.getKey() + ": " + e.getValue());
    }
    // number of volumes should be 2 - [data1, data2]
    assertEquals("number of volumes is wrong", 2, volInfos.size());

    for (BPOfferService bpos : dn.getAllBpOs()) {
      LOG.info("reg: bpid=" + "; name=" + bpos.bpRegistration + "; sid="
          + bpos.bpRegistration.getDatanodeUuid() + "; nna=" +
          getNNSocketAddress(bpos));
    }

    // try block report
    BPOfferService bpos1 = dn.getAllBpOs()[0];
    bpos1.triggerBlockReportForTests();

    assertEquals("wrong nn address",
        getNNSocketAddress(bpos1),
        nn1.getNameNodeAddress());
    assertEquals("wrong bpid", bpos1.getBlockPoolId(), bpid1);
    assertEquals("wrong cid", dn.getClusterId(), cid1);
    cluster.shutdown();
    
    // Ensure all the BPOfferService threads are shutdown
    assertEquals(0, dn.getAllBpOs().length);
    cluster = null;
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestBackupNode.java From hadoop with Apache License 2.0

4 votes

/**
 * Ensure that the backupnode will tail edits from the NN
 * and keep in sync, even while the NN rolls, checkpoints
 * occur, etc.
 */
@Test
public void testBackupNodeTailsEdits() throws Exception {
  Configuration conf = new HdfsConfiguration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  BackupNode backup = null;

  try {
    cluster = new MiniDFSCluster.Builder(conf)
                                .numDataNodes(0).build();
    fileSys = cluster.getFileSystem();
    backup = startBackupNode(conf, StartupOption.BACKUP, 1);
    
    BackupImage bnImage = (BackupImage) backup.getFSImage();
    testBNInSync(cluster, backup, 1);
    
    // Force a roll -- BN should roll with NN.
    NameNode nn = cluster.getNameNode();
    NamenodeProtocols nnRpc = nn.getRpcServer();
    nnRpc.rollEditLog();
    assertEquals(bnImage.getEditLog().getCurSegmentTxId(),
        nn.getFSImage().getEditLog().getCurSegmentTxId());
    
    // BN should stay in sync after roll
    testBNInSync(cluster, backup, 2);
    
    long nnImageBefore =
      nn.getFSImage().getStorage().getMostRecentCheckpointTxId();
    // BN checkpoint
    backup.doCheckpoint();
    
    // NN should have received a new image
    long nnImageAfter =
      nn.getFSImage().getStorage().getMostRecentCheckpointTxId();
    
    assertTrue("nn should have received new checkpoint. before: " +
        nnImageBefore + " after: " + nnImageAfter,
        nnImageAfter > nnImageBefore);

    // BN should stay in sync after checkpoint
    testBNInSync(cluster, backup, 3);

    // Stop BN
    StorageDirectory sd = bnImage.getStorage().getStorageDir(0);
    backup.stop();
    backup = null;
    
    // When shutting down the BN, it shouldn't finalize logs that are
    // still open on the NN
    EditLogFile editsLog = FSImageTestUtil.findLatestEditsLog(sd);
    assertEquals(editsLog.getFirstTxId(),
        nn.getFSImage().getEditLog().getCurSegmentTxId());
    assertTrue("Should not have finalized " + editsLog,
        editsLog.isInProgress());
    
    // do some edits
    assertTrue(fileSys.mkdirs(new Path("/edit-while-bn-down")));
    
    // start a new backup node
    backup = startBackupNode(conf, StartupOption.BACKUP, 1);

    testBNInSync(cluster, backup, 4);
    assertNotNull(backup.getNamesystem().getFileInfo("/edit-while-bn-down", false));
  } finally {
    LOG.info("Shutting down...");
    if (backup != null) backup.stop();
    if (fileSys != null) fileSys.close();
    if (cluster != null) cluster.shutdown();
  }
  
  assertStorageDirsMatch(cluster.getNameNode(), backup);
}

Source File: TestNNStorageRetentionFunctional.java From hadoop with Apache License 2.0

4 votes

/**
* Test case where two directories are configured as NAME_AND_EDITS
* and one of them fails to save storage. Since the edits and image
* failure states are decoupled, the failure of image saving should
* not prevent the purging of logs from that dir.
*/
@Test
public void testPurgingWithNameEditsDirAfterFailure()
    throws Exception {
  MiniDFSCluster cluster = null;    
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0);

  File sd0 = new File(TEST_ROOT_DIR, "nn0");
  File sd1 = new File(TEST_ROOT_DIR, "nn1");
  File cd0 = new File(sd0, "current");
  File cd1 = new File(sd1, "current");
  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
      Joiner.on(",").join(sd0, sd1));

  try {
    cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(0)
      .manageNameDfsDirs(false)
      .format(true).build();

    NameNode nn = cluster.getNameNode();

    doSaveNamespace(nn);
    LOG.info("After first save, images 0 and 2 should exist in both dirs");
    assertGlobEquals(cd0, "fsimage_\\d*", 
        getImageFileName(0), getImageFileName(2));
    assertGlobEquals(cd1, "fsimage_\\d*",
        getImageFileName(0), getImageFileName(2));
    assertGlobEquals(cd0, "edits_.*",
        getFinalizedEditsFileName(1, 2),
        getInProgressEditsFileName(3));
    assertGlobEquals(cd1, "edits_.*",
        getFinalizedEditsFileName(1, 2),
        getInProgressEditsFileName(3));
    
    doSaveNamespace(nn);
    LOG.info("After second save, image 0 should be purged, " +
        "and image 4 should exist in both.");
    assertGlobEquals(cd0, "fsimage_\\d*",
        getImageFileName(2), getImageFileName(4));
    assertGlobEquals(cd1, "fsimage_\\d*",
        getImageFileName(2), getImageFileName(4));
    assertGlobEquals(cd0, "edits_.*",
        getFinalizedEditsFileName(3, 4),
        getInProgressEditsFileName(5));
    assertGlobEquals(cd1, "edits_.*",
        getFinalizedEditsFileName(3, 4),
        getInProgressEditsFileName(5));
    
    LOG.info("Failing first storage dir by chmodding it");
    assertEquals(0, FileUtil.chmod(cd0.getAbsolutePath(), "000"));
    doSaveNamespace(nn);      
    LOG.info("Restoring accessibility of first storage dir");      
    assertEquals(0, FileUtil.chmod(cd0.getAbsolutePath(), "755"));

    LOG.info("nothing should have been purged in first storage dir");
    assertGlobEquals(cd0, "fsimage_\\d*",
        getImageFileName(2), getImageFileName(4));
    assertGlobEquals(cd0, "edits_.*",
        getFinalizedEditsFileName(3, 4),
        getInProgressEditsFileName(5));

    LOG.info("fsimage_2 should be purged in second storage dir");
    assertGlobEquals(cd1, "fsimage_\\d*",
        getImageFileName(4), getImageFileName(6));
    assertGlobEquals(cd1, "edits_.*",
        getFinalizedEditsFileName(5, 6),
        getInProgressEditsFileName(7));

    LOG.info("On next save, we should purge logs from the failed dir," +
        " but not images, since the image directory is in failed state.");
    doSaveNamespace(nn);
    assertGlobEquals(cd1, "fsimage_\\d*",
        getImageFileName(6), getImageFileName(8));
    assertGlobEquals(cd1, "edits_.*",
        getFinalizedEditsFileName(7, 8),
        getInProgressEditsFileName(9));
    assertGlobEquals(cd0, "fsimage_\\d*",
        getImageFileName(2), getImageFileName(4));
    assertGlobEquals(cd0, "edits_.*",
        getInProgressEditsFileName(9));
  } finally {
    FileUtil.chmod(cd0.getAbsolutePath(), "755");

    LOG.info("Shutting down...");
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestBlockTokenWithDFS.java From big-c with Apache License 2.0

4 votes

/**
 * testing that WRITE operation can handle token expiration when
 * re-establishing pipeline is needed
 */
@Test
public void testWrite() throws Exception {
  MiniDFSCluster cluster = null;
  int numDataNodes = 2;
  Configuration conf = getConf(numDataNodes);

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());

    final NameNode nn = cluster.getNameNode();
    final BlockManager bm = nn.getNamesystem().getBlockManager();
    final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

    // set a short token lifetime (1 second)
    SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);
    Path fileToWrite = new Path(FILE_TO_WRITE);
    FileSystem fs = cluster.getFileSystem();

    FSDataOutputStream stm = writeFile(fs, fileToWrite, (short) numDataNodes,
        BLOCK_SIZE);
    // write a partial block
    int mid = rawData.length - 1;
    stm.write(rawData, 0, mid);
    stm.hflush();

    /*
     * wait till token used in stm expires
     */
    Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm);
    while (!SecurityTestUtil.isBlockTokenExpired(token)) {
      try {
        Thread.sleep(10);
      } catch (InterruptedException ignored) {
      }
    }

    // remove a datanode to force re-establishing pipeline
    cluster.stopDataNode(0);
    // write the rest of the file
    stm.write(rawData, mid, rawData.length - mid);
    stm.close();
    // check if write is successful
    FSDataInputStream in4 = fs.open(fileToWrite);
    assertTrue(checkFile1(in4));
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster#getNameNode()