org.apache.hadoop.hdfs.DFSTestUtil#getFirstBlock

Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0

6 votes

@Test
public void testSufficientlySingleReplBlockUsesNewRack() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 1;
  final Path filePath = new Path("/testFile");

  String racks[] = {"/rack1", "/rack1", "/rack1", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block with a replication factor of 1
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 0);

    REPLICATION_FACTOR = 2;
    NameNodeAdapter.setReplication(ns, "/testFile", REPLICATION_FACTOR);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0

5 votes

/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestUnderReplicatedBlocks.java From hadoop-gpu with Apache License 2.0

5 votes

public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR+1, true, null);
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    FSNamesystem namesystem = cluster.getNameNode().namesystem;
    Block b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = namesystem.blocksMap.nodeIterator(b).next();
    namesystem.addToInvalidates(b, dn);
    namesystem.blocksMap.removeNode(b, dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}

Source File: TestProcessCorruptBlocks.java From hadoop with Apache License 2.0

5 votes

/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From big-c with Apache License 2.0

5 votes

@Test
public void testReduceReplFactorRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 3;
  final Path filePath = new Path("/testFile");
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Decrease the replication factor, make sure the deleted replica
    // was not the one that lived on the rack with only one replica,
    // ie we should still have 2 racks after reducing the repl factor.
    REPLICATION_FACTOR = 2;
    NameNodeAdapter.setReplication(ns, "/testFile", REPLICATION_FACTOR); 

    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0

5 votes

@Test
public void testSufficientlyReplBlocksUsesNewRack() throws Exception {
  Configuration conf = getConf();
  final short REPLICATION_FACTOR = 3;
  final Path filePath = new Path("/testFile");
  // All datanodes are on the same rack
  String racks[] = {"/rack1", "/rack1", "/rack1"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();

  try {
    // Create a file with one block with a replication factor of 3
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 0);

    // Add a new datanode on a different rack
    String newRacks[] = {"/rack2"};
    cluster.startDataNodes(conf, 1, true, null, newRacks);
    cluster.waitActive();

    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0

4 votes

@Test
public void testNodeDecomissionRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");

  // Configure an excludes file
  FileSystem localFileSys = FileSystem.getLocal(conf);
  Path workingDir = localFileSys.getWorkingDirectory();
  Path dir = new Path(workingDir, "build/test/data/temp/decommission");
  Path excludeFile = new Path(dir, "exclude");
  Path includeFile = new Path(dir, "include");
  assertTrue(localFileSys.mkdirs(dir));
  DFSTestUtil.writeFile(localFileSys, excludeFile, "");
  DFSTestUtil.writeFile(localFileSys, includeFile, "");
  conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
  conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath());

  // Two blocks and four racks
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Decommission one of the hosts with the block, this should cause 
    // the block to get replicated to another host on the same rack,
    // otherwise the rack policy is violated.
    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(filePath), 0, Long.MAX_VALUE);
    String name = locs[0].getNames()[0];
    DFSTestUtil.writeFile(localFileSys, excludeFile, name);
    ns.getBlockManager().getDatanodeManager().refreshNodes(conf);
    DFSTestUtil.waitForDecommission(fs, name);

    // Check the block still has sufficient # replicas across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0

4 votes

@Test
public void testReduceReplFactorDueToRejoinRespectsRackPolicy() 
    throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");
  // Last datanode is on a different rack
  String racks[] = {"/rack1", "/rack1", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();
  final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Make the last (cross rack) datanode look like it failed
    // to heartbeat by stopping it and calling removeDatanode.
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(3, datanodes.size());
    DataNode dataNode = datanodes.get(2);
    DatanodeID dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(2);
    dm.removeDatanode(dnId);

    // The block gets re-replicated to another datanode so it has a 
    // sufficient # replicas, but not across racks, so there should
    // be 1 rack, and 1 needed replica (even though there are 2 hosts 
    // available and only 2 replicas required).
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 1);

    // Start the "failed" datanode, which has a replica so the block is
    // now over-replicated and therefore a replica should be removed but
    // not on the restarted datanode as that would violate the rack policy.
    String rack2[] = {"/rack2"};
    cluster.startDataNodes(conf, 1, true, null, rack2);
    cluster.waitActive();      
    
    // The block now has sufficient # replicas, across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0

4 votes

@Test
public void testReplDueToNodeFailRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 3;
  final Path filePath = new Path("/testFile");
  // Last datanode is on a different rack
  String racks[] = {"/rack1", "/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();
  final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();

  try {
    // Create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Make the last datanode look like it failed to heartbeat by 
    // calling removeDatanode and stopping it.
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    int idx = datanodes.size() - 1;
    DataNode dataNode = datanodes.get(idx);
    DatanodeID dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(idx);
    dm.removeDatanode(dnId);

    // The block should still have sufficient # replicas, across racks.
    // The last node may not have contained a replica, but if it did
    // it should have been replicated within the same rack.
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
    
    // Fail the last datanode again, it's also on rack2 so there is
    // only 1 rack for all the replicas
    datanodes = cluster.getDataNodes();
    idx = datanodes.size() - 1;
    dataNode = datanodes.get(idx);
    dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(idx);
    dm.removeDatanode(dnId);

    // Make sure we have enough live replicas even though we are
    // short one rack and therefore need one replica
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 1);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From big-c with Apache License 2.0

4 votes

@Test
public void testReduceReplFactorDueToRejoinRespectsRackPolicy() 
    throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");
  // Last datanode is on a different rack
  String racks[] = {"/rack1", "/rack1", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();
  final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Make the last (cross rack) datanode look like it failed
    // to heartbeat by stopping it and calling removeDatanode.
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(3, datanodes.size());
    DataNode dataNode = datanodes.get(2);
    DatanodeID dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(2);
    dm.removeDatanode(dnId);

    // The block gets re-replicated to another datanode so it has a 
    // sufficient # replicas, but not across racks, so there should
    // be 1 rack, and 1 needed replica (even though there are 2 hosts 
    // available and only 2 replicas required).
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 1);

    // Start the "failed" datanode, which has a replica so the block is
    // now over-replicated and therefore a replica should be removed but
    // not on the restarted datanode as that would violate the rack policy.
    String rack2[] = {"/rack2"};
    cluster.startDataNodes(conf, 1, true, null, rack2);
    cluster.waitActive();      
    
    // The block now has sufficient # replicas, across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From big-c with Apache License 2.0

4 votes

@Test
public void testCorruptBlockRereplicatedAcrossRacks() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  int fileLen = 512;
  final Path filePath = new Path("/testFile");
  // Datanodes are spread across two racks
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    
    DFSTestUtil.createFile(fs, filePath, fileLen, REPLICATION_FACTOR, 1L);
    final String fileContent = DFSTestUtil.readFile(fs, filePath);

    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Corrupt a replica of the block
    int dnToCorrupt = DFSTestUtil.firstDnWithBlock(cluster, b);
    assertTrue(cluster.corruptReplica(dnToCorrupt, b));

    // Restart the datanode so blocks are re-scanned, and the corrupt
    // block is detected.
    cluster.restartDataNode(dnToCorrupt);

    // Wait for the namenode to notice the corrupt replica
    DFSTestUtil.waitCorruptReplicas(fs, ns, filePath, b, 1);

    // The rack policy is still respected
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Ensure all replicas are valid (the corrupt replica may not
    // have been cleaned up yet).
    for (int i = 0; i < racks.length; i++) {
      String blockContent = cluster.readBlockOnDataNode(i, b);
      if (blockContent != null && i != dnToCorrupt) {
        assertEquals("Corrupt replica", fileContent, blockContent);
      }
    }
  } finally {
    cluster.shutdown();
  }
}

Source File: TestUnderReplicatedBlocks.java From big-c with Apache License 2.0

4 votes

/**
 * The test verifies the number of outstanding replication requests for a
 * given DN shouldn't exceed the limit set by configuration property
 * dfs.namenode.replication.max-streams-hard-limit.
 * The test does the followings:
 * 1. Create a mini cluster with 2 DNs. Set large heartbeat interval so that
 *    replication requests won't be picked by any DN right away.
 * 2. Create a file with 10 blocks and replication factor 2. Thus each
 *    of the 2 DNs have one replica of each block.
 * 3. Add a DN to the cluster for later replication.
 * 4. Remove a DN that has data.
 * 5. Ask BlockManager to compute the replication work. This will assign
 *    replication requests to the only DN that has data.
 * 6. Make sure the number of pending replication requests of that DN don't
 *    exceed the limit.
 * @throws Exception
 */
@Test(timeout=60000) // 1 min timeout
public void testNumberOfBlocksToBeReplicated() throws Exception {
  Configuration conf = new HdfsConfiguration();

  conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1);

  // Large value to make sure the pending replication request can stay in
  // DatanodeDescriptor.replicateBlocks before test timeout.
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 100);

  // Make sure BlockManager can pull all blocks from UnderReplicatedBlocks via
  // chooseUnderReplicatedBlocks at once.
   conf.setInt(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION, 5);

  int NUM_OF_BLOCKS = 10;
  final short REP_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
          REP_FACTOR).build();
  try {
    // create a file with 10 blocks with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, NUM_OF_BLOCKS, REP_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REP_FACTOR);

    cluster.startDataNodes(conf, 1, true, null, null, null, null);

    final BlockManager bm = cluster.getNamesystem().getBlockManager();
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    Iterator<DatanodeStorageInfo> storageInfos =
        bm.blocksMap.getStorages(b.getLocalBlock())
        .iterator();
    DatanodeDescriptor firstDn = storageInfos.next().getDatanodeDescriptor();
    DatanodeDescriptor secondDn = storageInfos.next().getDatanodeDescriptor();

    bm.getDatanodeManager().removeDatanode(firstDn);

    assertEquals(NUM_OF_BLOCKS, bm.getUnderReplicatedNotMissingBlocks());
    bm.computeDatanodeWork();


    assertTrue("The number of blocks to be replicated should be less than "
        + "or equal to " + bm.replicationStreamsHardLimit,
        secondDn.getNumberOfBlocksToBeReplicated()
        <= bm.replicationStreamsHardLimit);
  } finally {
    cluster.shutdown();
  }

}

Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0

4 votes

/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. The above condition should hold
 * true as long as there is one good replica. This test verifies that.
 * 
 * The test strategy : 
 *   Bring up Cluster with 2 DataNodes
 *   Create a file of replication factor 2 
 *   Corrupt one replica of a block of the file 
 *   Verify that there is  one good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good 
 *     replicas (1) is less than replication factor (2)).
 *   Set the replication factor to 1 
 *   Verify that the corrupt replica is removed. 
 *     (corrupt replica should  be removed since number of good
 *      replicas (1) is equal to replication factor (1))
 */
@Test(timeout=20000)
public void testWithReplicationFactorAsOne() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    namesystem.setReplication(fileName.toString(), (short) 1);

    // wait for 3 seconds so that all block reports are processed.
    for (int i = 0; i < 10; i++) {
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {
      }
      if (countReplicas(namesystem, block).corruptReplicas() == 0) {
        break;
      }
    }

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

  } finally {
    cluster.shutdown();
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From big-c with Apache License 2.0

4 votes

@Test
public void testNodeDecomissionRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");

  // Configure an excludes file
  FileSystem localFileSys = FileSystem.getLocal(conf);
  Path workingDir = localFileSys.getWorkingDirectory();
  Path dir = new Path(workingDir, "build/test/data/temp/decommission");
  Path excludeFile = new Path(dir, "exclude");
  Path includeFile = new Path(dir, "include");
  assertTrue(localFileSys.mkdirs(dir));
  DFSTestUtil.writeFile(localFileSys, excludeFile, "");
  DFSTestUtil.writeFile(localFileSys, includeFile, "");
  conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
  conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath());

  // Two blocks and four racks
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Decommission one of the hosts with the block, this should cause 
    // the block to get replicated to another host on the same rack,
    // otherwise the rack policy is violated.
    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(filePath), 0, Long.MAX_VALUE);
    String name = locs[0].getNames()[0];
    DFSTestUtil.writeFile(localFileSys, excludeFile, name);
    ns.getBlockManager().getDatanodeManager().refreshNodes(conf);
    DFSTestUtil.waitForDecommission(fs, name);

    // Check the block still has sufficient # replicas across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestFsck.java From big-c with Apache License 2.0

4 votes

/**
 * Test for blockIdCK with block corruption
 */
@Test
public void testBlockIdCKCorruption() throws Exception {
  short NUM_DN = 1;
  final long blockSize = 512;
  Random random = new Random();
  DFSClient dfsClient;
  LocatedBlocks blocks;
  ExtendedBlock block;
  short repFactor = 1;
  String [] racks = {"/rack1"};
  String [] hosts = {"host1"};

  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
  // Set short retry timeouts so this test runs faster
  conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
  conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);

  MiniDFSCluster cluster = null;
  DistributedFileSystem dfs = null;
  try {
    cluster =
        new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
            .racks(racks).build();

    assertNotNull("Failed Cluster Creation", cluster);
    cluster.waitClusterUp();
    dfs = cluster.getFileSystem();
    assertNotNull("Failed to get FileSystem", dfs);

    DFSTestUtil util = new DFSTestUtil.Builder().
      setName(getClass().getSimpleName()).setNumFiles(1).build();
    //create files
    final String pathString = new String("/testfile");
    final Path path = new Path(pathString);
    util.createFile(dfs, path, 1024, repFactor, 1000L);
    util.waitReplication(dfs, path, repFactor);
    StringBuilder sb = new StringBuilder();
    for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
      sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
    }
    String[] bIds = sb.toString().split(" ");

    //make sure block is healthy before we corrupt it
    String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));

    // corrupt replicas
    block = DFSTestUtil.getFirstBlock(dfs, path);
    File blockFile = cluster.getBlockFile(0, block);
    if (blockFile != null && blockFile.exists()) {
      RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
      FileChannel channel = raFile.getChannel();
      String badString = "BADBAD";
      int rand = random.nextInt((int) channel.size()/2);
      raFile.seek(rand);
      raFile.write(badString.getBytes());
      raFile.close();
    }

    util.waitCorruptReplicas(dfs, cluster.getNamesystem(), path, block, 1);

    outStr = runFsck(conf, 1, false, "/", "-blockId", block.getBlockName());
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0

4 votes

@Test
public void testNodeDecomissionWithOverreplicationRespectsRackPolicy() 
    throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 5;
  final Path filePath = new Path("/testFile");

  // Configure an excludes file
  FileSystem localFileSys = FileSystem.getLocal(conf);
  Path workingDir = localFileSys.getWorkingDirectory();
  Path dir = new Path(workingDir, "build/test/data/temp/decommission");
  Path excludeFile = new Path(dir, "exclude");
  Path includeFile = new Path(dir, "include");
  assertTrue(localFileSys.mkdirs(dir));
  DFSTestUtil.writeFile(localFileSys, excludeFile, "");
  DFSTestUtil.writeFile(localFileSys, includeFile, "");
  conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath());
  conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());

  // All hosts are on two racks, only one host on /rack2
  String racks[] = {"/rack1", "/rack2", "/rack1", "/rack1", "/rack1"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Lower the replication factor so the blocks are over replicated
    REPLICATION_FACTOR = 2;
    fs.setReplication(filePath, REPLICATION_FACTOR);

    // Decommission one of the hosts with the block that is not on
    // the lone host on rack2 (if we decomission that host it would
    // be impossible to respect the rack policy).
    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(filePath), 0, Long.MAX_VALUE);
    for (String top : locs[0].getTopologyPaths()) {
      if (!top.startsWith("/rack2")) {
        String name = top.substring("/rack1".length()+1);
        DFSTestUtil.writeFile(localFileSys, excludeFile, name);
        ns.getBlockManager().getDatanodeManager().refreshNodes(conf);
        DFSTestUtil.waitForDecommission(fs, name);
        break;
      }
    }

    // Check the block still has sufficient # replicas across racks,
    // ie we didn't remove the replica on the host on /rack1.
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestProcessCorruptBlocks.java From hadoop with Apache License 2.0

4 votes

/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. The above condition should hold
 * true as long as there is one good replica. This test verifies that.
 * 
 * The test strategy : 
 *   Bring up Cluster with 2 DataNodes
 *   Create a file of replication factor 2 
 *   Corrupt one replica of a block of the file 
 *   Verify that there is  one good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good 
 *     replicas (1) is less than replication factor (2)).
 *   Set the replication factor to 1 
 *   Verify that the corrupt replica is removed. 
 *     (corrupt replica should  be removed since number of good
 *      replicas (1) is equal to replication factor (1))
 */
@Test(timeout=20000)
public void testWithReplicationFactorAsOne() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    namesystem.setReplication(fileName.toString(), (short) 1);

    // wait for 3 seconds so that all block reports are processed.
    for (int i = 0; i < 10; i++) {
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {
      }
      if (countReplicas(namesystem, block).corruptReplicas() == 0) {
        break;
      }
    }

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

  } finally {
    cluster.shutdown();
  }
}

Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0

4 votes

public void testUnderReplicationWithDecommissionDataNode() throws Exception {
  final Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = (short)1;
  File f = new File(HOST_FILE_PATH);
  if (f.exists()) {
    f.delete();
  }
  conf.set("dfs.hosts.exclude", HOST_FILE_PATH);
  LOG.info("Start the cluster");
  final MiniDFSCluster cluster = 
    new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null);
  try {
    final FSNamesystem namesystem = cluster.getNameNode().namesystem;
    final FileSystem fs = cluster.getFileSystem();
    DatanodeDescriptor[] datanodes = (DatanodeDescriptor[])
          namesystem.heartbeats.toArray(
              new DatanodeDescriptor[REPLICATION_FACTOR]);
    assertEquals(1, datanodes.length);
    // populate the cluster with a one block file
    final Path FILE_PATH = new Path("/testfile2");
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);

    // shutdown the datanode
    DataNodeProperties dnprop = shutdownDataNode(cluster, datanodes[0]);
    assertEquals(1, namesystem.getMissingBlocksCount()); // one missing block
    assertEquals(0, namesystem.getNonCorruptUnderReplicatedBlocks());

    // Make the only datanode to be decommissioned
    LOG.info("Decommission the datanode " + dnprop);
    addToExcludeFile(namesystem.getConf(), datanodes);
    namesystem.refreshNodes(namesystem.getConf());      
    
    // bring up the datanode
    cluster.restartDataNode(dnprop);

    // Wait for block report
    LOG.info("wait for its block report to come in");
    NumberReplicas num;
    long startTime = System.currentTimeMillis();
    do {
     namesystem.readLock();
     try {
       num = namesystem.countNodes(block);
     } finally {
       namesystem.readUnlock();
     }
     Thread.sleep(1000);
     LOG.info("live: " + num.liveReplicas() 
         + "Decom: " + num.decommissionedReplicas());
    } while (num.decommissionedReplicas() != 1 &&
        System.currentTimeMillis() - startTime < 30000);
    assertEquals("Decommissioning Replicas doesn't reach 1", 
        1, num.decommissionedReplicas());
    assertEquals(1, namesystem.getNonCorruptUnderReplicatedBlocks());
    assertEquals(0, namesystem.getMissingBlocksCount());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestPendingCorruptDnMessages.java From hadoop with Apache License 2.0

4 votes

@Test
public void testChangedStorageId() throws IOException, URISyntaxException,
    InterruptedException {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .build();
  
  try {
    cluster.transitionToActive(0);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    OutputStream out = fs.create(filePath);
    out.write("foo bar baz".getBytes());
    out.close();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    
    // Change the gen stamp of the block on datanode to go back in time (gen
    // stamps start at 1000)
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
    assertTrue(cluster.changeGenStampOfBlock(0, block, 900));
    
    // Stop the DN so the replica with the changed gen stamp will be reported
    // when this DN starts up.
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Restart the namenode so that when the DN comes up it will see an initial
    // block report.
    cluster.restartNameNode(1, false);
    assertTrue(cluster.restartDataNode(dnProps, true));
    
    // Wait until the standby NN queues up the corrupt block in the pending DN
    // message queue.
    while (cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount() < 1) {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    }
    
    assertEquals(1, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
    
    // Reformat/restart the DN.
    assertTrue(wipeAndRestartDn(cluster, 0));
    
    // Give the DN time to start up and register, which will cause the
    // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
    String newStorageId = "";
    do {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
      newStorageId = getRegisteredDatanodeUid(cluster, 1);
      System.out.println("====> oldStorageId: " + oldStorageId +
          " newStorageId: " + newStorageId);
    } while (newStorageId.equals(oldStorageId));
    
    assertEquals(0, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    
    // Now try to fail over.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
  } finally {
    cluster.shutdown();
  }
}

Java Code Examples for org.apache.hadoop.hdfs.DFSTestUtil#getFirstBlock()