Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster#restartDataNode()
The following examples show how to use
org.apache.hadoop.hdfs.MiniDFSCluster#restartDataNode() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPendingCorruptDnMessages.java From hadoop with Apache License 2.0 | 5 votes |
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex) throws IOException { // stop the DN, reformat it, then start it again with the same xfer port. DataNodeProperties dnProps = cluster.stopDataNode(dnIndex); cluster.formatDataNodeDirs(); return cluster.restartDataNode(dnProps, true); }
Example 2
Source File: TestProcessCorruptBlocks.java From hadoop with Apache License 2.0 | 5 votes |
/** * The corrupt block has to be removed when the number of valid replicas * matches replication factor for the file. In this test, the above * condition is achieved by increasing the number of good replicas by * replicating on a new Datanode. * The test strategy : * Bring up Cluster with 3 DataNodes * Create a file of replication factor 3 * Corrupt one replica of a block of the file * Verify that there are still 2 good replicas and 1 corrupt replica * (corrupt replica should not be removed since number of good replicas * (2) is less than replication factor (3)) * Start a new data node * Verify that the a new replica is created and corrupt replica is * removed. * */ @Test public void testByAddingAnExtraDataNode() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); FileSystem fs = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); DataNodeProperties dnPropsFourth = cluster.stopDataNode(3); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short) 3); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); corruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.waitReplication(fs, fileName, (short) 2); assertEquals(2, countReplicas(namesystem, block).liveReplicas()); assertEquals(1, countReplicas(namesystem, block).corruptReplicas()); cluster.restartDataNode(dnPropsFourth); DFSTestUtil.waitReplication(fs, fileName, (short) 3); assertEquals(3, countReplicas(namesystem, block).liveReplicas()); assertEquals(0, countReplicas(namesystem, block).corruptReplicas()); } finally { cluster.shutdown(); } }
Example 3
Source File: TestProcessCorruptBlocks.java From hadoop with Apache License 2.0 | 5 votes |
private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName, int dnIndex, ExtendedBlock block) throws IOException { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change assertTrue(cluster.corruptReplica(dnIndex, block)); DataNodeProperties dnProps = cluster.stopDataNode(0); // Each datanode has multiple data dirs, check each for (int dirIndex = 0; dirIndex < 2; dirIndex++) { final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getStorageDir(dnIndex, dirIndex); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr"); if (scanLogFile.exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } } } // restart the detained so the corrupt replica will be detected cluster.restartDataNode(dnProps); }
Example 4
Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0 | 5 votes |
private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName, int dnIndex, ExtendedBlock block) throws IOException { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change assertTrue(cluster.corruptReplica(dnIndex, block)); DataNodeProperties dnProps = cluster.stopDataNode(0); // Each datanode has multiple data dirs, check each for (int dirIndex = 0; dirIndex < 2; dirIndex++) { final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getStorageDir(dnIndex, dirIndex); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr"); if (scanLogFile.exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } } } // restart the detained so the corrupt replica will be detected cluster.restartDataNode(dnProps); }
Example 5
Source File: TestPendingCorruptDnMessages.java From big-c with Apache License 2.0 | 5 votes |
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex) throws IOException { // stop the DN, reformat it, then start it again with the same xfer port. DataNodeProperties dnProps = cluster.stopDataNode(dnIndex); cluster.formatDataNodeDirs(); return cluster.restartDataNode(dnProps, true); }
Example 6
Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0 | 5 votes |
/** * The corrupt block has to be removed when the number of valid replicas * matches replication factor for the file. In this test, the above * condition is achieved by increasing the number of good replicas by * replicating on a new Datanode. * The test strategy : * Bring up Cluster with 3 DataNodes * Create a file of replication factor 3 * Corrupt one replica of a block of the file * Verify that there are still 2 good replicas and 1 corrupt replica * (corrupt replica should not be removed since number of good replicas * (2) is less than replication factor (3)) * Start a new data node * Verify that the a new replica is created and corrupt replica is * removed. * */ @Test public void testByAddingAnExtraDataNode() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); FileSystem fs = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); DataNodeProperties dnPropsFourth = cluster.stopDataNode(3); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short) 3); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); corruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.waitReplication(fs, fileName, (short) 2); assertEquals(2, countReplicas(namesystem, block).liveReplicas()); assertEquals(1, countReplicas(namesystem, block).corruptReplicas()); cluster.restartDataNode(dnPropsFourth); DFSTestUtil.waitReplication(fs, fileName, (short) 3); assertEquals(3, countReplicas(namesystem, block).liveReplicas()); assertEquals(0, countReplicas(namesystem, block).corruptReplicas()); } finally { cluster.shutdown(); } }
Example 7
Source File: TestStandbyIsHot.java From big-c with Apache License 2.0 | 4 votes |
/** * Regression test for HDFS-2795: * - Start an HA cluster with a DN. * - Write several blocks to the FS with replication 1. * - Shutdown the DN * - Wait for the NNs to declare the DN dead. All blocks will be under-replicated. * - Restart the DN. * In the bug, the standby node would only very slowly notice the blocks returning * to the cluster. */ @Test(timeout=60000) public void testDatanodeRestarts() throws Exception { Configuration conf = new Configuration(); conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024); // We read from the standby to watch block locations HAUtil.setAllowStandbyReads(conf, true); conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .numDataNodes(1) .build(); try { NameNode nn0 = cluster.getNameNode(0); NameNode nn1 = cluster.getNameNode(1); cluster.transitionToActive(0); // Create 5 blocks. DFSTestUtil.createFile(cluster.getFileSystem(0), TEST_FILE_PATH, 5*1024, (short)1, 1L); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); // Stop the DN. DataNode dn = cluster.getDataNodes().get(0); String dnName = dn.getDatanodeId().getXferAddr(); DataNodeProperties dnProps = cluster.stopDataNode(0); // Make sure both NNs register it as dead. BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName); BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName); BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks()); // The SBN will not have any blocks in its neededReplication queue // since the SBN doesn't process replication. assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks()); LocatedBlocks locs = nn1.getRpcServer().getBlockLocations( TEST_FILE, 0, 1); assertEquals("Standby should have registered that the block has no replicas", 0, locs.get(0).getLocations().length); cluster.restartDataNode(dnProps); // Wait for both NNs to re-register the DN. cluster.waitActive(0); cluster.waitActive(1); BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager()); BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager()); assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks()); assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks()); locs = nn1.getRpcServer().getBlockLocations( TEST_FILE, 0, 1); assertEquals("Standby should have registered that the block has replicas again", 1, locs.get(0).getLocations().length); } finally { cluster.shutdown(); } }
Example 8
Source File: TestNodeCount.java From RDFS with Apache License 2.0 | 4 votes |
public void testNodeCount() throws Exception { // start a mini dfs cluster of 2 nodes final Configuration conf = new Configuration(); conf.setInt("dfs.replication.interval", 10); final short REPLICATION_FACTOR = (short)2; final MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null); try { final FSNamesystem namesystem = cluster.getNameNode().namesystem; final FileSystem fs = cluster.getFileSystem(); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // keep a copy of all datanode descriptor DatanodeDescriptor[] datanodes = (DatanodeDescriptor[]) namesystem.heartbeats.toArray(new DatanodeDescriptor[REPLICATION_FACTOR]); // start two new nodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(false); LOG.info("Bringing down first DN"); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName()); // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { datanode.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } LOG.info("Waiting for block to be replicated"); // the block will be replicated DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); LOG.info("Restarting first datanode"); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(false); LOG.info("Waiting for excess replicas to be detected"); // check if excessive replica is detected waitForExcessReplicasToChange(namesystem, block, 1); LOG.info("Finding a non-excess node"); // find out a non-excess node Iterator<DatanodeDescriptor> iter = namesystem.blocksMap.nodeIterator(block); DatanodeDescriptor nonExcessDN = null; while (iter.hasNext()) { DatanodeDescriptor dn = iter.next(); Collection<Block> blocks = namesystem.excessReplicateMap.get(dn.getStorageID()); if (blocks == null || !blocks.contains(block) ) { nonExcessDN = dn; break; } } assertTrue(nonExcessDN!=null); LOG.info("Stopping non-excess node: " + nonExcessDN); // bring down non excessive datanode dnprop = cluster.stopDataNode(nonExcessDN.getName()); // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { nonExcessDN.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } LOG.info("Waiting for live replicas to hit repl factor"); // The block should be replicated NumberReplicas num; do { namesystem.readLock(); try { num = namesystem.countNodes(block); } finally { namesystem.readUnlock(); } } while (num.liveReplicas() != REPLICATION_FACTOR); LOG.info("Restarting first DN"); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(false); // check if excessive replica is detected LOG.info("Waiting for excess replicas to be detected"); waitForExcessReplicasToChange(namesystem, block, 2); } finally { cluster.shutdown(); } }
Example 9
Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0 | 4 votes |
public void testUnderReplicationWithDecommissionDataNode() throws Exception { final Configuration conf = new Configuration(); final short REPLICATION_FACTOR = (short)1; File f = new File(HOST_FILE_PATH); if (f.exists()) { f.delete(); } conf.set("dfs.hosts.exclude", HOST_FILE_PATH); LOG.info("Start the cluster"); final MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null); try { final FSNamesystem namesystem = cluster.getNameNode().namesystem; final FileSystem fs = cluster.getFileSystem(); DatanodeDescriptor[] datanodes = (DatanodeDescriptor[]) namesystem.heartbeats.toArray( new DatanodeDescriptor[REPLICATION_FACTOR]); assertEquals(1, datanodes.length); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile2"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // shutdown the datanode DataNodeProperties dnprop = shutdownDataNode(cluster, datanodes[0]); assertEquals(1, namesystem.getMissingBlocksCount()); // one missing block assertEquals(0, namesystem.getNonCorruptUnderReplicatedBlocks()); // Make the only datanode to be decommissioned LOG.info("Decommission the datanode " + dnprop); addToExcludeFile(namesystem.getConf(), datanodes); namesystem.refreshNodes(namesystem.getConf()); // bring up the datanode cluster.restartDataNode(dnprop); // Wait for block report LOG.info("wait for its block report to come in"); NumberReplicas num; long startTime = System.currentTimeMillis(); do { namesystem.readLock(); try { num = namesystem.countNodes(block); } finally { namesystem.readUnlock(); } Thread.sleep(1000); LOG.info("live: " + num.liveReplicas() + "Decom: " + num.decommissionedReplicas()); } while (num.decommissionedReplicas() != 1 && System.currentTimeMillis() - startTime < 30000); assertEquals("Decommissioning Replicas doesn't reach 1", 1, num.decommissionedReplicas()); assertEquals(1, namesystem.getNonCorruptUnderReplicatedBlocks()); assertEquals(0, namesystem.getMissingBlocksCount()); } finally { cluster.shutdown(); } }
Example 10
Source File: TestDataNodeMultipleRegistrations.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testDNWithInvalidStorageWithHA() throws Exception { MiniDFSNNTopology top = new MiniDFSNNTopology() .addNameservice(new MiniDFSNNTopology.NSConf("ns1") .addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1")) .addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1"))); top.setFederation(true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top) .numDataNodes(0).build(); try { cluster.startDataNodes(conf, 1, true, null, null); // let the initialization be complete Thread.sleep(10000); DataNode dn = cluster.getDataNodes().get(0); assertTrue("Datanode should be running", dn.isDatanodeUp()); assertEquals("BPOfferService should be running", 1, dn.getAllBpOs().length); DataNodeProperties dnProp = cluster.stopDataNode(0); cluster.getNameNode(0).stop(); cluster.getNameNode(1).stop(); Configuration nn1 = cluster.getConfiguration(0); Configuration nn2 = cluster.getConfiguration(1); // setting up invalid cluster StartupOption.FORMAT.setClusterId("cluster-2"); DFSTestUtil.formatNameNode(nn1); MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1), FSNamesystem.getNamespaceDirs(nn2), nn2); cluster.restartNameNode(0, false); cluster.restartNameNode(1, false); cluster.restartDataNode(dnProp); // let the initialization be complete Thread.sleep(10000); dn = cluster.getDataNodes().get(0); assertFalse("Datanode should have shutdown as only service failed", dn.isDatanodeUp()); } finally { cluster.shutdown(); } }
Example 11
Source File: TestOverReplicatedBlocks.java From big-c with Apache License 2.0 | 4 votes |
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ @Test public void testProcesOverReplicateBlock() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set( DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); assertTrue(cluster.corruptReplica(0, block)); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir( cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(), "scanner.cursor"); //wait for one minute for deletion to succeed; for(int i = 0; !scanCursor.delete(); i++) { assertTrue("Could not delete " + scanCursor.getAbsolutePath() + " in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); String blockPoolId = cluster.getNamesystem().getBlockPoolId(); final DatanodeID corruptDataNode = DataNodeTestUtils.getDNRegistrationForBP( cluster.getDataNodes().get(2), blockPoolId); final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); try { namesystem.writeLock(); synchronized(hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs String corruptMachineName = corruptDataNode.getXferAddr(); for (DatanodeDescriptor datanode : hm.getDatanodes()) { if (!corruptMachineName.equals(datanode.getXferAddr())) { datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L); datanode.updateHeartbeat( BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0, null); } } // decrease the replication factor to 1; NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas()); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
Example 12
Source File: TestBlocksWithNotEnoughRacks.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testCorruptBlockRereplicatedAcrossRacks() throws Exception { Configuration conf = getConf(); short REPLICATION_FACTOR = 2; int fileLen = 512; final Path filePath = new Path("/testFile"); // Datanodes are spread across two racks String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"}; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(racks.length).racks(racks).build(); final FSNamesystem ns = cluster.getNameNode().getNamesystem(); try { // Create a file with one block with a replication factor of 2 final FileSystem fs = cluster.getFileSystem(); DFSTestUtil.createFile(fs, filePath, fileLen, REPLICATION_FACTOR, 1L); final String fileContent = DFSTestUtil.readFile(fs, filePath); ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath); DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0); // Corrupt a replica of the block int dnToCorrupt = DFSTestUtil.firstDnWithBlock(cluster, b); assertTrue(cluster.corruptReplica(dnToCorrupt, b)); // Restart the datanode so blocks are re-scanned, and the corrupt // block is detected. cluster.restartDataNode(dnToCorrupt); // Wait for the namenode to notice the corrupt replica DFSTestUtil.waitCorruptReplicas(fs, ns, filePath, b, 1); // The rack policy is still respected DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0); // Ensure all replicas are valid (the corrupt replica may not // have been cleaned up yet). for (int i = 0; i < racks.length; i++) { String blockContent = cluster.readBlockOnDataNode(i, b); if (blockContent != null && i != dnToCorrupt) { assertEquals("Corrupt replica", fileContent, blockContent); } } } finally { cluster.shutdown(); } }
Example 13
Source File: TestNodeCount.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testNodeCount() throws Exception { // start a mini dfs cluster of 2 nodes final Configuration conf = new HdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build(); try { final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); final FileSystem fs = cluster.getFileSystem(); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // keep a copy of all datanode descriptor final DatanodeDescriptor[] datanodes = hm.getDatanodes(); // start two new nodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.noticeDeadDatanode( cluster.getNameNode(), datanode.getXferAddr()); // the block will be replicated DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(); // check if excessive replica is detected (transient) initializeTimeout(TIMEOUT); while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) { checkTimeout("excess replicas not detected"); } // find out a non-excess node DatanodeDescriptor nonExcessDN = null; for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) { final DatanodeDescriptor dn = storage.getDatanodeDescriptor(); Collection<Block> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid()); if (blocks == null || !blocks.contains(block.getLocalBlock()) ) { nonExcessDN = dn; break; } } assertTrue(nonExcessDN!=null); // bring down non excessive datanode dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.noticeDeadDatanode( cluster.getNameNode(), nonExcessDN.getXferAddr()); // The block should be replicated initializeTimeout(TIMEOUT); while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) { checkTimeout("live replica count not correct", 1000); } // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(); // check if excessive replica is detected (transient) initializeTimeout(TIMEOUT); while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) { checkTimeout("excess replica count not equal to 2"); } } finally { cluster.shutdown(); } }
Example 14
Source File: TestNetworkTopology.java From hadoop with Apache License 2.0 | 4 votes |
@Test(timeout=180000) public void testInvalidNetworkTopologiesNotCachedInHdfs() throws Exception { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; try { // bad rack topology String racks[] = { "/a/b", "/c" }; String hosts[] = { "foo1.example.com", "foo2.example.com" }; cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2). racks(racks).hosts(hosts).build(); cluster.waitActive(); NamenodeProtocols nn = cluster.getNameNodeRpc(); Assert.assertNotNull(nn); // Wait for one DataNode to register. // The other DataNode will not be able to register up because of the rack mismatch. DatanodeInfo[] info; while (true) { info = nn.getDatanodeReport(DatanodeReportType.LIVE); Assert.assertFalse(info.length == 2); if (info.length == 1) { break; } Thread.sleep(1000); } // Set the network topology of the other node to the match the network // topology of the node that came up. int validIdx = info[0].getHostName().equals(hosts[0]) ? 0 : 1; int invalidIdx = validIdx == 1 ? 0 : 1; StaticMapping.addNodeToRack(hosts[invalidIdx], racks[validIdx]); LOG.info("datanode " + validIdx + " came up with network location " + info[0].getNetworkLocation()); // Restart the DN with the invalid topology and wait for it to register. cluster.restartDataNode(invalidIdx); Thread.sleep(5000); while (true) { info = nn.getDatanodeReport(DatanodeReportType.LIVE); if (info.length == 2) { break; } if (info.length == 0) { LOG.info("got no valid DNs"); } else if (info.length == 1) { LOG.info("got one valid DN: " + info[0].getHostName() + " (at " + info[0].getNetworkLocation() + ")"); } Thread.sleep(1000); } Assert.assertEquals(info[0].getNetworkLocation(), info[1].getNetworkLocation()); } finally { if (cluster != null) { cluster.shutdown(); } } }
Example 15
Source File: TestNetworkTopology.java From big-c with Apache License 2.0 | 4 votes |
@Test(timeout=180000) public void testInvalidNetworkTopologiesNotCachedInHdfs() throws Exception { // start a cluster Configuration conf = new HdfsConfiguration(); MiniDFSCluster cluster = null; try { // bad rack topology String racks[] = { "/a/b", "/c" }; String hosts[] = { "foo1.example.com", "foo2.example.com" }; cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2). racks(racks).hosts(hosts).build(); cluster.waitActive(); NamenodeProtocols nn = cluster.getNameNodeRpc(); Assert.assertNotNull(nn); // Wait for one DataNode to register. // The other DataNode will not be able to register up because of the rack mismatch. DatanodeInfo[] info; while (true) { info = nn.getDatanodeReport(DatanodeReportType.LIVE); Assert.assertFalse(info.length == 2); if (info.length == 1) { break; } Thread.sleep(1000); } // Set the network topology of the other node to the match the network // topology of the node that came up. int validIdx = info[0].getHostName().equals(hosts[0]) ? 0 : 1; int invalidIdx = validIdx == 1 ? 0 : 1; StaticMapping.addNodeToRack(hosts[invalidIdx], racks[validIdx]); LOG.info("datanode " + validIdx + " came up with network location " + info[0].getNetworkLocation()); // Restart the DN with the invalid topology and wait for it to register. cluster.restartDataNode(invalidIdx); Thread.sleep(5000); while (true) { info = nn.getDatanodeReport(DatanodeReportType.LIVE); if (info.length == 2) { break; } if (info.length == 0) { LOG.info("got no valid DNs"); } else if (info.length == 1) { LOG.info("got one valid DN: " + info[0].getHostName() + " (at " + info[0].getNetworkLocation() + ")"); } Thread.sleep(1000); } Assert.assertEquals(info[0].getNetworkLocation(), info[1].getNetworkLocation()); } finally { if (cluster != null) { cluster.shutdown(); } } }
Example 16
Source File: TestDataNodeMultipleRegistrations.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testDNWithInvalidStorageWithHA() throws Exception { MiniDFSNNTopology top = new MiniDFSNNTopology() .addNameservice(new MiniDFSNNTopology.NSConf("ns1") .addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1")) .addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1"))); top.setFederation(true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top) .numDataNodes(0).build(); try { cluster.startDataNodes(conf, 1, true, null, null); // let the initialization be complete Thread.sleep(10000); DataNode dn = cluster.getDataNodes().get(0); assertTrue("Datanode should be running", dn.isDatanodeUp()); assertEquals("BPOfferService should be running", 1, dn.getAllBpOs().length); DataNodeProperties dnProp = cluster.stopDataNode(0); cluster.getNameNode(0).stop(); cluster.getNameNode(1).stop(); Configuration nn1 = cluster.getConfiguration(0); Configuration nn2 = cluster.getConfiguration(1); // setting up invalid cluster StartupOption.FORMAT.setClusterId("cluster-2"); DFSTestUtil.formatNameNode(nn1); MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1), FSNamesystem.getNamespaceDirs(nn2), nn2); cluster.restartNameNode(0, false); cluster.restartNameNode(1, false); cluster.restartDataNode(dnProp); // let the initialization be complete Thread.sleep(10000); dn = cluster.getDataNodes().get(0); assertFalse("Datanode should have shutdown as only service failed", dn.isDatanodeUp()); } finally { cluster.shutdown(); } }
Example 17
Source File: TestOverReplicatedBlocks.java From RDFS with Apache License 2.0 | 4 votes |
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { int namespaceId = cluster.getNameNode().getNamespaceID(); final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0, cluster); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(cluster.getBlockDirectory("data1").getParent(), "dncp_block_verification.log.curr"); //wait for one minute for deletion to succeed; scanLog.delete(); // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).getDNRegistrationForNS(namespaceId); final FSNamesystem namesystem = cluster.getNameNode().getNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0); } } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); waitReplication(namesystem, block, (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); // Test the case when multiple calls to setReplication still succeeds. System.out.println("Starting next test with file foo2."); final Path fileName2 = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName2, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName2, (short)3); LocatedBlocks lbs = namesystem.getBlockLocations( fileName2.toString(), 0, 10); Block firstBlock = lbs.get(0).getBlock(); namesystem.setReplication(fileName2.toString(), (short)2); namesystem.setReplication(fileName2.toString(), (short)1); // wait upto one minute for excess replicas to get deleted. It is not // immediate because excess replicas are being handled asyncronously. waitReplication(namesystem, firstBlock, (short)1); assertEquals(1, namesystem.countNodes(firstBlock).liveReplicas()); } finally { cluster.shutdown(); } }
Example 18
Source File: TestBlocksWithNotEnoughRacks.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testCorruptBlockRereplicatedAcrossRacks() throws Exception { Configuration conf = getConf(); short REPLICATION_FACTOR = 2; int fileLen = 512; final Path filePath = new Path("/testFile"); // Datanodes are spread across two racks String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"}; MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(racks.length).racks(racks).build(); final FSNamesystem ns = cluster.getNameNode().getNamesystem(); try { // Create a file with one block with a replication factor of 2 final FileSystem fs = cluster.getFileSystem(); DFSTestUtil.createFile(fs, filePath, fileLen, REPLICATION_FACTOR, 1L); final String fileContent = DFSTestUtil.readFile(fs, filePath); ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath); DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0); // Corrupt a replica of the block int dnToCorrupt = DFSTestUtil.firstDnWithBlock(cluster, b); assertTrue(cluster.corruptReplica(dnToCorrupt, b)); // Restart the datanode so blocks are re-scanned, and the corrupt // block is detected. cluster.restartDataNode(dnToCorrupt); // Wait for the namenode to notice the corrupt replica DFSTestUtil.waitCorruptReplicas(fs, ns, filePath, b, 1); // The rack policy is still respected DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0); // Ensure all replicas are valid (the corrupt replica may not // have been cleaned up yet). for (int i = 0; i < racks.length; i++) { String blockContent = cluster.readBlockOnDataNode(i, b); if (blockContent != null && i != dnToCorrupt) { assertEquals("Corrupt replica", fileContent, blockContent); } } } finally { cluster.shutdown(); } }
Example 19
Source File: TestNodeCount.java From hadoop with Apache License 2.0 | 4 votes |
@Test public void testNodeCount() throws Exception { // start a mini dfs cluster of 2 nodes final Configuration conf = new HdfsConfiguration(); final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build(); try { final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); final FileSystem fs = cluster.getFileSystem(); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // keep a copy of all datanode descriptor final DatanodeDescriptor[] datanodes = hm.getDatanodes(); // start two new nodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.noticeDeadDatanode( cluster.getNameNode(), datanode.getXferAddr()); // the block will be replicated DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(); // check if excessive replica is detected (transient) initializeTimeout(TIMEOUT); while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) { checkTimeout("excess replicas not detected"); } // find out a non-excess node DatanodeDescriptor nonExcessDN = null; for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) { final DatanodeDescriptor dn = storage.getDatanodeDescriptor(); Collection<Block> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid()); if (blocks == null || !blocks.contains(block.getLocalBlock()) ) { nonExcessDN = dn; break; } } assertTrue(nonExcessDN!=null); // bring down non excessive datanode dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr()); // make sure that NN detects that the datanode is down BlockManagerTestUtil.noticeDeadDatanode( cluster.getNameNode(), nonExcessDN.getXferAddr()); // The block should be replicated initializeTimeout(TIMEOUT); while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) { checkTimeout("live replica count not correct", 1000); } // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(); // check if excessive replica is detected (transient) initializeTimeout(TIMEOUT); while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) { checkTimeout("excess replica count not equal to 2"); } } finally { cluster.shutdown(); } }
Example 20
Source File: TestOverReplicatedBlocks.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(System.getProperty("test.build.data"), "dfs/data/data1/current/dncp_block_verification.log.curr"); //wait for one minute for deletion to succeed; for(int i=0; !scanLog.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).dnRegistration; final FSNamesystem namesystem = FSNamesystem.getFSNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 0); } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); } } finally { cluster.shutdown(); } }