org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties Java Examples
The following examples show how to use
org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestAsyncLogRolling.java From hbase with Apache License 2.0 | 6 votes |
@Test public void testLogRollOnDatanodeDeath() throws IOException, InterruptedException { dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), 3, true, null, null); tableName = getName(); Table table = createTestTable(tableName); TEST_UTIL.waitUntilAllRegionsAssigned(table.getName()); doPut(table, 1); server = TEST_UTIL.getRSForFirstRegionInTable(table.getName()); RegionInfo hri = server.getRegions(table.getName()).get(0).getRegionInfo(); AsyncFSWAL wal = (AsyncFSWAL) server.getWAL(hri); int numRolledLogFiles = AsyncFSWALProvider.getNumRolledLogFiles(wal); DatanodeInfo[] dnInfos = wal.getPipeline(); DataNodeProperties dnProp = TEST_UTIL.getDFSCluster().stopDataNode(dnInfos[0].getName()); TEST_UTIL.getDFSCluster().restartDataNode(dnProp); doPut(table, 2); assertEquals(numRolledLogFiles + 1, AsyncFSWALProvider.getNumRolledLogFiles(wal)); }
Example #2
Source File: TestFanOutOneBlockAsyncDFSOutput.java From hbase with Apache License 2.0 | 6 votes |
@Test public void testConnectToDatanodeFailed() throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InterruptedException, NoSuchFieldException { Field xceiverServerDaemonField = DataNode.class.getDeclaredField("dataXceiverServer"); xceiverServerDaemonField.setAccessible(true); Class<?> xceiverServerClass = Class.forName("org.apache.hadoop.hdfs.server.datanode.DataXceiverServer"); Method numPeersMethod = xceiverServerClass.getDeclaredMethod("getNumPeers"); numPeersMethod.setAccessible(true); // make one datanode broken DataNodeProperties dnProp = CLUSTER.stopDataNode(0); Path f = new Path("/test"); EventLoop eventLoop = EVENT_LOOP_GROUP.next(); try (FanOutOneBlockAsyncDFSOutput output = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS, f, true, false, (short) 3, FS.getDefaultBlockSize(), eventLoop, CHANNEL_CLASS)) { // should exclude the dead dn when retry so here we only have 2 DNs in pipeline assertEquals(2, output.getPipeline().length); } finally { CLUSTER.restartDataNode(dnProp); } }
Example #3
Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0 | 5 votes |
private DataNodeProperties shutdownDataNode(MiniDFSCluster cluster, DatanodeDescriptor datanode) { LOG.info("shutdown datanode: " + datanode.getName()); DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName()); FSNamesystem namesystem = cluster.getNameNode().namesystem; // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { datanode.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } return dnprop; }
Example #4
Source File: TestDataTransferKeepalive.java From big-c with Apache License 2.0 | 5 votes |
/** * Test for the case where the client beings to read a long block, but doesn't * read bytes off the stream quickly. The datanode should time out sending the * chunks and the transceiver should die, even if it has a long keepalive. */ @Test(timeout=300000) public void testSlowReader() throws Exception { // Set a client socket cache expiry time much longer than // the datanode-side expiration time. final long CLIENT_EXPIRY_MS = 600000L; Configuration clientConf = new Configuration(conf); clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS); clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader"); DistributedFileSystem fs = (DistributedFileSystem)FileSystem.get(cluster.getURI(), clientConf); // Restart the DN with a shorter write timeout. DataNodeProperties props = cluster.stopDataNode(0); props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY, WRITE_TIMEOUT); props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 120000); assertTrue(cluster.restartDataNode(props, true)); dn = cluster.getDataNodes().get(0); // Wait for heartbeats to avoid a startup race where we // try to write the block while the DN is still starting. cluster.triggerHeartbeats(); DFSTestUtil.createFile(fs, TEST_FILE, 1024*1024*8L, (short)1, 0L); FSDataInputStream stm = fs.open(TEST_FILE); stm.read(); assertXceiverCount(1); GenericTestUtils.waitFor(new Supplier<Boolean>() { public Boolean get() { // DN should time out in sendChunks, and this should force // the xceiver to exit. return getXceiverCountWithoutServer() == 0; } }, 500, 50000); IOUtils.closeStream(stm); }
Example #5
Source File: TestRollingUpgrade.java From big-c with Apache License 2.0 | 5 votes |
private static void rollbackRollingUpgrade(Path foo, Path bar, Path file, byte[] data, MiniDFSCluster cluster) throws IOException { final DataNodeProperties dnprop = cluster.stopDataNode(0); cluster.restartNameNode("-rollingUpgrade", "rollback"); cluster.restartDataNode(dnprop, true); final DistributedFileSystem dfs = cluster.getFileSystem(); Assert.assertTrue(dfs.exists(foo)); Assert.assertFalse(dfs.exists(bar)); AppendTestUtil.checkFullFile(dfs, file, data.length, data); }
Example #6
Source File: TestDataTransferKeepalive.java From hadoop with Apache License 2.0 | 5 votes |
/** * Test for the case where the client beings to read a long block, but doesn't * read bytes off the stream quickly. The datanode should time out sending the * chunks and the transceiver should die, even if it has a long keepalive. */ @Test(timeout=300000) public void testSlowReader() throws Exception { // Set a client socket cache expiry time much longer than // the datanode-side expiration time. final long CLIENT_EXPIRY_MS = 600000L; Configuration clientConf = new Configuration(conf); clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS); clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader"); DistributedFileSystem fs = (DistributedFileSystem)FileSystem.get(cluster.getURI(), clientConf); // Restart the DN with a shorter write timeout. DataNodeProperties props = cluster.stopDataNode(0); props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY, WRITE_TIMEOUT); props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY, 120000); assertTrue(cluster.restartDataNode(props, true)); dn = cluster.getDataNodes().get(0); // Wait for heartbeats to avoid a startup race where we // try to write the block while the DN is still starting. cluster.triggerHeartbeats(); DFSTestUtil.createFile(fs, TEST_FILE, 1024*1024*8L, (short)1, 0L); FSDataInputStream stm = fs.open(TEST_FILE); stm.read(); assertXceiverCount(1); GenericTestUtils.waitFor(new Supplier<Boolean>() { public Boolean get() { // DN should time out in sendChunks, and this should force // the xceiver to exit. return getXceiverCountWithoutServer() == 0; } }, 500, 50000); IOUtils.closeStream(stm); }
Example #7
Source File: TestFileAppend4.java From RDFS with Apache License 2.0 | 5 votes |
private void runDNRestartCorruptType(CorruptionType corrupt) throws Exception { cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { short rep = 3; // replication assertTrue(BLOCK_SIZE%4 == 0); file1 = new Path("/dnDeath.dat"); // write 1/2 block & close stm = fs1.create(file1, true, 1024, rep, 4096); AppendTestUtil.write(stm, 0, 1024); stm.sync(); loseLeases(fs1); DFSOutputStream dfso = (DFSOutputStream)stm.getWrappedStream(); dfso.abortForTests(); // close the primary DN DataNodeProperties badDN = cluster.stopDataNode(0); // Truncate the block on the primary DN corruptDataNode(0, corrupt); // Start the DN back up cluster.restartDataNode(badDN); // Recover the lease FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(fs1.getConf()); recoverFile(fs2); assertFileSize(fs2, 1024); checkFile(fs2, 1024); } finally { // explicitly do not shut down fs1, since it's been frozen up by // killing the DataStreamer and not allowing recovery cluster.shutdown(); } }
Example #8
Source File: TestFileAppend4.java From RDFS with Apache License 2.0 | 5 votes |
/** * Test that the restart of a DN and the subsequent pipeline recovery do not cause * a file to become prematurely considered "complete", when it's a fresh file * with no .append() called. */ public void testNotPrematurelyCompleteWithFailureNotReopened() throws Exception { LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); NameNode nn = cluster.getNameNode(); FileSystem fs1 = cluster.getFileSystem(); try { short rep = 3; // replication file1 = new Path("/delayedReceiveBlock"); stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, 64*1024*1024); LOG.info("======== Writing"); AppendTestUtil.write(stm, 0, 1024*1024); LOG.info("======== Waiting for a block allocation"); waitForBlockReplication(fs1, "/delayedReceiveBlock", 0, 3000); LOG.info("======== Checking not complete"); assertFalse(NameNodeAdapter.checkFileProgress(nn.namesystem, "/delayedReceiveBlock", true)); // Stop one of the DNs, don't restart MiniDFSCluster.DataNodeProperties dnprops = cluster.stopDataNode(0); // Write some more data AppendTestUtil.write(stm, 0, 1024*1024); // Make sure we don't see the file as complete LOG.info("======== Checking progress"); assertFalse(NameNodeAdapter.checkFileProgress(nn.namesystem, "/delayedReceiveBlock", true)); LOG.info("======== Closing"); stm.close(); } finally { LOG.info("======== Cleaning up"); fs1.close(); cluster.shutdown(); } }
Example #9
Source File: TestDecommissioningStatus.java From hadoop with Apache License 2.0 | 5 votes |
/** * Verify the support for decommissioning a datanode that is already dead. * Under this scenario the datanode should immediately be marked as * DECOMMISSIONED */ @Test(timeout=120000) public void testDecommissionDeadDN() throws Exception { Logger log = Logger.getLogger(DecommissionManager.class); log.setLevel(Level.DEBUG); DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId(); String dnName = dnID.getXferAddr(); DataNodeProperties stoppedDN = cluster.stopDataNode(0); DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(), false, 30000); FSNamesystem fsn = cluster.getNamesystem(); final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID); decommissionNode(fsn, localFileSys, dnName); dm.refreshNodes(conf); BlockManagerTestUtil.recheckDecommissionState(dm); assertTrue(dnDescriptor.isDecommissioned()); // Add the node back cluster.restartDataNode(stoppedDN, true); cluster.waitActive(); // Call refreshNodes on FSNamesystem with empty exclude file to remove the // datanode from decommissioning list and make it available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); }
Example #10
Source File: TestProcessCorruptBlocks.java From hadoop with Apache License 2.0 | 5 votes |
private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName, int dnIndex, ExtendedBlock block) throws IOException { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change assertTrue(cluster.corruptReplica(dnIndex, block)); DataNodeProperties dnProps = cluster.stopDataNode(0); // Each datanode has multiple data dirs, check each for (int dirIndex = 0; dirIndex < 2; dirIndex++) { final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getStorageDir(dnIndex, dirIndex); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr"); if (scanLogFile.exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } } } // restart the detained so the corrupt replica will be detected cluster.restartDataNode(dnProps); }
Example #11
Source File: TestProcessCorruptBlocks.java From hadoop with Apache License 2.0 | 5 votes |
/** * The corrupt block has to be removed when the number of valid replicas * matches replication factor for the file. In this test, the above * condition is achieved by increasing the number of good replicas by * replicating on a new Datanode. * The test strategy : * Bring up Cluster with 3 DataNodes * Create a file of replication factor 3 * Corrupt one replica of a block of the file * Verify that there are still 2 good replicas and 1 corrupt replica * (corrupt replica should not be removed since number of good replicas * (2) is less than replication factor (3)) * Start a new data node * Verify that the a new replica is created and corrupt replica is * removed. * */ @Test public void testByAddingAnExtraDataNode() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); FileSystem fs = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); DataNodeProperties dnPropsFourth = cluster.stopDataNode(3); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short) 3); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); corruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.waitReplication(fs, fileName, (short) 2); assertEquals(2, countReplicas(namesystem, block).liveReplicas()); assertEquals(1, countReplicas(namesystem, block).corruptReplicas()); cluster.restartDataNode(dnPropsFourth); DFSTestUtil.waitReplication(fs, fileName, (short) 3); assertEquals(3, countReplicas(namesystem, block).liveReplicas()); assertEquals(0, countReplicas(namesystem, block).corruptReplicas()); } finally { cluster.shutdown(); } }
Example #12
Source File: TestPendingCorruptDnMessages.java From hadoop with Apache License 2.0 | 5 votes |
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex) throws IOException { // stop the DN, reformat it, then start it again with the same xfer port. DataNodeProperties dnProps = cluster.stopDataNode(dnIndex); cluster.formatDataNodeDirs(); return cluster.restartDataNode(dnProps, true); }
Example #13
Source File: TestPendingCorruptDnMessages.java From big-c with Apache License 2.0 | 5 votes |
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex) throws IOException { // stop the DN, reformat it, then start it again with the same xfer port. DataNodeProperties dnProps = cluster.stopDataNode(dnIndex); cluster.formatDataNodeDirs(); return cluster.restartDataNode(dnProps, true); }
Example #14
Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0 | 5 votes |
/** * The corrupt block has to be removed when the number of valid replicas * matches replication factor for the file. In this test, the above * condition is achieved by increasing the number of good replicas by * replicating on a new Datanode. * The test strategy : * Bring up Cluster with 3 DataNodes * Create a file of replication factor 3 * Corrupt one replica of a block of the file * Verify that there are still 2 good replicas and 1 corrupt replica * (corrupt replica should not be removed since number of good replicas * (2) is less than replication factor (3)) * Start a new data node * Verify that the a new replica is created and corrupt replica is * removed. * */ @Test public void testByAddingAnExtraDataNode() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); FileSystem fs = cluster.getFileSystem(); final FSNamesystem namesystem = cluster.getNamesystem(); DataNodeProperties dnPropsFourth = cluster.stopDataNode(3); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short) 3); ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); corruptBlock(cluster, fs, fileName, 0, block); DFSTestUtil.waitReplication(fs, fileName, (short) 2); assertEquals(2, countReplicas(namesystem, block).liveReplicas()); assertEquals(1, countReplicas(namesystem, block).corruptReplicas()); cluster.restartDataNode(dnPropsFourth); DFSTestUtil.waitReplication(fs, fileName, (short) 3); assertEquals(3, countReplicas(namesystem, block).liveReplicas()); assertEquals(0, countReplicas(namesystem, block).corruptReplicas()); } finally { cluster.shutdown(); } }
Example #15
Source File: TestRollingUpgrade.java From hadoop with Apache License 2.0 | 5 votes |
private static void rollbackRollingUpgrade(Path foo, Path bar, Path file, byte[] data, MiniDFSCluster cluster) throws IOException { final DataNodeProperties dnprop = cluster.stopDataNode(0); cluster.restartNameNode("-rollingUpgrade", "rollback"); cluster.restartDataNode(dnprop, true); final DistributedFileSystem dfs = cluster.getFileSystem(); Assert.assertTrue(dfs.exists(foo)); Assert.assertFalse(dfs.exists(bar)); AppendTestUtil.checkFullFile(dfs, file, data.length, data); }
Example #16
Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0 | 5 votes |
private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName, int dnIndex, ExtendedBlock block) throws IOException { // corrupt the block on datanode dnIndex // the indexes change once the nodes are restarted. // But the datadirectory will not change assertTrue(cluster.corruptReplica(dnIndex, block)); DataNodeProperties dnProps = cluster.stopDataNode(0); // Each datanode has multiple data dirs, check each for (int dirIndex = 0; dirIndex < 2; dirIndex++) { final String bpid = cluster.getNamesystem().getBlockPoolId(); File storageDir = cluster.getStorageDir(dnIndex, dirIndex); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr"); if (scanLogFile.exists()) { // wait for one minute for deletion to succeed; for (int i = 0; !scanLogFile.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) { } } } } // restart the detained so the corrupt replica will be detected cluster.restartDataNode(dnProps); }
Example #17
Source File: TestDecommissioningStatus.java From big-c with Apache License 2.0 | 5 votes |
/** * Verify the support for decommissioning a datanode that is already dead. * Under this scenario the datanode should immediately be marked as * DECOMMISSIONED */ @Test(timeout=120000) public void testDecommissionDeadDN() throws Exception { Logger log = Logger.getLogger(DecommissionManager.class); log.setLevel(Level.DEBUG); DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId(); String dnName = dnID.getXferAddr(); DataNodeProperties stoppedDN = cluster.stopDataNode(0); DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(), false, 30000); FSNamesystem fsn = cluster.getNamesystem(); final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager(); DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID); decommissionNode(fsn, localFileSys, dnName); dm.refreshNodes(conf); BlockManagerTestUtil.recheckDecommissionState(dm); assertTrue(dnDescriptor.isDecommissioned()); // Add the node back cluster.restartDataNode(stoppedDN, true); cluster.waitActive(); // Call refreshNodes on FSNamesystem with empty exclude file to remove the // datanode from decommissioning list and make it available again. writeConfigFile(localFileSys, excludeFile, null); dm.refreshNodes(conf); }
Example #18
Source File: TestFileAppend4.java From RDFS with Apache License 2.0 | 4 votes |
public void testFullClusterPowerLoss() throws Exception { cluster = new MiniDFSCluster(conf, 2, true, null); FileSystem fs1 = cluster.getFileSystem(); try { short rep = 2; // replication assertTrue(BLOCK_SIZE%4 == 0); file1 = new Path("/dnDeath.dat"); // write 1/2 block & close stm = fs1.create(file1, true, 1024, rep, 4096); AppendTestUtil.write(stm, 0, 1024); stm.sync(); loseLeases(fs1); DFSOutputStream dfso = (DFSOutputStream)stm.getWrappedStream(); dfso.abortForTests(); // close the DNs DataNodeProperties badDN = cluster.stopDataNode(0); DataNodeProperties badDN2 = cluster.stopDataNode(0); // what was 1 is now 0 assertNotNull(badDN); assertNotNull(badDN2); // Truncate one of them as if its journal got corrupted corruptDataNode(0, CorruptionType.TRUNCATE_BLOCK_HALF); // Start the DN back up cluster.restartDataNode(badDN); cluster.restartDataNode(badDN2); // Wait for a heartbeat to make sure we get the initial block // report of the replicasBeingWritten cluster.waitForDNHeartbeat(0, 10000); cluster.waitForDNHeartbeat(1, 10000); // Recover the lease FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(fs1.getConf()); recoverFile(fs2); assertFileSize(fs2, 512); checkFile(fs2, 512); } finally { // explicitly do not shut down fs1, since it's been frozen up by // killing the DataStreamer and not allowing recovery cluster.shutdown(); } }
Example #19
Source File: TestPendingInvalidateBlock.java From big-c with Apache License 2.0 | 4 votes |
/** * Test whether we can delay the deletion of unknown blocks in DataNode's * first several block reports. */ @Test public void testPendingDeleteUnknownBlocks() throws Exception { final int fileNum = 5; // 5 files final Path[] files = new Path[fileNum]; final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION]; // create a group of files, each file contains 1 block for (int i = 0; i < fileNum; i++) { files[i] = new Path("/file" + i); DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i); } // wait until all DataNodes have replicas waitForReplication(); for (int i = REPLICATION - 1; i >= 0; i--) { dnprops[i] = cluster.stopDataNode(i); } Thread.sleep(2000); // delete 2 files, we still have 3 files remaining so that we can cover // every DN storage for (int i = 0; i < 2; i++) { dfs.delete(files[i], true); } // restart NameNode cluster.restartNameNode(false); InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox .getInternalState(cluster.getNamesystem().getBlockManager(), "invalidateBlocks"); InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks); Mockito.doReturn(1L).when(mockIb).getInvalidationDelay(); Whitebox.setInternalState(cluster.getNamesystem().getBlockManager(), "invalidateBlocks", mockIb); Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks()); // restart DataNodes for (int i = 0; i < REPLICATION; i++) { cluster.restartDataNode(dnprops[i], true); } cluster.waitActive(); for (int i = 0; i < REPLICATION; i++) { DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i)); } Thread.sleep(2000); // make sure we have received block reports by checking the total block # Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks()); cluster.restartNameNode(true); Thread.sleep(6000); Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks()); }
Example #20
Source File: TestDataNodeMultipleRegistrations.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testDNWithInvalidStorageWithHA() throws Exception { MiniDFSNNTopology top = new MiniDFSNNTopology() .addNameservice(new MiniDFSNNTopology.NSConf("ns1") .addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1")) .addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1"))); top.setFederation(true); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top) .numDataNodes(0).build(); try { cluster.startDataNodes(conf, 1, true, null, null); // let the initialization be complete Thread.sleep(10000); DataNode dn = cluster.getDataNodes().get(0); assertTrue("Datanode should be running", dn.isDatanodeUp()); assertEquals("BPOfferService should be running", 1, dn.getAllBpOs().length); DataNodeProperties dnProp = cluster.stopDataNode(0); cluster.getNameNode(0).stop(); cluster.getNameNode(1).stop(); Configuration nn1 = cluster.getConfiguration(0); Configuration nn2 = cluster.getConfiguration(1); // setting up invalid cluster StartupOption.FORMAT.setClusterId("cluster-2"); DFSTestUtil.formatNameNode(nn1); MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1), FSNamesystem.getNamespaceDirs(nn2), nn2); cluster.restartNameNode(0, false); cluster.restartNameNode(1, false); cluster.restartDataNode(dnProp); // let the initialization be complete Thread.sleep(10000); dn = cluster.getDataNodes().get(0); assertFalse("Datanode should have shutdown as only service failed", dn.isDatanodeUp()); } finally { cluster.shutdown(); } }
Example #21
Source File: TestOverReplicatedBlocks.java From big-c with Apache License 2.0 | 4 votes |
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ @Test public void testProcesOverReplicateBlock() throws Exception { Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L); conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L); conf.set( DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build(); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName); assertTrue(cluster.corruptReplica(0, block)); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir( cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(), "scanner.cursor"); //wait for one minute for deletion to succeed; for(int i = 0; !scanCursor.delete(); i++) { assertTrue("Could not delete " + scanCursor.getAbsolutePath() + " in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); String blockPoolId = cluster.getNamesystem().getBlockPoolId(); final DatanodeID corruptDataNode = DataNodeTestUtils.getDNRegistrationForBP( cluster.getDataNodes().get(2), blockPoolId); final FSNamesystem namesystem = cluster.getNamesystem(); final BlockManager bm = namesystem.getBlockManager(); final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager(); try { namesystem.writeLock(); synchronized(hm) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs String corruptMachineName = corruptDataNode.getXferAddr(); for (DatanodeDescriptor datanode : hm.getDatanodes()) { if (!corruptMachineName.equals(datanode.getXferAddr())) { datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L); datanode.updateHeartbeat( BlockManagerTestUtil.getStorageReportsForDatanode(datanode), 0L, 0L, 0, 0, null); } } // decrease the replication factor to 1; NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas()); } } finally { namesystem.writeUnlock(); } } finally { cluster.shutdown(); } }
Example #22
Source File: TestFileAppend4.java From RDFS with Apache License 2.0 | 4 votes |
/** * Test that when a DN starts up with bbws from a file that got * removed or finalized when it was down, the block gets deleted. */ public void testBBWCleanupOnStartup() throws Throwable { LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { int halfBlock = (int) BLOCK_SIZE / 2; short rep = 3; // replication assertTrue(BLOCK_SIZE % 4 == 0); file1 = new Path("/bbwCleanupOnStartup.dat"); // write 1/2 block & sync stm = fs1.create(file1, true, (int) BLOCK_SIZE * 2, rep, BLOCK_SIZE); AppendTestUtil.write(stm, 0, halfBlock); stm.sync(); String dataDirs = cluster.getDataNodes().get(0).getConf().get("dfs.data.dir"); // close one of the datanodes MiniDFSCluster.DataNodeProperties dnprops = cluster.stopDataNode(0); stm.close(); List<File> bbwFilesAfterShutdown = getBBWFiles(dataDirs); assertEquals(1, bbwFilesAfterShutdown.size()); assertTrue(cluster.restartDataNode(dnprops)); List<File> bbwFilesAfterRestart = null; // Wait up to 10 heartbeats for the files to get removed - it should // really happen after just a couple. for (int i = 0; i < 10; i++) { LOG.info("Waiting for heartbeat #" + i + " after DN restart"); cluster.waitForDNHeartbeat(0, 10000); // Check if it has been deleted bbwFilesAfterRestart = getBBWFiles(dataDirs); if (bbwFilesAfterRestart.size() == 0) { break; } } assertEquals(0, bbwFilesAfterRestart.size()); } finally { fs1.close(); cluster.shutdown(); } }
Example #23
Source File: TestPendingCorruptDnMessages.java From big-c with Apache License 2.0 | 4 votes |
@Test public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException { HdfsConfiguration conf = new HdfsConfiguration(); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) .numDataNodes(1) .nnTopology(MiniDFSNNTopology.simpleHATopology()) .build(); try { cluster.transitionToActive(0); FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf); OutputStream out = fs.create(filePath); out.write("foo bar baz".getBytes()); out.close(); HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1)); // Change the gen stamp of the block on datanode to go back in time (gen // stamps start at 1000) ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath); assertTrue(cluster.changeGenStampOfBlock(0, block, 900)); // Stop the DN so the replica with the changed gen stamp will be reported // when this DN starts up. DataNodeProperties dnProps = cluster.stopDataNode(0); // Restart the namenode so that when the DN comes up it will see an initial // block report. cluster.restartNameNode(1, false); assertTrue(cluster.restartDataNode(dnProps, true)); // Wait until the standby NN queues up the corrupt block in the pending DN // message queue. while (cluster.getNamesystem(1).getBlockManager() .getPendingDataNodeMessageCount() < 1) { ThreadUtil.sleepAtLeastIgnoreInterrupts(1000); } assertEquals(1, cluster.getNamesystem(1).getBlockManager() .getPendingDataNodeMessageCount()); String oldStorageId = getRegisteredDatanodeUid(cluster, 1); // Reformat/restart the DN. assertTrue(wipeAndRestartDn(cluster, 0)); // Give the DN time to start up and register, which will cause the // DatanodeManager to dissociate the old storage ID from the DN xfer addr. String newStorageId = ""; do { ThreadUtil.sleepAtLeastIgnoreInterrupts(1000); newStorageId = getRegisteredDatanodeUid(cluster, 1); System.out.println("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId); } while (newStorageId.equals(oldStorageId)); assertEquals(0, cluster.getNamesystem(1).getBlockManager() .getPendingDataNodeMessageCount()); // Now try to fail over. cluster.transitionToStandby(0); cluster.transitionToActive(1); } finally { cluster.shutdown(); } }
Example #24
Source File: TestFileAppend4.java From RDFS with Apache License 2.0 | 4 votes |
/** * Test that the restart of a DN and the subsequent pipeline recovery do not cause * a file to become prematurely considered "complete". (ie that the block * synchronization as part of pipeline recovery doesn't add the block to the * nodes taking part in recovery) */ public void testNotPrematurelyCompleteWithFailure() throws Exception { LOG.info("START"); cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs1 = cluster.getFileSystem(); try { int halfBlock = (int)BLOCK_SIZE/2; short rep = 3; // replication assertTrue(BLOCK_SIZE%4 == 0); file1 = new Path("/delayedReceiveBlock"); // write 1/2 block & close stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, BLOCK_SIZE); AppendTestUtil.write(stm, 0, halfBlock); stm.close(); NameNode nn = cluster.getNameNode(); LOG.info("======== Appending"); stm = fs1.append(file1); LOG.info("======== Writing"); AppendTestUtil.write(stm, 0, halfBlock/4); // restart one of the datanodes and wait for a few of its heartbeats // so that it will report the recovered replica MiniDFSCluster.DataNodeProperties dnprops = cluster.stopDataNode(0); stm.sync(); assertTrue(cluster.restartDataNode(dnprops)); for (int i = 0; i < 2; i++) { cluster.waitForDNHeartbeat(0, 3000); } AppendTestUtil.write(stm, 0, halfBlock/4); LOG.info("======== Checking progress"); assertFalse(NameNodeAdapter.checkFileProgress(nn.namesystem, "/delayedReceiveBlock", true)); LOG.info("======== Closing"); stm.close(); } finally { LOG.info("======== Cleaning up"); fs1.close(); cluster.shutdown(); } }
Example #25
Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0 | 4 votes |
public void testUnderReplicationWithDecommissionDataNode() throws Exception { final Configuration conf = new Configuration(); final short REPLICATION_FACTOR = (short)1; File f = new File(HOST_FILE_PATH); if (f.exists()) { f.delete(); } conf.set("dfs.hosts.exclude", HOST_FILE_PATH); LOG.info("Start the cluster"); final MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null); try { final FSNamesystem namesystem = cluster.getNameNode().namesystem; final FileSystem fs = cluster.getFileSystem(); DatanodeDescriptor[] datanodes = (DatanodeDescriptor[]) namesystem.heartbeats.toArray( new DatanodeDescriptor[REPLICATION_FACTOR]); assertEquals(1, datanodes.length); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile2"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // shutdown the datanode DataNodeProperties dnprop = shutdownDataNode(cluster, datanodes[0]); assertEquals(1, namesystem.getMissingBlocksCount()); // one missing block assertEquals(0, namesystem.getNonCorruptUnderReplicatedBlocks()); // Make the only datanode to be decommissioned LOG.info("Decommission the datanode " + dnprop); addToExcludeFile(namesystem.getConf(), datanodes); namesystem.refreshNodes(namesystem.getConf()); // bring up the datanode cluster.restartDataNode(dnprop); // Wait for block report LOG.info("wait for its block report to come in"); NumberReplicas num; long startTime = System.currentTimeMillis(); do { namesystem.readLock(); try { num = namesystem.countNodes(block); } finally { namesystem.readUnlock(); } Thread.sleep(1000); LOG.info("live: " + num.liveReplicas() + "Decom: " + num.decommissionedReplicas()); } while (num.decommissionedReplicas() != 1 && System.currentTimeMillis() - startTime < 30000); assertEquals("Decommissioning Replicas doesn't reach 1", 1, num.decommissionedReplicas()); assertEquals(1, namesystem.getNonCorruptUnderReplicatedBlocks()); assertEquals(0, namesystem.getMissingBlocksCount()); } finally { cluster.shutdown(); } }
Example #26
Source File: TestNodeCount.java From RDFS with Apache License 2.0 | 4 votes |
public void testNodeCount() throws Exception { // start a mini dfs cluster of 2 nodes final Configuration conf = new Configuration(); conf.setInt("dfs.replication.interval", 10); final short REPLICATION_FACTOR = (short)2; final MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null); try { final FSNamesystem namesystem = cluster.getNameNode().namesystem; final FileSystem fs = cluster.getFileSystem(); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // keep a copy of all datanode descriptor DatanodeDescriptor[] datanodes = (DatanodeDescriptor[]) namesystem.heartbeats.toArray(new DatanodeDescriptor[REPLICATION_FACTOR]); // start two new nodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(false); LOG.info("Bringing down first DN"); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName()); // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { datanode.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } LOG.info("Waiting for block to be replicated"); // the block will be replicated DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); LOG.info("Restarting first datanode"); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(false); LOG.info("Waiting for excess replicas to be detected"); // check if excessive replica is detected waitForExcessReplicasToChange(namesystem, block, 1); LOG.info("Finding a non-excess node"); // find out a non-excess node Iterator<DatanodeDescriptor> iter = namesystem.blocksMap.nodeIterator(block); DatanodeDescriptor nonExcessDN = null; while (iter.hasNext()) { DatanodeDescriptor dn = iter.next(); Collection<Block> blocks = namesystem.excessReplicateMap.get(dn.getStorageID()); if (blocks == null || !blocks.contains(block) ) { nonExcessDN = dn; break; } } assertTrue(nonExcessDN!=null); LOG.info("Stopping non-excess node: " + nonExcessDN); // bring down non excessive datanode dnprop = cluster.stopDataNode(nonExcessDN.getName()); // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { nonExcessDN.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } LOG.info("Waiting for live replicas to hit repl factor"); // The block should be replicated NumberReplicas num; do { namesystem.readLock(); try { num = namesystem.countNodes(block); } finally { namesystem.readUnlock(); } } while (num.liveReplicas() != REPLICATION_FACTOR); LOG.info("Restarting first DN"); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(false); // check if excessive replica is detected LOG.info("Waiting for excess replicas to be detected"); waitForExcessReplicasToChange(namesystem, block, 2); } finally { cluster.shutdown(); } }
Example #27
Source File: TestOverReplicatedBlocks.java From RDFS with Apache License 2.0 | 4 votes |
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { int namespaceId = cluster.getNameNode().getNamespaceID(); final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0, cluster); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(cluster.getBlockDirectory("data1").getParent(), "dncp_block_verification.log.curr"); //wait for one minute for deletion to succeed; scanLog.delete(); // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).getDNRegistrationForNS(namespaceId); final FSNamesystem namesystem = cluster.getNameNode().getNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0); } } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); waitReplication(namesystem, block, (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); // Test the case when multiple calls to setReplication still succeeds. System.out.println("Starting next test with file foo2."); final Path fileName2 = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName2, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName2, (short)3); LocatedBlocks lbs = namesystem.getBlockLocations( fileName2.toString(), 0, 10); Block firstBlock = lbs.get(0).getBlock(); namesystem.setReplication(fileName2.toString(), (short)2); namesystem.setReplication(fileName2.toString(), (short)1); // wait upto one minute for excess replicas to get deleted. It is not // immediate because excess replicas are being handled asyncronously. waitReplication(namesystem, firstBlock, (short)1); assertEquals(1, namesystem.countNodes(firstBlock).liveReplicas()); } finally { cluster.shutdown(); } }
Example #28
Source File: TestNodeCount.java From hadoop-gpu with Apache License 2.0 | 4 votes |
public void testNodeCount() throws Exception { // start a mini dfs cluster of 2 nodes final Configuration conf = new Configuration(); final short REPLICATION_FACTOR = (short)2; final MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null); try { final FSNamesystem namesystem = cluster.getNameNode().namesystem; final FileSystem fs = cluster.getFileSystem(); // populate the cluster with a one block file final Path FILE_PATH = new Path("/testfile"); DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L); DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH); // keep a copy of all datanode descriptor DatanodeDescriptor[] datanodes = (DatanodeDescriptor[]) namesystem.heartbeats.toArray(new DatanodeDescriptor[REPLICATION_FACTOR]); // start two new nodes cluster.startDataNodes(conf, 2, true, null, null); cluster.waitActive(); // bring down first datanode DatanodeDescriptor datanode = datanodes[0]; DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName()); // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { datanode.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } // the block will be replicated DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(); // check if excessive replica is detected NumberReplicas num = null; do { synchronized (namesystem) { num = namesystem.countNodes(block); } } while (num.excessReplicas() == 0); // find out a non-excess node Iterator<DatanodeDescriptor> iter = namesystem.blocksMap.nodeIterator(block); DatanodeDescriptor nonExcessDN = null; while (iter.hasNext()) { DatanodeDescriptor dn = iter.next(); Collection<Block> blocks = namesystem.excessReplicateMap.get(dn.getStorageID()); if (blocks == null || !blocks.contains(block) ) { nonExcessDN = dn; break; } } assertTrue(nonExcessDN!=null); // bring down non excessive datanode dnprop = cluster.stopDataNode(nonExcessDN.getName()); // make sure that NN detects that the datanode is down synchronized (namesystem.heartbeats) { nonExcessDN.setLastUpdate(0); // mark it dead namesystem.heartbeatCheck(); } // The block should be replicated do { num = namesystem.countNodes(block); } while (num.liveReplicas() != REPLICATION_FACTOR); // restart the first datanode cluster.restartDataNode(dnprop); cluster.waitActive(); // check if excessive replica is detected do { num = namesystem.countNodes(block); } while (num.excessReplicas() == 2); } finally { cluster.shutdown(); } }
Example #29
Source File: TestOverReplicatedBlocks.java From hadoop-gpu with Apache License 2.0 | 4 votes |
/** Test processOverReplicatedBlock can handle corrupt replicas fine. * It make sure that it won't treat corrupt replicas as valid ones * thus prevents NN deleting valid replicas but keeping * corrupt ones. */ public void testProcesOverReplicateBlock() throws IOException { Configuration conf = new Configuration(); conf.setLong("dfs.blockreport.intervalMsec", 1000L); conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2)); MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null); FileSystem fs = cluster.getFileSystem(); try { final Path fileName = new Path("/foo1"); DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L); DFSTestUtil.waitReplication(fs, fileName, (short)3); // corrupt the block on datanode 0 Block block = DFSTestUtil.getFirstBlock(fs, fileName); TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0); DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(System.getProperty("test.build.data"), "dfs/data/data1/current/dncp_block_verification.log.curr"); //wait for one minute for deletion to succeed; for(int i=0; !scanLog.delete(); i++) { assertTrue("Could not delete log file in one minute", i < 60); try { Thread.sleep(1000); } catch (InterruptedException ignored) {} } // restart the datanode so the corrupt replica will be detected cluster.restartDataNode(dnProps); DFSTestUtil.waitReplication(fs, fileName, (short)2); final DatanodeID corruptDataNode = cluster.getDataNodes().get(2).dnRegistration; final FSNamesystem namesystem = FSNamesystem.getFSNamesystem(); synchronized (namesystem.heartbeats) { // set live datanode's remaining space to be 0 // so they will be chosen to be deleted when over-replication occurs for (DatanodeDescriptor datanode : namesystem.heartbeats) { if (!corruptDataNode.equals(datanode)) { datanode.updateHeartbeat(100L, 100L, 0L, 0); } } // decrease the replication factor to 1; namesystem.setReplication(fileName.toString(), (short)1); // corrupt one won't be chosen to be excess one // without 4910 the number of live replicas would be 0: block gets lost assertEquals(1, namesystem.countNodes(block).liveReplicas()); } } finally { cluster.shutdown(); } }
Example #30
Source File: TestPendingInvalidateBlock.java From hadoop with Apache License 2.0 | 4 votes |
/** * Test whether we can delay the deletion of unknown blocks in DataNode's * first several block reports. */ @Test public void testPendingDeleteUnknownBlocks() throws Exception { final int fileNum = 5; // 5 files final Path[] files = new Path[fileNum]; final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION]; // create a group of files, each file contains 1 block for (int i = 0; i < fileNum; i++) { files[i] = new Path("/file" + i); DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i); } // wait until all DataNodes have replicas waitForReplication(); for (int i = REPLICATION - 1; i >= 0; i--) { dnprops[i] = cluster.stopDataNode(i); } Thread.sleep(2000); // delete 2 files, we still have 3 files remaining so that we can cover // every DN storage for (int i = 0; i < 2; i++) { dfs.delete(files[i], true); } // restart NameNode cluster.restartNameNode(false); InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox .getInternalState(cluster.getNamesystem().getBlockManager(), "invalidateBlocks"); InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks); Mockito.doReturn(1L).when(mockIb).getInvalidationDelay(); Whitebox.setInternalState(cluster.getNamesystem().getBlockManager(), "invalidateBlocks", mockIb); Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks()); // restart DataNodes for (int i = 0; i < REPLICATION; i++) { cluster.restartDataNode(dnprops[i], true); } cluster.waitActive(); for (int i = 0; i < REPLICATION; i++) { DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i)); } Thread.sleep(2000); // make sure we have received block reports by checking the total block # Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks()); cluster.restartNameNode(true); Thread.sleep(6000); Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks()); }