org.apache.hadoop.hdfs.AppendTestUtil#check

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

6 votes

@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

6 votes

@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}

Source File: TestDFSConcurrentFileOperations.java From RDFS with Apache License 2.0

5 votes

public void testLeaseRecoveryOnTrashedFile() throws Exception {
  Configuration conf = new Configuration();
  
  conf.setLong("dfs.block.size", blockSize);
  
  init(conf);
  
  String src = "/file-1";
  String dst = "/file-2";
  Path srcPath = new Path(src);
  Path dstPath = new Path(dst);
  FSDataOutputStream fos = fs.create(srcPath);

  AppendTestUtil.write(fos, 0, writeSize);
  fos.sync();
  
  // renaming a file out from under a client will cause close to fail
  // and result in the lease remaining while the blocks are finalized on
  // the DNs
  fs.rename(srcPath, dstPath);

  try {
    fos.close();
    fail("expected IOException");
  } catch (IOException e) {
    //expected
  }

  FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(conf);
  AppendTestUtil.recoverFile(cluster, fs2, dstPath);
  AppendTestUtil.check(fs2, dstPath, writeSize);
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestDNFencing.java From hadoop with Apache License 2.0

4 votes

/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestDNFencing.java From big-c with Apache License 2.0

4 votes

/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}

Java Code Examples for org.apache.hadoop.hdfs.AppendTestUtil#check()