org.apache.hadoop.hdfs.AppendTestUtil#write

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

6 votes

@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

6 votes

@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}

Source File: TestDFSConcurrentFileOperations.java From RDFS with Apache License 2.0

5 votes

public void testLeaseRecoveryOnTrashedFile() throws Exception {
  Configuration conf = new Configuration();
  
  conf.setLong("dfs.block.size", blockSize);
  
  init(conf);
  
  String src = "/file-1";
  String dst = "/file-2";
  Path srcPath = new Path(src);
  Path dstPath = new Path(dst);
  FSDataOutputStream fos = fs.create(srcPath);

  AppendTestUtil.write(fos, 0, writeSize);
  fos.sync();
  
  // renaming a file out from under a client will cause close to fail
  // and result in the lease remaining while the blocks are finalized on
  // the DNs
  fs.rename(srcPath, dstPath);

  try {
    fos.close();
    fail("expected IOException");
  } catch (IOException e) {
    //expected
  }

  FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(conf);
  AppendTestUtil.recoverFile(cluster, fs2, dstPath);
  AppendTestUtil.check(fs2, dstPath, writeSize);
}

Source File: TestDNFencing.java From hadoop with Apache License 2.0

5 votes

/**
 * Regression test for HDFS-2742. The issue in this bug was:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - Standby queues the RBW state in PendingDatanodeMessages
 * - Standby processes edit logs during failover. Before fixing
 *   this bug, it was mistakenly applying the RBW reported state
 *   after the block had been completed, causing the block to get
 *   marked corrupt. Instead, we should now be applying the RBW
 *   message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
 */
@Test
public void testBlockReportsWhileFileBeingWritten() throws Exception {
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();
    
    // Block report will include the RBW replica, but will be
    // queued on the StandbyNode.
    cluster.triggerBlockReports();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}

Source File: TestDNFencing.java From big-c with Apache License 2.0

5 votes

/**
 * Regression test for HDFS-2742. The issue in this bug was:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - Standby queues the RBW state in PendingDatanodeMessages
 * - Standby processes edit logs during failover. Before fixing
 *   this bug, it was mistakenly applying the RBW reported state
 *   after the block had been completed, causing the block to get
 *   marked corrupt. Instead, we should now be applying the RBW
 *   message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
 */
@Test
public void testBlockReportsWhileFileBeingWritten() throws Exception {
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();
    
    // Block report will include the RBW replica, but will be
    // queued on the StandbyNode.
    cluster.triggerBlockReports();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestDFSIsUnderConstruction.java From RDFS with Apache License 2.0

4 votes

public void testSecondLastBlockNotReceived() throws Exception {
  String fileName = "/testSecondLastBlockNotReceived";
  Path growingFile = new Path(fileName); 
  FSDataInputStream fis = null;
  FSDataOutputStream fos = fs.create(growingFile, false, 1024, (short)1, 1024);
  try {
    int fileLength = 2096;
    AppendTestUtil.write(fos, 0, fileLength);
    fos.sync();

    fis = fs.open(growingFile);
    for (int i = 0; i < fileLength; i++) {
      fis.read();
    }
    fis.close();

    FSNamesystem fsns = cluster.getNameNode().namesystem;
    INode[] inodes = fsns.dir.getExistingPathINodes(fileName);
    BlockInfo[] bis = ((INodeFile) (inodes[inodes.length - 1])).getBlocks();
    bis[bis.length - 2].setNumBytes(1);

    try {
      fis = fs.open(growingFile);
      TestCase.fail();
    } catch (IOException e) {
    }
    bis[bis.length - 2].setNumBytes(1024);

    bis[bis.length - 1].setNumBytes(1);
    fis = fs.open(growingFile);
    for (int i = 0; i < fileLength; i++) {
      fis.read();
    }
  } finally {
    if (fos != null) {
      fos.close();
    }
    if (fis != null) {
      fis.close();
    }
  }
}

Source File: BlockReportTestBase.java From big-c with Apache License 2.0

4 votes

/**
 * Test for the case where one of the DNs in the pipeline is in the
 * process of doing a block report exactly when the block is closed.
 * In this case, the block report becomes delayed until after the
 * block is marked completed on the NN, and hence it reports an RBW
 * replica for a COMPLETE block. Such a report should not be marked
 * corrupt.
 * This is a regression test for HDFS-2791.
 */
@Test(timeout=300000)
public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  final String METHOD_NAME = GenericTestUtils.getMethodName();
  Path filePath = new Path("/" + METHOD_NAME + ".dat");

  // Start a second DN for this test -- we're checking
  // what happens when one of the DNs is slowed for some reason.
  REPL_FACTOR = 2;
  startDNandWait(null, false);

  NameNode nn = cluster.getNameNode();

  FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Set up a spy so that we can delay the block report coming
    // from this node.
    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn);

    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());

    // Force a block report to be generated. The block report will have
    // an RBW replica in it. Wait for the RPC to be sent, but block
    // it before it gets to the NN.
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();

  } finally {
    IOUtils.closeStream(out);
  }

  // Now that the stream is closed, the NN will have the block in COMPLETE
  // state.
  delayer.proceed();
  brFinished.await();

  // Verify that no replicas are marked corrupt, and that the
  // file is still readable.
  BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
  assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
  DFSTestUtil.readFile(fs, filePath);

  // Ensure that the file is readable even from the DN that we futzed with.
  cluster.stopDataNode(1);
  DFSTestUtil.readFile(fs, filePath);
}

Source File: TestDNFencing.java From big-c with Apache License 2.0

4 votes

/**
 * Another regression test for HDFS-2742. This tests the following sequence:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - The block report is delayed in reaching the standby.
 * - The file is closed.
 * - The standby processes the OP_ADD and OP_CLOSE operations before
 *   the RBW block report arrives.
 * - The standby should not mark the block as corrupt.
 */
@Test
public void testRBWReportArrivesAfterEdits() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn2);
    
    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  delayer.proceed();
  brFinished.await();
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}

Source File: TestDNFencing.java From big-c with Apache License 2.0

4 votes

/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: BlockReportTestBase.java From hadoop with Apache License 2.0

4 votes

/**
 * Test for the case where one of the DNs in the pipeline is in the
 * process of doing a block report exactly when the block is closed.
 * In this case, the block report becomes delayed until after the
 * block is marked completed on the NN, and hence it reports an RBW
 * replica for a COMPLETE block. Such a report should not be marked
 * corrupt.
 * This is a regression test for HDFS-2791.
 */
@Test(timeout=300000)
public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  final String METHOD_NAME = GenericTestUtils.getMethodName();
  Path filePath = new Path("/" + METHOD_NAME + ".dat");

  // Start a second DN for this test -- we're checking
  // what happens when one of the DNs is slowed for some reason.
  REPL_FACTOR = 2;
  startDNandWait(null, false);

  NameNode nn = cluster.getNameNode();

  FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Set up a spy so that we can delay the block report coming
    // from this node.
    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn);

    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());

    // Force a block report to be generated. The block report will have
    // an RBW replica in it. Wait for the RPC to be sent, but block
    // it before it gets to the NN.
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();

  } finally {
    IOUtils.closeStream(out);
  }

  // Now that the stream is closed, the NN will have the block in COMPLETE
  // state.
  delayer.proceed();
  brFinished.await();

  // Verify that no replicas are marked corrupt, and that the
  // file is still readable.
  BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
  assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
  DFSTestUtil.readFile(fs, filePath);

  // Ensure that the file is readable even from the DN that we futzed with.
  cluster.stopDataNode(1);
  DFSTestUtil.readFile(fs, filePath);
}

Source File: TestDNFencing.java From hadoop with Apache License 2.0

4 votes

/**
 * Another regression test for HDFS-2742. This tests the following sequence:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - The block report is delayed in reaching the standby.
 * - The file is closed.
 * - The standby processes the OP_ADD and OP_CLOSE operations before
 *   the RBW block report arrives.
 * - The standby should not mark the block as corrupt.
 */
@Test
public void testRBWReportArrivesAfterEdits() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn2);
    
    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  delayer.proceed();
  brFinished.await();
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}

Source File: TestDNFencing.java From hadoop with Apache License 2.0

4 votes

/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Java Code Examples for org.apache.hadoop.hdfs.AppendTestUtil#write()