org.apache.hadoop.hdfs.MiniDFSCluster#getNamesystem

Source File: TestOverReplicatedBlocks.java From big-c with Apache License 2.0

6 votes

/**
 * Test over replicated block should get invalidated when decreasing the
 * replication for a partial block.
 */
@Test
public void testInvalidateOverReplicatedBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
      .build();
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    FileSystem fs = cluster.getFileSystem();
    Path p = new Path(MiniDFSCluster.getBaseDirectory(), "/foo1");
    FSDataOutputStream out = fs.create(p, (short) 2);
    out.writeBytes("HDFS-3119: " + p);
    out.hsync();
    fs.setReplication(p, (short) 1);
    out.close();
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, p);
    assertEquals("Expected only one live replica for the block", 1, bm
        .countNodes(block.getLocalBlock()).liveReplicas());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestEditLog.java From hadoop with Apache License 2.0

6 votes

/**
 * Test case for loading a very simple edit log from a format
 * prior to the inclusion of edit transaction IDs in the log.
 */
@Test
public void testPreTxidEditLogWithEdits() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
    cluster.waitActive();
    final FSNamesystem namesystem = cluster.getNamesystem();

    long numEdits = testLoad(HADOOP20_SOME_EDITS, namesystem);
    assertEquals(3, numEdits);
    // Sanity check the edit
    HdfsFileStatus fileInfo = namesystem.getFileInfo("/myfile", false);
    assertEquals("supergroup", fileInfo.getGroup());
    assertEquals(3, fileInfo.getReplication());
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}

Source File: TestEditLog.java From big-c with Apache License 2.0

6 votes

/**
 * Test case for loading a very simple edit log from a format
 * prior to the inclusion of edit transaction IDs in the log.
 */
@Test
public void testPreTxidEditLogWithEdits() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build();
    cluster.waitActive();
    final FSNamesystem namesystem = cluster.getNamesystem();

    long numEdits = testLoad(HADOOP20_SOME_EDITS, namesystem);
    assertEquals(3, numEdits);
    // Sanity check the edit
    HdfsFileStatus fileInfo = namesystem.getFileInfo("/myfile", false);
    assertEquals("supergroup", fileInfo.getGroup());
    assertEquals(3, fileInfo.getReplication());
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}

Source File: TestProcessCorruptBlocks.java From big-c with Apache License 2.0

5 votes

/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestEditLog.java From big-c with Apache License 2.0

5 votes

/**
 * Test what happens with the following sequence:
 *
 *  Thread A writes edit
 *  Thread B calls logSyncAll
 *           calls close() on stream
 *  Thread A calls logSync
 *
 * This sequence is legal and can occur if enterSafeMode() is closely
 * followed by saveNamespace.
 */
@Test
public void testBatchedSyncWithClosedLogs() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  ExecutorService threadA = Executors.newSingleThreadExecutor();
  ExecutorService threadB = Executors.newSingleThreadExecutor();
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    FSImage fsimage = namesystem.getFSImage();
    final FSEditLog editLog = fsimage.getEditLog();

    // Log an edit from thread A
    doLogEdit(threadA, editLog, "thread-a 1");
    assertEquals("logging edit without syncing should do not affect txid",
      1, editLog.getSyncTxId());

    // logSyncAll in Thread B
    doCallLogSyncAll(threadB, editLog);
    assertEquals("logSyncAll should sync thread A's transaction",
      2, editLog.getSyncTxId());

    // Close edit log
    editLog.close();

    // Ask thread A to finish sync (which should be a no-op)
    doCallLogSync(threadA, editLog);
  } finally {
    threadA.shutdown();
    threadB.shutdown();
    if(fileSys != null) fileSys.close();
    if(cluster != null) cluster.shutdown();
  }
}

Source File: TestRBWBlockInvalidation.java From big-c with Apache License 2.0

4 votes

/**
 * Test when a block's replica is removed from RBW folder in one of the
 * datanode, namenode should ask to invalidate that corrupted block and
 * schedule replication for one more replica for that under replicated block.
 */
@Test(timeout=600000)
public void testBlockInvalidationWhenRBWReplicaMissedInDN()
    throws IOException, InterruptedException {
  // This test cannot pass on Windows due to file locking enforcement.  It will
  // reject the attempt to delete the block file from the RBW folder.
  assumeTrue(!Path.WINDOWS);

  Configuration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 300);
  conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
  conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
      .build();
  FSDataOutputStream out = null;
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/tmp/TestRBWBlockInvalidation", "foo1");
    out = fs.create(testPath, (short) 2);
    out.writeBytes("HDFS-3157: " + testPath);
    out.hsync();
    cluster.startDataNodes(conf, 1, true, null, null, null);
    String bpid = namesystem.getBlockPoolId();
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, testPath);
    Block block = blk.getLocalBlock();
    DataNode dn = cluster.getDataNodes().get(0);

    // Delete partial block and its meta information from the RBW folder
    // of first datanode.
    File blockFile = DataNodeTestUtils.getBlockFile(dn, bpid, block);
    File metaFile = DataNodeTestUtils.getMetaFile(dn, bpid, block);
    assertTrue("Could not delete the block file from the RBW folder",
        blockFile.delete());
    assertTrue("Could not delete the block meta file from the RBW folder",
        metaFile.delete());

    out.close();
    
    int liveReplicas = 0;
    while (true) {
      if ((liveReplicas = countReplicas(namesystem, blk).liveReplicas()) < 2) {
        // This confirms we have a corrupt replica
        LOG.info("Live Replicas after corruption: " + liveReplicas);
        break;
      }
      Thread.sleep(100);
    }
    assertEquals("There should be less than 2 replicas in the "
        + "liveReplicasMap", 1, liveReplicas);
    
    while (true) {
      if ((liveReplicas =
            countReplicas(namesystem, blk).liveReplicas()) > 1) {
        //Wait till the live replica count becomes equal to Replication Factor
        LOG.info("Live Replicas after Rereplication: " + liveReplicas);
        break;
      }
      Thread.sleep(100);
    }
    assertEquals("There should be two live replicas", 2, liveReplicas);

    while (true) {
      Thread.sleep(100);
      if (countReplicas(namesystem, blk).corruptReplicas() == 0) {
        LOG.info("Corrupt Replicas becomes 0");
        break;
      }
    }
  } finally {
    if (out != null) {
      out.close();
    }
    cluster.shutdown();
  }
}

Source File: TestHAMetrics.java From hadoop with Apache License 2.0

4 votes

@Test(timeout = 300000)
public void testHAMetrics() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
      .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
    
    FSNamesystem nn0 = cluster.getNamesystem(0);
    FSNamesystem nn1 = cluster.getNamesystem(1);
    
    assertEquals(nn0.getHAState(), "standby");
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals(nn1.getHAState(), "standby");
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());

    cluster.transitionToActive(0);
    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    final ObjectName mxbeanName =
        new ObjectName("Hadoop:service=NameNode,name=NameNodeStatus");
    final Long ltt1 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > 0", ltt1 > 0);
    
    assertEquals("active", nn0.getHAState());
    assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
    assertEquals("standby", nn1.getHAState());
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
    
    cluster.transitionToStandby(0);
    final Long ltt2 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > " + ltt1, ltt2 > ltt1);
    cluster.transitionToActive(1);
    
    assertEquals("standby", nn0.getHAState());
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals("active", nn1.getHAState());
    assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
    
    Thread.sleep(2000); // make sure standby gets a little out-of-date
    assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    DFSTestUtil.createFile(fs, new Path("/foo"),
        10, (short)1, 1L);
    
    assertTrue(0 < nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
        cluster.getNameNode(0));
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    // Since we just waited for the standby to catch up, the time since we
    // last loaded edits should be very low.
    assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
        newMillisSinceLastLoadedEdits,
        millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
  } finally {
    IOUtils.cleanup(LOG, fs);
    cluster.shutdown();
  }
}

Source File: TestEditLog.java From big-c with Apache License 2.0

4 votes

@Test
public void testSyncBatching() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  ExecutorService threadA = Executors.newSingleThreadExecutor();
  ExecutorService threadB = Executors.newSingleThreadExecutor();
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    FSImage fsimage = namesystem.getFSImage();
    final FSEditLog editLog = fsimage.getEditLog();

    assertEquals("should start with only the BEGIN_LOG_SEGMENT txn synced",
      1, editLog.getSyncTxId());
    
    // Log an edit from thread A
    doLogEdit(threadA, editLog, "thread-a 1");
    assertEquals("logging edit without syncing should do not affect txid",
      1, editLog.getSyncTxId());

    // Log an edit from thread B
    doLogEdit(threadB, editLog, "thread-b 1");
    assertEquals("logging edit without syncing should do not affect txid",
      1, editLog.getSyncTxId());

    // Now ask to sync edit from B, which should sync both edits.
    doCallLogSync(threadB, editLog);
    assertEquals("logSync from second thread should bump txid up to 3",
      3, editLog.getSyncTxId());

    // Now ask to sync edit from A, which was already batched in - thus
    // it should increment the batch count metric
    doCallLogSync(threadA, editLog);
    assertEquals("logSync from first thread shouldn't change txid",
      3, editLog.getSyncTxId());

    //Should have incremented the batch count exactly once
    assertCounter("TransactionsBatchedInSync", 1L, 
      getMetrics("NameNodeActivity"));
  } finally {
    threadA.shutdown();
    threadB.shutdown();
    if(fileSys != null) fileSys.close();
    if(cluster != null) cluster.shutdown();
  }
}

Source File: TestRBWBlockInvalidation.java From hadoop with Apache License 2.0

4 votes

/**
 * Test when a block's replica is removed from RBW folder in one of the
 * datanode, namenode should ask to invalidate that corrupted block and
 * schedule replication for one more replica for that under replicated block.
 */
@Test(timeout=600000)
public void testBlockInvalidationWhenRBWReplicaMissedInDN()
    throws IOException, InterruptedException {
  // This test cannot pass on Windows due to file locking enforcement.  It will
  // reject the attempt to delete the block file from the RBW folder.
  assumeTrue(!Path.WINDOWS);

  Configuration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 2);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 300);
  conf.setLong(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
  conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
      .build();
  FSDataOutputStream out = null;
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    FileSystem fs = cluster.getFileSystem();
    Path testPath = new Path("/tmp/TestRBWBlockInvalidation", "foo1");
    out = fs.create(testPath, (short) 2);
    out.writeBytes("HDFS-3157: " + testPath);
    out.hsync();
    cluster.startDataNodes(conf, 1, true, null, null, null);
    String bpid = namesystem.getBlockPoolId();
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, testPath);
    Block block = blk.getLocalBlock();
    DataNode dn = cluster.getDataNodes().get(0);

    // Delete partial block and its meta information from the RBW folder
    // of first datanode.
    File blockFile = DataNodeTestUtils.getBlockFile(dn, bpid, block);
    File metaFile = DataNodeTestUtils.getMetaFile(dn, bpid, block);
    assertTrue("Could not delete the block file from the RBW folder",
        blockFile.delete());
    assertTrue("Could not delete the block meta file from the RBW folder",
        metaFile.delete());

    out.close();
    
    int liveReplicas = 0;
    while (true) {
      if ((liveReplicas = countReplicas(namesystem, blk).liveReplicas()) < 2) {
        // This confirms we have a corrupt replica
        LOG.info("Live Replicas after corruption: " + liveReplicas);
        break;
      }
      Thread.sleep(100);
    }
    assertEquals("There should be less than 2 replicas in the "
        + "liveReplicasMap", 1, liveReplicas);
    
    while (true) {
      if ((liveReplicas =
            countReplicas(namesystem, blk).liveReplicas()) > 1) {
        //Wait till the live replica count becomes equal to Replication Factor
        LOG.info("Live Replicas after Rereplication: " + liveReplicas);
        break;
      }
      Thread.sleep(100);
    }
    assertEquals("There should be two live replicas", 2, liveReplicas);

    while (true) {
      Thread.sleep(100);
      if (countReplicas(namesystem, blk).corruptReplicas() == 0) {
        LOG.info("Corrupt Replicas becomes 0");
        break;
      }
    }
  } finally {
    if (out != null) {
      out.close();
    }
    cluster.shutdown();
  }
}

Source File: TestHAMetrics.java From big-c with Apache License 2.0

4 votes

@Test(timeout = 300000)
public void testHAMetrics() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
      .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
    
    FSNamesystem nn0 = cluster.getNamesystem(0);
    FSNamesystem nn1 = cluster.getNamesystem(1);
    
    assertEquals(nn0.getHAState(), "standby");
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals(nn1.getHAState(), "standby");
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());

    cluster.transitionToActive(0);
    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    final ObjectName mxbeanName =
        new ObjectName("Hadoop:service=NameNode,name=NameNodeStatus");
    final Long ltt1 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > 0", ltt1 > 0);
    
    assertEquals("active", nn0.getHAState());
    assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
    assertEquals("standby", nn1.getHAState());
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
    
    cluster.transitionToStandby(0);
    final Long ltt2 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > " + ltt1, ltt2 > ltt1);
    cluster.transitionToActive(1);
    
    assertEquals("standby", nn0.getHAState());
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals("active", nn1.getHAState());
    assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
    
    Thread.sleep(2000); // make sure standby gets a little out-of-date
    assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    DFSTestUtil.createFile(fs, new Path("/foo"),
        10, (short)1, 1L);
    
    assertTrue(0 < nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
        cluster.getNameNode(0));
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    // Since we just waited for the standby to catch up, the time since we
    // last loaded edits should be very low.
    assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
        newMillisSinceLastLoadedEdits,
        millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
  } finally {
    IOUtils.cleanup(LOG, fs);
    cluster.shutdown();
  }
}

Source File: TestEditLog.java From hadoop with Apache License 2.0

4 votes

/**
 * Test creating a directory with lots and lots of edit log segments
 */
@Test
public void testManyEditLogSegments() throws IOException {
  final int NUM_EDIT_LOG_ROLLS = 1000;
  // start a cluster
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();
    FSImage fsimage = namesystem.getFSImage();
    final FSEditLog editLog = fsimage.getEditLog();
    for (int i = 0; i < NUM_EDIT_LOG_ROLLS; i++){
      editLog.logSetReplication("fakefile" + i, (short)(i % 3));
      assertExistsInStorageDirs(
          cluster, NameNodeDirType.EDITS,
          NNStorage.getInProgressEditsFileName((i * 3) + 1));
      editLog.logSync();
      editLog.rollEditLog();
      assertExistsInStorageDirs(
          cluster, NameNodeDirType.EDITS,
          NNStorage.getFinalizedEditsFileName((i * 3) + 1, (i * 3) + 3));
    }
    editLog.close();
  } finally {
    if(fileSys != null) fileSys.close();
    if(cluster != null) cluster.shutdown();
  }

  // How long does it take to read through all these edit logs?
  long startTime = Time.now();
  try {
    cluster = new MiniDFSCluster.Builder(conf).
        numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
  long endTime = Time.now();
  double delta = ((float)(endTime - startTime)) / 1000.0;
  LOG.info(String.format("loaded %d edit log segments in %.2f seconds",
      NUM_EDIT_LOG_ROLLS, delta));
}

Source File: TestEditLog.java From hadoop with Apache License 2.0

4 votes

@Test
public void testEditChecksum() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
  cluster.waitActive();
  fileSys = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  FSImage fsimage = namesystem.getFSImage();
  final FSEditLog editLog = fsimage.getEditLog();
  fileSys.mkdirs(new Path("/tmp"));

  Iterator<StorageDirectory> iter = fsimage.getStorage().
    dirIterator(NameNodeDirType.EDITS);
  LinkedList<StorageDirectory> sds = new LinkedList<StorageDirectory>();
  while (iter.hasNext()) {
    sds.add(iter.next());
  }
  editLog.close();
  cluster.shutdown();

  for (StorageDirectory sd : sds) {
    File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 3);
    assertTrue(editFile.exists());

    long fileLen = editFile.length();
    LOG.debug("Corrupting Log File: " + editFile + " len: " + fileLen);
    RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
    rwf.seek(fileLen-4); // seek to checksum bytes
    int b = rwf.readInt();
    rwf.seek(fileLen-4);
    rwf.writeInt(b+1);
    rwf.close();
  }
  
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).format(false).build();
    fail("should not be able to start");
  } catch (IOException e) {
    // expected
    assertNotNull("Cause of exception should be ChecksumException", e.getCause());
    assertEquals("Cause of exception should be ChecksumException",
        ChecksumException.class, e.getCause().getClass());
  }
}

Source File: TestEditLog.java From big-c with Apache License 2.0

4 votes

@Test
public void testEditChecksum() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
  cluster.waitActive();
  fileSys = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  FSImage fsimage = namesystem.getFSImage();
  final FSEditLog editLog = fsimage.getEditLog();
  fileSys.mkdirs(new Path("/tmp"));

  Iterator<StorageDirectory> iter = fsimage.getStorage().
    dirIterator(NameNodeDirType.EDITS);
  LinkedList<StorageDirectory> sds = new LinkedList<StorageDirectory>();
  while (iter.hasNext()) {
    sds.add(iter.next());
  }
  editLog.close();
  cluster.shutdown();

  for (StorageDirectory sd : sds) {
    File editFile = NNStorage.getFinalizedEditsFile(sd, 1, 3);
    assertTrue(editFile.exists());

    long fileLen = editFile.length();
    LOG.debug("Corrupting Log File: " + editFile + " len: " + fileLen);
    RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
    rwf.seek(fileLen-4); // seek to checksum bytes
    int b = rwf.readInt();
    rwf.seek(fileLen-4);
    rwf.writeInt(b+1);
    rwf.close();
  }
  
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).format(false).build();
    fail("should not be able to start");
  } catch (IOException e) {
    // expected
    assertNotNull("Cause of exception should be ChecksumException", e.getCause());
    assertEquals("Cause of exception should be ChecksumException",
        ChecksumException.class, e.getCause().getClass());
  }
}

Source File: TestEditLog.java From hadoop with Apache License 2.0

4 votes

@Test
public void testSyncBatching() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  ExecutorService threadA = Executors.newSingleThreadExecutor();
  ExecutorService threadB = Executors.newSingleThreadExecutor();
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    FSImage fsimage = namesystem.getFSImage();
    final FSEditLog editLog = fsimage.getEditLog();

    assertEquals("should start with only the BEGIN_LOG_SEGMENT txn synced",
      1, editLog.getSyncTxId());
    
    // Log an edit from thread A
    doLogEdit(threadA, editLog, "thread-a 1");
    assertEquals("logging edit without syncing should do not affect txid",
      1, editLog.getSyncTxId());

    // Log an edit from thread B
    doLogEdit(threadB, editLog, "thread-b 1");
    assertEquals("logging edit without syncing should do not affect txid",
      1, editLog.getSyncTxId());

    // Now ask to sync edit from B, which should sync both edits.
    doCallLogSync(threadB, editLog);
    assertEquals("logSync from second thread should bump txid up to 3",
      3, editLog.getSyncTxId());

    // Now ask to sync edit from A, which was already batched in - thus
    // it should increment the batch count metric
    doCallLogSync(threadA, editLog);
    assertEquals("logSync from first thread shouldn't change txid",
      3, editLog.getSyncTxId());

    //Should have incremented the batch count exactly once
    assertCounter("TransactionsBatchedInSync", 1L, 
      getMetrics("NameNodeActivity"));
  } finally {
    threadA.shutdown();
    threadB.shutdown();
    if(fileSys != null) fileSys.close();
    if(cluster != null) cluster.shutdown();
  }
}

Source File: TestFSEditLogLoader.java From hadoop with Apache License 2.0

4 votes

@Test
public void testDisplayRecentEditLogOpCodes() throws IOException {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES)
      .enableManagedDfsDirsRedundancy(false).build();
  cluster.waitActive();
  fileSys = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  FSImage fsimage = namesystem.getFSImage();
  for (int i = 0; i < 20; i++) {
    fileSys.mkdirs(new Path("/tmp/tmp" + i));
  }
  StorageDirectory sd = fsimage.getStorage().dirIterator(NameNodeDirType.EDITS).next();
  cluster.shutdown();

  File editFile = FSImageTestUtil.findLatestEditsLog(sd).getFile();
  assertTrue("Should exist: " + editFile, editFile.exists());

  // Corrupt the edits file.
  long fileLen = editFile.length();
  RandomAccessFile rwf = new RandomAccessFile(editFile, "rw");
  rwf.seek(fileLen - 40);
  for (int i = 0; i < 20; i++) {
    rwf.write(FSEditLogOpCodes.OP_DELETE.getOpCode());
  }
  rwf.close();
  
  StringBuilder bld = new StringBuilder();
  bld.append("^Error replaying edit log at offset \\d+.  ");
  bld.append("Expected transaction ID was \\d+\n");
  bld.append("Recent opcode offsets: (\\d+\\s*){4}$");
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES)
        .enableManagedDfsDirsRedundancy(false).format(false).build();
    fail("should not be able to start");
  } catch (IOException e) {
    assertTrue("error message contains opcodes message",
        e.getMessage().matches(bld.toString()));
  }
}

Source File: TestINodeFile.java From big-c with Apache License 2.0

4 votes

/**
 * FSDirectory#unprotectedSetQuota creates a new INodeDirectoryWithQuota to
 * replace the original INodeDirectory. Before HDFS-4243, the parent field of
 * all the children INodes of the target INodeDirectory is not changed to
 * point to the new INodeDirectoryWithQuota. This testcase tests this
 * scenario.
 */
@Test
public void testGetFullPathNameAfterSetQuota() throws Exception {
  long fileLen = 1024;
  replication = 3;
  Configuration conf = new Configuration();
  MiniDFSCluster cluster = null;
  try {
    cluster =
        new MiniDFSCluster.Builder(conf).numDataNodes(replication).build();
    cluster.waitActive();
    FSNamesystem fsn = cluster.getNamesystem();
    FSDirectory fsdir = fsn.getFSDirectory();
    DistributedFileSystem dfs = cluster.getFileSystem();

    // Create a file for test
    final Path dir = new Path("/dir");
    final Path file = new Path(dir, "file");
    DFSTestUtil.createFile(dfs, file, fileLen, replication, 0L);

    // Check the full path name of the INode associating with the file
    INode fnode = fsdir.getINode(file.toString());
    assertEquals(file.toString(), fnode.getFullPathName());
    
    // Call FSDirectory#unprotectedSetQuota which calls
    // INodeDirectory#replaceChild
    dfs.setQuota(dir, Long.MAX_VALUE - 1, replication * fileLen * 10);
    INodeDirectory dirNode = getDir(fsdir, dir);
    assertEquals(dir.toString(), dirNode.getFullPathName());
    assertTrue(dirNode.isWithQuota());
    
    final Path newDir = new Path("/newdir");
    final Path newFile = new Path(newDir, "file");
    // Also rename dir
    dfs.rename(dir, newDir, Options.Rename.OVERWRITE);
    // /dir/file now should be renamed to /newdir/file
    fnode = fsdir.getINode(newFile.toString());
    // getFullPathName can return correct result only if the parent field of
    // child node is set correctly
    assertEquals(newFile.toString(), fnode.getFullPathName());
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestOverReplicatedBlocks.java From hadoop with Apache License 2.0

4 votes

/**
 * The test verifies that replica for deletion is chosen on a node,
 * with the oldest heartbeat, when this heartbeat is larger than the
 * tolerable heartbeat interval.
 * It creates a file with several blocks and replication 4.
 * The last DN is configured to send heartbeats rarely.
 * 
 * Test waits until the tolerable heartbeat interval expires, and reduces
 * replication of the file. All replica deletions should be scheduled for the
 * last node. No replicas will actually be deleted, since last DN doesn't
 * send heartbeats. 
 */
@Test
public void testChooseReplicaToDelete() throws Exception {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK_SIZE);
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 300);
    cluster.startDataNodes(conf, 1, true, null, null, null);
    DataNode lastDN = cluster.getDataNodes().get(3);
    DatanodeRegistration dnReg = DataNodeTestUtils.getDNRegistrationForBP(
        lastDN, namesystem.getBlockPoolId());
    String lastDNid = dnReg.getDatanodeUuid();

    final Path fileName = new Path("/foo2");
    DFSTestUtil.createFile(fs, fileName, SMALL_FILE_LENGTH, (short)4, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)4);

    // Wait for tolerable number of heartbeats plus one
    DatanodeDescriptor nodeInfo = null;
    long lastHeartbeat = 0;
    long waitTime = DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT * 1000 *
      (DFSConfigKeys.DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_DEFAULT + 1);
    do {
      nodeInfo = namesystem.getBlockManager().getDatanodeManager()
          .getDatanode(dnReg);
      lastHeartbeat = nodeInfo.getLastUpdateMonotonic();
    } while (monotonicNow() - lastHeartbeat < waitTime);
    fs.setReplication(fileName, (short)3);

    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(fileName), 0, Long.MAX_VALUE);

    // All replicas for deletion should be scheduled on lastDN.
    // And should not actually be deleted, because lastDN does not heartbeat.
    namesystem.readLock();
    Collection<Block> dnBlocks = 
      namesystem.getBlockManager().excessReplicateMap.get(lastDNid);
    assertEquals("Replicas on node " + lastDNid + " should have been deleted",
        SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks.size());
    namesystem.readUnlock();
    for(BlockLocation location : locs)
      assertEquals("Block should still have 4 replicas",
          4, location.getNames().length);
  } finally {
    if(fs != null) fs.close();
    if(cluster != null) cluster.shutdown();
  }
}

Source File: TestEditLog.java From big-c with Apache License 2.0

4 votes

/**
 * Test creating a directory with lots and lots of edit log segments
 */
@Test
public void testManyEditLogSegments() throws IOException {
  final int NUM_EDIT_LOG_ROLLS = 1000;
  // start a cluster
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();
    FSImage fsimage = namesystem.getFSImage();
    final FSEditLog editLog = fsimage.getEditLog();
    for (int i = 0; i < NUM_EDIT_LOG_ROLLS; i++){
      editLog.logSetReplication("fakefile" + i, (short)(i % 3));
      assertExistsInStorageDirs(
          cluster, NameNodeDirType.EDITS,
          NNStorage.getInProgressEditsFileName((i * 3) + 1));
      editLog.logSync();
      editLog.rollEditLog();
      assertExistsInStorageDirs(
          cluster, NameNodeDirType.EDITS,
          NNStorage.getFinalizedEditsFileName((i * 3) + 1, (i * 3) + 3));
    }
    editLog.close();
  } finally {
    if(fileSys != null) fileSys.close();
    if(cluster != null) cluster.shutdown();
  }

  // How long does it take to read through all these edit logs?
  long startTime = Time.now();
  try {
    cluster = new MiniDFSCluster.Builder(conf).
        numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
  long endTime = Time.now();
  double delta = ((float)(endTime - startTime)) / 1000.0;
  LOG.info(String.format("loaded %d edit log segments in %.2f seconds",
      NUM_EDIT_LOG_ROLLS, delta));
}

Source File: TestEditLogRace.java From big-c with Apache License 2.0

4 votes

/**
 * Tests saving fs image while transactions are ongoing.
 */
@Test
public void testSaveNamespace() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;

  AtomicReference<Throwable> caughtErr = new AtomicReference<Throwable>();
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();
    final NamenodeProtocols nn = cluster.getNameNodeRpc();

    FSImage fsimage = namesystem.getFSImage();
    FSEditLog editLog = fsimage.getEditLog();

    startTransactionWorkers(nn, caughtErr);

    for (int i = 0; i < NUM_SAVE_IMAGE && caughtErr.get() == null; i++) {
      try {
        Thread.sleep(20);
      } catch (InterruptedException ignored) {}


      LOG.info("Save " + i + ": entering safe mode");
      namesystem.enterSafeMode(false);

      // Verify edit logs before the save
      // They should start with the first edit after the checkpoint
      long logStartTxId = fsimage.getStorage().getMostRecentCheckpointTxId() + 1; 
      verifyEditLogs(namesystem, fsimage,
          NNStorage.getInProgressEditsFileName(logStartTxId),
          logStartTxId);


      LOG.info("Save " + i + ": saving namespace");
      namesystem.saveNamespace();
      LOG.info("Save " + i + ": leaving safemode");

      long savedImageTxId = fsimage.getStorage().getMostRecentCheckpointTxId();
      
      // Verify that edit logs post save got finalized and aren't corrupt
      verifyEditLogs(namesystem, fsimage,
          NNStorage.getFinalizedEditsFileName(logStartTxId, savedImageTxId),
          logStartTxId);
      
      // The checkpoint id should be 1 less than the last written ID, since
      // the log roll writes the "BEGIN" transaction to the new log.
      assertEquals(fsimage.getStorage().getMostRecentCheckpointTxId(),
                   editLog.getLastWrittenTxId() - 1);

      namesystem.leaveSafeMode();
      LOG.info("Save " + i + ": complete");
    }
  } finally {
    stopTransactionWorkers();
    if (caughtErr.get() != null) {
      throw new RuntimeException(caughtErr.get());
    }
    if(fileSys != null) fileSys.close();
    if(cluster != null) cluster.shutdown();
  }
}

Source File: TestHeartbeatHandling.java From big-c with Apache License 2.0

4 votes

/**
 * Test if
 * {@link FSNamesystem#handleHeartbeat}
 * can pick up replication and/or invalidate requests and observes the max
 * limit
 */
@Test
public void testHeartbeat() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
  try {
    cluster.waitActive();
    final FSNamesystem namesystem = cluster.getNamesystem();
    final HeartbeatManager hm = namesystem.getBlockManager(
        ).getDatanodeManager().getHeartbeatManager();
    final String poolId = namesystem.getBlockPoolId();
    final DatanodeRegistration nodeReg =
      DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
    final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
    final String storageID = DatanodeStorage.generateUuid();
    dd.updateStorage(new DatanodeStorage(storageID));

    final int REMAINING_BLOCKS = 1;
    final int MAX_REPLICATE_LIMIT =
      conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
    final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
    final int MAX_INVALIDATE_BLOCKS = 2*MAX_INVALIDATE_LIMIT+REMAINING_BLOCKS;
    final int MAX_REPLICATE_BLOCKS = 2*MAX_REPLICATE_LIMIT+REMAINING_BLOCKS;
    final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)};

    try {
      namesystem.writeLock();
      synchronized(hm) {
        for (int i=0; i<MAX_REPLICATE_BLOCKS; i++) {
          dd.addBlockToBeReplicated(
              new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP),
              ONE_TARGET);
        }
        DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd,
            namesystem).getCommands();
        assertEquals(1, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);

        ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
        for (int i=0; i<MAX_INVALIDATE_BLOCKS; i++) {
          blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
        }
        dd.addBlocksToBeInvalidated(blockList);
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(2, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
        assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
        
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(2, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
        assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
        
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(1, cmds.length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
        assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);

        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(0, cmds.length);
      }
    } finally {
      namesystem.writeUnlock();
    }
  } finally {
    cluster.shutdown();
  }
}

Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster#getNamesystem()