org.apache.hadoop.hdfs.protocol.RecoveryInProgressException Java Exaples

Source File: TestReadWhileWriting.java From hadoop with Apache License 2.0

6 votes

/** Try openning a file for append. */
private static FSDataOutputStream append(FileSystem fs, Path p) throws Exception {
  for(int i = 0; i < 10; i++) {
    try {
      return fs.append(p);
    } catch(RemoteException re) {
      if (re.getClassName().equals(RecoveryInProgressException.class.getName())) {
        AppendTestUtil.LOG.info("Will sleep and retry, i=" + i +", p="+p, re);
        Thread.sleep(1000);
      }
      else
        throw re;
    }
  }
  throw new IOException("Cannot append to " + p);
}

Source File: TestReadWhileWriting.java From big-c with Apache License 2.0

6 votes

/** Try openning a file for append. */
private static FSDataOutputStream append(FileSystem fs, Path p) throws Exception {
  for(int i = 0; i < 10; i++) {
    try {
      return fs.append(p);
    } catch(RemoteException re) {
      if (re.getClassName().equals(RecoveryInProgressException.class.getName())) {
        AppendTestUtil.LOG.info("Will sleep and retry, i=" + i +", p="+p, re);
        Thread.sleep(1000);
      }
      else
        throw re;
    }
  }
  throw new IOException("Cannot append to " + p);
}

Source File: TestBlockRecovery.java From hadoop with Apache License 2.0

5 votes

/**
 * BlockRecoveryFI_05. One DN throws RecoveryInProgressException.
 *
 * @throws IOException
 *           in case of an error
 */
@Test
public void testRecoveryInProgressException()
  throws IOException, InterruptedException {
  if(LOG.isDebugEnabled()) {
    LOG.debug("Running " + GenericTestUtils.getMethodName());
  }
  DataNode spyDN = spy(dn);
  doThrow(new RecoveryInProgressException("Replica recovery is in progress")).
     when(spyDN).initReplicaRecovery(any(RecoveringBlock.class));
  Daemon d = spyDN.recoverBlocks("fake NN", initRecoveringBlocks());
  d.join();
  verify(spyDN, never()).syncBlock(
      any(RecoveringBlock.class), anyListOf(BlockRecord.class));
}

Source File: TestBlockRecovery.java From big-c with Apache License 2.0

5 votes

/**
 * BlockRecoveryFI_05. One DN throws RecoveryInProgressException.
 *
 * @throws IOException
 *           in case of an error
 */
@Test
public void testRecoveryInProgressException()
  throws IOException, InterruptedException {
  if(LOG.isDebugEnabled()) {
    LOG.debug("Running " + GenericTestUtils.getMethodName());
  }
  DataNode spyDN = spy(dn);
  doThrow(new RecoveryInProgressException("Replica recovery is in progress")).
     when(spyDN).initReplicaRecovery(any(RecoveringBlock.class));
  Daemon d = spyDN.recoverBlocks("fake NN", initRecoveringBlocks());
  d.join();
  verify(spyDN, never()).syncBlock(
      any(RecoveringBlock.class), anyListOf(BlockRecord.class));
}

Source File: FsDatasetImpl.java From hadoop with Apache License 2.0

4 votes

/** static version of {@link #initReplicaRecovery(RecoveringBlock)}. */
static ReplicaRecoveryInfo initReplicaRecovery(String bpid, ReplicaMap map,
    Block block, long recoveryId, long xceiverStopTimeout) throws IOException {
  final ReplicaInfo replica = map.get(bpid, block.getBlockId());
  LOG.info("initReplicaRecovery: " + block + ", recoveryId=" + recoveryId
      + ", replica=" + replica);

  //check replica
  if (replica == null) {
    return null;
  }

  //stop writer if there is any
  if (replica instanceof ReplicaInPipeline) {
    final ReplicaInPipeline rip = (ReplicaInPipeline)replica;
    rip.stopWriter(xceiverStopTimeout);

    //check replica bytes on disk.
    if (rip.getBytesOnDisk() < rip.getVisibleLength()) {
      throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:"
          + " getBytesOnDisk() < getVisibleLength(), rip=" + rip);
    }

    //check the replica's files
    checkReplicaFiles(rip);
  }

  //check generation stamp
  if (replica.getGenerationStamp() < block.getGenerationStamp()) {
    throw new IOException(
        "replica.getGenerationStamp() < block.getGenerationStamp(), block="
        + block + ", replica=" + replica);
  }

  //check recovery id
  if (replica.getGenerationStamp() >= recoveryId) {
    throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:"
        + " replica.getGenerationStamp() >= recoveryId = " + recoveryId
        + ", block=" + block + ", replica=" + replica);
  }

  //check RUR
  final ReplicaUnderRecovery rur;
  if (replica.getState() == ReplicaState.RUR) {
    rur = (ReplicaUnderRecovery)replica;
    if (rur.getRecoveryID() >= recoveryId) {
      throw new RecoveryInProgressException(
          "rur.getRecoveryID() >= recoveryId = " + recoveryId
          + ", block=" + block + ", rur=" + rur);
    }
    final long oldRecoveryID = rur.getRecoveryID();
    rur.setRecoveryID(recoveryId);
    LOG.info("initReplicaRecovery: update recovery id for " + block
        + " from " + oldRecoveryID + " to " + recoveryId);
  }
  else {
    rur = new ReplicaUnderRecovery(replica, recoveryId);
    map.add(bpid, rur);
    LOG.info("initReplicaRecovery: changing replica state for "
        + block + " from " + replica.getState()
        + " to " + rur.getState());
  }
  return rur.createInfo();
}

Source File: DataNode.java From hadoop with Apache License 2.0

4 votes

/** Recover a block */
private void recoverBlock(RecoveringBlock rBlock) throws IOException {
  ExtendedBlock block = rBlock.getBlock();
  String blookPoolId = block.getBlockPoolId();
  DatanodeID[] datanodeids = rBlock.getLocations();
  List<BlockRecord> syncList = new ArrayList<BlockRecord>(datanodeids.length);
  int errorCount = 0;

  //check generation stamps
  for(DatanodeID id : datanodeids) {
    try {
      BPOfferService bpos = blockPoolManager.get(blookPoolId);
      DatanodeRegistration bpReg = bpos.bpRegistration;
      InterDatanodeProtocol datanode = bpReg.equals(id)?
          this: DataNode.createInterDataNodeProtocolProxy(id, getConf(),
              dnConf.socketTimeout, dnConf.connectToDnViaHostname);
      ReplicaRecoveryInfo info = callInitReplicaRecovery(datanode, rBlock);
      if (info != null &&
          info.getGenerationStamp() >= block.getGenerationStamp() &&
          info.getNumBytes() > 0) {
        syncList.add(new BlockRecord(id, datanode, info));
      }
    } catch (RecoveryInProgressException ripE) {
      InterDatanodeProtocol.LOG.warn(
          "Recovery for replica " + block + " on data-node " + id
          + " is already in progress. Recovery id = "
          + rBlock.getNewGenerationStamp() + " is aborted.", ripE);
      return;
    } catch (IOException e) {
      ++errorCount;
      InterDatanodeProtocol.LOG.warn(
          "Failed to obtain replica info for block (=" + block 
          + ") from datanode (=" + id + ")", e);
    }
  }

  if (errorCount == datanodeids.length) {
    throw new IOException("All datanodes failed: block=" + block
        + ", datanodeids=" + Arrays.asList(datanodeids));
  }

  syncBlock(rBlock, syncList);
}

Source File: FsDatasetImpl.java From big-c with Apache License 2.0

4 votes

/** static version of {@link #initReplicaRecovery(RecoveringBlock)}. */
static ReplicaRecoveryInfo initReplicaRecovery(String bpid, ReplicaMap map,
    Block block, long recoveryId, long xceiverStopTimeout) throws IOException {
  final ReplicaInfo replica = map.get(bpid, block.getBlockId());
  LOG.info("initReplicaRecovery: " + block + ", recoveryId=" + recoveryId
      + ", replica=" + replica);

  //check replica
  if (replica == null) {
    return null;
  }

  //stop writer if there is any
  if (replica instanceof ReplicaInPipeline) {
    final ReplicaInPipeline rip = (ReplicaInPipeline)replica;
    rip.stopWriter(xceiverStopTimeout);

    //check replica bytes on disk.
    if (rip.getBytesOnDisk() < rip.getVisibleLength()) {
      throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:"
          + " getBytesOnDisk() < getVisibleLength(), rip=" + rip);
    }

    //check the replica's files
    checkReplicaFiles(rip);
  }

  //check generation stamp
  if (replica.getGenerationStamp() < block.getGenerationStamp()) {
    throw new IOException(
        "replica.getGenerationStamp() < block.getGenerationStamp(), block="
        + block + ", replica=" + replica);
  }

  //check recovery id
  if (replica.getGenerationStamp() >= recoveryId) {
    throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:"
        + " replica.getGenerationStamp() >= recoveryId = " + recoveryId
        + ", block=" + block + ", replica=" + replica);
  }

  //check RUR
  final ReplicaUnderRecovery rur;
  if (replica.getState() == ReplicaState.RUR) {
    rur = (ReplicaUnderRecovery)replica;
    if (rur.getRecoveryID() >= recoveryId) {
      throw new RecoveryInProgressException(
          "rur.getRecoveryID() >= recoveryId = " + recoveryId
          + ", block=" + block + ", rur=" + rur);
    }
    final long oldRecoveryID = rur.getRecoveryID();
    rur.setRecoveryID(recoveryId);
    LOG.info("initReplicaRecovery: update recovery id for " + block
        + " from " + oldRecoveryID + " to " + recoveryId);
  }
  else {
    rur = new ReplicaUnderRecovery(replica, recoveryId);
    map.add(bpid, rur);
    LOG.info("initReplicaRecovery: changing replica state for "
        + block + " from " + replica.getState()
        + " to " + rur.getState());
  }
  return rur.createInfo();
}

Source File: DataNode.java From big-c with Apache License 2.0

4 votes

/** Recover a block */
private void recoverBlock(RecoveringBlock rBlock) throws IOException {
  ExtendedBlock block = rBlock.getBlock();
  String blookPoolId = block.getBlockPoolId();
  DatanodeID[] datanodeids = rBlock.getLocations();
  List<BlockRecord> syncList = new ArrayList<BlockRecord>(datanodeids.length);
  int errorCount = 0;

  //check generation stamps
  for(DatanodeID id : datanodeids) {
    try {
      BPOfferService bpos = blockPoolManager.get(blookPoolId);
      DatanodeRegistration bpReg = bpos.bpRegistration;
      InterDatanodeProtocol datanode = bpReg.equals(id)?
          this: DataNode.createInterDataNodeProtocolProxy(id, getConf(),
              dnConf.socketTimeout, dnConf.connectToDnViaHostname);
      ReplicaRecoveryInfo info = callInitReplicaRecovery(datanode, rBlock);
      if (info != null &&
          info.getGenerationStamp() >= block.getGenerationStamp() &&
          info.getNumBytes() > 0) {
        syncList.add(new BlockRecord(id, datanode, info));
      }
    } catch (RecoveryInProgressException ripE) {
      InterDatanodeProtocol.LOG.warn(
          "Recovery for replica " + block + " on data-node " + id
          + " is already in progress. Recovery id = "
          + rBlock.getNewGenerationStamp() + " is aborted.", ripE);
      return;
    } catch (IOException e) {
      ++errorCount;
      InterDatanodeProtocol.LOG.warn(
          "Failed to obtain replica info for block (=" + block 
          + ") from datanode (=" + id + ")", e);
    }
  }

  if (errorCount == datanodeids.length) {
    throw new IOException("All datanodes failed: block=" + block
        + ", datanodeids=" + Arrays.asList(datanodeids));
  }

  syncBlock(rBlock, syncList);
}

Source File: HoodieLogFormatWriter.java From hudi with Apache License 2.0

4 votes

private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e)
    throws IOException, InterruptedException {
  if (e.getMessage().contains(APPEND_UNAVAILABLE_EXCEPTION_MESSAGE)) {
    // This issue happens when all replicas for a file are down and/or being decommissioned.
    // The fs.append() API could append to the last block for a file. If the last block is full, a new block is
    // appended to. In a scenario when a lot of DN's are decommissioned, it can happen that DN's holding all
    // replicas for a block/file are decommissioned together. During this process, all these blocks will start to
    // get replicated to other active DataNodes but this process might take time (can be of the order of few
    // hours). During this time, if a fs.append() API is invoked for a file whose last block is eligible to be
    // appended to, then the NN will throw an exception saying that it couldn't find any active replica with the
    // last block. Find more information here : https://issues.apache.org/jira/browse/HDFS-6325
    LOG.warn("Failed to open an append stream to the log file. Opening a new log file..", e);
    // Rollover the current log file (since cannot get a stream handle) and create new one
    this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
    createNewFile();
  } else if (e.getClassName().contentEquals(AlreadyBeingCreatedException.class.getName())) {
    LOG.warn("Another task executor writing to the same log file(" + logFile + ". Rolling over");
    // Rollover the current log file (since cannot get a stream handle) and create new one
    this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
    createNewFile();
  } else if (e.getClassName().contentEquals(RecoveryInProgressException.class.getName())
      && (fs instanceof DistributedFileSystem)) {
    // this happens when either another task executor writing to this file died or
    // data node is going down. Note that we can only try to recover lease for a DistributedFileSystem.
    // ViewFileSystem unfortunately does not support this operation
    LOG.warn("Trying to recover log on path " + path);
    if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) {
      LOG.warn("Recovered lease on path " + path);
      // try again
      this.output = fs.append(path, bufferSize);
    } else {
      LOG.warn("Failed to recover lease on path " + path);
      throw new HoodieException(e);
    }
  } else {
    // When fs.append() has failed and an exception is thrown, by closing the output stream
    // we shall force hdfs to release the lease on the log file. When Spark retries this task (with
    // new attemptId, say taskId.1) it will be able to acquire lease on the log file (as output stream was
    // closed properly by taskId.0).
    //
    // If close() call were to fail throwing an exception, our best bet is to rollover to a new log file.
    try {
      close();
      // output stream has been successfully closed and lease on the log file has been released,
      // before throwing an exception for the append failure.
      throw new HoodieIOException("Failed to append to the output stream ", e);
    } catch (Exception ce) {
      LOG.warn("Failed to close the output stream for " + fs.getClass().getName() + " on path " + path
          + ". Rolling over to a new log file.");
      this.logFile = logFile.rollOver(fs, rolloverLogWriteToken);
      createNewFile();
    }
  }
}

org.apache.hadoop.hdfs.protocol.RecoveryInProgressException Java Examples