org.apache.hadoop.hdfs.protocol.RecoveryInProgressException Java Examples
The following examples show how to use
org.apache.hadoop.hdfs.protocol.RecoveryInProgressException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestReadWhileWriting.java From hadoop with Apache License 2.0 | 6 votes |
/** Try openning a file for append. */ private static FSDataOutputStream append(FileSystem fs, Path p) throws Exception { for(int i = 0; i < 10; i++) { try { return fs.append(p); } catch(RemoteException re) { if (re.getClassName().equals(RecoveryInProgressException.class.getName())) { AppendTestUtil.LOG.info("Will sleep and retry, i=" + i +", p="+p, re); Thread.sleep(1000); } else throw re; } } throw new IOException("Cannot append to " + p); }
Example #2
Source File: TestReadWhileWriting.java From big-c with Apache License 2.0 | 6 votes |
/** Try openning a file for append. */ private static FSDataOutputStream append(FileSystem fs, Path p) throws Exception { for(int i = 0; i < 10; i++) { try { return fs.append(p); } catch(RemoteException re) { if (re.getClassName().equals(RecoveryInProgressException.class.getName())) { AppendTestUtil.LOG.info("Will sleep and retry, i=" + i +", p="+p, re); Thread.sleep(1000); } else throw re; } } throw new IOException("Cannot append to " + p); }
Example #3
Source File: TestBlockRecovery.java From hadoop with Apache License 2.0 | 5 votes |
/** * BlockRecoveryFI_05. One DN throws RecoveryInProgressException. * * @throws IOException * in case of an error */ @Test public void testRecoveryInProgressException() throws IOException, InterruptedException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } DataNode spyDN = spy(dn); doThrow(new RecoveryInProgressException("Replica recovery is in progress")). when(spyDN).initReplicaRecovery(any(RecoveringBlock.class)); Daemon d = spyDN.recoverBlocks("fake NN", initRecoveringBlocks()); d.join(); verify(spyDN, never()).syncBlock( any(RecoveringBlock.class), anyListOf(BlockRecord.class)); }
Example #4
Source File: TestBlockRecovery.java From big-c with Apache License 2.0 | 5 votes |
/** * BlockRecoveryFI_05. One DN throws RecoveryInProgressException. * * @throws IOException * in case of an error */ @Test public void testRecoveryInProgressException() throws IOException, InterruptedException { if(LOG.isDebugEnabled()) { LOG.debug("Running " + GenericTestUtils.getMethodName()); } DataNode spyDN = spy(dn); doThrow(new RecoveryInProgressException("Replica recovery is in progress")). when(spyDN).initReplicaRecovery(any(RecoveringBlock.class)); Daemon d = spyDN.recoverBlocks("fake NN", initRecoveringBlocks()); d.join(); verify(spyDN, never()).syncBlock( any(RecoveringBlock.class), anyListOf(BlockRecord.class)); }
Example #5
Source File: FsDatasetImpl.java From hadoop with Apache License 2.0 | 4 votes |
/** static version of {@link #initReplicaRecovery(RecoveringBlock)}. */ static ReplicaRecoveryInfo initReplicaRecovery(String bpid, ReplicaMap map, Block block, long recoveryId, long xceiverStopTimeout) throws IOException { final ReplicaInfo replica = map.get(bpid, block.getBlockId()); LOG.info("initReplicaRecovery: " + block + ", recoveryId=" + recoveryId + ", replica=" + replica); //check replica if (replica == null) { return null; } //stop writer if there is any if (replica instanceof ReplicaInPipeline) { final ReplicaInPipeline rip = (ReplicaInPipeline)replica; rip.stopWriter(xceiverStopTimeout); //check replica bytes on disk. if (rip.getBytesOnDisk() < rip.getVisibleLength()) { throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:" + " getBytesOnDisk() < getVisibleLength(), rip=" + rip); } //check the replica's files checkReplicaFiles(rip); } //check generation stamp if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException( "replica.getGenerationStamp() < block.getGenerationStamp(), block=" + block + ", replica=" + replica); } //check recovery id if (replica.getGenerationStamp() >= recoveryId) { throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:" + " replica.getGenerationStamp() >= recoveryId = " + recoveryId + ", block=" + block + ", replica=" + replica); } //check RUR final ReplicaUnderRecovery rur; if (replica.getState() == ReplicaState.RUR) { rur = (ReplicaUnderRecovery)replica; if (rur.getRecoveryID() >= recoveryId) { throw new RecoveryInProgressException( "rur.getRecoveryID() >= recoveryId = " + recoveryId + ", block=" + block + ", rur=" + rur); } final long oldRecoveryID = rur.getRecoveryID(); rur.setRecoveryID(recoveryId); LOG.info("initReplicaRecovery: update recovery id for " + block + " from " + oldRecoveryID + " to " + recoveryId); } else { rur = new ReplicaUnderRecovery(replica, recoveryId); map.add(bpid, rur); LOG.info("initReplicaRecovery: changing replica state for " + block + " from " + replica.getState() + " to " + rur.getState()); } return rur.createInfo(); }
Example #6
Source File: DataNode.java From hadoop with Apache License 2.0 | 4 votes |
/** Recover a block */ private void recoverBlock(RecoveringBlock rBlock) throws IOException { ExtendedBlock block = rBlock.getBlock(); String blookPoolId = block.getBlockPoolId(); DatanodeID[] datanodeids = rBlock.getLocations(); List<BlockRecord> syncList = new ArrayList<BlockRecord>(datanodeids.length); int errorCount = 0; //check generation stamps for(DatanodeID id : datanodeids) { try { BPOfferService bpos = blockPoolManager.get(blookPoolId); DatanodeRegistration bpReg = bpos.bpRegistration; InterDatanodeProtocol datanode = bpReg.equals(id)? this: DataNode.createInterDataNodeProtocolProxy(id, getConf(), dnConf.socketTimeout, dnConf.connectToDnViaHostname); ReplicaRecoveryInfo info = callInitReplicaRecovery(datanode, rBlock); if (info != null && info.getGenerationStamp() >= block.getGenerationStamp() && info.getNumBytes() > 0) { syncList.add(new BlockRecord(id, datanode, info)); } } catch (RecoveryInProgressException ripE) { InterDatanodeProtocol.LOG.warn( "Recovery for replica " + block + " on data-node " + id + " is already in progress. Recovery id = " + rBlock.getNewGenerationStamp() + " is aborted.", ripE); return; } catch (IOException e) { ++errorCount; InterDatanodeProtocol.LOG.warn( "Failed to obtain replica info for block (=" + block + ") from datanode (=" + id + ")", e); } } if (errorCount == datanodeids.length) { throw new IOException("All datanodes failed: block=" + block + ", datanodeids=" + Arrays.asList(datanodeids)); } syncBlock(rBlock, syncList); }
Example #7
Source File: FsDatasetImpl.java From big-c with Apache License 2.0 | 4 votes |
/** static version of {@link #initReplicaRecovery(RecoveringBlock)}. */ static ReplicaRecoveryInfo initReplicaRecovery(String bpid, ReplicaMap map, Block block, long recoveryId, long xceiverStopTimeout) throws IOException { final ReplicaInfo replica = map.get(bpid, block.getBlockId()); LOG.info("initReplicaRecovery: " + block + ", recoveryId=" + recoveryId + ", replica=" + replica); //check replica if (replica == null) { return null; } //stop writer if there is any if (replica instanceof ReplicaInPipeline) { final ReplicaInPipeline rip = (ReplicaInPipeline)replica; rip.stopWriter(xceiverStopTimeout); //check replica bytes on disk. if (rip.getBytesOnDisk() < rip.getVisibleLength()) { throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:" + " getBytesOnDisk() < getVisibleLength(), rip=" + rip); } //check the replica's files checkReplicaFiles(rip); } //check generation stamp if (replica.getGenerationStamp() < block.getGenerationStamp()) { throw new IOException( "replica.getGenerationStamp() < block.getGenerationStamp(), block=" + block + ", replica=" + replica); } //check recovery id if (replica.getGenerationStamp() >= recoveryId) { throw new IOException("THIS IS NOT SUPPOSED TO HAPPEN:" + " replica.getGenerationStamp() >= recoveryId = " + recoveryId + ", block=" + block + ", replica=" + replica); } //check RUR final ReplicaUnderRecovery rur; if (replica.getState() == ReplicaState.RUR) { rur = (ReplicaUnderRecovery)replica; if (rur.getRecoveryID() >= recoveryId) { throw new RecoveryInProgressException( "rur.getRecoveryID() >= recoveryId = " + recoveryId + ", block=" + block + ", rur=" + rur); } final long oldRecoveryID = rur.getRecoveryID(); rur.setRecoveryID(recoveryId); LOG.info("initReplicaRecovery: update recovery id for " + block + " from " + oldRecoveryID + " to " + recoveryId); } else { rur = new ReplicaUnderRecovery(replica, recoveryId); map.add(bpid, rur); LOG.info("initReplicaRecovery: changing replica state for " + block + " from " + replica.getState() + " to " + rur.getState()); } return rur.createInfo(); }
Example #8
Source File: DataNode.java From big-c with Apache License 2.0 | 4 votes |
/** Recover a block */ private void recoverBlock(RecoveringBlock rBlock) throws IOException { ExtendedBlock block = rBlock.getBlock(); String blookPoolId = block.getBlockPoolId(); DatanodeID[] datanodeids = rBlock.getLocations(); List<BlockRecord> syncList = new ArrayList<BlockRecord>(datanodeids.length); int errorCount = 0; //check generation stamps for(DatanodeID id : datanodeids) { try { BPOfferService bpos = blockPoolManager.get(blookPoolId); DatanodeRegistration bpReg = bpos.bpRegistration; InterDatanodeProtocol datanode = bpReg.equals(id)? this: DataNode.createInterDataNodeProtocolProxy(id, getConf(), dnConf.socketTimeout, dnConf.connectToDnViaHostname); ReplicaRecoveryInfo info = callInitReplicaRecovery(datanode, rBlock); if (info != null && info.getGenerationStamp() >= block.getGenerationStamp() && info.getNumBytes() > 0) { syncList.add(new BlockRecord(id, datanode, info)); } } catch (RecoveryInProgressException ripE) { InterDatanodeProtocol.LOG.warn( "Recovery for replica " + block + " on data-node " + id + " is already in progress. Recovery id = " + rBlock.getNewGenerationStamp() + " is aborted.", ripE); return; } catch (IOException e) { ++errorCount; InterDatanodeProtocol.LOG.warn( "Failed to obtain replica info for block (=" + block + ") from datanode (=" + id + ")", e); } } if (errorCount == datanodeids.length) { throw new IOException("All datanodes failed: block=" + block + ", datanodeids=" + Arrays.asList(datanodeids)); } syncBlock(rBlock, syncList); }
Example #9
Source File: HoodieLogFormatWriter.java From hudi with Apache License 2.0 | 4 votes |
private void handleAppendExceptionOrRecoverLease(Path path, RemoteException e) throws IOException, InterruptedException { if (e.getMessage().contains(APPEND_UNAVAILABLE_EXCEPTION_MESSAGE)) { // This issue happens when all replicas for a file are down and/or being decommissioned. // The fs.append() API could append to the last block for a file. If the last block is full, a new block is // appended to. In a scenario when a lot of DN's are decommissioned, it can happen that DN's holding all // replicas for a block/file are decommissioned together. During this process, all these blocks will start to // get replicated to other active DataNodes but this process might take time (can be of the order of few // hours). During this time, if a fs.append() API is invoked for a file whose last block is eligible to be // appended to, then the NN will throw an exception saying that it couldn't find any active replica with the // last block. Find more information here : https://issues.apache.org/jira/browse/HDFS-6325 LOG.warn("Failed to open an append stream to the log file. Opening a new log file..", e); // Rollover the current log file (since cannot get a stream handle) and create new one this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); createNewFile(); } else if (e.getClassName().contentEquals(AlreadyBeingCreatedException.class.getName())) { LOG.warn("Another task executor writing to the same log file(" + logFile + ". Rolling over"); // Rollover the current log file (since cannot get a stream handle) and create new one this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); createNewFile(); } else if (e.getClassName().contentEquals(RecoveryInProgressException.class.getName()) && (fs instanceof DistributedFileSystem)) { // this happens when either another task executor writing to this file died or // data node is going down. Note that we can only try to recover lease for a DistributedFileSystem. // ViewFileSystem unfortunately does not support this operation LOG.warn("Trying to recover log on path " + path); if (FSUtils.recoverDFSFileLease((DistributedFileSystem) fs, path)) { LOG.warn("Recovered lease on path " + path); // try again this.output = fs.append(path, bufferSize); } else { LOG.warn("Failed to recover lease on path " + path); throw new HoodieException(e); } } else { // When fs.append() has failed and an exception is thrown, by closing the output stream // we shall force hdfs to release the lease on the log file. When Spark retries this task (with // new attemptId, say taskId.1) it will be able to acquire lease on the log file (as output stream was // closed properly by taskId.0). // // If close() call were to fail throwing an exception, our best bet is to rollover to a new log file. try { close(); // output stream has been successfully closed and lease on the log file has been released, // before throwing an exception for the append failure. throw new HoodieIOException("Failed to append to the output stream ", e); } catch (Exception ce) { LOG.warn("Failed to close the output stream for " + fs.getClass().getName() + " on path " + path + ". Rolling over to a new log file."); this.logFile = logFile.rollOver(fs, rolloverLogWriteToken); createNewFile(); } } }