org.apache.ratis.proto.RaftProtos Java Examples
The following examples show how to use
org.apache.ratis.proto.RaftProtos.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OzoneManagerStateMachine.java From hadoop-ozone with Apache License 2.0 | 6 votes |
/** * Validate/pre-process the incoming update request in the state machine. * @return the content to be written to the log entry. Null means the request * should be rejected. * @throws IOException thrown by the state machine while validating */ @Override public TransactionContext startTransaction( RaftClientRequest raftClientRequest) throws IOException { ByteString messageContent = raftClientRequest.getMessage().getContent(); OMRequest omRequest = OMRatisHelper.convertByteStringToOMRequest( messageContent); Preconditions.checkArgument(raftClientRequest.getRaftGroupId().equals( raftGroupId)); try { handler.validateRequest(omRequest); } catch (IOException ioe) { TransactionContext ctxt = TransactionContext.newBuilder() .setClientRequest(raftClientRequest) .setStateMachine(this) .setServerRole(RaftProtos.RaftPeerRole.LEADER) .build(); ctxt.setException(ioe); return ctxt; } return handleStartTransactionRequests(raftClientRequest, omRequest); }
Example #2
Source File: TestGrpcServerMetrics.java From incubator-ratis with Apache License 2.0 | 6 votes |
@Test public void testGrpcLogAppenderLatencyTimer() throws Exception { for (boolean heartbeat : new boolean[] { true, false }) { RaftProtos.AppendEntriesRequestProto.Builder proto = RaftProtos.AppendEntriesRequestProto.newBuilder(); if (!heartbeat) { proto.addEntries(RaftProtos.LogEntryProto.newBuilder().build()); } GrpcLogAppender.AppendEntriesRequest req = new GrpcLogAppender.AppendEntriesRequest(proto.build(), followerId, grpcServerMetrics); Assert.assertEquals(0L, ratisMetricRegistry.timer(String.format( RATIS_GRPC_METRICS_LOG_APPENDER_LATENCY + GrpcServerMetrics .getHeartbeatSuffix(heartbeat), followerId.toString())) .getSnapshot().getMax()); req.startRequestTimer(); Thread.sleep(1000L); req.stopRequestTimer(); Assert.assertTrue(ratisMetricRegistry.timer(String.format( RATIS_GRPC_METRICS_LOG_APPENDER_LATENCY + GrpcServerMetrics .getHeartbeatSuffix(heartbeat), followerId.toString())) .getSnapshot().getMax() > 1000L); } }
Example #3
Source File: TestRaftServerNoLeaderTimeout.java From incubator-ratis with Apache License 2.0 | 6 votes |
@Test public void testLeaderElectionDetection() throws Exception { RaftTestUtil.waitForLeader(cluster); final TimeDuration noLeaderTimeout = RaftServerConfigKeys.Notification.noLeaderTimeout(cluster.getProperties()); RaftServerImpl healthyFollower = cluster.getFollowers().get(1); RaftServerImpl failedFollower = cluster.getFollowers().get(0); // fail the leader and one of the followers to that quorum is not present // for next leader election to succeed. cluster.killServer(failedFollower.getId()); cluster.killServer(cluster.getLeader().getId()); // Wait to ensure that leader election is triggered and also state machine callback is triggered noLeaderTimeout.sleep(); noLeaderTimeout.sleep(); RaftProtos.RoleInfoProto roleInfoProto = SimpleStateMachine4Testing.get(healthyFollower).getLeaderElectionTimeoutInfo(); Assert.assertNotNull(roleInfoProto); Assert.assertEquals(roleInfoProto.getRole(), RaftProtos.RaftPeerRole.CANDIDATE); final long noLeaderTimeoutMs = noLeaderTimeout.toLong(TimeUnit.MILLISECONDS); Assert.assertTrue(roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() > noLeaderTimeoutMs); }
Example #4
Source File: InstallSnapshotNotificationTests.java From incubator-ratis with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<TermIndex> notifyInstallSnapshotFromLeader( RaftProtos.RoleInfoProto roleInfoProto, TermIndex termIndex) { final SingleFileSnapshotInfo leaderSnapshotInfo = (SingleFileSnapshotInfo) leaderSnapshotInfoRef.get(); LOG.info("{}: leaderSnapshotInfo = {}", getId(), leaderSnapshotInfo); if (leaderSnapshotInfo == null) { return super.notifyInstallSnapshotFromLeader(roleInfoProto, termIndex); } try { Path leaderSnapshotFile = leaderSnapshotInfo.getFile().getPath(); File followerSnapshotFilePath = new File(getSMdir(), leaderSnapshotFile.getFileName().toString()); Files.copy(leaderSnapshotFile, followerSnapshotFilePath.toPath()); } catch (IOException e) { LOG.error("Failed notifyInstallSnapshotFromLeader", e); return JavaUtils.completeExceptionally(e); } return CompletableFuture.completedFuture(leaderSnapshotInfo.getTermIndex()); }
Example #5
Source File: StateMachineShutdownTests.java From incubator-ratis with Apache License 2.0 | 6 votes |
@Override public CompletableFuture<Message> applyTransaction(TransactionContext trx) { CompletableFuture<Message> future = new CompletableFuture<Message>(); if (blockOnApply) { synchronized (objectToWait) { try { objectToWait.wait(); } catch (InterruptedException e) { throw new RuntimeException(); } } } RaftProtos.LogEntryProto entry = trx.getLogEntry(); updateLastAppliedTermIndex(entry.getTerm(), entry.getIndex()); future.complete(new RaftTestUtil.SimpleMessage("done")); return future; }
Example #6
Source File: OzoneManagerStateMachine.java From hadoop-ozone with Apache License 2.0 | 6 votes |
/** * Leader OM has purged entries from its log. To catch up, OM must download * the latest checkpoint from the leader OM and install it. * @param roleInfoProto the leader node information * @param firstTermIndexInLog TermIndex of the first append entry available * in the Leader's log. * @return the last term index included in the installed snapshot. */ @Override public CompletableFuture<TermIndex> notifyInstallSnapshotFromLeader( RaftProtos.RoleInfoProto roleInfoProto, TermIndex firstTermIndexInLog) { String leaderNodeId = RaftPeerId.valueOf(roleInfoProto.getSelf().getId()) .toString(); LOG.info("Received install snapshot notificaiton form OM leader: {} with " + "term index: {}", leaderNodeId, firstTermIndexInLog); if (!roleInfoProto.getRole().equals(RaftProtos.RaftPeerRole.LEADER)) { // A non-leader Ratis server should not send this notification. LOG.error("Received Install Snapshot notification from non-leader OM " + "node: {}. Ignoring the notification.", leaderNodeId); return completeExceptionally(new OMException("Received notification to " + "install snaphost from non-leader OM node", OMException.ResultCodes.RATIS_ERROR)); } CompletableFuture<TermIndex> future = CompletableFuture.supplyAsync( () -> ozoneManager.installSnapshot(leaderNodeId), installSnapshotExecutor); return future; }
Example #7
Source File: RatisHelper.java From hadoop-ozone with Apache License 2.0 | 6 votes |
/** * Table mapping exception type to retry policy used for the exception in * write and watch request. * --------------------------------------------------------------------------- * | Exception | RetryPolicy for | RetryPolicy for | * | | Write request | Watch request | * |-------------------------------------------------------------------------| * | NotReplicatedException | NO_RETRY | NO_RETRY | * |-------------------------------------------------------------------------| * | GroupMismatchException | NO_RETRY | NO_RETRY | * |-------------------------------------------------------------------------| * | StateMachineException | NO_RETRY | NO_RETRY | * |-------------------------------------------------------------------------| * | TimeoutIOException | EXPONENTIAL_BACKOFF | NO_RETRY | * |-------------------------------------------------------------------------| * | ResourceUnavailableException| EXPONENTIAL_BACKOFF | EXPONENTIAL_BACKOFF | * |-------------------------------------------------------------------------| * | Others | MULTILINEAR_RANDOM | MULTILINEAR_RANDOM | * | | _RETRY | _RETRY | * --------------------------------------------------------------------------- */ public static RetryPolicy createRetryPolicy(ConfigurationSource conf) { RatisClientConfig ratisClientConfig = OzoneConfiguration.of(conf) .getObject(RatisClientConfig.class); ExponentialBackoffRetry exponentialBackoffRetry = createExponentialBackoffPolicy(ratisClientConfig); MultipleLinearRandomRetry multipleLinearRandomRetry = MultipleLinearRandomRetry .parseCommaSeparated(ratisClientConfig.getMultilinearPolicy()); long writeTimeout = ratisClientConfig.getWriteRequestTimeoutInMs(); long watchTimeout = ratisClientConfig.getWatchRequestTimeoutInMs(); return RequestTypeDependentRetryPolicy.newBuilder() .setRetryPolicy(RaftProtos.RaftClientRequestProto.TypeCase.WRITE, createExceptionDependentPolicy(exponentialBackoffRetry, multipleLinearRandomRetry, exponentialBackoffRetry)) .setRetryPolicy(RaftProtos.RaftClientRequestProto.TypeCase.WATCH, createExceptionDependentPolicy(exponentialBackoffRetry, multipleLinearRandomRetry, RetryPolicies.noRetry())) .setTimeout(RaftProtos.RaftClientRequestProto.TypeCase.WRITE, TimeDuration.valueOf(writeTimeout, TimeUnit.MILLISECONDS)) .setTimeout(RaftProtos.RaftClientRequestProto.TypeCase.WATCH, TimeDuration.valueOf(watchTimeout, TimeUnit.MILLISECONDS)) .build(); }
Example #8
Source File: TestRaftServerLeaderElectionTimeout.java From ratis with Apache License 2.0 | 6 votes |
@Test public void testLeaderElectionDetection() throws Exception { RaftTestUtil.waitForLeader(cluster); long leaderElectionTimeout = RaftServerConfigKeys. leaderElectionTimeout(cluster.getProperties()).toIntExact(TimeUnit.MILLISECONDS); RaftServerImpl healthyFollower = cluster.getFollowers().get(1); RaftServerImpl failedFollower = cluster.getFollowers().get(0); // fail the leader and one of the followers to that quorum is not present // for next leader election to succeed. cluster.killServer(failedFollower.getId()); cluster.killServer(cluster.getLeader().getId()); // Wait to ensure that leader election is triggered and also state machine callback is triggered Thread.sleep( leaderElectionTimeout * 2); RaftProtos.RoleInfoProto roleInfoProto = SimpleStateMachine4Testing.get(healthyFollower).getLeaderElectionTimeoutInfo(); Assert.assertNotNull(roleInfoProto); Assert.assertEquals(roleInfoProto.getRole(), RaftProtos.RaftPeerRole.CANDIDATE); Assert.assertTrue(roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() > leaderElectionTimeout); }
Example #9
Source File: XceiverClientRatis.java From hadoop-ozone with Apache License 2.0 | 6 votes |
private void updateCommitInfosMap( Collection<RaftProtos.CommitInfoProto> commitInfoProtos) { // if the commitInfo map is empty, just update the commit indexes for each // of the servers if (commitInfoMap.isEmpty()) { commitInfoProtos.forEach(proto -> commitInfoMap .put(RatisHelper.toDatanodeId(proto.getServer()), proto.getCommitIndex())); // In case the commit is happening 2 way, just update the commitIndex // for the servers which have been successfully updating the commit // indexes. This is important because getReplicatedMinCommitIndex() // should always return the min commit index out of the nodes which have // been replicating data successfully. } else { commitInfoProtos.forEach(proto -> commitInfoMap .computeIfPresent(RatisHelper.toDatanodeId(proto.getServer()), (address, index) -> { index = proto.getCommitIndex(); return index; })); } }
Example #10
Source File: TestRaftStream.java From ratis with Apache License 2.0 | 5 votes |
private void checkLog(RaftLog raftLog, long expectedCommittedIndex, Supplier<byte[]> s) throws IOException { long committedIndex = raftLog.getLastCommittedIndex(); Assert.assertEquals(expectedCommittedIndex, committedIndex); // check the log content TermIndex[] entries = raftLog.getEntries(1, expectedCommittedIndex + 1); for (TermIndex entry : entries) { RaftProtos.LogEntryProto log = raftLog.get(entry.getIndex()); byte[] logData = log.getStateMachineLogEntry().getLogData().toByteArray(); byte[] expected = s.get(); LOG.info("log " + entry + " " + log.getLogEntryBodyCase() + " " + StringUtils.bytes2HexString(logData)); Assert.assertEquals(expected.length, logData.length); Assert.assertArrayEquals(expected, logData); } }
Example #11
Source File: TestRaftServerSlownessDetection.java From ratis with Apache License 2.0 | 5 votes |
@Test public void testSlownessDetection() throws Exception { RaftTestUtil.waitForLeader(cluster); long slownessTimeout = RaftServerConfigKeys.Rpc .slownessTimeout(cluster.getProperties()).toIntExact(TimeUnit.MILLISECONDS); RaftServerImpl failedFollower = cluster.getFollowers().get(0); // fail the node and wait for the callback to be triggered cluster.killServer(failedFollower.getId()); Thread.sleep( slownessTimeout * 2); // Followers should not get any failed not notification for (RaftServerImpl followerServer : cluster.getFollowers()) { Assert.assertNull(SimpleStateMachine4Testing.get(followerServer).getSlownessInfo()); } // the leader should get notification that the follower has failed now RaftProtos.RoleInfoProto roleInfoProto = SimpleStateMachine4Testing.get(cluster.getLeader()).getSlownessInfo(); Assert.assertNotNull(roleInfoProto); List<RaftProtos.ServerRpcProto> followers = roleInfoProto.getLeaderInfo().getFollowerInfoList(); //Assert that the node shutdown is lagging behind for (RaftProtos.ServerRpcProto serverProto : followers) { if (RaftPeerId.valueOf(serverProto.getId().getId()).equals(failedFollower.getId())) { Assert.assertTrue(serverProto.getLastRpcElapsedTimeMs() > slownessTimeout); } } }
Example #12
Source File: XceiverServerRatis.java From hadoop-ozone with Apache License 2.0 | 5 votes |
void handleApplyTransactionFailure(RaftGroupId groupId, RaftProtos.RaftPeerRole role) { UUID dnId = RatisHelper.toDatanodeId(getServer().getId()); String msg = "Ratis Transaction failure in datanode " + dnId + " with role " + role + " .Triggering pipeline close action."; triggerPipelineClose(groupId, msg, ClosePipelineInfo.Reason.STATEMACHINE_TRANSACTION_FAILED, true); }
Example #13
Source File: MetaStateMachine.java From ratis with Apache License 2.0 | 5 votes |
@Override public TransactionContext applyTransactionSerial(TransactionContext trx) { RaftProtos.LogEntryProto x = trx.getLogEntry(); MetaSMRequestProto req = null; try { req = MetaSMRequestProto.parseFrom(x.getStateMachineLogEntry().getLogData()); } catch (InvalidProtocolBufferException e) { e.printStackTrace(); } switch (req.getTypeCase()) { case REGISTERREQUEST: LogServiceRegisterLogRequestProto r = req.getRegisterRequest(); LogName logname = LogServiceProtoUtil.toLogName(r.getLogname()); RaftGroup rg = MetaServiceProtoUtil.toRaftGroup(r.getRaftGroup()); map.put(logname, rg); LOG.info("Log {} registered at {} with group {} ", logname, getId(), rg ); break; case UNREGISTERREQUEST: LogServiceUnregisterLogRequestProto unregReq = req.getUnregisterRequest(); logname = LogServiceProtoUtil.toLogName(unregReq.getLogname()); map.remove(logname); break; case PINGREQUEST: LogServicePingRequestProto pingRequest = req.getPingRequest(); RaftPeer peer = MetaServiceProtoUtil.toRaftPeer(pingRequest.getPeer()); if (peers.contains(peer)) { //Do Nothing, that's just heartbeat } else { peers.add(peer); avail.add(new PeerGroups(peer)); } break; default: } return super.applyTransactionSerial(trx); }
Example #14
Source File: RaftServerMetrics.java From incubator-ratis with Apache License 2.0 | 5 votes |
/** * Register a commit index tracker for the peer in cluster. */ public void addPeerCommitIndexGauge(RaftPeerId peerId) { String followerCommitIndexKey = String.format( LEADER_METRIC_PEER_COMMIT_INDEX, peerId); registry.gauge(followerCommitIndexKey, () -> () -> { RaftProtos.CommitInfoProto commitInfoProto = commitInfoCache.get(peerId); if (commitInfoProto != null) { return commitInfoProto.getCommitIndex(); } return 0L; }); }
Example #15
Source File: BaseStateMachine.java From incubator-ratis with Apache License 2.0 | 5 votes |
@Override public CompletableFuture<Message> applyTransaction(TransactionContext trx) { // return the same message contained in the entry RaftProtos.LogEntryProto entry = Objects.requireNonNull(trx.getLogEntry()); updateLastAppliedTermIndex(entry.getTerm(), entry.getIndex()); return CompletableFuture.completedFuture( Message.valueOf(trx.getLogEntry().getStateMachineLogEntry().getLogData())); }
Example #16
Source File: RequestTypeDependentRetryPolicy.java From incubator-ratis with Apache License 2.0 | 5 votes |
private RequestTypeDependentRetryPolicy( EnumMap<RaftProtos.RaftClientRequestProto.TypeCase, RetryPolicy> map, EnumMap<RaftProtos.RaftClientRequestProto.TypeCase, TimeDuration> timeoutMap) { this.retryPolicyMap = Collections.unmodifiableMap(map); this.timeoutMap = timeoutMap; this.myString = () -> { final StringBuilder b = new StringBuilder(getClass().getSimpleName()).append("{"); map.forEach((key, value) -> b.append(key).append("->").append(value).append(", ")); b.setLength(b.length() - 2); return b.append("}").toString(); }; }
Example #17
Source File: LogStateMachine.java From ratis with Apache License 2.0 | 5 votes |
private CompletableFuture<Message> processAppendRequest(TransactionContext trx, LogServiceRequestProto logProto) { final LogEntryProto entry = trx.getLogEntry(); AppendLogEntryRequestProto proto = logProto.getAppendRequest(); final long index = entry.getIndex(); long total = 0; Throwable t = verifyState(State.OPEN); if (t == null) { try (final AutoCloseableLock writeLock = writeLock()) { List<byte[]> entries = LogServiceProtoUtil.toListByteArray(proto.getDataList()); for (byte[] bb : entries) { total += bb.length; } this.length += total; // TODO do we need this for other write request (close, sync) updateLastAppliedTermIndex(entry.getTerm(), index); } } List<Long> ids = new ArrayList<Long>(); ids.add(index); final CompletableFuture<Message> f = CompletableFuture.completedFuture( Message.valueOf(LogServiceProtoUtil.toAppendLogReplyProto(ids, t).toByteString())); final RaftProtos.RaftPeerRole role = trx.getServerRole(); LOG.debug("{}:{}-{}: {} new length {}", role, getId(), index, proto, length); if (LOG.isTraceEnabled()) { LOG.trace("{}-{}: variables={}", getId(), index, length); } return f; }
Example #18
Source File: LogStateMachine.java From incubator-ratis with Apache License 2.0 | 5 votes |
private CompletableFuture<Message> processAppendRequest(TransactionContext trx, LogServiceRequestProto logProto) { final LogEntryProto entry = trx.getLogEntry(); AppendLogEntryRequestProto proto = logProto.getAppendRequest(); final long index = entry.getIndex(); long newSize = 0; Throwable t = verifyState(State.OPEN); final List<Long> ids = new ArrayList<Long>(); if (t == null) { try (AutoCloseableLock writeLock = writeLock()) { List<byte[]> entries = LogServiceProtoUtil.toListByteArray(proto.getDataList()); for (byte[] bb : entries) { ids.add(this.length); newSize += bb.length; this.length++; } this.dataRecordsSize += newSize; // TODO do we need this for other write request (close, sync) updateLastAppliedTermIndex(entry.getTerm(), index); } } final CompletableFuture<Message> f = CompletableFuture.completedFuture( Message.valueOf(LogServiceProtoUtil.toAppendLogReplyProto(ids, t).toByteString())); final RaftProtos.RaftPeerRole role = trx.getServerRole(); if (LOG.isTraceEnabled()) { LOG.trace("{}:{}-{}: {} new length {}", role, getId(), index, TextFormat.shortDebugString(proto), dataRecordsSize); } return f; }
Example #19
Source File: CounterStateMachine.java From incubator-ratis with Apache License 2.0 | 5 votes |
/** * Apply the INCREMENT command by incrementing the counter object. * * @param trx the transaction context * @return the message containing the updated counter value */ @Override public CompletableFuture<Message> applyTransaction(TransactionContext trx) { final RaftProtos.LogEntryProto entry = trx.getLogEntry(); //check if the command is valid String logData = entry.getStateMachineLogEntry().getLogData() .toString(Charset.defaultCharset()); if (!logData.equals("INCREMENT")) { return CompletableFuture.completedFuture( Message.valueOf("Invalid Command")); } //update the last applied term and index final long index = entry.getIndex(); updateLastAppliedTermIndex(entry.getTerm(), index); //actual execution of the command: increment the counter counter.incrementAndGet(); //return the new value of the counter to the client final CompletableFuture<Message> f = CompletableFuture.completedFuture(Message.valueOf(counter.toString())); //if leader, log the incremented value and it's log index if (trx.getServerRole() == RaftProtos.RaftPeerRole.LEADER) { LOG.info("{}: Increment to {}", index, counter.toString()); } return f; }
Example #20
Source File: TestRaftServerWithGrpc.java From incubator-ratis with Apache License 2.0 | 5 votes |
void testRaftClientRequestMetrics(MiniRaftClusterWithGrpc cluster) throws IOException, ExecutionException, InterruptedException { final RaftServerImpl leader = RaftTestUtil.waitForLeader(cluster); RaftServerMetrics raftServerMetrics = leader.getRaftServerMetrics(); try (final RaftClient client = cluster.createClient()) { final CompletableFuture<RaftClientReply> f1 = client.sendAsync(new SimpleMessage("testing")); Assert.assertTrue(f1.get().isSuccess()); Assert.assertTrue(raftServerMetrics.getTimer(RAFT_CLIENT_WRITE_REQUEST).getCount() > 0); final CompletableFuture<RaftClientReply> f2 = client.sendReadOnlyAsync(new SimpleMessage("testing")); Assert.assertTrue(f2.get().isSuccess()); Assert.assertTrue(raftServerMetrics.getTimer(RAFT_CLIENT_READ_REQUEST).getCount() > 0); final CompletableFuture<RaftClientReply> f3 = client.sendStaleReadAsync(new SimpleMessage("testing"), 0, leader.getId()); Assert.assertTrue(f3.get().isSuccess()); Assert.assertTrue(raftServerMetrics.getTimer(RAFT_CLIENT_STALE_READ_REQUEST).getCount() > 0); final CompletableFuture<RaftClientReply> f4 = client.sendWatchAsync(0, RaftProtos.ReplicationLevel.ALL); Assert.assertTrue(f4.get().isSuccess()); Assert.assertTrue(raftServerMetrics.getTimer(String.format(RAFT_CLIENT_WATCH_REQUEST, "-ALL")).getCount() > 0); final CompletableFuture<RaftClientReply> f5 = client.sendWatchAsync(0, RaftProtos.ReplicationLevel.MAJORITY); Assert.assertTrue(f5.get().isSuccess()); Assert.assertTrue(raftServerMetrics.getTimer(String.format(RAFT_CLIENT_WATCH_REQUEST, "")).getCount() > 0); } }
Example #21
Source File: OzoneManagerStateMachine.java From hadoop-ozone with Apache License 2.0 | 5 votes |
/** * Handle the RaftClientRequest and return TransactionContext object. * @param raftClientRequest * @param omRequest * @return TransactionContext */ private TransactionContext handleStartTransactionRequests( RaftClientRequest raftClientRequest, OMRequest omRequest) { return TransactionContext.newBuilder() .setClientRequest(raftClientRequest) .setStateMachine(this) .setServerRole(RaftProtos.RaftPeerRole.LEADER) .setLogData(raftClientRequest.getMessage().getContent()) .build(); }
Example #22
Source File: BaseLogParser.java From hadoop-ozone with Apache License 2.0 | 5 votes |
public void parseRatisLogs( Function<RaftProtos.StateMachineLogEntryProto, String> smLogToStr) { try { ParseRatisLog.Builder builder = new ParseRatisLog.Builder(); builder.setSegmentFile(segmentFile); builder.setSMLogToString(smLogToStr); ParseRatisLog prl = builder.build(); prl.dumpSegmentFile(); } catch (Exception e) { System.out.println(DatanodeRatisLogParser.class.getSimpleName() + "failed with exception " + e.toString()); } }
Example #23
Source File: XceiverServerRatis.java From hadoop-ozone with Apache License 2.0 | 5 votes |
private void handlePipelineFailure(RaftGroupId groupId, RoleInfoProto roleInfoProto) { String msg; UUID datanode = RatisHelper.toDatanodeId(roleInfoProto.getSelf()); RaftPeerId id = RaftPeerId.valueOf(roleInfoProto.getSelf().getId()); switch (roleInfoProto.getRole()) { case CANDIDATE: msg = datanode + " is in candidate state for " + roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() + "ms"; break; case LEADER: StringBuilder sb = new StringBuilder(); sb.append(datanode).append(" has not seen follower/s"); for (RaftProtos.ServerRpcProto follower : roleInfoProto.getLeaderInfo() .getFollowerInfoList()) { if (follower.getLastRpcElapsedTimeMs() > nodeFailureTimeoutMs) { sb.append(" ").append(RatisHelper.toDatanodeId(follower.getId())) .append(" for ").append(follower.getLastRpcElapsedTimeMs()) .append("ms"); } } msg = sb.toString(); break; default: LOG.error("unknown state: {}", roleInfoProto.getRole()); throw new IllegalStateException("node" + id + " is in illegal role " + roleInfoProto.getRole()); } triggerPipelineClose(groupId, msg, ClosePipelineInfo.Reason.PIPELINE_FAILED, false); }
Example #24
Source File: RequestTypeDependentRetryPolicy.java From incubator-ratis with Apache License 2.0 | 4 votes |
/** Set the given policy for the given type. */ public Builder setRetryPolicy(RaftProtos.RaftClientRequestProto.TypeCase type, RetryPolicy policy) { final RetryPolicy previous = retryPolicyMap.put(type, policy); Preconditions.assertNull(previous, () -> "The retryPolicy for type " + type + " is already set to " + previous); return this; }
Example #25
Source File: RequestTypeDependentRetryPolicy.java From incubator-ratis with Apache License 2.0 | 4 votes |
public Builder setTimeout(RaftProtos.RaftClientRequestProto.TypeCase type, TimeDuration timeout) { final TimeDuration previous = timeoutMap.put(type, timeout); Preconditions.assertNull(previous, () -> "The timeout for type " + type + " is already set to " + previous); return this; }
Example #26
Source File: MetaStateMachine.java From incubator-ratis with Apache License 2.0 | 4 votes |
@Override public TransactionContext applyTransactionSerial(TransactionContext trx) { RaftProtos.LogEntryProto x = trx.getLogEntry(); MetaSMRequestProto req = null; try { req = MetaSMRequestProto.parseFrom(x.getStateMachineLogEntry().getLogData()); } catch (InvalidProtocolBufferException e) { e.printStackTrace(); } switch (req.getTypeCase()) { case REGISTERREQUEST: LogServiceRegisterLogRequestProto r = req.getRegisterRequest(); LogName logname = LogServiceProtoUtil.toLogName(r.getLogname()); RaftGroup rg = MetaServiceProtoUtil.toRaftGroup(r.getRaftGroup()); rg.getPeers().stream().forEach(raftPeer -> { Set<LogName> logNames; if(!peerLogs.containsKey(raftPeer)) { logNames = new HashSet<>(); peerLogs.put(raftPeer, logNames); } else { logNames = peerLogs.get(raftPeer); } logNames.add(logname); }); map.put(logname, rg); LOG.info("Log {} registered at {} with group {} ", logname, getId(), rg ); break; case UNREGISTERREQUEST: LogServiceUnregisterLogRequestProto unregReq = req.getUnregisterRequest(); logname = LogServiceProtoUtil.toLogName(unregReq.getLogname()); map.remove(logname); break; case PINGREQUEST: LogServicePingRequestProto pingRequest = req.getPingRequest(); RaftPeer peer = MetaServiceProtoUtil.toRaftPeer(pingRequest.getPeer()); //If Set<RaftPeer> contains peer then do nothing as that's just heartbeat else add the peer to the set. if (!peers.contains(peer)) { peers.add(peer); avail.add(new PeerGroups(peer)); heartbeatInfo.put(peer, System.currentTimeMillis()); } break; case HEARTBEATREQUEST: MetaServiceProtos.LogServiceHeartbeatRequestProto heartbeatRequest = req.getHeartbeatRequest(); RaftPeer heartbeatPeer = MetaServiceProtoUtil.toRaftPeer(heartbeatRequest.getPeer()); heartbeatInfo.put(heartbeatPeer, System.currentTimeMillis()); break; default: } return super.applyTransactionSerial(trx); }
Example #27
Source File: StateMachineShutdownTests.java From incubator-ratis with Apache License 2.0 | 4 votes |
@Test public void testStateMachineShutdownWaitsForApplyTxn() throws Exception { final RaftProperties prop = getProperties(); prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY, StateMachineWithConditionalWait.class, StateMachine.class); final MiniRaftCluster cluster = newCluster(3); cluster.start(); RaftTestUtil.waitForLeader(cluster); RaftServerImpl leader = cluster.getLeader(); RaftPeerId leaderId = leader.getId(); //Unblock leader and one follower ((StateMachineWithConditionalWait)leader.getStateMachine()) .unBlockApplyTxn(); ((StateMachineWithConditionalWait)cluster. getFollowers().get(0).getStateMachine()).unBlockApplyTxn(); cluster.getLeaderAndSendFirstMessage(true); try (final RaftClient client = cluster.createClient(leaderId)) { client.send(new RaftTestUtil.SimpleMessage("message")); RaftClientReply reply = client.send( new RaftTestUtil.SimpleMessage("message2")); long logIndex = reply.getLogIndex(); //Confirm that followers have committed RaftClientReply watchReply = client.sendWatch( logIndex, RaftProtos.ReplicationLevel.ALL_COMMITTED); watchReply.getCommitInfos().forEach( val -> Assert.assertTrue(val.getCommitIndex() >= logIndex)); RaftServerImpl secondFollower = cluster.getFollowers().get(1); // Second follower is blocked in apply transaction Assert.assertTrue( secondFollower.getState().getLastAppliedIndex() < logIndex); // Now shutdown the follower in a separate thread Thread t = new Thread(() -> secondFollower.shutdown(true)); t.start(); // The second follower should still be blocked in apply transaction Assert.assertTrue( secondFollower.getState().getLastAppliedIndex() < logIndex); // Now unblock the second follower ((StateMachineWithConditionalWait) secondFollower.getStateMachine()) .unBlockApplyTxn(); // Now wait for the thread t.join(5000); Assert.assertEquals( secondFollower.getState().getLastAppliedIndex(), logIndex); cluster.shutdown(); } }
Example #28
Source File: RatisHelper.java From hadoop-ozone with Apache License 2.0 | 4 votes |
public static Long getMinReplicatedIndex( Collection<RaftProtos.CommitInfoProto> commitInfos) { return commitInfos.stream().map(RaftProtos.CommitInfoProto::getCommitIndex) .min(Long::compareTo).orElse(null); }
Example #29
Source File: RatisHelper.java From hadoop-ozone with Apache License 2.0 | 4 votes |
public static UUID toDatanodeId(RaftProtos.RaftPeerProto peerId) { return toDatanodeId(RaftPeerId.valueOf(peerId.getId())); }
Example #30
Source File: XceiverClientRatis.java From hadoop-ozone with Apache License 2.0 | 4 votes |
@Override public XceiverClientReply watchForCommit(long index) throws InterruptedException, ExecutionException, TimeoutException, IOException { long commitIndex = getReplicatedMinCommitIndex(); XceiverClientReply clientReply = new XceiverClientReply(null); if (commitIndex >= index) { // return the min commit index till which the log has been replicated to // all servers clientReply.setLogIndex(commitIndex); return clientReply; } RaftClientReply reply; try { CompletableFuture<RaftClientReply> replyFuture = getClient() .sendWatchAsync(index, RaftProtos.ReplicationLevel.ALL_COMMITTED); replyFuture.get(); } catch (Exception e) { Throwable t = HddsClientUtils.checkForException(e); LOG.warn("3 way commit failed on pipeline {}", pipeline, e); if (t instanceof GroupMismatchException) { throw e; } reply = getClient() .sendWatchAsync(index, RaftProtos.ReplicationLevel.MAJORITY_COMMITTED) .get(); List<RaftProtos.CommitInfoProto> commitInfoProtoList = reply.getCommitInfos().stream() .filter(i -> i.getCommitIndex() < index) .collect(Collectors.toList()); commitInfoProtoList.parallelStream().forEach(proto -> { UUID address = RatisHelper.toDatanodeId(proto.getServer()); addDatanodetoReply(address, clientReply); // since 3 way commit has failed, the updated map from now on will // only store entries for those datanodes which have had successful // replication. commitInfoMap.remove(address); LOG.info( "Could not commit index {} on pipeline {} to all the nodes. " + "Server {} has failed. Committed by majority.", index, pipeline, address); }); } clientReply.setLogIndex(index); return clientReply; }