org.apache.ratis.proto.RaftProtos Java Exaples

Source File: OzoneManagerStateMachine.java From hadoop-ozone with Apache License 2.0

6 votes

/**
 * Validate/pre-process the incoming update request in the state machine.
 * @return the content to be written to the log entry. Null means the request
 * should be rejected.
 * @throws IOException thrown by the state machine while validating
 */
@Override
public TransactionContext startTransaction(
    RaftClientRequest raftClientRequest) throws IOException {
  ByteString messageContent = raftClientRequest.getMessage().getContent();
  OMRequest omRequest = OMRatisHelper.convertByteStringToOMRequest(
      messageContent);

  Preconditions.checkArgument(raftClientRequest.getRaftGroupId().equals(
      raftGroupId));
  try {
    handler.validateRequest(omRequest);
  } catch (IOException ioe) {
    TransactionContext ctxt = TransactionContext.newBuilder()
        .setClientRequest(raftClientRequest)
        .setStateMachine(this)
        .setServerRole(RaftProtos.RaftPeerRole.LEADER)
        .build();
    ctxt.setException(ioe);
    return ctxt;
  }
  return handleStartTransactionRequests(raftClientRequest, omRequest);
}

Source File: TestGrpcServerMetrics.java From incubator-ratis with Apache License 2.0

6 votes

@Test
public void testGrpcLogAppenderLatencyTimer() throws Exception {
  for (boolean heartbeat : new boolean[] { true, false }) {
    RaftProtos.AppendEntriesRequestProto.Builder proto =
        RaftProtos.AppendEntriesRequestProto.newBuilder();
    if (!heartbeat) {
      proto.addEntries(RaftProtos.LogEntryProto.newBuilder().build());
    }
    GrpcLogAppender.AppendEntriesRequest req =
        new GrpcLogAppender.AppendEntriesRequest(proto.build(), followerId,
            grpcServerMetrics);
    Assert.assertEquals(0L, ratisMetricRegistry.timer(String.format(
        RATIS_GRPC_METRICS_LOG_APPENDER_LATENCY + GrpcServerMetrics
            .getHeartbeatSuffix(heartbeat), followerId.toString()))
        .getSnapshot().getMax());
    req.startRequestTimer();
    Thread.sleep(1000L);
    req.stopRequestTimer();
    Assert.assertTrue(ratisMetricRegistry.timer(String.format(
        RATIS_GRPC_METRICS_LOG_APPENDER_LATENCY + GrpcServerMetrics
            .getHeartbeatSuffix(heartbeat), followerId.toString()))
        .getSnapshot().getMax() > 1000L);
  }
}

Source File: TestRaftServerNoLeaderTimeout.java From incubator-ratis with Apache License 2.0

6 votes

@Test
public void testLeaderElectionDetection() throws Exception {
  RaftTestUtil.waitForLeader(cluster);
  final TimeDuration noLeaderTimeout = RaftServerConfigKeys.Notification.noLeaderTimeout(cluster.getProperties());

  RaftServerImpl healthyFollower = cluster.getFollowers().get(1);
  RaftServerImpl failedFollower = cluster.getFollowers().get(0);
  // fail the leader and one of the followers to that quorum is not present
  // for next leader election to succeed.
  cluster.killServer(failedFollower.getId());
  cluster.killServer(cluster.getLeader().getId());

  // Wait to ensure that leader election is triggered and also state machine callback is triggered
  noLeaderTimeout.sleep();
  noLeaderTimeout.sleep();

  RaftProtos.RoleInfoProto roleInfoProto =
      SimpleStateMachine4Testing.get(healthyFollower).getLeaderElectionTimeoutInfo();
  Assert.assertNotNull(roleInfoProto);

  Assert.assertEquals(roleInfoProto.getRole(), RaftProtos.RaftPeerRole.CANDIDATE);
  final long noLeaderTimeoutMs = noLeaderTimeout.toLong(TimeUnit.MILLISECONDS);
  Assert.assertTrue(roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() > noLeaderTimeoutMs);
}

Source File: InstallSnapshotNotificationTests.java From incubator-ratis with Apache License 2.0

6 votes

@Override
public CompletableFuture<TermIndex> notifyInstallSnapshotFromLeader(
    RaftProtos.RoleInfoProto roleInfoProto,
    TermIndex termIndex) {
  final SingleFileSnapshotInfo leaderSnapshotInfo = (SingleFileSnapshotInfo) leaderSnapshotInfoRef.get();
  LOG.info("{}: leaderSnapshotInfo = {}", getId(), leaderSnapshotInfo);
  if (leaderSnapshotInfo == null) {
    return super.notifyInstallSnapshotFromLeader(roleInfoProto, termIndex);
  }

  try {
    Path leaderSnapshotFile = leaderSnapshotInfo.getFile().getPath();
    File followerSnapshotFilePath = new File(getSMdir(),
        leaderSnapshotFile.getFileName().toString());
    Files.copy(leaderSnapshotFile, followerSnapshotFilePath.toPath());
  } catch (IOException e) {
    LOG.error("Failed notifyInstallSnapshotFromLeader", e);
    return JavaUtils.completeExceptionally(e);
  }
  return CompletableFuture.completedFuture(leaderSnapshotInfo.getTermIndex());
}

Source File: StateMachineShutdownTests.java From incubator-ratis with Apache License 2.0

6 votes

@Override
public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
  CompletableFuture<Message> future = new CompletableFuture<Message>();
  if (blockOnApply) {
    synchronized (objectToWait) {
      try {
        objectToWait.wait();
      } catch (InterruptedException e) {
        throw new RuntimeException();
      }
    }
  }
  RaftProtos.LogEntryProto entry = trx.getLogEntry();
  updateLastAppliedTermIndex(entry.getTerm(), entry.getIndex());
  future.complete(new RaftTestUtil.SimpleMessage("done"));
  return future;
}

Source File: OzoneManagerStateMachine.java From hadoop-ozone with Apache License 2.0

6 votes

/**
 * Leader OM has purged entries from its log. To catch up, OM must download
 * the latest checkpoint from the leader OM and install it.
 * @param roleInfoProto the leader node information
 * @param firstTermIndexInLog TermIndex of the first append entry available
 *                           in the Leader's log.
 * @return the last term index included in the installed snapshot.
 */
@Override
public CompletableFuture<TermIndex> notifyInstallSnapshotFromLeader(
    RaftProtos.RoleInfoProto roleInfoProto, TermIndex firstTermIndexInLog) {

  String leaderNodeId = RaftPeerId.valueOf(roleInfoProto.getSelf().getId())
      .toString();

  LOG.info("Received install snapshot notificaiton form OM leader: {} with " +
          "term index: {}", leaderNodeId, firstTermIndexInLog);

  if (!roleInfoProto.getRole().equals(RaftProtos.RaftPeerRole.LEADER)) {
    // A non-leader Ratis server should not send this notification.
    LOG.error("Received Install Snapshot notification from non-leader OM " +
        "node: {}. Ignoring the notification.", leaderNodeId);
    return completeExceptionally(new OMException("Received notification to " +
        "install snaphost from non-leader OM node",
        OMException.ResultCodes.RATIS_ERROR));
  }

  CompletableFuture<TermIndex> future = CompletableFuture.supplyAsync(
      () -> ozoneManager.installSnapshot(leaderNodeId),
      installSnapshotExecutor);
  return future;
}

Source File: RatisHelper.java From hadoop-ozone with Apache License 2.0

6 votes

/**
 * Table mapping exception type to retry policy used for the exception in
 * write and watch request.
 * ---------------------------------------------------------------------------
 * |        Exception            | RetryPolicy for     | RetryPolicy for     |
 * |                             | Write request       | Watch request       |
 * |-------------------------------------------------------------------------|
 * | NotReplicatedException      | NO_RETRY            | NO_RETRY            |
 * |-------------------------------------------------------------------------|
 * | GroupMismatchException      | NO_RETRY            | NO_RETRY            |
 * |-------------------------------------------------------------------------|
 * | StateMachineException       | NO_RETRY            | NO_RETRY            |
 * |-------------------------------------------------------------------------|
 * | TimeoutIOException          | EXPONENTIAL_BACKOFF | NO_RETRY            |
 * |-------------------------------------------------------------------------|
 * | ResourceUnavailableException| EXPONENTIAL_BACKOFF | EXPONENTIAL_BACKOFF |
 * |-------------------------------------------------------------------------|
 * | Others                      | MULTILINEAR_RANDOM  | MULTILINEAR_RANDOM  |
 * |                             | _RETRY             | _RETRY               |
 * ---------------------------------------------------------------------------
 */
public static RetryPolicy createRetryPolicy(ConfigurationSource conf) {
  RatisClientConfig ratisClientConfig = OzoneConfiguration.of(conf)
      .getObject(RatisClientConfig.class);
  ExponentialBackoffRetry exponentialBackoffRetry =
      createExponentialBackoffPolicy(ratisClientConfig);
  MultipleLinearRandomRetry multipleLinearRandomRetry =
      MultipleLinearRandomRetry
          .parseCommaSeparated(ratisClientConfig.getMultilinearPolicy());

  long writeTimeout = ratisClientConfig.getWriteRequestTimeoutInMs();
  long watchTimeout = ratisClientConfig.getWatchRequestTimeoutInMs();

  return RequestTypeDependentRetryPolicy.newBuilder()
      .setRetryPolicy(RaftProtos.RaftClientRequestProto.TypeCase.WRITE,
          createExceptionDependentPolicy(exponentialBackoffRetry,
              multipleLinearRandomRetry, exponentialBackoffRetry))
      .setRetryPolicy(RaftProtos.RaftClientRequestProto.TypeCase.WATCH,
          createExceptionDependentPolicy(exponentialBackoffRetry,
              multipleLinearRandomRetry, RetryPolicies.noRetry()))
      .setTimeout(RaftProtos.RaftClientRequestProto.TypeCase.WRITE,
          TimeDuration.valueOf(writeTimeout, TimeUnit.MILLISECONDS))
      .setTimeout(RaftProtos.RaftClientRequestProto.TypeCase.WATCH,
          TimeDuration.valueOf(watchTimeout, TimeUnit.MILLISECONDS))
      .build();
}

Source File: TestRaftServerLeaderElectionTimeout.java From ratis with Apache License 2.0

6 votes

@Test
public void testLeaderElectionDetection() throws Exception {
  RaftTestUtil.waitForLeader(cluster);
  long leaderElectionTimeout = RaftServerConfigKeys.
      leaderElectionTimeout(cluster.getProperties()).toIntExact(TimeUnit.MILLISECONDS);

  RaftServerImpl healthyFollower = cluster.getFollowers().get(1);
  RaftServerImpl failedFollower = cluster.getFollowers().get(0);
  // fail the leader and one of the followers to that quorum is not present
  // for next leader election to succeed.
  cluster.killServer(failedFollower.getId());
  cluster.killServer(cluster.getLeader().getId());

  // Wait to ensure that leader election is triggered and also state machine callback is triggered
  Thread.sleep( leaderElectionTimeout * 2);

  RaftProtos.RoleInfoProto roleInfoProto =
      SimpleStateMachine4Testing.get(healthyFollower).getLeaderElectionTimeoutInfo();
  Assert.assertNotNull(roleInfoProto);

  Assert.assertEquals(roleInfoProto.getRole(), RaftProtos.RaftPeerRole.CANDIDATE);
  Assert.assertTrue(roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() > leaderElectionTimeout);
}

Source File: XceiverClientRatis.java From hadoop-ozone with Apache License 2.0

6 votes

private void updateCommitInfosMap(
    Collection<RaftProtos.CommitInfoProto> commitInfoProtos) {
  // if the commitInfo map is empty, just update the commit indexes for each
  // of the servers
  if (commitInfoMap.isEmpty()) {
    commitInfoProtos.forEach(proto -> commitInfoMap
        .put(RatisHelper.toDatanodeId(proto.getServer()),
            proto.getCommitIndex()));
    // In case the commit is happening 2 way, just update the commitIndex
    // for the servers which have been successfully updating the commit
    // indexes. This is important because getReplicatedMinCommitIndex()
    // should always return the min commit index out of the nodes which have
    // been replicating data successfully.
  } else {
    commitInfoProtos.forEach(proto -> commitInfoMap
        .computeIfPresent(RatisHelper.toDatanodeId(proto.getServer()),
            (address, index) -> {
              index = proto.getCommitIndex();
              return index;
            }));
  }
}

Source File: TestRaftStream.java From ratis with Apache License 2.0

5 votes

private void checkLog(RaftLog raftLog, long expectedCommittedIndex,
    Supplier<byte[]> s) throws IOException {
  long committedIndex = raftLog.getLastCommittedIndex();
  Assert.assertEquals(expectedCommittedIndex, committedIndex);
  // check the log content
  TermIndex[] entries = raftLog.getEntries(1, expectedCommittedIndex + 1);
  for (TermIndex entry : entries) {
    RaftProtos.LogEntryProto log  = raftLog.get(entry.getIndex());
    byte[] logData = log.getStateMachineLogEntry().getLogData().toByteArray();
    byte[] expected = s.get();
    LOG.info("log " + entry + " " + log.getLogEntryBodyCase() + " " + StringUtils.bytes2HexString(logData));
    Assert.assertEquals(expected.length, logData.length);
    Assert.assertArrayEquals(expected, logData);
  }
}

Source File: TestRaftServerSlownessDetection.java From ratis with Apache License 2.0

5 votes

@Test
public void testSlownessDetection() throws Exception {
  RaftTestUtil.waitForLeader(cluster);
  long slownessTimeout = RaftServerConfigKeys.Rpc
      .slownessTimeout(cluster.getProperties()).toIntExact(TimeUnit.MILLISECONDS);
  RaftServerImpl failedFollower = cluster.getFollowers().get(0);

  // fail the node and wait for the callback to be triggered
  cluster.killServer(failedFollower.getId());
  Thread.sleep( slownessTimeout * 2);

  // Followers should not get any failed not notification
  for (RaftServerImpl followerServer : cluster.getFollowers()) {
    Assert.assertNull(SimpleStateMachine4Testing.get(followerServer).getSlownessInfo());
  }
  // the leader should get notification that the follower has failed now
  RaftProtos.RoleInfoProto roleInfoProto =
      SimpleStateMachine4Testing.get(cluster.getLeader()).getSlownessInfo();
  Assert.assertNotNull(roleInfoProto);

  List<RaftProtos.ServerRpcProto> followers =
      roleInfoProto.getLeaderInfo().getFollowerInfoList();
  //Assert that the node shutdown is lagging behind
  for (RaftProtos.ServerRpcProto serverProto : followers) {
    if (RaftPeerId.valueOf(serverProto.getId().getId()).equals(failedFollower.getId())) {
      Assert.assertTrue(serverProto.getLastRpcElapsedTimeMs() > slownessTimeout);
    }
  }
}

Source File: XceiverServerRatis.java From hadoop-ozone with Apache License 2.0

5 votes

void handleApplyTransactionFailure(RaftGroupId groupId,
    RaftProtos.RaftPeerRole role) {
  UUID dnId = RatisHelper.toDatanodeId(getServer().getId());
  String msg =
      "Ratis Transaction failure in datanode " + dnId + " with role " + role
          + " .Triggering pipeline close action.";
  triggerPipelineClose(groupId, msg,
      ClosePipelineInfo.Reason.STATEMACHINE_TRANSACTION_FAILED, true);
}

Source File: MetaStateMachine.java From ratis with Apache License 2.0

5 votes

@Override
public TransactionContext applyTransactionSerial(TransactionContext trx) {
    RaftProtos.LogEntryProto x = trx.getLogEntry();
    MetaSMRequestProto req = null;
    try {
        req = MetaSMRequestProto.parseFrom(x.getStateMachineLogEntry().getLogData());
    } catch (InvalidProtocolBufferException e) {
        e.printStackTrace();
    }
    switch (req.getTypeCase()) {
        case REGISTERREQUEST:
            LogServiceRegisterLogRequestProto r = req.getRegisterRequest();
            LogName logname = LogServiceProtoUtil.toLogName(r.getLogname());
            RaftGroup rg = MetaServiceProtoUtil.toRaftGroup(r.getRaftGroup());
            map.put(logname, rg);
            LOG.info("Log {} registered at {} with group {} ", logname, getId(), rg );
            break;
        case UNREGISTERREQUEST:
            LogServiceUnregisterLogRequestProto unregReq = req.getUnregisterRequest();
            logname = LogServiceProtoUtil.toLogName(unregReq.getLogname());
            map.remove(logname);
            break;
        case PINGREQUEST:
            LogServicePingRequestProto pingRequest = req.getPingRequest();
            RaftPeer peer = MetaServiceProtoUtil.toRaftPeer(pingRequest.getPeer());
            if (peers.contains(peer)) {
                //Do Nothing, that's just heartbeat
            } else {
                peers.add(peer);
                avail.add(new PeerGroups(peer));
            }
            break;

        default:
    }
    return super.applyTransactionSerial(trx);
}

Source File: RaftServerMetrics.java From incubator-ratis with Apache License 2.0

5 votes

/**
 * Register a commit index tracker for the peer in cluster.
 */
public void addPeerCommitIndexGauge(RaftPeerId peerId) {
  String followerCommitIndexKey = String.format(
      LEADER_METRIC_PEER_COMMIT_INDEX, peerId);
  registry.gauge(followerCommitIndexKey, () -> () -> {
    RaftProtos.CommitInfoProto commitInfoProto = commitInfoCache.get(peerId);
    if (commitInfoProto != null) {
      return commitInfoProto.getCommitIndex();
    }
    return 0L;
  });
}

Source File: BaseStateMachine.java From incubator-ratis with Apache License 2.0

5 votes

@Override
public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
  // return the same message contained in the entry
  RaftProtos.LogEntryProto entry = Objects.requireNonNull(trx.getLogEntry());
  updateLastAppliedTermIndex(entry.getTerm(), entry.getIndex());
  return CompletableFuture.completedFuture(
      Message.valueOf(trx.getLogEntry().getStateMachineLogEntry().getLogData()));
}

Source File: RequestTypeDependentRetryPolicy.java From incubator-ratis with Apache License 2.0

5 votes

private RequestTypeDependentRetryPolicy(
    EnumMap<RaftProtos.RaftClientRequestProto.TypeCase, RetryPolicy> map,
    EnumMap<RaftProtos.RaftClientRequestProto.TypeCase, TimeDuration> timeoutMap) {
  this.retryPolicyMap = Collections.unmodifiableMap(map);
  this.timeoutMap = timeoutMap;
  this.myString = () -> {
    final StringBuilder b = new StringBuilder(getClass().getSimpleName()).append("{");
    map.forEach((key, value) -> b.append(key).append("->").append(value).append(", "));
    b.setLength(b.length() - 2);
    return b.append("}").toString();
  };
}

Source File: LogStateMachine.java From ratis with Apache License 2.0

5 votes

private CompletableFuture<Message> processAppendRequest(TransactionContext trx,
    LogServiceRequestProto logProto) {

  final LogEntryProto entry = trx.getLogEntry();
  AppendLogEntryRequestProto proto = logProto.getAppendRequest();
  final long index = entry.getIndex();
  long total = 0;
  Throwable t = verifyState(State.OPEN);
  if (t == null) {
    try (final AutoCloseableLock writeLock = writeLock()) {
        List<byte[]> entries = LogServiceProtoUtil.toListByteArray(proto.getDataList());
        for (byte[] bb : entries) {
          total += bb.length;
        }
        this.length += total;
        // TODO do we need this for other write request (close, sync)
        updateLastAppliedTermIndex(entry.getTerm(), index);
    }
  }
  List<Long> ids = new ArrayList<Long>();
  ids.add(index);
  final CompletableFuture<Message> f =
      CompletableFuture.completedFuture(
        Message.valueOf(LogServiceProtoUtil.toAppendLogReplyProto(ids, t).toByteString()));
  final RaftProtos.RaftPeerRole role = trx.getServerRole();
  LOG.debug("{}:{}-{}: {} new length {}", role, getId(), index, proto, length);
  if (LOG.isTraceEnabled()) {
    LOG.trace("{}-{}: variables={}", getId(), index, length);
  }
  return f;
}

Source File: LogStateMachine.java From incubator-ratis with Apache License 2.0

5 votes

private CompletableFuture<Message> processAppendRequest(TransactionContext trx,
    LogServiceRequestProto logProto) {

  final LogEntryProto entry = trx.getLogEntry();
  AppendLogEntryRequestProto proto = logProto.getAppendRequest();
  final long index = entry.getIndex();
  long newSize = 0;
  Throwable t = verifyState(State.OPEN);
  final List<Long> ids = new ArrayList<Long>();
  if (t == null) {
    try (AutoCloseableLock writeLock = writeLock()) {
        List<byte[]> entries = LogServiceProtoUtil.toListByteArray(proto.getDataList());
        for (byte[] bb : entries) {
          ids.add(this.length);
          newSize += bb.length;
          this.length++;
        }
        this.dataRecordsSize += newSize;
        // TODO do we need this for other write request (close, sync)
        updateLastAppliedTermIndex(entry.getTerm(), index);
    }
  }
  final CompletableFuture<Message> f =
      CompletableFuture.completedFuture(
        Message.valueOf(LogServiceProtoUtil.toAppendLogReplyProto(ids, t).toByteString()));
  final RaftProtos.RaftPeerRole role = trx.getServerRole();
  if (LOG.isTraceEnabled()) {
    LOG.trace("{}:{}-{}: {} new length {}", role, getId(), index,
        TextFormat.shortDebugString(proto), dataRecordsSize);
  }
  return f;
}

Source File: CounterStateMachine.java From incubator-ratis with Apache License 2.0

5 votes

/**
 * Apply the INCREMENT command by incrementing the counter object.
 *
 * @param trx the transaction context
 * @return the message containing the updated counter value
 */
@Override
public CompletableFuture<Message> applyTransaction(TransactionContext trx) {
  final RaftProtos.LogEntryProto entry = trx.getLogEntry();

  //check if the command is valid
  String logData = entry.getStateMachineLogEntry().getLogData()
      .toString(Charset.defaultCharset());
  if (!logData.equals("INCREMENT")) {
    return CompletableFuture.completedFuture(
        Message.valueOf("Invalid Command"));
  }
  //update the last applied term and index
  final long index = entry.getIndex();
  updateLastAppliedTermIndex(entry.getTerm(), index);

  //actual execution of the command: increment the counter
  counter.incrementAndGet();

  //return the new value of the counter to the client
  final CompletableFuture<Message> f =
      CompletableFuture.completedFuture(Message.valueOf(counter.toString()));

  //if leader, log the incremented value and it's log index
  if (trx.getServerRole() == RaftProtos.RaftPeerRole.LEADER) {
    LOG.info("{}: Increment to {}", index, counter.toString());
  }

  return f;
}

Source File: TestRaftServerWithGrpc.java From incubator-ratis with Apache License 2.0

5 votes

void testRaftClientRequestMetrics(MiniRaftClusterWithGrpc cluster) throws IOException,
    ExecutionException, InterruptedException {
  final RaftServerImpl leader = RaftTestUtil.waitForLeader(cluster);
  RaftServerMetrics raftServerMetrics = leader.getRaftServerMetrics();

  try (final RaftClient client = cluster.createClient()) {
    final CompletableFuture<RaftClientReply> f1 = client.sendAsync(new SimpleMessage("testing"));
    Assert.assertTrue(f1.get().isSuccess());
    Assert.assertTrue(raftServerMetrics.getTimer(RAFT_CLIENT_WRITE_REQUEST).getCount() > 0);

    final CompletableFuture<RaftClientReply> f2 = client.sendReadOnlyAsync(new SimpleMessage("testing"));
    Assert.assertTrue(f2.get().isSuccess());
    Assert.assertTrue(raftServerMetrics.getTimer(RAFT_CLIENT_READ_REQUEST).getCount() > 0);

    final CompletableFuture<RaftClientReply> f3 = client.sendStaleReadAsync(new SimpleMessage("testing"),
        0, leader.getId());
    Assert.assertTrue(f3.get().isSuccess());
    Assert.assertTrue(raftServerMetrics.getTimer(RAFT_CLIENT_STALE_READ_REQUEST).getCount() > 0);

    final CompletableFuture<RaftClientReply> f4 = client.sendWatchAsync(0, RaftProtos.ReplicationLevel.ALL);
    Assert.assertTrue(f4.get().isSuccess());
    Assert.assertTrue(raftServerMetrics.getTimer(String.format(RAFT_CLIENT_WATCH_REQUEST, "-ALL")).getCount() > 0);

    final CompletableFuture<RaftClientReply> f5 = client.sendWatchAsync(0, RaftProtos.ReplicationLevel.MAJORITY);
    Assert.assertTrue(f5.get().isSuccess());
    Assert.assertTrue(raftServerMetrics.getTimer(String.format(RAFT_CLIENT_WATCH_REQUEST, "")).getCount() > 0);
  }
}

Source File: OzoneManagerStateMachine.java From hadoop-ozone with Apache License 2.0

5 votes

/**
 * Handle the RaftClientRequest and return TransactionContext object.
 * @param raftClientRequest
 * @param omRequest
 * @return TransactionContext
 */
private TransactionContext handleStartTransactionRequests(
    RaftClientRequest raftClientRequest, OMRequest omRequest) {

  return TransactionContext.newBuilder()
      .setClientRequest(raftClientRequest)
      .setStateMachine(this)
      .setServerRole(RaftProtos.RaftPeerRole.LEADER)
      .setLogData(raftClientRequest.getMessage().getContent())
      .build();
}

Source File: BaseLogParser.java From hadoop-ozone with Apache License 2.0

5 votes

public void parseRatisLogs(
    Function<RaftProtos.StateMachineLogEntryProto, String> smLogToStr) {
  try {
    ParseRatisLog.Builder builder = new ParseRatisLog.Builder();
    builder.setSegmentFile(segmentFile);
    builder.setSMLogToString(smLogToStr);

    ParseRatisLog prl = builder.build();
    prl.dumpSegmentFile();
  } catch (Exception e) {
    System.out.println(DatanodeRatisLogParser.class.getSimpleName()
        + "failed with exception  " + e.toString());
  }
}

Source File: XceiverServerRatis.java From hadoop-ozone with Apache License 2.0

5 votes

private void handlePipelineFailure(RaftGroupId groupId,
    RoleInfoProto roleInfoProto) {
  String msg;
  UUID datanode = RatisHelper.toDatanodeId(roleInfoProto.getSelf());
  RaftPeerId id = RaftPeerId.valueOf(roleInfoProto.getSelf().getId());
  switch (roleInfoProto.getRole()) {
  case CANDIDATE:
    msg = datanode + " is in candidate state for " +
        roleInfoProto.getCandidateInfo().getLastLeaderElapsedTimeMs() + "ms";
    break;
  case LEADER:
    StringBuilder sb = new StringBuilder();
    sb.append(datanode).append(" has not seen follower/s");
    for (RaftProtos.ServerRpcProto follower : roleInfoProto.getLeaderInfo()
        .getFollowerInfoList()) {
      if (follower.getLastRpcElapsedTimeMs() > nodeFailureTimeoutMs) {
        sb.append(" ").append(RatisHelper.toDatanodeId(follower.getId()))
            .append(" for ").append(follower.getLastRpcElapsedTimeMs())
            .append("ms");
      }
    }
    msg = sb.toString();
    break;
  default:
    LOG.error("unknown state: {}", roleInfoProto.getRole());
    throw new IllegalStateException("node" + id + " is in illegal role "
        + roleInfoProto.getRole());
  }

  triggerPipelineClose(groupId, msg,
      ClosePipelineInfo.Reason.PIPELINE_FAILED, false);
}

Source File: RequestTypeDependentRetryPolicy.java From incubator-ratis with Apache License 2.0

4 votes

/** Set the given policy for the given type. */
public Builder setRetryPolicy(RaftProtos.RaftClientRequestProto.TypeCase type, RetryPolicy policy) {
  final RetryPolicy previous = retryPolicyMap.put(type, policy);
  Preconditions.assertNull(previous, () -> "The retryPolicy for type " + type + " is already set to " + previous);
  return this;
}

Source File: RequestTypeDependentRetryPolicy.java From incubator-ratis with Apache License 2.0

4 votes

public Builder setTimeout(RaftProtos.RaftClientRequestProto.TypeCase type, TimeDuration timeout) {
  final TimeDuration previous = timeoutMap.put(type, timeout);
  Preconditions.assertNull(previous, () -> "The timeout for type " + type + " is already set to " + previous);
  return this;
}

Source File: MetaStateMachine.java From incubator-ratis with Apache License 2.0

4 votes

@Override
public TransactionContext applyTransactionSerial(TransactionContext trx) {
    RaftProtos.LogEntryProto x = trx.getLogEntry();
    MetaSMRequestProto req = null;
    try {
        req = MetaSMRequestProto.parseFrom(x.getStateMachineLogEntry().getLogData());
    } catch (InvalidProtocolBufferException e) {
        e.printStackTrace();
    }
    switch (req.getTypeCase()) {
        case REGISTERREQUEST:
            LogServiceRegisterLogRequestProto r = req.getRegisterRequest();
            LogName logname = LogServiceProtoUtil.toLogName(r.getLogname());
            RaftGroup rg = MetaServiceProtoUtil.toRaftGroup(r.getRaftGroup());
            rg.getPeers().stream().forEach(raftPeer -> {
                Set<LogName> logNames;
                if(!peerLogs.containsKey(raftPeer)) {
                    logNames = new HashSet<>();
                    peerLogs.put(raftPeer, logNames);
                } else {
                    logNames = peerLogs.get(raftPeer);
                }
                logNames.add(logname);

            });
            map.put(logname, rg);

            LOG.info("Log {} registered at {} with group {} ", logname, getId(), rg );
            break;
        case UNREGISTERREQUEST:
            LogServiceUnregisterLogRequestProto unregReq = req.getUnregisterRequest();
            logname = LogServiceProtoUtil.toLogName(unregReq.getLogname());
            map.remove(logname);
            break;
        case PINGREQUEST:
            LogServicePingRequestProto pingRequest = req.getPingRequest();
            RaftPeer peer = MetaServiceProtoUtil.toRaftPeer(pingRequest.getPeer());
            //If Set<RaftPeer> contains peer then do nothing as that's just heartbeat else add the peer to the set.
            if (!peers.contains(peer)) {
                peers.add(peer);
                avail.add(new PeerGroups(peer));
                heartbeatInfo.put(peer,  System.currentTimeMillis());
            }
            break;
        case HEARTBEATREQUEST:
            MetaServiceProtos.LogServiceHeartbeatRequestProto heartbeatRequest = req.getHeartbeatRequest();
            RaftPeer heartbeatPeer = MetaServiceProtoUtil.toRaftPeer(heartbeatRequest.getPeer());
            heartbeatInfo.put(heartbeatPeer,  System.currentTimeMillis());
            break;
        default:
    }
    return super.applyTransactionSerial(trx);
}

Source File: StateMachineShutdownTests.java From incubator-ratis with Apache License 2.0

4 votes

@Test
public void testStateMachineShutdownWaitsForApplyTxn() throws Exception {
  final RaftProperties prop = getProperties();
  prop.setClass(MiniRaftCluster.STATEMACHINE_CLASS_KEY,
      StateMachineWithConditionalWait.class, StateMachine.class);
  final MiniRaftCluster cluster = newCluster(3);
  cluster.start();
  RaftTestUtil.waitForLeader(cluster);
  RaftServerImpl leader = cluster.getLeader();
  RaftPeerId leaderId = leader.getId();

  //Unblock leader and one follower
  ((StateMachineWithConditionalWait)leader.getStateMachine())
      .unBlockApplyTxn();
  ((StateMachineWithConditionalWait)cluster.
      getFollowers().get(0).getStateMachine()).unBlockApplyTxn();

  cluster.getLeaderAndSendFirstMessage(true);

  try (final RaftClient client = cluster.createClient(leaderId)) {
    client.send(new RaftTestUtil.SimpleMessage("message"));
    RaftClientReply reply = client.send(
            new RaftTestUtil.SimpleMessage("message2"));

    long logIndex = reply.getLogIndex();
    //Confirm that followers have committed
    RaftClientReply watchReply = client.sendWatch(
            logIndex, RaftProtos.ReplicationLevel.ALL_COMMITTED);
    watchReply.getCommitInfos().forEach(
            val -> Assert.assertTrue(val.getCommitIndex() >= logIndex));
    RaftServerImpl secondFollower = cluster.getFollowers().get(1);
    // Second follower is blocked in apply transaction
    Assert.assertTrue(
            secondFollower.getState().getLastAppliedIndex()
                    < logIndex);

    // Now shutdown the follower in a separate thread
    Thread t = new Thread(() -> secondFollower.shutdown(true));
    t.start();

    // The second follower should still be blocked in apply transaction
    Assert.assertTrue(
            secondFollower.getState().getLastAppliedIndex()
                    < logIndex);

    // Now unblock the second follower
    ((StateMachineWithConditionalWait) secondFollower.getStateMachine())
            .unBlockApplyTxn();

    // Now wait for the thread
    t.join(5000);
    Assert.assertEquals(
            secondFollower.getState().getLastAppliedIndex(),
            logIndex);

    cluster.shutdown();
  }
}

Source File: RatisHelper.java From hadoop-ozone with Apache License 2.0

4 votes

public static Long getMinReplicatedIndex(
    Collection<RaftProtos.CommitInfoProto> commitInfos) {
  return commitInfos.stream().map(RaftProtos.CommitInfoProto::getCommitIndex)
      .min(Long::compareTo).orElse(null);
}

Source File: RatisHelper.java From hadoop-ozone with Apache License 2.0

4 votes

public static UUID toDatanodeId(RaftProtos.RaftPeerProto peerId) {
  return toDatanodeId(RaftPeerId.valueOf(peerId.getId()));
}

Source File: XceiverClientRatis.java From hadoop-ozone with Apache License 2.0

4 votes

@Override
public XceiverClientReply watchForCommit(long index)
    throws InterruptedException, ExecutionException, TimeoutException,
    IOException {
  long commitIndex = getReplicatedMinCommitIndex();
  XceiverClientReply clientReply = new XceiverClientReply(null);
  if (commitIndex >= index) {
    // return the min commit index till which the log has been replicated to
    // all servers
    clientReply.setLogIndex(commitIndex);
    return clientReply;
  }
  RaftClientReply reply;
  try {
    CompletableFuture<RaftClientReply> replyFuture = getClient()
        .sendWatchAsync(index, RaftProtos.ReplicationLevel.ALL_COMMITTED);
    replyFuture.get();
  } catch (Exception e) {
    Throwable t = HddsClientUtils.checkForException(e);
    LOG.warn("3 way commit failed on pipeline {}", pipeline, e);
    if (t instanceof GroupMismatchException) {
      throw e;
    }
    reply = getClient()
        .sendWatchAsync(index, RaftProtos.ReplicationLevel.MAJORITY_COMMITTED)
        .get();
    List<RaftProtos.CommitInfoProto> commitInfoProtoList =
        reply.getCommitInfos().stream()
            .filter(i -> i.getCommitIndex() < index)
            .collect(Collectors.toList());
    commitInfoProtoList.parallelStream().forEach(proto -> {
      UUID address = RatisHelper.toDatanodeId(proto.getServer());
      addDatanodetoReply(address, clientReply);
      // since 3 way commit has failed, the updated map from now on  will
      // only store entries for those datanodes which have had successful
      // replication.
      commitInfoMap.remove(address);
      LOG.info(
          "Could not commit index {} on pipeline {} to all the nodes. " +
          "Server {} has failed. Committed by majority.",
          index, pipeline, address);
    });
  }
  clientReply.setLogIndex(index);
  return clientReply;
}

org.apache.ratis.proto.RaftProtos Java Examples