Java Code Examples for org.apache.helix.model.CurrentState#getEndTime()

The following examples show how to use org.apache.helix.model.CurrentState#getEndTime() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAbnormalStatesResolver.java    From helix with Apache License 2.0 5 votes vote down vote up
private long getTopStateUpdateTime(ExternalView ev, String partition, String state) {
  String topStateHost = ev.getStateMap(partition).entrySet().stream()
      .filter(entry -> entry.getValue().equals(state)).findFirst().get().getKey();
  MockParticipantManager participant = Arrays.stream(_participants)
      .filter(instance -> instance.getInstanceName().equals(topStateHost)).findFirst().get();

  HelixDataAccessor accessor = _controller.getHelixDataAccessor();
  PropertyKey.Builder keyBuilder = accessor.keyBuilder();
  CurrentState currentState = accessor.getProperty(keyBuilder
      .currentState(participant.getInstanceName(), participant.getSessionId(),
          ev.getResourceName()));
  return currentState.getEndTime(partition);
}
 
Example 2
Source File: TopStateHandoffReportStage.java    From helix with Apache License 2.0 4 votes vote down vote up
/**
 * When we find a top state missing of the given partition, we find out when it started to miss
 * top state, then we record it in cache
 *
 * @param cache cluster data cache
 * @param resourceName resource name
 * @param partition partition of the given resource
 * @param topState top state name
 * @param currentStateOutput current state output
 */
private void reportTopStateMissing(ResourceControllerDataProvider cache, String resourceName, Partition partition,
    String topState, CurrentStateOutput currentStateOutput) {
  Map<String, Map<String, MissingTopStateRecord>> missingTopStateMap = cache.getMissingTopStateMap();
  Map<String, Map<String, String>> lastTopStateMap = cache.getLastTopStateLocationMap();
  if (missingTopStateMap.containsKey(resourceName) && missingTopStateMap.get(resourceName)
      .containsKey(partition.getPartitionName())) {
    // a previous missing has been already recorded
    return;
  }

  long startTime = TopStateHandoffReportStage.TIMESTAMP_NOT_RECORDED;
  long fromTopStateUserLatency = DEFAULT_HANDOFF_USER_LATENCY;
  boolean isGraceful = true;

  // 1. try to find the previous topstate missing event for the startTime.
  String missingStateInstance = null;
  if (lastTopStateMap.containsKey(resourceName)) {
    missingStateInstance = lastTopStateMap.get(resourceName).get(partition.getPartitionName());
  }

  if (missingStateInstance != null) {
    Map<String, LiveInstance> liveInstances = cache.getLiveInstances();
    if (liveInstances.containsKey(missingStateInstance)) {
      CurrentState currentState = cache.getCurrentState(missingStateInstance,
          liveInstances.get(missingStateInstance).getEphemeralOwner()).get(resourceName);

      if (currentState != null
          && currentState.getPreviousState(partition.getPartitionName()) != null && currentState
          .getPreviousState(partition.getPartitionName()).equalsIgnoreCase(topState)) {

        // Update the latest start time only from top state to other state transition
        // At beginning, the start time should -1 (not recorded). If something happen either
        // instance not alive or the instance just started for that partition, Helix does not know
        // the previous start time or end time. So we count from current.
        //
        // Previous state is top state does not mean that resource has only one top state
        // (i.e. Online/Offline). So Helix has to find the latest start time as the staring point.
        long fromTopStateStartTime = currentState.getStartTime(partition.getPartitionName());
        if (fromTopStateStartTime > startTime) {
          startTime = fromTopStateStartTime;
          fromTopStateUserLatency =
              currentState.getEndTime(partition.getPartitionName()) - startTime;
        }
        startTime = Math.max(startTime, currentState.getStartTime(partition.getPartitionName()));
      } // Else no related state transition history found, use current time as the missing start time.
    } else {
      // If the previous topState holder is no longer alive, the offline time is used as start time.
      // Also, if we observe a top state missing and the previous top state node is gone, the
      // top state handoff is not graceful
      isGraceful = false;
      Map<String, Long> offlineMap = cache.getInstanceOfflineTimeMap();
      if (offlineMap.containsKey(missingStateInstance)) {
        startTime = Math.max(startTime, offlineMap.get(missingStateInstance));
      }
    }
  }

  // 2. if no previous top state records, it's either resource just created or there is a
  // controller leadership change. Check any pending message that are created for top state
  // transition. Assume this is graceful top state handoff as if the from top state instance
  // crashed, we are not recording such message
  if (startTime == TopStateHandoffReportStage.TIMESTAMP_NOT_RECORDED) {
    for (Message message : currentStateOutput.getPendingMessageMap(resourceName, partition)
        .values()) {
      // Only messages that match the current session ID will be recorded in the map.
      // So no need to redundantly check here.
      if (message.getToState().equals(topState)) {
        startTime = Math.max(startTime, message.getCreateTimeStamp());
      }
    }
  }

  // 3. if no clue about previous top state or any related pending message, it could be
  //    a. resource just created
  //    b. controller leader switch (actual hand off could be either graceful or non graceful)
  //
  // Use the current system time as missing top state start time and assume always graceful
  // TODO: revise this case and see if this case can be better addressed
  if (startTime == TopStateHandoffReportStage.TIMESTAMP_NOT_RECORDED) {
    LogUtil.logWarn(LOG, _eventId,
        "Cannot confirm top state missing start time. Use the current system time as the start time.");
    startTime = System.currentTimeMillis();
  }

  if (!missingTopStateMap.containsKey(resourceName)) {
    missingTopStateMap.put(resourceName, new HashMap<String, MissingTopStateRecord>());
  }

  missingTopStateMap.get(resourceName).put(partition.getPartitionName(),
      new MissingTopStateRecord(startTime, fromTopStateUserLatency, isGraceful));
}
 
Example 3
Source File: TopStateHandoffReportStage.java    From helix with Apache License 2.0 4 votes vote down vote up
/**
 * When we see a top state come back, i.e. we observe a top state in this pipeline run,
 * but have a top state missing record before, we need to remove the top state missing
 * record and report top state handoff duration
 *
 * @param cache cluster data cache
 * @param stateMap state map of the given partition of the given resource
 * @param resourceName resource name
 * @param partition partition of the resource
 * @param clusterStatusMonitor monitor object
 * @param threshold top state handoff threshold
 * @param topState name of the top state
 */
private void reportTopStateComesBack(ResourceControllerDataProvider cache, Map<String, String> stateMap, String resourceName,
    Partition partition, ClusterStatusMonitor clusterStatusMonitor, long threshold,
    String topState) {
  Map<String, Map<String, MissingTopStateRecord>> missingTopStateMap =
      cache.getMissingTopStateMap();
  MissingTopStateRecord record =
      missingTopStateMap.get(resourceName).get(partition.getPartitionName());
  long handOffStartTime = record.getStartTimeStamp();
  long fromTopStateUserLatency = record.getUserLatency();

  // Find the earliest end time from the top states and the corresponding user latency
  long handOffEndTime = Long.MAX_VALUE;
  long toTopStateUserLatency = DEFAULT_HANDOFF_USER_LATENCY;
  Map<String, LiveInstance> liveInstances = cache.getLiveInstances();
  for (String instanceName : stateMap.keySet()) {
    CurrentState currentState =
        cache.getCurrentState(instanceName, liveInstances.get(instanceName).getEphemeralOwner())
            .get(resourceName);
    if (currentState.getState(partition.getPartitionName()).equalsIgnoreCase(topState)) {
      if (currentState.getEndTime(partition.getPartitionName()) <= handOffEndTime) {
        handOffEndTime = currentState.getEndTime(partition.getPartitionName());
        toTopStateUserLatency =
            handOffEndTime - currentState.getStartTime(partition.getPartitionName());
      }
    }
  }

  if (handOffStartTime > 0 && handOffEndTime - handOffStartTime <= threshold) {
    long duration = handOffEndTime - handOffStartTime;
    long helixLatency = duration - fromTopStateUserLatency - toTopStateUserLatency;
    // It is possible that during controller leader switch, we lost previous master information
    // and use current time to approximate missing top state start time. If we see the actual
    // user latency is larger than the duration we estimated, we use user latency to start with
    duration = Math.max(duration, helixLatency);
    boolean isGraceful = record.isGracefulHandoff();
    logMissingTopStateInfo(duration, helixLatency, isGraceful, partition.getPartitionName());

    if (clusterStatusMonitor != null) {
      clusterStatusMonitor
          .updateMissingTopStateDurationStats(resourceName, duration, helixLatency, isGraceful,
              true);
    }
  }
  removeFromStatsMap(missingTopStateMap, resourceName, partition);
}
 
Example 4
Source File: InstanceMessagesCache.java    From helix with Apache License 2.0 4 votes vote down vote up
private void checkRelayHost(Message relayMessage, Map<String, LiveInstance> liveInstanceMap,
    Map<String, Map<String, Map<String, CurrentState>>> currentStateMap, Message hostedMessage) {

  long currentTime = System.currentTimeMillis();

  String sessionId = hostedMessage.getTgtSessionId();
  String relayInstance = hostedMessage.getTgtName();
  String resourceName = hostedMessage.getResourceName();
  String partitionName = hostedMessage.getPartitionName();

  if (!liveInstanceMap.containsKey(relayInstance)) {
    // If the p2p forwarding host is no longer live, we should not remove the relay message immediately
    // since we do not know whether the relay message was forwarded before the instance went offline.
    setMessageRelayTime(relayMessage, currentTime);
    return;
  }
  String instanceSessionId = liveInstanceMap.get(relayInstance).getEphemeralOwner();
  if (!instanceSessionId.equals(sessionId)) {
    LOG.info("Relay instance sessionId {} does not match sessionId {} in hosted message {}, "
            + "set relay message {} to be expired.", instanceSessionId, sessionId,
        relayMessage.getId(), hostedMessage.getMsgId());
    setMessageRelayTime(relayMessage, currentTime);
    return;
  }

  Map<String, Map<String, CurrentState>> instanceCurrentStateMap =
      currentStateMap.get(relayInstance);
  if (instanceCurrentStateMap == null || !instanceCurrentStateMap.containsKey(sessionId)) {
    LOG.warn(
        "CurrentStateMap null for {}, session {}, set relay messages {} to be expired. Hosted message {}.",
        relayInstance, sessionId, relayMessage.getId(), hostedMessage.getId());
    setMessageRelayTime(relayMessage, currentTime);
    return;
  }

  Map<String, CurrentState> sessionCurrentStateMap = instanceCurrentStateMap.get(sessionId);
  CurrentState currentState = sessionCurrentStateMap.get(resourceName);

  if (currentState == null) {
    LOG.info("No currentState found for {} on {}, set relay message {} to be expired.",
        resourceName, relayInstance, relayMessage.getId());
    setMessageRelayTime(relayMessage, currentTime);
    return;
  }

  String partitionState = currentState.getState(partitionName);
  String targetState = hostedMessage.getToState();
  String fromState = hostedMessage.getFromState();

  // The relay host partition state has been changed after relay message was created.
  if (!fromState.equals(partitionState)) {
    // If the partition on the relay host turned to ERROR while transited from top state,
    // we can remove the cached relay message right away since participant won't forward the relay message anyway.
    if (HelixDefinedState.ERROR.name().equals(partitionState) && fromState
        .equals(currentState.getPreviousState(partitionName))) {
      LOG.info("Partition {} got to ERROR from the top state, "
              + "expiring relay message {} immediately. Hosted message {}.", partitionName,
          relayMessage.getId(), hostedMessage.getId());
      relayMessage.setExpired(true);
      return;
    }

    // If the partition completed the transition, set the relay time to be the actual time when state transition completed.
    if (targetState.equals(partitionState) && fromState
        .equals(currentState.getPreviousState(partitionName))) {
      // The relay host already completed the state transition.
      long completeTime = currentState.getEndTime(partitionName);
      if (completeTime > relayMessage.getCreateTimeStamp()) {
        setMessageRelayTime(relayMessage, completeTime);
        LOG.error("Target state for partition {} matches the hosted message's target state, "
            + "set relay message {} to be expired.", partitionName, relayMessage.getId());
        return;
      }
    }

    // For all other situations, set relay time to be current time.
    setMessageRelayTime(relayMessage, currentTime);
    // the state has been changed after it completed the required state transition (maybe another state-transition happened).
    LOG.info("Current state {} for partition {} does not match hosted message's from state, "
            + "set relay message {} to be expired.", partitionState, partitionName,
        relayMessage.getId());
  }
}
 
Example 5
Source File: CurrentStateSnapshot.java    From helix with Apache License 2.0 4 votes vote down vote up
/**
 * Return the end times of all recent changed current states update.
 */
public Map<PropertyKey, Map<String, Long>> getNewCurrentStateEndTimes() {
  Map<PropertyKey, Map<String, Long>> endTimeMap = new HashMap<>();
  if (_updatedStateKeys != null && _prevStateMap != null) {
    // Note if the prev state map is empty, this is the first time refresh.
    // So the update is not considered as "recent" change.
    int driftCnt = 0; // clock drift count for comparing timestamp
    for (PropertyKey propertyKey : _updatedStateKeys) {
      CurrentState prevState = _prevStateMap.get(propertyKey);
      CurrentState curState = _properties.get(propertyKey);

      Map<String, Long> partitionUpdateEndTimes = null;
      for (String partition : curState.getPartitionStateMap().keySet()) {
        long newEndTime = curState.getEndTime(partition);
        // if prevState is null, and newEndTime is -1, we should not record -1 in endTimeMap; otherwise,
        // statePropagation latency calculation in RoutingTableProvider would spit out extremely large metrics.
        if ((prevState == null || prevState.getEndTime(partition) < newEndTime) && newEndTime != -1) {
          if (partitionUpdateEndTimes == null) {
            partitionUpdateEndTimes = new HashMap<>();
          }
          partitionUpdateEndTimes.put(partition, newEndTime);
        } else if (prevState != null && prevState.getEndTime(partition) > newEndTime) {
          // This can happen due to clock drift.
          // updatedStateKeys is the path to resource in an instance config.
          // Thus, the space of inner loop is Sigma{replica(i) * partition(i)}; i over all resources in the cluster
          // This space can be large. In order not to print too many lines, we print first warning for the first case.
          // If clock drift turns out to be common, we can consider print out more logs, or expose an metric.
          if (driftCnt < 1) {
            LOG.warn(
                "clock drift. partition:" + partition + " curState:" + curState.getState(partition) + " prevState: "
                    + prevState.getState(partition));
          }
          driftCnt++;
        }
      }

      if (partitionUpdateEndTimes != null) {
        endTimeMap.put(propertyKey, partitionUpdateEndTimes);
      }
    }
  }
  return endTimeMap;
}