Java Code Examples for org.apache.solr.common.cloud.Replica#isActive()

The following examples show how to use org.apache.solr.common.cloud.Replica#isActive() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractFullDistribZkTestBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected void logReplicaTypesReplicationInfo(String collectionName, ZkStateReader zkStateReader) throws KeeperException, InterruptedException, IOException {
  log.info("## Collecting extra Replica.Type information of the cluster");
  zkStateReader.updateLiveNodes();
  StringBuilder builder = new StringBuilder();
  zkStateReader.forceUpdateCollection(collectionName);
  DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
  for(Slice s:collection.getSlices()) {
    Replica leader = s.getLeader();
    for (Replica r:s.getReplicas()) {
      if (!r.isActive(zkStateReader.getClusterState().getLiveNodes())) {
        builder.append(String.format(Locale.ROOT, "Replica %s not in liveNodes or is not active%s", r.getName(), System.lineSeparator()));
        continue;
      }
      if (r.equals(leader)) {
        builder.append(String.format(Locale.ROOT, "Replica %s is leader%s", r.getName(), System.lineSeparator()));
      }
      logReplicationDetails(r, builder);
    }
  }
  log.info("Summary of the cluster: {}", builder);
}
 
Example 2
Source File: MiniSolrCloudCluster.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static CollectionStatePredicate expectedShardsAndActiveReplicas(int expectedShards, int expectedReplicas) {
  return (liveNodes, collectionState) -> {
    if (collectionState == null)
      return false;
    if (collectionState.getSlices().size() != expectedShards) {
      return false;
    }
    
    int activeReplicas = 0;
    for (Slice slice : collectionState) {
      for (Replica replica : slice) {
        if (replica.isActive(liveNodes)) {
          activeReplicas++;
        }
      }
    }
    if (activeReplicas == expectedReplicas) {
      return true;
    }

    return false;
  };
}
 
Example 3
Source File: RebalanceLeaders.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void checkLeaderStatus() throws InterruptedException, KeeperException {
  for (int idx = 0; pendingOps.size() > 0 && idx < 600; ++idx) {
    ClusterState clusterState = coreContainer.getZkController().getClusterState();
    Set<String> liveNodes = clusterState.getLiveNodes();
    DocCollection dc = clusterState.getCollection(collectionName);
    for (Slice slice : dc.getSlices()) {
      for (Replica replica : slice.getReplicas()) {
        if (replica.isActive(liveNodes) && replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false)) {
          if (replica.getBool(LEADER_PROP, false)) {
            if (pendingOps.containsKey(slice.getName())) {
              // Record for return that the leader changed successfully
              pendingOps.remove(slice.getName());
              addToSuccesses(slice, replica);
              break;
            }
          }
        }
      }
    }
    TimeUnit.MILLISECONDS.sleep(100);
    coreContainer.getZkController().getZkStateReader().forciblyRefreshAllClusterStateSlow();
  }
  addAnyFailures();
}
 
Example 4
Source File: TestPullReplica.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private CollectionStatePredicate activeReplicaCount(int numNrtReplicas, int numTlogReplicas, int numPullReplicas) {
  return (liveNodes, collectionState) -> {
    int nrtFound = 0, tlogFound = 0, pullFound = 0;
    if (collectionState == null)
      return false;
    for (Slice slice : collectionState) {
      for (Replica replica : slice) {
        if (replica.isActive(liveNodes))
          switch (replica.getType()) {
            case TLOG:
              tlogFound++;
              break;
            case PULL:
              pullFound++;
              break;
            case NRT:
              nrtFound++;
              break;
            default:
              throw new AssertionError("Unexpected replica type");
          }
      }
    }
    return numNrtReplicas == nrtFound && numTlogReplicas == tlogFound && numPullReplicas == pullFound;
  };
}
 
Example 5
Source File: TestTlogReplica.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void waitForNumDocsInAllReplicas(int numDocs, Collection<Replica> replicas, String query, int timeout) throws IOException, SolrServerException, InterruptedException {
  TimeOut t = new TimeOut(timeout, TimeUnit.SECONDS, TimeSource.NANO_TIME);
  for (Replica r:replicas) {
    if (!r.isActive(cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes())) {
      continue;
    }
    try (HttpSolrClient replicaClient = getHttpSolrClient(r.getCoreUrl())) {
      while (true) {
        try {
          assertEquals("Replica " + r.getName() + " not up to date after " + timeout + " seconds",
              numDocs, replicaClient.query(new SolrQuery(query)).getResults().getNumFound());
          break;
        } catch (AssertionError e) {
          if (t.hasTimedOut()) {
            throw e;
          } else {
            Thread.sleep(100);
          }
        }
      }
    }
  }
}
 
Example 6
Source File: TestTlogReplica.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private CollectionStatePredicate activeReplicaCount(int numNrtReplicas, int numTlogReplicas, int numPullReplicas) {
  return (liveNodes, collectionState) -> {
    int nrtFound = 0, tlogFound = 0, pullFound = 0;
    if (collectionState == null)
      return false;
    for (Slice slice : collectionState) {
      for (Replica replica : slice) {
        if (replica.isActive(liveNodes))
          switch (replica.getType()) {
            case TLOG:
              tlogFound++;
              break;
            case PULL:
              pullFound++;
              break;
            case NRT:
              nrtFound++;
              break;
            default:
              throw new AssertionError("Unexpected replica type");
          }
      }
    }
    return numNrtReplicas == nrtFound && numTlogReplicas == tlogFound && numPullReplicas == pullFound;
  };
}
 
Example 7
Source File: TestRebalanceLeaders.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void checkAllReplicasActive() throws KeeperException, InterruptedException {
  TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
  while (timeout.hasTimedOut() == false) {
    forceUpdateCollectionStatus();
    DocCollection docCollection = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION_NAME);
    Set<String> liveNodes = cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes();
    boolean allActive = true;
    for (Slice slice : docCollection.getSlices()) {
      for (Replica rep : slice.getReplicas()) {
        if (rep.isActive(liveNodes) == false) {
          allActive = false;
        }
      }
    }
    if (allActive) {
      return;
    }
    TimeUnit.MILLISECONDS.sleep(100);
  }
  fail("timed out waiting for all replicas to become active");
}
 
Example 8
Source File: TestPullReplicaErrorHandling.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private CollectionStatePredicate activeReplicaCount(int numWriter, int numActive, int numPassive) {
  return (liveNodes, collectionState) -> {
    int writersFound = 0, activesFound = 0, passivesFound = 0;
    if (collectionState == null)
      return false;
    for (Slice slice : collectionState) {
      for (Replica replica : slice) {
        if (replica.isActive(liveNodes))
          switch (replica.getType()) {
            case TLOG:
              activesFound++;
              break;
            case PULL:
              passivesFound++;
              break;
            case NRT:
              writersFound++;
              break;
            default:
              throw new AssertionError("Unexpected replica type");
          }
      }
    }
    return numWriter == writersFound && numActive == activesFound && numPassive == passivesFound;
  };
}
 
Example 9
Source File: TestPolicyCloud.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static CollectionStatePredicate expectAllReplicasOnSpecificNode
  (final String expectedNodeName,
   final int expectedSliceCount,
   final int expectedReplicaCount) {

  return (liveNodes, collection) -> {
    if (null == collection || expectedSliceCount != collection.getSlices().size()) {
      return false;
    }
    int actualReplicaCount = 0;
    for (Slice slice : collection) {
      for (Replica replica : slice) {
        if ( ! (replica.isActive(liveNodes)
                && expectedNodeName.equals(replica.getNodeName())) ) {
          return false;
        }
        actualReplicaCount++;
      }
    }
    return expectedReplicaCount == actualReplicaCount;
  };
}
 
Example 10
Source File: SolrCloudTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static boolean compareActiveReplicaCountsForShards(int expectedReplicas, Set<String> liveNodes, DocCollection collectionState) {
  int activeReplicas = 0;
  for (Slice slice : collectionState) {
    for (Replica replica : slice) {
      if (replica.isActive(liveNodes)) {
        activeReplicas++;
      }
    }
  }

  log.info("active replica count: {} expected replica count: {}", activeReplicas, expectedReplicas);

  return activeReplicas == expectedReplicas;

}
 
Example 11
Source File: LeaderRecoveryWatcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
  if (collectionState == null) { // collection has been deleted - don't wait
    latch.countDown();
    return true;
  }
  Slice slice = collectionState.getSlice(shardId);
  if (slice == null) { // shard has been removed - don't wait
    latch.countDown();
    return true;
  }
  for (Replica replica : slice.getReplicas()) {
    // check if another replica exists - doesn't have to be the one we're moving
    // as long as it's active and can become a leader, in which case we don't have to wait
    // for recovery of specifically the one that we've just added
    if (!replica.getName().equals(replicaId)) {
      if (replica.getType().equals(Replica.Type.PULL)) { // not eligible for leader election
        continue;
      }
      // check its state
      String coreName = replica.getStr(ZkStateReader.CORE_NAME_PROP);
      if (targetCore != null && !targetCore.equals(coreName)) {
        continue;
      }
      if (replica.isActive(liveNodes)) { // recovered - stop waiting
        latch.countDown();
        return true;
      }
    }
  }
  // set the watch again to wait for the new replica to recover
  return false;
}
 
Example 12
Source File: TestRebalanceLeaders.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void checkReplicasInactive(List<JettySolrRunner> downJettys) throws KeeperException, InterruptedException {
  TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
  DocCollection docCollection = null;
  Set<String> liveNodes = null;

  Set<String> downJettyNodes = new TreeSet<>();
  for (JettySolrRunner jetty : downJettys) {
    downJettyNodes.add(jetty.getBaseUrl().getHost() + ":" + jetty.getBaseUrl().getPort() + "_solr");
  }
  while (timeout.hasTimedOut() == false) {
    forceUpdateCollectionStatus();
    docCollection = cluster.getSolrClient().getZkStateReader().getClusterState().getCollection(COLLECTION_NAME);
    liveNodes = cluster.getSolrClient().getZkStateReader().getClusterState().getLiveNodes();
    boolean expectedInactive = true;

    for (Slice slice : docCollection.getSlices()) {
      for (Replica rep : slice.getReplicas()) {
        if (downJettyNodes.contains(rep.getNodeName()) == false) {
          continue; // We are on a live node
        }
        // A replica on an allegedly down node is reported as active.
        if (rep.isActive(liveNodes)) {
          expectedInactive = false;
        }
      }
    }
    if (expectedInactive) {
      return;
    }
    TimeUnit.MILLISECONDS.sleep(100);
  }
  fail("timed out waiting for all replicas to become inactive: livenodes: " + liveNodes +
      " Collection state: " + docCollection.toString());
}
 
Example 13
Source File: RebalanceLeaders.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void ensurePreferredIsLeader(Slice slice) throws KeeperException, InterruptedException {
  for (Replica replica : slice.getReplicas()) {
    // Tell the replica to become the leader if we're the preferred leader AND active AND not the leader already
    if (replica.getBool(SliceMutator.PREFERRED_LEADER_PROP, false) == false) {
      continue;
    }
    // OK, we are the preferred leader, are we the actual leader?
    if (replica.getBool(LEADER_PROP, false)) {
      //We're a preferred leader, but we're _also_ the leader, don't need to do anything.
      addAlreadyLeaderToResults(slice, replica);
      return; // already the leader, do nothing.
    }
    ZkStateReader zkStateReader = coreContainer.getZkController().getZkStateReader();
    // We're the preferred leader, but someone else is leader. Only become leader if we're active.
    if (replica.isActive(zkStateReader.getClusterState().getLiveNodes()) == false) {
      addInactiveToResults(slice, replica);
      return; // Don't try to become the leader if we're not active!
    }

    List<String> electionNodes = OverseerTaskProcessor.getSortedElectionNodes(zkStateReader.getZkClient(),
        ZkStateReader.getShardLeadersElectPath(collectionName, slice.getName()));

    if (electionQueueInBadState(electionNodes, slice, replica)) {
      return;
    }

    // Replica is the preferred leader but not the actual leader, do something about that.
    // "Something" is
    // 1> if the preferred leader isn't first in line, tell it to re-queue itself.
    // 2> tell the actual leader to re-queue itself.

    // Ok, the sorting for election nodes is a bit strange. If the sequence numbers are the same, then the whole
    // string is used, but that sorts nodes with the same sequence number by their session IDs from ZK.
    // While this is determinate, it's not quite what we need, so re-queue nodes that aren't us and are
    // watching the leader node..


    String firstWatcher = electionNodes.get(1);

    if (LeaderElector.getNodeName(firstWatcher).equals(replica.getName()) == false) {
      makeReplicaFirstWatcher(slice, replica);
    }

    // This replica should be the leader at the end of the day, so let's record that information to check at the end
    pendingOps.put(slice.getName(), replica.getName());
    String leaderElectionNode = electionNodes.get(0);
    String coreName = slice.getReplica(LeaderElector.getNodeName(leaderElectionNode)).getStr(CORE_NAME_PROP);
    rejoinElectionQueue(slice, leaderElectionNode, coreName, false);
    waitForNodeChange(slice, leaderElectionNode);

    return; // Done with this slice, skip the rest of the replicas.
  }
}
 
Example 14
Source File: CloudUtil.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Return a {@link CollectionStatePredicate} that returns true if a collection has the expected
 * number of shards and replicas.
 * <p>Note: for shards marked as inactive the current Solr behavior is that replicas remain active.
 * {@link org.apache.solr.cloud.autoscaling.sim.SimCloudManager} follows this behavior.</p>
 * @param expectedShards expected number of shards
 * @param expectedReplicas expected number of active replicas per shard
 * @param withInactive if true then count also inactive shards
 * @param requireLeaders if true then require that each shard has a leader
 */
public static CollectionStatePredicate clusterShape(int expectedShards, int expectedReplicas, boolean withInactive,
                                                    boolean requireLeaders) {
  return (liveNodes, collectionState) -> {
    if (collectionState == null) {
      log.debug("-- null collection");
      return false;
    }
    Collection<Slice> slices = withInactive ? collectionState.getSlices() : collectionState.getActiveSlices();
    if (slices.size() != expectedShards) {
      if (log.isDebugEnabled()) {
        log.debug("-- wrong number of slices for collection {}, expected={}, found={}: {}", collectionState.getName(), expectedShards, collectionState.getSlices().size(), collectionState.getSlices());
      }
      return false;
    }
    Set<String> leaderless = new HashSet<>();
    for (Slice slice : slices) {
      int activeReplicas = 0;
      if (requireLeaders && slice.getState() != Slice.State.INACTIVE && slice.getLeader() == null) {
        leaderless.add(slice.getName());
        continue;
      }
      // skip other checks, we're going to fail anyway
      if (!leaderless.isEmpty()) {
        continue;
      }
      for (Replica replica : slice) {
        if (replica.isActive(liveNodes))
          activeReplicas++;
      }
      if (activeReplicas != expectedReplicas) {
        if (log.isDebugEnabled()) {
          log.debug("-- wrong number of active replicas for collection {} in slice {}, expected={}, found={}", collectionState.getName(), slice.getName(), expectedReplicas, activeReplicas);
        }
        return false;
      }
    }
    if (leaderless.isEmpty()) {
      return true;
    } else {
      log.info("-- shards without leaders: {}", leaderless);
      return false;
    }
  };
}
 
Example 15
Source File: ActiveReplicaWatcher.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public synchronized boolean onStateChanged(Set<String> liveNodes, DocCollection collectionState) {
  if (log.isDebugEnabled()) {
    log.debug("-- onStateChanged@{}: replicaIds={}, solrCoreNames={} {}\ncollectionState {}"
        , Long.toHexString(hashCode()), replicaIds, solrCoreNames
        , (latch != null ? "\nlatch count=" + latch.getCount() : "")
        , collectionState); // logOk
  }
  if (collectionState == null) { // collection has been deleted - don't wait
    if (log.isDebugEnabled()) {
      log.debug("-- collection deleted, decrementing latch by {} ", replicaIds.size() + solrCoreNames.size()); // logOk
    }
    if (latch != null) {
      for (int i = 0; i < replicaIds.size() + solrCoreNames.size(); i++) {
        latch.countDown();
      }
    }
    replicaIds.clear();
    solrCoreNames.clear();
    return true;
  }
  if (replicaIds.isEmpty() && solrCoreNames.isEmpty()) {
    log.debug("-- already done, exiting...");
    return true;
  }
  if (collectionState.getZNodeVersion() == lastZkVersion) {
    log.debug("-- spurious call with already seen zkVersion= {}, ignoring...", lastZkVersion);
    return false;
  }
  lastZkVersion = collectionState.getZNodeVersion();

  for (Slice slice : collectionState.getSlices()) {
    for (Replica replica : slice.getReplicas()) {
      if (replicaIds.contains(replica.getName())) {
        if (replica.isActive(liveNodes)) {
          activeReplicas.add(replica);
          replicaIds.remove(replica.getName());
          if (latch != null) {
            latch.countDown();
          }
        }
      } else if (solrCoreNames.contains(replica.getStr(ZkStateReader.CORE_NAME_PROP))) {
        if (replica.isActive(liveNodes)) {
          activeReplicas.add(replica);
          solrCoreNames.remove(replica.getStr(ZkStateReader.CORE_NAME_PROP));
          if (latch != null) {
            latch.countDown();
          }
        }
      }
    }
  }
  if (log.isDebugEnabled()) {
    log.debug("-- {} now latchcount={}", Long.toHexString(hashCode()), latch.getCount());
  }
  if (replicaIds.isEmpty() && solrCoreNames.isEmpty()) {
    return true;
  } else {
    return false;
  }
}
 
Example 16
Source File: AutoscalingHistoryHandlerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static void waitForRecovery(String collection) throws Exception {
  log.info("Waiting for recovery of {}", collection);
  boolean recovered = false;
  boolean allActive = true;
  boolean hasLeaders = true;
  DocCollection collState = null;
  for (int i = 0; i < 300; i++) {
    ClusterState state = solrClient.getZkStateReader().getClusterState();
    collState = getCollectionState(collection);
    log.debug("###### {}", collState);
    Collection<Replica> replicas = collState.getReplicas();
    allActive = true;
    hasLeaders = true;
    if (replicas != null && !replicas.isEmpty()) {
      for (Replica r : replicas) {
        if (state.getLiveNodes().contains(r.getNodeName())) {
          if (!r.isActive(state.getLiveNodes())) {
            log.info("Not active: {}", r);
            allActive = false;
          }
        } else {
          log.info("Replica no longer on a live node, ignoring: {}", r);
        }
      }
    } else {
      allActive = false;
    }
    for (Slice slice : collState.getSlices()) {
      if (slice.getLeader() == null) {
        hasLeaders = false;
      }
    }
    if (allActive && hasLeaders) {
      recovered = true;
      break;
    } else {
      log.info("--- waiting, allActive={}, hasLeaders={}", allActive, hasLeaders);
      Thread.sleep(1000);
    }
  }
  assertTrue("replica never fully recovered: allActive=" + allActive + ", hasLeaders=" + hasLeaders + ", collState=" + collState, recovered);

}