Java Code Examples for org.apache.solr.common.cloud.Replica#State
The following examples show how to use
org.apache.solr.common.cloud.Replica#State .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OverseerCollectionMessageHandler.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Send request to all replicas of a collection * @return List of replicas which is not live for receiving the request */ List<Replica> collectionCmd(ZkNodeProps message, ModifiableSolrParams params, NamedList<Object> results, Replica.State stateMatcher, String asyncId, Set<String> okayExceptions) { log.info("Executing Collection Cmd={}, asyncId={}", params, asyncId); String collectionName = message.getStr(NAME); @SuppressWarnings("deprecation") ShardHandler shardHandler = shardHandlerFactory.getShardHandler(overseer.getCoreContainer().getUpdateShardHandler().getDefaultHttpClient()); ClusterState clusterState = zkStateReader.getClusterState(); DocCollection coll = clusterState.getCollection(collectionName); List<Replica> notLivesReplicas = new ArrayList<>(); final ShardRequestTracker shardRequestTracker = new ShardRequestTracker(asyncId); for (Slice slice : coll.getSlices()) { notLivesReplicas.addAll(shardRequestTracker.sliceCmd(clusterState, params, stateMatcher, slice, shardHandler)); } shardRequestTracker.processResponses(results, shardHandler, false, null, okayExceptions); return notLivesReplicas; }
Example 2
Source File: HttpPartitionTest.java From lucene-solr with Apache License 2.0 | 6 votes |
protected List<Replica> getActiveOrRecoveringReplicas(String testCollectionName, String shardId) throws Exception { Map<String,Replica> activeReplicas = new HashMap<String,Replica>(); ZkStateReader zkr = cloudClient.getZkStateReader(); ClusterState cs = zkr.getClusterState(); assertNotNull(cs); for (Slice shard : cs.getCollection(testCollectionName).getActiveSlices()) { if (shard.getName().equals(shardId)) { for (Replica replica : shard.getReplicas()) { final Replica.State state = replica.getState(); if (state == Replica.State.ACTIVE || state == Replica.State.RECOVERING) { activeReplicas.put(replica.getName(), replica); } } } } List<Replica> replicas = new ArrayList<Replica>(); replicas.addAll(activeReplicas.values()); return replicas; }
Example 3
Source File: OverseerCollectionMessageHandler.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Send request to all replicas of a slice * @return List of replicas which is not live for receiving the request */ public List<Replica> sliceCmd(ClusterState clusterState, ModifiableSolrParams params, Replica.State stateMatcher, Slice slice, ShardHandler shardHandler) { List<Replica> notLiveReplicas = new ArrayList<>(); for (Replica replica : slice.getReplicas()) { if ((stateMatcher == null || Replica.State.getState(replica.getStr(ZkStateReader.STATE_PROP)) == stateMatcher)) { if (clusterState.liveNodesContain(replica.getStr(ZkStateReader.NODE_NAME_PROP))) { // For thread safety, only simple clone the ModifiableSolrParams ModifiableSolrParams cloneParams = new ModifiableSolrParams(); cloneParams.add(params); cloneParams.set(CoreAdminParams.CORE, replica.getStr(ZkStateReader.CORE_NAME_PROP)); sendShardRequest(replica.getStr(ZkStateReader.NODE_NAME_PROP), cloneParams, shardHandler); } else { notLiveReplicas.add(replica); } } } return notLiveReplicas; }
Example 4
Source File: ChaosMonkey.java From lucene-solr with Apache License 2.0 | 6 votes |
private int checkIfKillIsLegal(String sliceName, int numActive) throws KeeperException, InterruptedException { for (CloudJettyRunner cloudJetty : shardToJetty.get(sliceName)) { // get latest cloud state zkStateReader.forceUpdateCollection(collection); DocCollection docCollection = zkStateReader.getClusterState().getCollection(collection); Slice slice = docCollection.getSlice(sliceName); ZkNodeProps props = slice.getReplicasMap().get(cloudJetty.coreNodeName); if (props == null) { throw new RuntimeException("shard name " + cloudJetty.coreNodeName + " not found in " + slice.getReplicasMap().keySet()); } final Replica.State state = Replica.State.getState(props.getStr(ZkStateReader.STATE_PROP)); final String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP); if (cloudJetty.jetty.isRunning() && state == Replica.State.ACTIVE && zkStateReader.getClusterState().liveNodesContain(nodeName)) { numActive++; } } return numActive; }
Example 5
Source File: ChaosMonkey.java From lucene-solr with Apache License 2.0 | 5 votes |
private boolean canKillIndexer(String sliceName) throws KeeperException, InterruptedException { int numIndexersFoundInShard = 0; for (CloudJettyRunner cloudJetty : shardToJetty.get(sliceName)) { // get latest cloud state zkStateReader.forceUpdateCollection(collection); DocCollection docCollection = zkStateReader.getClusterState().getCollection(collection); Slice slice = docCollection.getSlice(sliceName); ZkNodeProps props = slice.getReplicasMap().get(cloudJetty.coreNodeName); if (props == null) { throw new RuntimeException("shard name " + cloudJetty.coreNodeName + " not found in " + slice.getReplicasMap().keySet()); } final Replica.State state = Replica.State.getState(props.getStr(ZkStateReader.STATE_PROP)); final Replica.Type replicaType = Replica.Type.valueOf(props.getStr(ZkStateReader.REPLICA_TYPE)); final String nodeName = props.getStr(ZkStateReader.NODE_NAME_PROP); if (cloudJetty.jetty.isRunning() && state == Replica.State.ACTIVE && (replicaType == Replica.Type.TLOG || replicaType == Replica.Type.NRT) && zkStateReader.getClusterState().liveNodesContain(nodeName)) { numIndexersFoundInShard++; } } return numIndexersFoundInShard > 1; }
Example 6
Source File: AbstractDistribZkTestBase.java From lucene-solr with Apache License 2.0 | 5 votes |
public static void verifyReplicaStatus(ZkStateReader reader, String collection, String shard, String coreNodeName, Replica.State expectedState) throws InterruptedException, TimeoutException { log.info("verifyReplicaStatus ({}) shard={} coreNodeName={}", collection, shard, coreNodeName); reader.waitForState(collection, 15000, TimeUnit.MILLISECONDS, (collectionState) -> collectionState != null && collectionState.getSlice(shard) != null && collectionState.getSlice(shard).getReplicasMap().get(coreNodeName) != null && collectionState.getSlice(shard).getReplicasMap().get(coreNodeName).getState() == expectedState); }
Example 7
Source File: ReplicaInfo.java From lucene-solr with Apache License 2.0 | 5 votes |
public Replica.State getState() { if (variables.get(ZkStateReader.STATE_PROP) != null) { return Replica.State.getState((String) variables.get(ZkStateReader.STATE_PROP)); } else { // default to ACTIVE variables.put(ZkStateReader.STATE_PROP, Replica.State.ACTIVE.toString()); return Replica.State.ACTIVE; } }
Example 8
Source File: SimClusterStateProvider.java From lucene-solr with Apache License 2.0 | 5 votes |
private void setReplicaStates(String nodeId, Replica.State state, Set<String> changedCollections) { @SuppressWarnings({"unchecked"}) List<ReplicaInfo> replicas = nodeReplicaMap.computeIfAbsent(nodeId, Utils.NEW_SYNCHRONIZED_ARRAYLIST_FUN); synchronized (replicas) { replicas.forEach(r -> { r.getVariables().put(ZkStateReader.STATE_PROP, state.toString()); if (state != Replica.State.ACTIVE) { r.getVariables().remove(ZkStateReader.LEADER_PROP); } changedCollections.add(r.getCollection()); }); } }
Example 9
Source File: HealthCheckHandlerTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private CloudDescriptor mockCD(String collection, String name, String shardId, boolean registered, Replica.State state) { Properties props = new Properties(); props.put(CoreDescriptor.CORE_SHARD, shardId); props.put(CoreDescriptor.CORE_COLLECTION, collection); props.put(CoreDescriptor.CORE_NODE_NAME, name); CloudDescriptor cd = new CloudDescriptor(null, name, props); cd.setHasRegistered(registered); cd.setLastPublished(state); return cd; }
Example 10
Source File: TestPullReplica.java From lucene-solr with Apache License 2.0 | 5 votes |
@Ignore("Ignore until I figure out a way to reliably record state transitions") public void testPullReplicaStates() throws Exception { // Validate that pull replicas go through the correct states when starting, stopping, reconnecting CollectionAdminRequest.createCollection(collectionName, "conf", 1, 1, 0, 0) .setMaxShardsPerNode(100) .process(cluster.getSolrClient()); // cluster.getSolrClient().getZkStateReader().registerCore(collectionName); //TODO: Is this needed? waitForState("Replica not added", collectionName, activeReplicaCount(1, 0, 0)); addDocs(500); List<Replica.State> statesSeen = new ArrayList<>(3); cluster.getSolrClient().registerCollectionStateWatcher(collectionName, (liveNodes, collectionState) -> { Replica r = collectionState.getSlice("shard1").getReplica("core_node2"); log.info("CollectionStateWatcher state change: {}", r); if (r == null) { return false; } statesSeen.add(r.getState()); if (log.isInfoEnabled()) { log.info("CollectionStateWatcher saw state: {}", r.getState()); } return r.getState() == Replica.State.ACTIVE; }); CollectionAdminRequest.addReplicaToShard(collectionName, "shard1", Replica.Type.PULL).process(cluster.getSolrClient()); waitForState("Replica not added", collectionName, activeReplicaCount(1, 0, 1)); zkClient().printLayoutToStream(System.out); if (log.isInfoEnabled()) { log.info("Saw states: {}", Arrays.toString(statesSeen.toArray())); } assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), 3, statesSeen.size()); assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), Replica.State.DOWN, statesSeen.get(0)); assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), Replica.State.RECOVERING, statesSeen.get(0)); assertEquals("Expecting DOWN->RECOVERING->ACTIVE but saw: " + Arrays.toString(statesSeen.toArray()), Replica.State.ACTIVE, statesSeen.get(0)); }
Example 11
Source File: ZookeeperInfoHandler.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Walk the collection state JSON object to see if it has any replicas that match * the state the user is filtering by. */ @SuppressWarnings("unchecked") final boolean matchesStatusFilter(Map<String, Object> collectionState, Set<String> liveNodes) { if (filterType != FilterType.status || filter == null || filter.length() == 0) return true; // no status filter, so all match boolean isHealthy = true; // means all replicas for all shards active boolean hasDownedShard = false; // means one or more shards is down boolean replicaInRecovery = false; Map<String, Object> shards = (Map<String, Object>) collectionState.get("shards"); for (Object o : shards.values()) { boolean hasActive = false; Map<String, Object> shard = (Map<String, Object>) o; Map<String, Object> replicas = (Map<String, Object>) shard.get("replicas"); for (Object value : replicas.values()) { Map<String, Object> replicaState = (Map<String, Object>) value; Replica.State coreState = Replica.State.getState((String) replicaState.get(ZkStateReader.STATE_PROP)); String nodeName = (String) replicaState.get("node_name"); // state can lie to you if the node is offline, so need to reconcile with live_nodes too if (!liveNodes.contains(nodeName)) coreState = Replica.State.DOWN; // not on a live node, so must be down if (coreState == Replica.State.ACTIVE) { hasActive = true; // assumed no replicas active and found one that is for this shard } else { if (coreState == Replica.State.RECOVERING) { replicaInRecovery = true; } isHealthy = false; // assumed healthy and found one replica that is not } } if (!hasActive) hasDownedShard = true; // this is bad } if ("healthy".equals(filter)) { return isHealthy; } else if ("degraded".equals(filter)) { return !hasDownedShard && !isHealthy; // means no shards offline but not 100% healthy either } else if ("downed_shard".equals(filter)) { return hasDownedShard; } else if (Replica.State.getState(filter) == Replica.State.RECOVERING) { return !isHealthy && replicaInRecovery; } return true; }
Example 12
Source File: ZkController.java From lucene-solr with Apache License 2.0 | 4 votes |
public void publish(final CoreDescriptor cd, final Replica.State state) throws Exception { publish(cd, state, true, false); }
Example 13
Source File: ZkController.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Publish core state to overseer. */ public void publish(final CoreDescriptor cd, final Replica.State state, boolean updateLastState, boolean forcePublish) throws Exception { if (!forcePublish) { try (SolrCore core = cc.getCore(cd.getName())) { if (core == null || core.isClosed()) { return; } } } MDCLoggingContext.setCoreDescriptor(cc, cd); try { String collection = cd.getCloudDescriptor().getCollectionName(); log.debug("publishing state={}", state); // System.out.println(Thread.currentThread().getStackTrace()[3]); Integer numShards = cd.getCloudDescriptor().getNumShards(); if (numShards == null) { // XXX sys prop hack log.debug("numShards not found on descriptor - reading it from system property"); numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP); } assert collection != null && collection.length() > 0; String shardId = cd.getCloudDescriptor().getShardId(); String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); Map<String,Object> props = new HashMap<>(); props.put(Overseer.QUEUE_OPERATION, "state"); props.put(ZkStateReader.STATE_PROP, state.toString()); props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl()); props.put(ZkStateReader.CORE_NAME_PROP, cd.getName()); props.put(ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles()); props.put(ZkStateReader.NODE_NAME_PROP, getNodeName()); props.put(ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId()); props.put(ZkStateReader.COLLECTION_PROP, collection); props.put(ZkStateReader.REPLICA_TYPE, cd.getCloudDescriptor().getReplicaType().toString()); props.put(ZkStateReader.FORCE_SET_STATE_PROP, "false"); if (numShards != null) { props.put(ZkStateReader.NUM_SHARDS_PROP, numShards.toString()); } if (coreNodeName != null) { props.put(ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); } try (SolrCore core = cc.getCore(cd.getName())) { if (core != null && state == Replica.State.ACTIVE) { ensureRegisteredSearcher(core); } if (core != null && core.getDirectoryFactory().isSharedStorage()) { if (core.getDirectoryFactory().isSharedStorage()) { props.put(ZkStateReader.SHARED_STORAGE_PROP, "true"); props.put("dataDir", core.getDataDir()); UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); if (ulog != null) { props.put("ulogDir", ulog.getLogDir()); } } } } catch (SolrCoreInitializationException ex) { // The core had failed to initialize (in a previous request, not this one), hence nothing to do here. if (log.isInfoEnabled()) { log.info("The core '{}' had failed to initialize before.", cd.getName()); } } // pull replicas are excluded because their terms are not considered if (state == Replica.State.RECOVERING && cd.getCloudDescriptor().getReplicaType() != Type.PULL) { // state is used by client, state of replica can change from RECOVERING to DOWN without needed to finish recovery // by calling this we will know that a replica actually finished recovery or not getShardTerms(collection, shardId).startRecovering(coreNodeName); } if (state == Replica.State.ACTIVE && cd.getCloudDescriptor().getReplicaType() != Type.PULL) { getShardTerms(collection, shardId).doneRecovering(coreNodeName); } ZkNodeProps m = new ZkNodeProps(props); if (updateLastState) { cd.getCloudDescriptor().setLastPublished(state); } overseerJobQueue.offer(Utils.toJSON(m)); } finally { MDCLoggingContext.clear(); } }
Example 14
Source File: OverseerCollectionMessageHandler.java From lucene-solr with Apache License 2.0 | 4 votes |
private List<Replica> collectionCmd(ZkNodeProps message, ModifiableSolrParams params, NamedList<Object> results, Replica.State stateMatcher, String asyncId) { return collectionCmd( message, params, results, stateMatcher, asyncId, Collections.emptySet()); }
Example 15
Source File: CoreAdminRequest.java From lucene-solr with Apache License 2.0 | 4 votes |
public Replica.State getState() { return state; }
Example 16
Source File: CloudDescriptor.java From lucene-solr with Apache License 2.0 | 4 votes |
public Replica.State getLastPublished() { return lastPublished; }
Example 17
Source File: CloudDescriptor.java From lucene-solr with Apache License 2.0 | 4 votes |
public void setLastPublished(Replica.State state) { lastPublished = state; }
Example 18
Source File: SimCloudManager.java From lucene-solr with Apache License 2.0 | 4 votes |
public String dumpClusterState(boolean withCollections) throws Exception { StringBuilder sb = new StringBuilder(); sb.append("#######################################\n"); sb.append("############ CLUSTER STATE ############\n"); sb.append("#######################################\n"); sb.append("## Live nodes:\t\t").append(getLiveNodesSet().size()).append("\n"); int emptyNodes = 0; int maxReplicas = 0; int minReplicas = Integer.MAX_VALUE; Map<String, Map<Replica.State, AtomicInteger>> replicaStates = new TreeMap<>(); int numReplicas = 0; for (String node : getLiveNodesSet().get()) { List<ReplicaInfo> replicas = getSimClusterStateProvider().simGetReplicaInfos(node); numReplicas += replicas.size(); if (replicas.size() > maxReplicas) { maxReplicas = replicas.size(); } if (minReplicas > replicas.size()) { minReplicas = replicas.size(); } for (ReplicaInfo ri : replicas) { replicaStates.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>()) .computeIfAbsent(ri.getState(), s -> new AtomicInteger()) .incrementAndGet(); } if (replicas.isEmpty()) { emptyNodes++; } } if (minReplicas == Integer.MAX_VALUE) { minReplicas = 0; } sb.append("## Empty nodes:\t").append(emptyNodes).append("\n"); Set<String> deadNodes = getSimNodeStateProvider().simGetDeadNodes(); sb.append("## Dead nodes:\t\t").append(deadNodes.size()).append("\n"); deadNodes.forEach(n -> sb.append("##\t\t").append(n).append("\n")); sb.append("## Collections:\n"); clusterStateProvider.simGetCollectionStats().forEach((coll, stats) -> { sb.append("## * ").append(coll).append('\n'); stats.forEach((k, v) -> { sb.append("## ").append(k).append("\t").append(v).append("\n"); }); }); if (withCollections) { ClusterState state = clusterStateProvider.getClusterState(); state.forEachCollection(coll -> sb.append(coll.toString()).append("\n")); } sb.append("## Max replicas per node:\t").append(maxReplicas).append("\n"); sb.append("## Min replicas per node:\t").append(minReplicas).append("\n"); sb.append("## Total replicas:\t\t").append(numReplicas).append("\n"); replicaStates.forEach((c, map) -> { AtomicInteger repCnt = new AtomicInteger(); map.forEach((s, cnt) -> repCnt.addAndGet(cnt.get())); sb.append("## * ").append(c).append("\t\t").append(repCnt.get()).append("\n"); map.forEach((s, cnt) -> sb.append("##\t\t- ").append(String.format(Locale.ROOT, "%-12s %4d", s, cnt.get())).append("\n")); }); sb.append("######### Solr op counts ##########\n"); simGetOpCounts().forEach((k, cnt) -> sb.append("##\t\t- ").append(String.format(Locale.ROOT, "%-14s %4d", k, cnt.get())).append("\n")); sb.append("######### Autoscaling event counts ###########\n"); Map<String, Map<String, AtomicInteger>> counts = simGetEventCounts(); counts.forEach((trigger, map) -> { sb.append("## * Trigger: ").append(trigger).append("\n"); map.forEach((s, cnt) -> sb.append("##\t\t- ").append(String.format(Locale.ROOT, "%-11s %4d", s, cnt.get())).append("\n")); }); return sb.toString(); }
Example 19
Source File: HttpPartitionTest.java From lucene-solr with Apache License 2.0 | 4 votes |
protected void waitToSeeReplicasActive(String testCollectionName, String shardId, Set<String> replicasToCheck, int maxWaitSecs) throws Exception { final RTimer timer = new RTimer(); ZkStateReader zkr = cloudClient.getZkStateReader(); zkr.forceUpdateCollection(testCollectionName); ClusterState cs = zkr.getClusterState(); boolean allReplicasUp = false; long waitMs = 0L; long maxWaitMs = maxWaitSecs * 1000L; while (waitMs < maxWaitMs && !allReplicasUp) { cs = cloudClient.getZkStateReader().getClusterState(); assertNotNull(cs); final DocCollection docCollection = cs.getCollectionOrNull(testCollectionName); assertNotNull(docCollection); Slice shard = docCollection.getSlice(shardId); assertNotNull("No Slice for "+shardId, shard); allReplicasUp = true; // assume true // wait to see all replicas are "active" for (Replica replica : shard.getReplicas()) { if (!replicasToCheck.contains(replica.getName())) continue; final Replica.State state = replica.getState(); if (state != Replica.State.ACTIVE) { if (log.isInfoEnabled()) { log.info("Replica {} is currently {}", replica.getName(), state); } allReplicasUp = false; } } if (!allReplicasUp) { try { Thread.sleep(200L); } catch (Exception ignoreMe) {} waitMs += 200L; } } // end while if (!allReplicasUp) fail("Didn't see replicas "+ replicasToCheck + " come up within " + maxWaitMs + " ms! ClusterState: " + printClusterStateInfo(testCollectionName)); if (log.isInfoEnabled()) { log.info("Took {} ms to see replicas [{}] become active.", timer.getTime(), replicasToCheck); } }
Example 20
Source File: CoreAdminRequest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void setState(Replica.State state) { this.state = state; }