Java Code Examples for org.apache.solr.common.cloud.DocRouter#Range
The following examples show how to use
org.apache.solr.common.cloud.DocRouter#Range .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CrossCollectionJoinQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private String createHashRangeFq() { if (routedByJoinKey) { ClusterState clusterState = searcher.getCore().getCoreContainer().getZkController().getClusterState(); CloudDescriptor desc = searcher.getCore().getCoreDescriptor().getCloudDescriptor(); Slice slice = clusterState.getCollection(desc.getCollectionName()).getSlicesMap().get(desc.getShardId()); DocRouter.Range range = slice.getRange(); // In CompositeIdRouter, the routing prefix only affects the top 16 bits int min = range.min & 0xffff0000; int max = range.max | 0x0000ffff; return String.format(Locale.ROOT, "{!hash_range f=%s l=%d u=%d}", fromField, min, max); } else { return null; } }
Example 2
Source File: SplitOp.java From lucene-solr with Apache License 2.0 | 6 votes |
static String toSplitString(Collection<DocRouter.Range> splits) throws Exception { if (splits == null) { return null; } StringBuilder sb = new StringBuilder(); for (DocRouter.Range range : splits) { if (sb.length() > 0) { sb.append(","); } sb.append(range); } return sb.toString(); }
Example 3
Source File: DistributedZkUpdateProcessor.java From lucene-solr with Apache License 2.0 | 6 votes |
/** For {@link org.apache.solr.common.params.CollectionParams.CollectionAction#SPLITSHARD} */ protected boolean amISubShardLeader(DocCollection coll, Slice parentSlice, String id, SolrInputDocument doc) throws InterruptedException { // Am I the leader of a shard in "construction/recovery" state? String myShardId = cloudDesc.getShardId(); Slice mySlice = coll.getSlice(myShardId); final Slice.State state = mySlice.getState(); if (state == Slice.State.CONSTRUCTION || state == Slice.State.RECOVERY) { Replica myLeader = zkController.getZkStateReader().getLeaderRetry(collection, myShardId); boolean amILeader = myLeader.getName().equals(cloudDesc.getCoreNodeName()); if (amILeader) { // Does the document belong to my hash range as well? DocRouter.Range myRange = mySlice.getRange(); if (myRange == null) myRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE); if (parentSlice != null) { boolean isSubset = parentSlice.getRange() != null && myRange.isSubsetOf(parentSlice.getRange()); return isSubset && coll.getRouter().isTargetSlice(id, doc, req.getParams(), myShardId, coll); } else { // delete by query case -- as long as I am a sub shard leader we're fine return true; } } } return false; }
Example 4
Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void splitShard(String collection, String shardId, List<DocRouter.Range> subRanges, String splitKey, boolean offline) throws SolrServerException, IOException { ModifiableSolrParams params = new ModifiableSolrParams(); params.set("action", CollectionParams.CollectionAction.SPLITSHARD.toString()); params.set("timing", "true"); params.set("offline", String.valueOf(offline)); params.set("collection", collection); if (shardId != null) { params.set("shard", shardId); } if (subRanges != null) { StringBuilder ranges = new StringBuilder(); for (int i = 0; i < subRanges.size(); i++) { DocRouter.Range subRange = subRanges.get(i); ranges.append(subRange.toString()); if (i < subRanges.size() - 1) ranges.append(","); } params.set("ranges", ranges.toString()); } if (splitKey != null) { params.set("split.key", splitKey); } @SuppressWarnings({"rawtypes"}) SolrRequest request = new QueryRequest(params); request.setPath("/admin/collections"); String baseUrl = ((HttpSolrClient) shardToJetty.get(SHARD1).get(0).client.getSolrClient()).getBaseURL(); baseUrl = baseUrl.substring(0, baseUrl.length() - "collection1".length()); try (HttpSolrClient baseServer = getHttpSolrClient(baseUrl, 30000, 60000 * 5)) { NamedList<Object> rsp = baseServer.request(request); if (log.isInfoEnabled()) { log.info("Shard split response: {}", Utils.toJSONString(rsp)); } } }
Example 5
Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void indexAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id, int n, Set<String> documentIds) throws Exception { index("id", id, "n_ti", n); int idx = getHashRangeIdx(router, ranges, id); if (idx != -1) { docCounts[idx]++; documentIds.add(String.valueOf(id)); } }
Example 6
Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public static int getHashRangeIdx(DocRouter router, List<DocRouter.Range> ranges, String id) { int hash = 0; if (router instanceof HashBasedRouter) { HashBasedRouter hashBasedRouter = (HashBasedRouter) router; hash = hashBasedRouter.sliceHash(id, null, null,null); } for (int i = 0; i < ranges.size(); i++) { DocRouter.Range range = ranges.get(i); if (range.includes(hash)) return i; } return -1; }
Example 7
Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void deleteAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id) throws Exception { controlClient.deleteById(id); cloudClient.deleteById(id); int idx = getHashRangeIdx(router, ranges, id); if (idx != -1) { docCounts[idx]--; } }
Example 8
Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private void doTestSplitByPaths(SolrIndexSplitter.SplitMethod splitMethod) throws Exception { LocalSolrQueryRequest request = null; try { // add two docs String id1 = "dorothy"; assertU(adoc("id", id1)); String id2 = "kansas"; assertU(adoc("id", id2)); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==2"); // find minHash/maxHash hash ranges List<DocRouter.Range> ranges = getRanges(id1, id2); request = lrf.makeRequest("q", "dummy"); SolrQueryResponse rsp = new SolrQueryResponse(); SplitIndexCommand command = new SplitIndexCommand(request, rsp, Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null, splitMethod); doSplit(command); Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(), DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType); DirectoryReader reader = DirectoryReader.open(directory); assertEquals("id:dorothy should be present in split index1", 1, reader.docFreq(new Term("id", "dorothy"))); assertEquals("id:kansas should not be present in split index1", 0, reader.docFreq(new Term("id", "kansas"))); assertEquals("split index1 should have only one document", 1, reader.numDocs()); reader.close(); h.getCore().getDirectoryFactory().release(directory); directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(), DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType); reader = DirectoryReader.open(directory); assertEquals("id:dorothy should not be present in split index2", 0, reader.docFreq(new Term("id", "dorothy"))); assertEquals("id:kansas should be present in split index2", 1, reader.docFreq(new Term("id", "kansas"))); assertEquals("split index2 should have only one document", 1, reader.numDocs()); reader.close(); h.getCore().getDirectoryFactory().release(directory); } finally { if (request != null) request.close(); // decrefs the searcher } }
Example 9
Source File: OverseerCollectionMessageHandler.java From lucene-solr with Apache License 2.0 | 5 votes |
DocRouter.Range intersect(DocRouter.Range a, DocRouter.Range b) { if (a == null || b == null || !a.overlaps(b)) { return null; } else if (a.isSubsetOf(b)) return a; else if (b.isSubsetOf(a)) return b; else if (b.includes(a.max)) { return new DocRouter.Range(b.min, a.max); } else { return new DocRouter.Range(a.min, b.max); } }
Example 10
Source File: SplitOp.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Returns a list of range counts sorted by the range lower bound, using the indexed "id" field (i.e. the terms are full IDs, not just prefixes) */ static Collection<RangeCount> getHashHistogramFromId(SolrIndexSearcher searcher, String idField, DocRouter router, DocCollection collection) throws IOException { RTimer timer = new RTimer(); TreeMap<DocRouter.Range, RangeCount> counts = new TreeMap<>(); Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), idField); if (terms == null) { return counts.values(); } int numPrefixes = 0; int numCollisions = 0; long sumBuckets = 0; byte sep = (byte) CompositeIdRouter.SEPARATOR.charAt(0); TermsEnum termsEnum = terms.iterator(); BytesRef currPrefix = new BytesRef(); // prefix of the previous "id" term int bucketCount = 0; // count of the number of docs in the current bucket // We're going to iterate over all terms, so do the minimum amount of work per term. // Terms are sorted, so all terms sharing a prefix will be grouped together. The extra work // is really just limited to stepping over all the terms in the id field. for (;;) { BytesRef term = termsEnum.next(); // compare to current prefix bucket and see if this new term shares the same prefix if (term != null && term.length >= currPrefix.length && currPrefix.length > 0) { if (StringHelper.startsWith(term, currPrefix)) { bucketCount++; // use 1 since we are dealing with unique ids continue; } } // At this point the prefix did not match, so if we had a bucket we were working on, record it. if (currPrefix.length > 0) { numPrefixes++; sumBuckets += bucketCount; String currPrefixStr = currPrefix.utf8ToString(); DocRouter.Range range = router.getSearchRangeSingle(currPrefixStr, null, collection); RangeCount rangeCount = new RangeCount(range, bucketCount); bucketCount = 0; RangeCount prev = counts.put(rangeCount.range, rangeCount); if (prev != null) { // we hit a hash collision, so add the buckets together. rangeCount.count += prev.count; numCollisions++; } } // if the current term is null, we ran out of values if (term == null) break; // find the new prefix (if any) // resize if needed if (currPrefix.length < term.length) { currPrefix.bytes = new byte[term.length+10]; } // Copy the bytes up to and including the separator, and set the length if the separator is found. // If there was no separator, then length remains 0 and it's the indicator that we have no prefix bucket currPrefix.length = 0; for (int i=0; i<term.length; i++) { byte b = term.bytes[i + term.offset]; currPrefix.bytes[i] = b; if (b == sep) { currPrefix.length = i + 1; bucketCount++; break; } } } if (log.isInfoEnabled()) { log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numCollisions={}" , idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions); } return counts.values(); }
Example 11
Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doTestSplitByRouteKey(SolrIndexSplitter.SplitMethod splitMethod) throws Exception { File indexDir = createTempDir().toFile(); CompositeIdRouter r1 = new CompositeIdRouter(); String splitKey = "sea-line!"; String key2 = "soul-raising!"; // murmur2 has a collision on the above two keys assertEquals(r1.keyHashRange(splitKey), r1.keyHashRange(key2)); /* More strings with collisions on murmur2 for future reference: "Drava" "dessert spoon" "Bighorn" "pleasure lover" "attributable to" "second edition" "sea-line" "soul-raising" "lift direction" "testimony meeting" */ for (int i=0; i<10; i++) { assertU(adoc("id", splitKey + i)); assertU(adoc("id", key2 + i)); } assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==20"); DocRouter.Range splitKeyRange = r1.keyHashRange(splitKey); LocalSolrQueryRequest request = null; Directory directory = null; try { request = lrf.makeRequest("q", "dummy"); SolrQueryResponse rsp = new SolrQueryResponse(); SplitIndexCommand command = new SplitIndexCommand(request, rsp, Lists.newArrayList(indexDir.getAbsolutePath()), null, Lists.newArrayList(splitKeyRange), new CompositeIdRouter(), null, splitKey, splitMethod); doSplit(command); directory = h.getCore().getDirectoryFactory().get(indexDir.getAbsolutePath(), DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType); DirectoryReader reader = DirectoryReader.open(directory); assertEquals("split index has wrong number of documents", 10, reader.numDocs()); reader.close(); h.getCore().getDirectoryFactory().release(directory); directory = null; } finally { if (request != null) { request.close(); } if (directory != null) { h.getCore().getDirectoryFactory().release(directory); } } }
Example 12
Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doTestSplitByCores(SolrIndexSplitter.SplitMethod splitMethod) throws Exception { // add three docs and 1 delete String id1 = "dorothy"; assertU(adoc("id", id1)); String id2 = "kansas"; assertU(adoc("id", id2)); String id3 = "wizard"; assertU(adoc("id", id3)); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==3"); assertU(delI("wizard")); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==2"); List<DocRouter.Range> ranges = getRanges(id1, id2); SolrCore core1 = null, core2 = null; try { core1 = h.getCoreContainer().create("split1", ImmutableMap.of("dataDir", indexDir1.getAbsolutePath(), "configSet", "cloud-minimal")); core2 = h.getCoreContainer().create("split2", ImmutableMap.of("dataDir", indexDir2.getAbsolutePath(), "configSet", "cloud-minimal")); LocalSolrQueryRequest request = null; try { request = lrf.makeRequest("q", "dummy"); SolrQueryResponse rsp = new SolrQueryResponse(); SplitIndexCommand command = new SplitIndexCommand(request, rsp, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter(), null, null, splitMethod); doSplit(command); } finally { if (request != null) request.close(); } @SuppressWarnings("resource") final EmbeddedSolrServer server1 = new EmbeddedSolrServer(h.getCoreContainer(), "split1"); @SuppressWarnings("resource") final EmbeddedSolrServer server2 = new EmbeddedSolrServer(h.getCoreContainer(), "split2"); server1.commit(true, true); server2.commit(true, true); assertEquals("id:dorothy should be present in split index1", 1, server1.query(new SolrQuery("id:dorothy")).getResults().getNumFound()); assertEquals("id:kansas should not be present in split index1", 0, server1.query(new SolrQuery("id:kansas")).getResults().getNumFound()); assertEquals("id:dorothy should not be present in split index2", 0, server2.query(new SolrQuery("id:dorothy")).getResults().getNumFound()); assertEquals("id:kansas should be present in split index2", 1, server2.query(new SolrQuery("id:kansas")).getResults().getNumFound()); } finally { h.getCoreContainer().unload("split2"); h.getCoreContainer().unload("split1"); } }
Example 13
Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doTestSplitDeletes(SolrIndexSplitter.SplitMethod splitMethod) throws Exception { LocalSolrQueryRequest request = null; try { // add two docs String id1 = "dorothy"; assertU(adoc("id", id1)); String id2 = "kansas"; assertU(adoc("id", id2)); assertU(commit()); assertJQ(req("q", "*:*"), "/response/numFound==2"); assertU(delI(id2)); // delete id2 assertU(commit()); // find minHash/maxHash hash ranges List<DocRouter.Range> ranges = getRanges(id1, id2); request = lrf.makeRequest("q", "dummy"); SolrQueryResponse rsp = new SolrQueryResponse(); SplitIndexCommand command = new SplitIndexCommand(request, rsp, Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null, splitMethod); doSplit(command); Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(), DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType); DirectoryReader reader = DirectoryReader.open(directory); assertEquals("id:dorothy should be present in split index1", 1, reader.docFreq(new Term("id", "dorothy"))); assertEquals("id:kansas should not be present in split index1", 0, reader.docFreq(new Term("id", "kansas"))); assertEquals("split index1 should have only one document", 1, reader.numDocs()); reader.close(); h.getCore().getDirectoryFactory().release(directory); directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(), DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType); reader = DirectoryReader.open(directory); assertEquals(0, reader.numDocs()); // should be empty reader.close(); h.getCore().getDirectoryFactory().release(directory); } finally { if (request != null) request.close(); // decrefs the searcher } }
Example 14
Source File: ClusterStateMutator.java From lucene-solr with Apache License 2.0 | 4 votes |
@SuppressWarnings({"unchecked"}) public ZkWriteCommand createCollection(ClusterState clusterState, ZkNodeProps message) { String cName = message.getStr(NAME); log.debug("building a new cName: {}", cName); if (clusterState.hasCollection(cName)) { log.warn("Collection {} already exists. exit", cName); return ZkStateWriter.NO_OP; } Map<String, Object> routerSpec = DocRouter.getRouterSpec(message); String routerName = routerSpec.get(NAME) == null ? DocRouter.DEFAULT_NAME : (String) routerSpec.get(NAME); DocRouter router = DocRouter.getDocRouter(routerName); Object messageShardsObj = message.get("shards"); Map<String, Slice> slices; if (messageShardsObj instanceof Map) { // we are being explicitly told the slice data (e.g. coll restore) slices = Slice.loadAllFromMap(cName, (Map<String, Object>)messageShardsObj); } else { List<String> shardNames = new ArrayList<>(); if (router instanceof ImplicitDocRouter) { getShardNames(shardNames, message.getStr("shards", DocRouter.DEFAULT_NAME)); } else { int numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, -1); if (numShards < 1) throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "numShards is a required parameter for 'compositeId' router"); getShardNames(numShards, shardNames); } List<DocRouter.Range> ranges = router.partitionRange(shardNames.size(), router.fullRange());//maybe null slices = new LinkedHashMap<>(); for (int i = 0; i < shardNames.size(); i++) { String sliceName = shardNames.get(i); Map<String, Object> sliceProps = new LinkedHashMap<>(1); sliceProps.put(Slice.RANGE, ranges == null ? null : ranges.get(i)); slices.put(sliceName, new Slice(sliceName, null, sliceProps,cName)); } } Map<String, Object> collectionProps = new HashMap<>(); for (Map.Entry<String, Object> e : OverseerCollectionMessageHandler.COLLECTION_PROPS_AND_DEFAULTS.entrySet()) { Object val = message.get(e.getKey()); if (val == null) { val = OverseerCollectionMessageHandler.COLLECTION_PROPS_AND_DEFAULTS.get(e.getKey()); } if (val != null) collectionProps.put(e.getKey(), val); } collectionProps.put(DocCollection.DOC_ROUTER, routerSpec); if (message.getStr("fromApi") == null) { collectionProps.put("autoCreated", "true"); } DocCollection newCollection = new DocCollection(cName, slices, collectionProps, router, -1); return new ZkWriteCommand(cName, newCollection); }
Example 15
Source File: SplitOp.java From lucene-solr with Apache License 2.0 | 4 votes |
static Collection<DocRouter.Range> getSplits(Collection<RangeCount> rawCounts, DocRouter.Range currentRange) throws Exception { int totalCount = 0; RangeCount biggest = null; // keep track of the largest in case we need to split it out into it's own shard RangeCount last = null; // keep track of what the last range is // Remove counts that don't overlap with currentRange (can happen if someone overrode document routing) List<RangeCount> counts = new ArrayList<>(rawCounts.size()); for (RangeCount rangeCount : rawCounts) { if (!rangeCount.range.overlaps(currentRange)) { continue; } totalCount += rangeCount.count; if (biggest == null || rangeCount.count > biggest.count) { biggest = rangeCount; } counts.add(rangeCount); last = rangeCount; } if (counts.size() == 0) { // we don't have any data to go off of, so do the split the normal way return null; } List<DocRouter.Range> targetRanges = new ArrayList<>(); if (counts.size() == 1) { // We have a single range, so we should split it. // Currently, we only split a prefix/bucket when we have just one, but this could be changed/controlled // in the future via a allowedSizeDifference parameter (i.e. if just separating prefix buckets results in // too large of an imbalanced, allow splitting within a prefix) // It may already be a partial range, so figure that out int lower = Math.max(last.range.min, currentRange.min); int upper = Math.min(last.range.max, currentRange.max); int mid = lower + (upper-lower)/2; if (mid == lower || mid == upper) { // shard too small... this should pretty much never happen, but use default split logic if it does. return null; } // Make sure to include the shard's current range in the new ranges so we don't create useless empty shards. DocRouter.Range lowerRange = new DocRouter.Range(currentRange.min, mid); DocRouter.Range upperRange = new DocRouter.Range(mid+1, currentRange.max); targetRanges.add(lowerRange); targetRanges.add(upperRange); return targetRanges; } // We have at least two ranges, so we want to partition the ranges // and avoid splitting any individual range. // The "middle" bucket we are going to find will be included with the lower range and excluded from the upper range. int targetCount = totalCount / 2; RangeCount middle = null; RangeCount prev = null; int currCount = 0; for (RangeCount rangeCount : counts) { currCount += rangeCount.count; if (currCount >= targetCount) { // this should at least be true on the last range middle = rangeCount; break; } prev = rangeCount; } // check if using the range before the middle one would make a better split point int overError = currCount - targetCount; // error if we include middle in first split int underError = targetCount - (currCount - middle.count); // error if we include middle in second split if (underError < overError) { middle = prev; } // The middle should never be the last, since that means that we won't actually do a split. // Minimising the error (above) should already ensure this never happens. assert middle != last; // Make sure to include the shard's current range in the new ranges so we don't create useless empty shards. DocRouter.Range lowerRange = new DocRouter.Range(currentRange.min, middle.range.max); DocRouter.Range upperRange = new DocRouter.Range(middle.range.max+1, currentRange.max); targetRanges.add(lowerRange); targetRanges.add(upperRange); return targetRanges; }
Example 16
Source File: SplitOp.java From lucene-solr with Apache License 2.0 | 4 votes |
static Collection<RangeCount> getHashHistogram(SolrIndexSearcher searcher, String prefixField, DocRouter router, DocCollection collection) throws IOException { RTimer timer = new RTimer(); TreeMap<DocRouter.Range,RangeCount> counts = new TreeMap<>(); Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), prefixField); if (terms == null) { return counts.values(); } int numPrefixes = 0; int numTriLevel = 0; int numCollisions = 0; long sumBuckets = 0; TermsEnum termsEnum = terms.iterator(); BytesRef term; while ((term = termsEnum.next()) != null) { numPrefixes++; String termStr = term.utf8ToString(); int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR); // truncate to first separator since we don't support multiple levels currently // NOTE: this does not currently work for tri-level composite ids since the number of bits allocated to the first ID is 16 for a 2 part id // and 8 for a 3 part id! if (firstSep != termStr.length()-1 && firstSep > 0) { numTriLevel++; termStr = termStr.substring(0, firstSep+1); } DocRouter.Range range = router.getSearchRangeSingle(termStr, null, collection); int numDocs = termsEnum.docFreq(); sumBuckets += numDocs; RangeCount rangeCount = new RangeCount(range, numDocs); RangeCount prev = counts.put(rangeCount.range, rangeCount); if (prev != null) { // we hit a hash collision or truncated a prefix to first level, so add the buckets together. rangeCount.count += prev.count; numCollisions++; } } if (log.isInfoEnabled()) { log.info("Split histogram: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numTriLevel={} numCollisions={}" , timer.getTime(), counts.size(), sumBuckets, numPrefixes, numTriLevel, numCollisions); } return counts.values(); }
Example 17
Source File: SplitOp.java From lucene-solr with Apache License 2.0 | 4 votes |
public RangeCount(DocRouter.Range range, int count) { this.range = range; this.count = count; }
Example 18
Source File: SplitHandlerTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void doRandomSplitRecommendation(Random rand) throws Exception { int low = 0; int high = 0; while (high-low < 10) { low = randomBound(rand); high = randomBound(rand); if (low > high) { int tmp = low; low = high; high = tmp; } } DocRouter.Range curr = new DocRouter.Range(low,high); int maxRanges = rand.nextInt(20); int start = low; // bucket can start before or after if (rand.nextBoolean()) { start += rand.nextInt(200) - 100; if (start > low) { // underflow start = Integer.MIN_VALUE; } } List<SplitOp.RangeCount> counts = new ArrayList<>(maxRanges); for (;;) { int end = start + rand.nextInt(100) + 1; if (end < start) { // overflow end = Integer.MAX_VALUE; } counts.add( new SplitOp.RangeCount(new DocRouter.Range(start, end), rand.nextInt(1000)+1)); if (counts.size() >= maxRanges) break; if (counts.size() == maxRanges / 2 && rand.nextBoolean()) { // transition toward the end of the range (more boundary cases for large ranges) start = high - rand.nextInt(100); start = Math.max(start, end+1); } else { start = end + 1; } if (rand.nextBoolean()) { start += rand.nextInt(100); } if (start < end) { // overflow break; } } try { Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr); verifyContiguous(results, curr); } catch (Throwable e) { // System.err.println(e); } }
Example 19
Source File: DistributedZkUpdateProcessor.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doDefensiveChecks(DistribPhase phase) { boolean isReplayOrPeersync = (updateCommand.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0; if (isReplayOrPeersync) return; String from = req.getParams().get(DISTRIB_FROM); DocCollection docCollection = clusterState.getCollection(collection); Slice mySlice = docCollection.getSlice(cloudDesc.getShardId()); boolean localIsLeader = cloudDesc.isLeader(); if (DistribPhase.FROMLEADER == phase && localIsLeader && from != null) { // from will be null on log replay String fromShard = req.getParams().get(DISTRIB_FROM_PARENT); if (fromShard != null) { if (mySlice.getState() == Slice.State.ACTIVE) { throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from parent shard leader but we are in active state"); } // shard splitting case -- check ranges to see if we are a sub-shard Slice fromSlice = docCollection.getSlice(fromShard); DocRouter.Range parentRange = fromSlice.getRange(); if (parentRange == null) parentRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE); if (mySlice.getRange() != null && !mySlice.getRange().isSubsetOf(parentRange)) { throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from parent shard leader but parent hash range is not superset of my range"); } } else { String fromCollection = req.getParams().get(DISTRIB_FROM_COLLECTION); // is it because of a routing rule? if (fromCollection == null) { log.error("Request says it is coming from leader, but we are the leader: {}", req.getParamString()); SolrException solrExc = new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from leader, but we are the leader"); solrExc.setMetadata("cause", "LeaderChanged"); throw solrExc; } } } int count = 0; while (((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) && count < 5) { count++; // re-getting localIsLeader since we published to ZK first before setting localIsLeader value localIsLeader = cloudDesc.isLeader(); try { Thread.sleep(500); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } if ((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) { log.error("ClusterState says we are the leader, but locally we don't think so"); throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "ClusterState says we are the leader (" + zkController.getBaseUrl() + "/" + req.getCore().getName() + "), but locally we don't think so. Request came from " + from); } }
Example 20
Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void splitByRouteFieldTest() throws Exception { log.info("Starting testSplitWithRouteField"); String collectionName = "routeFieldColl"; int numShards = 4; int replicationFactor = 2; int maxShardsPerNode = (((numShards * replicationFactor) / getCommonCloudSolrClient() .getZkStateReader().getClusterState().getLiveNodes().size())) + 1; HashMap<String, List<Integer>> collectionInfos = new HashMap<>(); String shard_fld = "shard_s"; try (CloudSolrClient client = createCloudClient(null)) { Map<String, Object> props = Utils.makeMap( REPLICATION_FACTOR, replicationFactor, MAX_SHARDS_PER_NODE, maxShardsPerNode, OverseerCollectionMessageHandler.NUM_SLICES, numShards, "router.field", shard_fld); createCollection(collectionInfos, collectionName, props, client); } List<Integer> list = collectionInfos.get(collectionName); checkForCollection(collectionName, list, null); waitForRecoveriesToFinish(false); String url = getUrlFromZk(getCommonCloudSolrClient().getZkStateReader().getClusterState(), collectionName); try (HttpSolrClient collectionClient = getHttpSolrClient(url)) { ClusterState clusterState = cloudClient.getZkStateReader().getClusterState(); final DocRouter router = clusterState.getCollection(collectionName).getRouter(); Slice shard1 = clusterState.getCollection(collectionName).getSlice(SHARD1); DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange(); final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range); final int[] docCounts = new int[ranges.size()]; for (int i = 100; i <= 200; i++) { String shardKey = "" + (char) ('a' + (i % 26)); // See comment in ShardRoutingTest for hash distribution collectionClient.add(getDoc(id, i, "n_ti", i, shard_fld, shardKey)); int idx = getHashRangeIdx(router, ranges, shardKey); if (idx != -1) { docCounts[idx]++; } } for (int i = 0; i < docCounts.length; i++) { int docCount = docCounts[i]; log.info("Shard shard1_{} docCount = {}", i, docCount); } collectionClient.commit(); trySplit(collectionName, null, SHARD1, 3); waitForRecoveriesToFinish(collectionName, false); assertEquals(docCounts[0], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_0")).getResults().getNumFound()); assertEquals(docCounts[1], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_1")).getResults().getNumFound()); } }