org.apache.solr.common.cloud.DocRouter#Range

Source File: CrossCollectionJoinQuery.java From lucene-solr with Apache License 2.0

6 votes

private String createHashRangeFq() {
  if (routedByJoinKey) {
    ClusterState clusterState = searcher.getCore().getCoreContainer().getZkController().getClusterState();
    CloudDescriptor desc = searcher.getCore().getCoreDescriptor().getCloudDescriptor();
    Slice slice = clusterState.getCollection(desc.getCollectionName()).getSlicesMap().get(desc.getShardId());
    DocRouter.Range range = slice.getRange();

    // In CompositeIdRouter, the routing prefix only affects the top 16 bits
    int min = range.min & 0xffff0000;
    int max = range.max | 0x0000ffff;

    return String.format(Locale.ROOT, "{!hash_range f=%s l=%d u=%d}", fromField, min, max);
  } else {
    return null;
  }
}

Source File: SplitOp.java From lucene-solr with Apache License 2.0

6 votes

static String toSplitString(Collection<DocRouter.Range> splits) throws Exception {
  if (splits == null) {
    return null;
  }

  StringBuilder sb = new StringBuilder();
  for (DocRouter.Range range : splits) {
    if (sb.length() > 0) {
      sb.append(",");
    }
    sb.append(range);
  }


  return sb.toString();
}

Source File: DistributedZkUpdateProcessor.java From lucene-solr with Apache License 2.0

6 votes

/** For {@link org.apache.solr.common.params.CollectionParams.CollectionAction#SPLITSHARD} */
protected boolean amISubShardLeader(DocCollection coll, Slice parentSlice, String id, SolrInputDocument doc) throws InterruptedException {
  // Am I the leader of a shard in "construction/recovery" state?
  String myShardId = cloudDesc.getShardId();
  Slice mySlice = coll.getSlice(myShardId);
  final Slice.State state = mySlice.getState();
  if (state == Slice.State.CONSTRUCTION || state == Slice.State.RECOVERY) {
    Replica myLeader = zkController.getZkStateReader().getLeaderRetry(collection, myShardId);
    boolean amILeader = myLeader.getName().equals(cloudDesc.getCoreNodeName());
    if (amILeader) {
      // Does the document belong to my hash range as well?
      DocRouter.Range myRange = mySlice.getRange();
      if (myRange == null) myRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE);
      if (parentSlice != null)  {
        boolean isSubset = parentSlice.getRange() != null && myRange.isSubsetOf(parentSlice.getRange());
        return isSubset && coll.getRouter().isTargetSlice(id, doc, req.getParams(), myShardId, coll);
      } else  {
        // delete by query case -- as long as I am a sub shard leader we're fine
        return true;
      }
    }
  }
  return false;
}

Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0

5 votes

protected void splitShard(String collection, String shardId, List<DocRouter.Range> subRanges, String splitKey, boolean offline) throws SolrServerException, IOException {
  ModifiableSolrParams params = new ModifiableSolrParams();
  params.set("action", CollectionParams.CollectionAction.SPLITSHARD.toString());
  params.set("timing", "true");
  params.set("offline", String.valueOf(offline));
  params.set("collection", collection);
  if (shardId != null)  {
    params.set("shard", shardId);
  }
  if (subRanges != null)  {
    StringBuilder ranges = new StringBuilder();
    for (int i = 0; i < subRanges.size(); i++) {
      DocRouter.Range subRange = subRanges.get(i);
      ranges.append(subRange.toString());
      if (i < subRanges.size() - 1)
        ranges.append(",");
    }
    params.set("ranges", ranges.toString());
  }
  if (splitKey != null) {
    params.set("split.key", splitKey);
  }
  @SuppressWarnings({"rawtypes"})
  SolrRequest request = new QueryRequest(params);
  request.setPath("/admin/collections");

  String baseUrl = ((HttpSolrClient) shardToJetty.get(SHARD1).get(0).client.getSolrClient()).getBaseURL();
  baseUrl = baseUrl.substring(0, baseUrl.length() - "collection1".length());

  try (HttpSolrClient baseServer = getHttpSolrClient(baseUrl, 30000, 60000 * 5)) {
    NamedList<Object> rsp = baseServer.request(request);
    if (log.isInfoEnabled()) {
      log.info("Shard split response: {}", Utils.toJSONString(rsp));
    }
  }
}

Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0

5 votes

protected void indexAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id, int n, Set<String> documentIds) throws Exception {
  index("id", id, "n_ti", n);

  int idx = getHashRangeIdx(router, ranges, id);
  if (idx != -1)  {
    docCounts[idx]++;
    documentIds.add(String.valueOf(id));
  }
}

Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0

5 votes

public static int getHashRangeIdx(DocRouter router, List<DocRouter.Range> ranges, String id) {
  int hash = 0;
  if (router instanceof HashBasedRouter) {
    HashBasedRouter hashBasedRouter = (HashBasedRouter) router;
    hash = hashBasedRouter.sliceHash(id, null, null,null);
  }
  for (int i = 0; i < ranges.size(); i++) {
    DocRouter.Range range = ranges.get(i);
    if (range.includes(hash))
      return i;
  }
  return -1;
}

Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0

5 votes

protected void deleteAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id) throws Exception {
  controlClient.deleteById(id);
  cloudClient.deleteById(id);

  int idx = getHashRangeIdx(router, ranges, id);
  if (idx != -1)  {
    docCounts[idx]--;
  }
}

Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0

5 votes

private void doTestSplitByPaths(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
  LocalSolrQueryRequest request = null;
  try {
    // add two docs
    String id1 = "dorothy";
    assertU(adoc("id", id1));
    String id2 = "kansas";
    assertU(adoc("id", id2));
    assertU(commit());
    assertJQ(req("q", "*:*"), "/response/numFound==2");

    // find minHash/maxHash hash ranges
    List<DocRouter.Range> ranges = getRanges(id1, id2);

    request = lrf.makeRequest("q", "dummy");
    SolrQueryResponse rsp = new SolrQueryResponse();
    SplitIndexCommand command = new SplitIndexCommand(request, rsp,
        Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null, splitMethod);
    doSplit(command);

    Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals("id:dorothy should be present in split index1", 1, reader.docFreq(new Term("id", "dorothy")));
    assertEquals("id:kansas should not be present in split index1", 0, reader.docFreq(new Term("id", "kansas")));
    assertEquals("split index1 should have only one document", 1, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
    directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    reader = DirectoryReader.open(directory);
    assertEquals("id:dorothy should not be present in split index2", 0, reader.docFreq(new Term("id", "dorothy")));
    assertEquals("id:kansas should be present in split index2", 1, reader.docFreq(new Term("id", "kansas")));
    assertEquals("split index2 should have only one document", 1, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
  } finally {
    if (request != null) request.close(); // decrefs the searcher
  }
}

Source File: OverseerCollectionMessageHandler.java From lucene-solr with Apache License 2.0

5 votes

DocRouter.Range intersect(DocRouter.Range a, DocRouter.Range b) {
  if (a == null || b == null || !a.overlaps(b)) {
    return null;
  } else if (a.isSubsetOf(b))
    return a;
  else if (b.isSubsetOf(a))
    return b;
  else if (b.includes(a.max)) {
    return new DocRouter.Range(b.min, a.max);
  } else  {
    return new DocRouter.Range(a.min, b.max);
  }
}

Source File: SplitOp.java From lucene-solr with Apache License 2.0

4 votes

/**
 *   Returns a list of range counts sorted by the range lower bound, using the indexed "id" field (i.e. the terms are full IDs, not just prefixes)
 */
static Collection<RangeCount> getHashHistogramFromId(SolrIndexSearcher searcher, String idField, DocRouter router, DocCollection collection) throws IOException {
  RTimer timer = new RTimer();

  TreeMap<DocRouter.Range, RangeCount> counts = new TreeMap<>();

  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), idField);
  if (terms == null) {
    return counts.values();
  }

  int numPrefixes = 0;
  int numCollisions = 0;
  long sumBuckets = 0;


  byte sep = (byte) CompositeIdRouter.SEPARATOR.charAt(0);
  TermsEnum termsEnum = terms.iterator();
  BytesRef currPrefix = new BytesRef();  // prefix of the previous "id" term
  int bucketCount = 0; // count of the number of docs in the current bucket

  // We're going to iterate over all terms, so do the minimum amount of work per term.
  // Terms are sorted, so all terms sharing a prefix will be grouped together.  The extra work
  // is really just limited to stepping over all the terms in the id field.
  for (;;) {
    BytesRef term = termsEnum.next();

    // compare to current prefix bucket and see if this new term shares the same prefix
    if (term != null && term.length >= currPrefix.length && currPrefix.length > 0) {
      if (StringHelper.startsWith(term, currPrefix)) {
        bucketCount++;  // use 1 since we are dealing with unique ids
        continue;
      }
    }

    // At this point the prefix did not match, so if we had a bucket we were working on, record it.
    if (currPrefix.length > 0) {
      numPrefixes++;
      sumBuckets += bucketCount;
      String currPrefixStr = currPrefix.utf8ToString();
      DocRouter.Range range = router.getSearchRangeSingle(currPrefixStr, null, collection);

      RangeCount rangeCount = new RangeCount(range, bucketCount);
      bucketCount = 0;

      RangeCount prev = counts.put(rangeCount.range, rangeCount);
      if (prev != null) {
        // we hit a hash collision, so add the buckets together.
        rangeCount.count += prev.count;
        numCollisions++;
      }
    }

    // if the current term is null, we ran out of values
    if (term == null) break;

    // find the new prefix (if any)

    // resize if needed
    if (currPrefix.length < term.length) {
      currPrefix.bytes = new byte[term.length+10];
    }

    // Copy the bytes up to and including the separator, and set the length if the separator is found.
    // If there was no separator, then length remains 0 and it's the indicator that we have no prefix bucket
    currPrefix.length = 0;
    for (int i=0; i<term.length; i++) {
      byte b = term.bytes[i + term.offset];
      currPrefix.bytes[i] = b;
      if (b == sep) {
        currPrefix.length = i + 1;
        bucketCount++;
        break;
      }
    }
  }

  if (log.isInfoEnabled()) {
    log.info("Split histogram from idField {}: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numCollisions={}"
        , idField, timer.getTime(), counts.size(), sumBuckets, numPrefixes, numCollisions);
  }

  return counts.values();
}

Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSplitByRouteKey(SolrIndexSplitter.SplitMethod splitMethod) throws Exception  {
  File indexDir = createTempDir().toFile();

  CompositeIdRouter r1 = new CompositeIdRouter();
  String splitKey = "sea-line!";
  String key2 = "soul-raising!";

  // murmur2 has a collision on the above two keys
  assertEquals(r1.keyHashRange(splitKey), r1.keyHashRange(key2));

  /*
  More strings with collisions on murmur2 for future reference:
  "Drava" "dessert spoon"
  "Bighorn" "pleasure lover"
  "attributable to" "second edition"
  "sea-line" "soul-raising"
  "lift direction" "testimony meeting"
   */

  for (int i=0; i<10; i++)  {
    assertU(adoc("id", splitKey + i));
    assertU(adoc("id", key2 + i));
  }
  assertU(commit());
  assertJQ(req("q", "*:*"), "/response/numFound==20");

  DocRouter.Range splitKeyRange = r1.keyHashRange(splitKey);

  LocalSolrQueryRequest request = null;
  Directory directory = null;
  try {
    request = lrf.makeRequest("q", "dummy");
    SolrQueryResponse rsp = new SolrQueryResponse();
    SplitIndexCommand command = new SplitIndexCommand(request, rsp,
        Lists.newArrayList(indexDir.getAbsolutePath()), null, Lists.newArrayList(splitKeyRange),
        new CompositeIdRouter(), null, splitKey, splitMethod);
    doSplit(command);
    directory = h.getCore().getDirectoryFactory().get(indexDir.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals("split index has wrong number of documents", 10, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
    directory = null;
  } finally {
    if (request != null)  {
      request.close();
    }
    if (directory != null)  {
      h.getCore().getDirectoryFactory().release(directory);
    }
  }
}

Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSplitByCores(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
  // add three docs and 1 delete
  String id1 = "dorothy";
  assertU(adoc("id", id1));
  String id2 = "kansas";
  assertU(adoc("id", id2));
  String id3 = "wizard";
  assertU(adoc("id", id3));
  assertU(commit());
  assertJQ(req("q", "*:*"), "/response/numFound==3");
  assertU(delI("wizard"));
  assertU(commit());
  assertJQ(req("q", "*:*"), "/response/numFound==2");
  List<DocRouter.Range> ranges = getRanges(id1, id2);

  SolrCore core1 = null, core2 = null;
  try {

    core1 = h.getCoreContainer().create("split1",
        ImmutableMap.of("dataDir", indexDir1.getAbsolutePath(), "configSet", "cloud-minimal"));
    core2 = h.getCoreContainer().create("split2",
        ImmutableMap.of("dataDir", indexDir2.getAbsolutePath(), "configSet", "cloud-minimal"));

    LocalSolrQueryRequest request = null;
    try {
      request = lrf.makeRequest("q", "dummy");
      SolrQueryResponse rsp = new SolrQueryResponse();
      SplitIndexCommand command = new SplitIndexCommand(request, rsp, null, Lists.newArrayList(core1, core2), ranges,
          new PlainIdRouter(), null, null, splitMethod);
      doSplit(command);
    } finally {
      if (request != null) request.close();
    }
    @SuppressWarnings("resource")
    final EmbeddedSolrServer server1 = new EmbeddedSolrServer(h.getCoreContainer(), "split1");
    @SuppressWarnings("resource")
    final EmbeddedSolrServer server2 = new EmbeddedSolrServer(h.getCoreContainer(), "split2");
    server1.commit(true, true);
    server2.commit(true, true);
    assertEquals("id:dorothy should be present in split index1", 1, server1.query(new SolrQuery("id:dorothy")).getResults().getNumFound());
    assertEquals("id:kansas should not be present in split index1", 0, server1.query(new SolrQuery("id:kansas")).getResults().getNumFound());
    assertEquals("id:dorothy should not be present in split index2", 0, server2.query(new SolrQuery("id:dorothy")).getResults().getNumFound());
    assertEquals("id:kansas should be present in split index2", 1, server2.query(new SolrQuery("id:kansas")).getResults().getNumFound());
  } finally {
    h.getCoreContainer().unload("split2");
    h.getCoreContainer().unload("split1");
  }
}

Source File: SolrIndexSplitterTest.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSplitDeletes(SolrIndexSplitter.SplitMethod splitMethod) throws Exception {
  LocalSolrQueryRequest request = null;
  try {
    // add two docs
    String id1 = "dorothy";
    assertU(adoc("id", id1));
    String id2 = "kansas";
    assertU(adoc("id", id2));
    assertU(commit());
    assertJQ(req("q", "*:*"), "/response/numFound==2");
    assertU(delI(id2)); // delete id2
    assertU(commit());


    // find minHash/maxHash hash ranges
    List<DocRouter.Range> ranges = getRanges(id1, id2);

    request = lrf.makeRequest("q", "dummy");
    SolrQueryResponse rsp = new SolrQueryResponse();

    SplitIndexCommand command = new SplitIndexCommand(request, rsp,
        Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter(), null, null, splitMethod);
    doSplit(command);

    Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    DirectoryReader reader = DirectoryReader.open(directory);
    assertEquals("id:dorothy should be present in split index1", 1, reader.docFreq(new Term("id", "dorothy")));
    assertEquals("id:kansas should not be present in split index1", 0, reader.docFreq(new Term("id", "kansas")));
    assertEquals("split index1 should have only one document", 1, reader.numDocs());
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
    directory = h.getCore().getDirectoryFactory().get(indexDir2.getAbsolutePath(),
        DirectoryFactory.DirContext.DEFAULT, h.getCore().getSolrConfig().indexConfig.lockType);
    reader = DirectoryReader.open(directory);
    assertEquals(0, reader.numDocs()); // should be empty
    reader.close();
    h.getCore().getDirectoryFactory().release(directory);
  } finally {
    if (request != null) request.close(); // decrefs the searcher
  }
}

Source File: ClusterStateMutator.java From lucene-solr with Apache License 2.0

4 votes

@SuppressWarnings({"unchecked"})
public ZkWriteCommand createCollection(ClusterState clusterState, ZkNodeProps message) {
  String cName = message.getStr(NAME);
  log.debug("building a new cName: {}", cName);
  if (clusterState.hasCollection(cName)) {
    log.warn("Collection {} already exists. exit", cName);
    return ZkStateWriter.NO_OP;
  }

  Map<String, Object> routerSpec = DocRouter.getRouterSpec(message);
  String routerName = routerSpec.get(NAME) == null ? DocRouter.DEFAULT_NAME : (String) routerSpec.get(NAME);
  DocRouter router = DocRouter.getDocRouter(routerName);

  Object messageShardsObj = message.get("shards");

  Map<String, Slice> slices;
  if (messageShardsObj instanceof Map) { // we are being explicitly told the slice data (e.g. coll restore)
    slices = Slice.loadAllFromMap(cName, (Map<String, Object>)messageShardsObj);
  } else {
    List<String> shardNames = new ArrayList<>();

    if (router instanceof ImplicitDocRouter) {
      getShardNames(shardNames, message.getStr("shards", DocRouter.DEFAULT_NAME));
    } else {
      int numShards = message.getInt(ZkStateReader.NUM_SHARDS_PROP, -1);
      if (numShards < 1)
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "numShards is a required parameter for 'compositeId' router");
      getShardNames(numShards, shardNames);
    }
    List<DocRouter.Range> ranges = router.partitionRange(shardNames.size(), router.fullRange());//maybe null

    slices = new LinkedHashMap<>();
    for (int i = 0; i < shardNames.size(); i++) {
      String sliceName = shardNames.get(i);

      Map<String, Object> sliceProps = new LinkedHashMap<>(1);
      sliceProps.put(Slice.RANGE, ranges == null ? null : ranges.get(i));

      slices.put(sliceName, new Slice(sliceName, null, sliceProps,cName));
    }
  }

  Map<String, Object> collectionProps = new HashMap<>();

  for (Map.Entry<String, Object> e : OverseerCollectionMessageHandler.COLLECTION_PROPS_AND_DEFAULTS.entrySet()) {
    Object val = message.get(e.getKey());
    if (val == null) {
      val = OverseerCollectionMessageHandler.COLLECTION_PROPS_AND_DEFAULTS.get(e.getKey());
    }
    if (val != null) collectionProps.put(e.getKey(), val);
  }
  collectionProps.put(DocCollection.DOC_ROUTER, routerSpec);

  if (message.getStr("fromApi") == null) {
    collectionProps.put("autoCreated", "true");
  }

  DocCollection newCollection = new DocCollection(cName, slices, collectionProps, router, -1);

  return new ZkWriteCommand(cName, newCollection);
}

Source File: SplitOp.java From lucene-solr with Apache License 2.0

4 votes

static Collection<DocRouter.Range> getSplits(Collection<RangeCount> rawCounts, DocRouter.Range currentRange) throws Exception {
  int totalCount = 0;
  RangeCount biggest = null; // keep track of the largest in case we need to split it out into it's own shard
  RangeCount last = null;  // keep track of what the last range is

  // Remove counts that don't overlap with currentRange (can happen if someone overrode document routing)
  List<RangeCount> counts = new ArrayList<>(rawCounts.size());
  for (RangeCount rangeCount : rawCounts) {
    if (!rangeCount.range.overlaps(currentRange)) {
      continue;
    }
    totalCount += rangeCount.count;
    if (biggest == null || rangeCount.count > biggest.count) {
      biggest = rangeCount;
    }
    counts.add(rangeCount);
    last = rangeCount;
  }

  if (counts.size() == 0) {
    // we don't have any data to go off of, so do the split the normal way
    return null;
  }


  List<DocRouter.Range> targetRanges = new ArrayList<>();

  if (counts.size() == 1) {
    // We have a single range, so we should split it.
    // Currently, we only split a prefix/bucket when we have just one, but this could be changed/controlled
    // in the future via a allowedSizeDifference parameter (i.e. if just separating prefix buckets results in
    // too large of an imbalanced, allow splitting within a prefix)

    // It may already be a partial range, so figure that out
    int lower = Math.max(last.range.min, currentRange.min);
    int upper = Math.min(last.range.max, currentRange.max);
    int mid = lower + (upper-lower)/2;
    if (mid == lower || mid == upper) {
      // shard too small... this should pretty much never happen, but use default split logic if it does.
      return null;
    }

    // Make sure to include the shard's current range in the new ranges so we don't create useless empty shards.
    DocRouter.Range lowerRange = new DocRouter.Range(currentRange.min, mid);
    DocRouter.Range upperRange = new DocRouter.Range(mid+1, currentRange.max);
    targetRanges.add(lowerRange);
    targetRanges.add(upperRange);

    return targetRanges;
  }

  // We have at least two ranges, so we want to partition the ranges
  // and avoid splitting any individual range.
  // The "middle" bucket we are going to find will be included with the lower range and excluded from the upper range.

  int targetCount = totalCount / 2;
  RangeCount middle = null;
  RangeCount prev = null;
  int currCount = 0;
  for (RangeCount rangeCount : counts) {
    currCount += rangeCount.count;
    if (currCount >= targetCount) {  // this should at least be true on the last range
      middle = rangeCount;
      break;
    }
    prev = rangeCount;
  }

  // check if using the range before the middle one would make a better split point
  int overError = currCount - targetCount;  // error if we include middle in first split
  int underError = targetCount - (currCount - middle.count); // error if we include middle in second split
  if (underError < overError) {
    middle = prev;
  }

  // The middle should never be the last, since that means that we won't actually do a split.
  // Minimising the error (above) should already ensure this never happens.
  assert middle != last;


  // Make sure to include the shard's current range in the new ranges so we don't create useless empty shards.
  DocRouter.Range lowerRange = new DocRouter.Range(currentRange.min, middle.range.max);
  DocRouter.Range upperRange = new DocRouter.Range(middle.range.max+1, currentRange.max);
  targetRanges.add(lowerRange);
  targetRanges.add(upperRange);

  return targetRanges;
}

Source File: SplitOp.java From lucene-solr with Apache License 2.0

4 votes

static Collection<RangeCount> getHashHistogram(SolrIndexSearcher searcher, String prefixField, DocRouter router, DocCollection collection) throws IOException {
  RTimer timer = new RTimer();
  TreeMap<DocRouter.Range,RangeCount> counts = new TreeMap<>();

  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), prefixField);
  if (terms == null) {
    return counts.values();
  }

  int numPrefixes = 0;
  int numTriLevel = 0;
  int numCollisions = 0;
  long sumBuckets = 0;

  TermsEnum termsEnum = terms.iterator();
  BytesRef term;
  while ((term = termsEnum.next()) != null) {
    numPrefixes++;

    String termStr = term.utf8ToString();
    int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR);
    // truncate to first separator since we don't support multiple levels currently
    // NOTE: this does not currently work for tri-level composite ids since the number of bits allocated to the first ID is 16 for a 2 part id
    // and 8 for a 3 part id!
    if (firstSep != termStr.length()-1 && firstSep > 0) {
      numTriLevel++;
      termStr = termStr.substring(0, firstSep+1);
    }

    DocRouter.Range range = router.getSearchRangeSingle(termStr, null, collection);
    int numDocs = termsEnum.docFreq();
    sumBuckets += numDocs;

    RangeCount rangeCount = new RangeCount(range, numDocs);

    RangeCount prev = counts.put(rangeCount.range, rangeCount);
    if (prev != null) {
      // we hit a hash collision or truncated a prefix to first level, so add the buckets together.
      rangeCount.count += prev.count;
      numCollisions++;
    }
  }

  if (log.isInfoEnabled()) {
    log.info("Split histogram: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numTriLevel={} numCollisions={}"
        , timer.getTime(), counts.size(), sumBuckets, numPrefixes, numTriLevel, numCollisions);
  }

  return counts.values();
}

Source File: SplitOp.java From lucene-solr with Apache License 2.0

4 votes

public RangeCount(DocRouter.Range range, int count) {
  this.range = range;
  this.count = count;
}

Source File: SplitHandlerTest.java From lucene-solr with Apache License 2.0

4 votes

public void doRandomSplitRecommendation(Random rand) throws Exception {
  int low = 0;
  int high = 0;

  while (high-low < 10) {
    low = randomBound(rand);
    high = randomBound(rand);
    if (low > high) {
      int tmp = low;
      low = high;
      high = tmp;
    }
  }

  DocRouter.Range curr = new DocRouter.Range(low,high);


  int maxRanges = rand.nextInt(20);

  int start = low;

  // bucket can start before or after
  if (rand.nextBoolean()) {
      start += rand.nextInt(200) - 100;
      if (start > low) {
        // underflow
        start = Integer.MIN_VALUE;
      }
  }

  List<SplitOp.RangeCount> counts = new ArrayList<>(maxRanges);
  for (;;) {
    int end = start + rand.nextInt(100) + 1;
    if (end < start) {
      // overflow
      end = Integer.MAX_VALUE;
    }
    counts.add( new SplitOp.RangeCount(new DocRouter.Range(start, end), rand.nextInt(1000)+1));
    if (counts.size() >= maxRanges) break;
    if (counts.size() == maxRanges / 2 && rand.nextBoolean()) {
      // transition toward the end of the range (more boundary cases for large ranges)
      start = high - rand.nextInt(100);
      start = Math.max(start, end+1);
    } else {
      start = end + 1;
    }
    if (rand.nextBoolean()) {
      start += rand.nextInt(100);
    }
    if (start < end) {
      // overflow
      break;
    }
  }

  try {
    Collection<DocRouter.Range> results = SplitOp.getSplits(counts, curr);
    verifyContiguous(results, curr);
  } catch (Throwable e) {
    // System.err.println(e);
  }
}

Source File: DistributedZkUpdateProcessor.java From lucene-solr with Apache License 2.0

4 votes

private void doDefensiveChecks(DistribPhase phase) {
  boolean isReplayOrPeersync = (updateCommand.getFlags() & (UpdateCommand.REPLAY | UpdateCommand.PEER_SYNC)) != 0;
  if (isReplayOrPeersync) return;

  String from = req.getParams().get(DISTRIB_FROM);

  DocCollection docCollection = clusterState.getCollection(collection);
  Slice mySlice = docCollection.getSlice(cloudDesc.getShardId());
  boolean localIsLeader = cloudDesc.isLeader();
  if (DistribPhase.FROMLEADER == phase && localIsLeader && from != null) { // from will be null on log replay
    String fromShard = req.getParams().get(DISTRIB_FROM_PARENT);
    if (fromShard != null) {
      if (mySlice.getState() == Slice.State.ACTIVE)  {
        throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
            "Request says it is coming from parent shard leader but we are in active state");
      }
      // shard splitting case -- check ranges to see if we are a sub-shard
      Slice fromSlice = docCollection.getSlice(fromShard);
      DocRouter.Range parentRange = fromSlice.getRange();
      if (parentRange == null) parentRange = new DocRouter.Range(Integer.MIN_VALUE, Integer.MAX_VALUE);
      if (mySlice.getRange() != null && !mySlice.getRange().isSubsetOf(parentRange)) {
        throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
            "Request says it is coming from parent shard leader but parent hash range is not superset of my range");
      }
    } else {
      String fromCollection = req.getParams().get(DISTRIB_FROM_COLLECTION); // is it because of a routing rule?
      if (fromCollection == null)  {
        log.error("Request says it is coming from leader, but we are the leader: {}", req.getParamString());
        SolrException solrExc = new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Request says it is coming from leader, but we are the leader");
        solrExc.setMetadata("cause", "LeaderChanged");
        throw solrExc;
      }
    }
  }

  int count = 0;
  while (((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) && count < 5) {
    count++;
    // re-getting localIsLeader since we published to ZK first before setting localIsLeader value
    localIsLeader = cloudDesc.isLeader();
    try {
      Thread.sleep(500);
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
    }
  }

  if ((isLeader && !localIsLeader) || (isSubShardLeader && !localIsLeader)) {
    log.error("ClusterState says we are the leader, but locally we don't think so");
    throw new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE,
        "ClusterState says we are the leader (" + zkController.getBaseUrl()
            + "/" + req.getCore().getName() + "), but locally we don't think so. Request came from " + from);
  }
}

Source File: ShardSplitTest.java From lucene-solr with Apache License 2.0

4 votes

public void splitByRouteFieldTest() throws Exception  {
  log.info("Starting testSplitWithRouteField");
  String collectionName = "routeFieldColl";
  int numShards = 4;
  int replicationFactor = 2;
  int maxShardsPerNode = (((numShards * replicationFactor) / getCommonCloudSolrClient()
      .getZkStateReader().getClusterState().getLiveNodes().size())) + 1;

  HashMap<String, List<Integer>> collectionInfos = new HashMap<>();
  String shard_fld = "shard_s";
  try (CloudSolrClient client = createCloudClient(null)) {
    Map<String, Object> props = Utils.makeMap(
        REPLICATION_FACTOR, replicationFactor,
        MAX_SHARDS_PER_NODE, maxShardsPerNode,
        OverseerCollectionMessageHandler.NUM_SLICES, numShards,
        "router.field", shard_fld);

    createCollection(collectionInfos, collectionName, props, client);
  }

  List<Integer> list = collectionInfos.get(collectionName);
  checkForCollection(collectionName, list, null);

  waitForRecoveriesToFinish(false);

  String url = getUrlFromZk(getCommonCloudSolrClient().getZkStateReader().getClusterState(), collectionName);

  try (HttpSolrClient collectionClient = getHttpSolrClient(url)) {

    ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
    final DocRouter router = clusterState.getCollection(collectionName).getRouter();
    Slice shard1 = clusterState.getCollection(collectionName).getSlice(SHARD1);
    DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
    final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
    final int[] docCounts = new int[ranges.size()];

    for (int i = 100; i <= 200; i++) {
      String shardKey = "" + (char) ('a' + (i % 26)); // See comment in ShardRoutingTest for hash distribution

      collectionClient.add(getDoc(id, i, "n_ti", i, shard_fld, shardKey));
      int idx = getHashRangeIdx(router, ranges, shardKey);
      if (idx != -1) {
        docCounts[idx]++;
      }
    }

    for (int i = 0; i < docCounts.length; i++) {
      int docCount = docCounts[i];
      log.info("Shard shard1_{} docCount = {}", i, docCount);
    }

    collectionClient.commit();

    trySplit(collectionName, null, SHARD1, 3);

    waitForRecoveriesToFinish(collectionName, false);

    assertEquals(docCounts[0], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_0")).getResults().getNumFound());
    assertEquals(docCounts[1], collectionClient.query(new SolrQuery("*:*").setParam("shards", "shard1_1")).getResults().getNumFound());
  }
}

Java Code Examples for org.apache.solr.common.cloud.DocRouter#Range