org.apache.lucene.search.TopFieldCollector Java Exaples

Source File: AlfrescoReRankQParserPlugin.java From SearchServices with GNU Lesser General Public License v3.0

6 votes

public ReRankCollector(int reRankDocs,
                       int length,
                       Query reRankQuery,
                       double reRankWeight,
                       QueryCommand cmd,
                       IndexSearcher searcher,
                       Map<BytesRef, Integer> boostedPriority,
                       boolean scale) throws IOException {
    super(null);
    this.reRankQuery = reRankQuery;
    this.reRankDocs = reRankDocs;
    this.length = length;
    this.boostedPriority = boostedPriority;
    this.scale = scale;
    Sort sort = cmd.getSort();
    if(sort == null) {
        this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), null);
    } else {
        sort = sort.rewrite(searcher);
        this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), null, false, true, true);
    }
    this.searcher = searcher;
    this.reRankWeight = reRankWeight;
}

Source File: LuceneOrderedDocCollector.java From crate with Apache License 2.0

6 votes

private KeyIterable<ShardId, Row> searchMore() throws IOException {
    if (exhausted()) {
        LOGGER.trace("searchMore but EXHAUSTED");
        return empty();
    }
    if (LOGGER.isDebugEnabled()) {
        LOGGER.debug("searchMore from [{}]", lastDoc);
    }
    ramAccounting.addBytes(batchSize * FIELD_DOC_SIZE);
    TopFieldCollector topFieldCollector = TopFieldCollector.create(
        sort,
        batchSize,
        lastDoc,
        0 // do not process any hits
    );
    return doSearch(topFieldCollector, minScore, query(lastDoc));
}

Source File: LuceneOrderedDocCollector.java From crate with Apache License 2.0

6 votes

private KeyIterable<ShardId, Row> initialSearch() throws IOException {
    if (batchSize > OPTIMIZE_BATCH_SIZE_THRESHOLD && !batchSizeReduced) {
        batchSizeReduced = true;
        // + 1 because TopFieldCollector doesn't work with size=0 and we need to set the `exhausted` flag properly.
        batchSize = Math.min(batchSize, searcher.count(query) + 1);
    }
    for (LuceneCollectorExpression<?> expression : expressions) {
        expression.startCollect(collectorContext);
        expression.setScorer(scorer);
    }
    ramAccounting.addBytes(batchSize * FIELD_DOC_SIZE);
    TopFieldCollector topFieldCollector = TopFieldCollector.create(
        sort,
        batchSize,
        0 // do not process any hits
    );
    return doSearch(topFieldCollector, minScore, query);
}

Source File: Grouping.java From lucene-solr with Apache License 2.0

6 votes

@Override
protected Collector createFirstPassCollector() throws IOException {
  DocSet groupFilt = searcher.getDocSet(query);
  int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
  Collector subCollector;
  if (withinGroupSort == null || withinGroupSort.equals(Sort.RELEVANCE)) {
    subCollector = topCollector = TopScoreDocCollector.create(groupDocsToCollect, Integer.MAX_VALUE);
  } else {
    topCollector = TopFieldCollector.create(searcher.weightSort(withinGroupSort), groupDocsToCollect, Integer.MAX_VALUE);
    if (needScores) {
      maxScoreCollector = new MaxScoreCollector();
      subCollector = MultiCollector.wrap(topCollector, maxScoreCollector);
    } else {
      subCollector = topCollector;
    }
  }
  collector = new FilterCollector(groupFilt, subCollector);
  return collector;
}

Source File: Grouping.java From lucene-solr with Apache License 2.0

6 votes

@Override
protected void finish() throws IOException {
  TopDocs topDocs = topCollector.topDocs();
  float maxScore;
  if (withinGroupSort == null || withinGroupSort.equals(Sort.RELEVANCE)) {
    maxScore = topDocs.scoreDocs.length == 0 ? Float.NaN : topDocs.scoreDocs[0].score;
  } else if (needScores) {
    // use top-level query to populate the scores
    TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, Grouping.this.query);
    maxScore = maxScoreCollector.getMaxScore();
  } else {
    maxScore = Float.NaN;
  }
  
  GroupDocs<String> groupDocs = new GroupDocs<>(Float.NaN, maxScore, topDocs.totalHits, topDocs.scoreDocs, query.toString(), null);
  if (main) {
    mainResult = getDocList(groupDocs);
  } else {
    NamedList rsp = commonResponse();
    addDocList(rsp, groupDocs);
  }
}

Source File: TopGroupsFieldCommand.java From lucene-solr with Apache License 2.0

6 votes

@Override
@SuppressWarnings({"unchecked", "rawtypes"})
public void postCollect(IndexSearcher searcher) throws IOException {
  if (firstPhaseGroups.isEmpty()) {
    topGroups = new TopGroups<>(groupSort.getSort(), withinGroupSort.getSort(), 0, 0, new GroupDocs[0], Float.NaN);
    return;
  }

  FieldType fieldType = field.getType();
  if (fieldType.getNumberType() != null) {
    topGroups = GroupConverter.fromMutable(field, secondPassCollector.getTopGroups(0));
  } else {
    topGroups = secondPassCollector.getTopGroups(0);
  }
  if (needScores) {
    for (GroupDocs<?> group : topGroups.groups) {
      TopFieldCollector.populateScores(group.scoreDocs, searcher, query);
    }
  }
}

Source File: QueryCommand.java From lucene-solr with Apache License 2.0

6 votes

@Override
public List<Collector> create() throws IOException {
  Collector subCollector;
  if (sort == null || sort.equals(Sort.RELEVANCE)) {
    subCollector = topDocsCollector = TopScoreDocCollector.create(docsToCollect, Integer.MAX_VALUE);
  } else {
    topDocsCollector = TopFieldCollector.create(sort, docsToCollect, Integer.MAX_VALUE);
    if (needScores) {
      maxScoreCollector = new MaxScoreCollector();
      subCollector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
    } else {
      subCollector = topDocsCollector;
    }
  }
  filterCollector = new FilterCollector(docSet, subCollector);
  return Arrays.asList((Collector) filterCollector);
}

Source File: ReRankCollector.java From lucene-solr with Apache License 2.0

6 votes

@SuppressWarnings({"unchecked"})
public ReRankCollector(int reRankDocs,
    int length,
    Rescorer reRankQueryRescorer,
    QueryCommand cmd,
    IndexSearcher searcher,
    Set<BytesRef> boostedPriority) throws IOException {
  super(null);
  this.reRankDocs = reRankDocs;
  this.length = length;
  this.boostedPriority = boostedPriority;
  this.query = cmd.getQuery();
  Sort sort = cmd.getSort();
  if(sort == null) {
    this.sort = null;
    this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), cmd.getMinExactCount());
  } else {
    this.sort = sort = sort.rewrite(searcher);
    //scores are needed for Rescorer (regardless of whether sort needs it)
    this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), cmd.getMinExactCount());
  }
  this.searcher = searcher;
  this.reRankQueryRescorer = reRankQueryRescorer;
}

Source File: LuceneOrderedDocCollector.java From crate with Apache License 2.0

5 votes

private KeyIterable<ShardId, Row> doSearch(TopFieldCollector topFieldCollector,
                                           Float minScore,
                                           Query query) throws IOException {
    Collector collector = topFieldCollector;
    if (minScore != null) {
        collector = new MinimumScoreCollector(collector, minScore);
    }
    collector = new KillableCollector(collector, this::raiseIfKilled);
    searcher.search(query, collector);
    ScoreDoc[] scoreDocs = topFieldCollector.topDocs().scoreDocs;
    if (doDocsScores) {
        TopFieldCollector.populateScores(scoreDocs, searcher, query);
    }
    return scoreDocToIterable(scoreDocs);
}

Source File: BasicStorageTest.java From lumongo with Apache License 2.0

5 votes

private static int runQuery(IndexReader indexReader, int count, Query q) throws IOException {
	long start = System.currentTimeMillis();
	IndexSearcher searcher = new IndexSearcher(indexReader);

	Sort sort = new Sort();

	sort.setSort(new SortedSetSortField("category", false));

	TopFieldCollector collector = TopFieldCollector.create(sort, count, null, true, true, true);

	searcher.search(q, collector);

	ScoreDoc[] hits = collector.topDocs().scoreDocs;
	int totalHits = collector.getTotalHits();
	@SuppressWarnings("unused") long searchTime = System.currentTimeMillis() - start;

	start = System.currentTimeMillis();

	List<String> ids = new ArrayList<>();
	for (ScoreDoc hit : hits) {
		int docId = hit.doc;
		Document d = searcher.doc(docId);
		ids.add(d.get("uid"));

	}
	@SuppressWarnings("unused") long fetchTime = System.currentTimeMillis() - start;

	return totalHits;
}

Source File: BlurFieldCollector.java From incubator-retired-blur with Apache License 2.0

5 votes

private TopFieldCollector getTopFieldCollector(Collector collector) {
  if (collector instanceof SlowCollector) {
    SlowCollector slowCollector = (SlowCollector) collector;
    return getTopFieldCollector(slowCollector.getCollector());
  } else if (collector instanceof StopExecutionCollector) {
    StopExecutionCollector stopExecutionCollector = (StopExecutionCollector) collector;
    return getTopFieldCollector(stopExecutionCollector.getCollector());
  } else if (collector instanceof TopFieldCollector) {
    TopFieldCollector topFieldCollector = (TopFieldCollector) collector;
    return topFieldCollector;
  } else {
    throw new RuntimeException("Collector type [" + collector + "] not supported.");
  }
}

Source File: BlurFieldCollector.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public Collector newCollector() throws IOException {
  TopFieldCollector collector = TopFieldCollector.create(_sort, _numHitsToCollect, _after, true, true, false, true);
  Collector col = new StopExecutionCollector(collector, _running);
  if (_runSlow) {
    return new SlowCollector(col);
  }
  return col;
}

Source File: TestNumericRangeQuery64.java From lucene-solr with Apache License 2.0

5 votes

/** test for constant score + boolean query + filter, the other tests only use the constant score mode */
private void testRange(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  int count=3000;
  long lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
  LegacyNumericRangeQuery<Long> q = LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
  for (byte i=0; i<2; i++) {
    TopFieldCollector collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
    String type;
    switch (i) {
      case 0:
        type = " (constant score filter rewrite)";
        q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
        break;
      case 1:
        type = " (constant score boolean rewrite)";
        q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
        break;
      default:
        return;
    }
    searcher.search(q, collector);
    TopDocs topDocs = collector.topDocs();
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertNotNull(sd);
    assertEquals("Score doc count"+type, count, sd.length );
    Document doc=searcher.doc(sd[0].doc);
    assertEquals("First doc"+type, 2*distance+startOffset, doc.getField(field).numericValue().longValue() );
    doc=searcher.doc(sd[sd.length-1].doc);
    assertEquals("Last doc"+type, (1+count)*distance+startOffset, doc.getField(field).numericValue().longValue() );
  }
}

Source File: TestNumericRangeQuery32.java From lucene-solr with Apache License 2.0

5 votes

private void testRightOpenRange(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  int count=3000;
  int lower=(count-1)*distance + (distance/3) +startOffset;
  LegacyNumericRangeQuery<Integer> q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, true);
  TopFieldCollector collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
  searcher.search(q, collector);
  TopDocs topDocs = collector.topDocs();
  ScoreDoc[] sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", noDocs-count, sd.length );
  Document doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", count*distance+startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (noDocs-1)*distance+startOffset, doc.getField(field).numericValue().intValue());

  q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, null, true, false);
  collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
  searcher.search(q, collector);
  topDocs = collector.topDocs();
  sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", noDocs-count, sd.length );
  doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", count*distance+startOffset, doc.getField(field).numericValue().intValue() );
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (noDocs-1)*distance+startOffset, doc.getField(field).numericValue().intValue() );
}

Source File: TestNumericRangeQuery32.java From lucene-solr with Apache License 2.0

5 votes

/** test for both constant score and boolean query, the other tests only use the constant score mode */
private void testRange(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  int count=3000;
  int lower=(distance*3/2)+startOffset, upper=lower + count*distance + (distance/3);
  LegacyNumericRangeQuery<Integer> q = LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
  for (byte i=0; i<2; i++) {
    TopFieldCollector collector = TopFieldCollector.create(Sort.INDEXORDER, noDocs, Integer.MAX_VALUE);
    String type;
    switch (i) {
      case 0:
        type = " (constant score filter rewrite)";
        q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
        break;
      case 1:
        type = " (constant score boolean rewrite)";
        q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
        break;
      default:
        return;
    }
    searcher.search(q, collector);
    TopDocs topDocs = collector.topDocs();
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertNotNull(sd);
    assertEquals("Score doc count"+type, count, sd.length );
    Document doc=searcher.doc(sd[0].doc);
    assertEquals("First doc"+type, 2*distance+startOffset, doc.getField(field).numericValue().intValue());
    doc=searcher.doc(sd[sd.length-1].doc);
    assertEquals("Last doc"+type, (1+count)*distance+startOffset, doc.getField(field).numericValue().intValue());
  }
}

Source File: QueryCommand.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void postCollect(IndexSearcher searcher) throws IOException {
  topDocs = topDocsCollector.topDocs();
  if (needScores) {
    // use mainQuery to populate the scores
    TopFieldCollector.populateScores(topDocs.scoreDocs, searcher, mainQuery);
  }
}

Source File: Grouping.java From lucene-solr with Apache License 2.0

5 votes

protected void populateScoresIfNecessary() throws IOException {
  if (needScores) {
    for (GroupDocs<?> groups : result.groups) {
      TopFieldCollector.populateScores(groups.scoreDocs, searcher, query);
    }
  }
}

Source File: TopGroupsCollector.java From lucene-solr with Apache License 2.0

5 votes

TopDocsReducer(Sort withinGroupSort,
               int maxDocsPerGroup, boolean getMaxScores) {
  this.needsScores = getMaxScores || withinGroupSort.needsScores();
  if (withinGroupSort == Sort.RELEVANCE) {
    supplier = () -> new TopDocsAndMaxScoreCollector(true, TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE), null);
  } else {
    supplier = () -> {
      TopFieldCollector topDocsCollector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
      MaxScoreCollector maxScoreCollector = getMaxScores ? new MaxScoreCollector() : null;
      return new TopDocsAndMaxScoreCollector(false, topDocsCollector, maxScoreCollector);
    };
  }
}

Source File: BlockGroupingCollector.java From lucene-solr with Apache License 2.0

4 votes

/** Returns the grouped results.  Returns null if the
 *  number of groups collected is &lt;= groupOffset.
 *
 *  <p><b>NOTE</b>: This collector is unable to compute
 *  the groupValue per group so it will always be null.
 *  This is normally not a problem, as you can obtain the
 *  value just like you obtain other values for each
 *  matching document (eg, via stored fields, via
 *  DocValues, etc.)
 *
 *  @param withinGroupSort The {@link Sort} used to sort
 *    documents within each group.
 *  @param groupOffset Which group to start from
 *  @param withinGroupOffset Which document to start from
 *    within each group
 *  @param maxDocsPerGroup How many top documents to keep
 *     within each group.
 */
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup) throws IOException {

  //if (queueFull) {
  //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
  //}
  if (subDocUpto != 0) {
    processGroup();
  }
  if (groupOffset >= groupQueue.size()) {
    return null;
  }
  int totalGroupedHitCount = 0;

  final ScoreAndDoc fakeScorer = new ScoreAndDoc();

  float maxScore = Float.MIN_VALUE;

  @SuppressWarnings({"unchecked","rawtypes"})
  final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
  for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
    final OneGroup og = groupQueue.pop();

    // At this point we hold all docs w/ in each group,
    // unsorted; we now sort them:
    final TopDocsCollector<?> collector;
    if (withinGroupSort.equals(Sort.RELEVANCE)) {
      // Sort by score
      if (!needsScores) {
        throw new IllegalArgumentException("cannot sort by relevance within group: needsScores=false");
      }
      collector = TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE);
    } else {
      // Sort by fields
      collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
    }

    float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
    LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
    leafCollector.setScorer(fakeScorer);
    for(int docIDX=0;docIDX<og.count;docIDX++) {
      final int doc = og.docs[docIDX];
      fakeScorer.doc = doc;
      if (needsScores) {
        fakeScorer.score = og.scores[docIDX];
        groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
      }
      leafCollector.collect(doc);
    }
    totalGroupedHitCount += og.count;

    final Object[] groupSortValues;

    groupSortValues = new Comparable<?>[comparators.length];
    for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
      groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.comparatorSlot);
    }

    final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);

    // TODO: we could aggregate scores across children
    // by Sum/Avg instead of passing NaN:
    groups[downTo] = new GroupDocs<>(Float.NaN,
                                           groupMaxScore,
                                           new TotalHits(og.count, TotalHits.Relation.EQUAL_TO),
                                           topDocs.scoreDocs,
                                           null,
                                           groupSortValues);
    maxScore = Math.max(maxScore, groupMaxScore);
  }

  /*
  while (groupQueue.size() != 0) {
    final OneGroup og = groupQueue.pop();
    //System.out.println("  leftover: og ord=" + og.groupOrd + " count=" + og.count);
    totalGroupedHitCount += og.count;
  }
  */

  return new TopGroups<>(new TopGroups<>(groupSort.getSort(),
                                     withinGroupSort.getSort(),
                                     totalHitCount, totalGroupedHitCount, groups, maxScore),
                       totalGroupCount);
}

Source File: SolrInformationServer.java From SearchServices with GNU Lesser General Public License v3.0

4 votes

@Override
public List<Transaction> getCascades(int num) throws IOException
{
    RefCounted<SolrIndexSearcher> refCounted = null;
    try
    {
        refCounted = this.core.getSearcher();
        SolrIndexSearcher searcher = refCounted.get();

        Collector collector;

        TopFieldCollector topFieldCollector = TopFieldCollector.create(new Sort(new SortField(FIELD_TXID, SortField.Type.LONG)),
                                                                        num,
                                                                        null,
                                                                        false,
                                                                        false,
                                                                        false);

        collector = topFieldCollector;

        LegacyNumericRangeQuery q = LegacyNumericRangeQuery.newIntRange(FIELD_CASCADE_FLAG, 1, 1, true, true);
        DelegatingCollector delegatingCollector = new TxnCacheFilter(cleanCascadeCache);

        delegatingCollector.setLastDelegate(collector);
        collector = delegatingCollector;

        searcher.search(q, collector);
        ScoreDoc[] scoreDocs = topFieldCollector.topDocs().scoreDocs;

        Set<String> fields = new HashSet<>();
        fields.add(FIELD_S_TXID);
        fields.add(FIELD_S_TXCOMMITTIME);

        List<Transaction> transactions = new ArrayList<>(scoreDocs.length);

        for(ScoreDoc scoreDoc : scoreDocs)
        {
            Transaction transaction = new Transaction();
            Document doc = searcher.doc(scoreDoc.doc, fields);

            IndexableField txID = doc.getField(FIELD_S_TXID);
            long txnID = txID.numericValue().longValue();
            cleanCascadeCache.put(txnID, null);
            transaction.setId(txnID);

            IndexableField txnCommitTime = doc.getField(FIELD_S_TXCOMMITTIME);
            transaction.setCommitTimeMs(txnCommitTime.numericValue().longValue());

            transactions.add(transaction);
        }

        return transactions;
    }
    finally
    {
        ofNullable(refCounted).ifPresent(RefCounted::decref);
    }
}

org.apache.lucene.search.TopFieldCollector Java Examples