org.apache.lucene.search.LeafCollector Java Exaples

Source File: EarlyTerminatingCollector.java From lucene-solr with Apache License 2.0

6 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
    throws IOException {
  prevReaderCumulativeSize += currentReaderSize; // not current any more
  currentReaderSize = context.reader().maxDoc() - 1;

  return new FilterLeafCollector(super.getLeafCollector(context)) {

    @Override
    public void collect(int doc) throws IOException {
      super.collect(doc);
      numCollected++;
      if (maxDocsToCollect <= numCollected) {
        throw new EarlyTerminatingCollectorException
          (numCollected, prevReaderCumulativeSize + (doc + 1));
      }
    }

  };
}

Source File: EarlyTerminatingSortingCollector.java From lucene-solr with Apache License 2.0

6 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  Sort segmentSort = context.reader().getMetaData().getSort();
  if (segmentSort != null && canEarlyTerminate(sort, segmentSort) == false) {
    throw new IllegalStateException("Cannot early terminate with sort order " + sort + " if segments are sorted with " + segmentSort);
  }

  if (segmentSort != null) {
    // segment is sorted, can early-terminate
    return new FilterLeafCollector(super.getLeafCollector(context)) {
      private int numCollected;

      @Override
      public void collect(int doc) throws IOException {
        super.collect(doc);
        if (++numCollected >= numDocsToCollect) {
          terminatedEarly.set(true);
          throw new CollectionTerminatedException();
        }
      }

    };
  } else {
    return super.getLeafCollector(context);
  }
}

Source File: RankQueryTestPlugin.java From lucene-solr with Apache License 2.0

6 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  final int base = context.docBase;
  final NumericDocValues values = DocValues.getNumeric(context.reader(), "sort_i");
  return new LeafCollector() {
    
    @Override
    public void setScorer(Scorable scorer) throws IOException {}
    
    public void collect(int doc) throws IOException {
      long value;
      if (values.advanceExact(doc)) {
        value = values.longValue();
      } else {
        value = 0;
      }
      list.add(new ScoreDoc(doc+base, (float) value));
    }
  };
}

Source File: ExportQParserPlugin.java From lucene-solr with Apache License 2.0

6 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  final FixedBitSet set = new FixedBitSet(context.reader().maxDoc());
  this.sets[context.ord] = set;
  return new LeafCollector() {
    
    @Override
    public void setScorer(Scorable scorer) throws IOException {}
    
    @Override
    public void collect(int docId) throws IOException{
      ++totalHits;
      set.set(docId);
    }
  };
}

Source File: SimpleFacets.java From lucene-solr with Apache License 2.0

6 votes

private Collector getInsanityWrapper(final String field, Collector collector) {
  SchemaField sf = searcher.getSchema().getFieldOrNull(field);
  if (sf != null && !sf.hasDocValues() && !sf.multiValued() && sf.getType().getNumberType() != null) {
    // it's a single-valued numeric field: we must currently create insanity :(
    // there isn't a GroupedFacetCollector that works on numerics right now...
    return new FilterCollector(collector) {
      @Override
      public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
        LeafReader insane = Insanity.wrapInsanity(context.reader(), field);
        return in.getLeafCollector(insane.getContext());
      }
    };
  } else {
    return collector;
  }
}

Source File: GenericTermsCollector.java From lucene-solr with Apache License 2.0

6 votes

static GenericTermsCollector wrap(final TermsCollector<?> collector) {
  return new GenericTermsCollector() {

    
    @Override
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
      return collector.getLeafCollector(context);
    }

    @Override
    public org.apache.lucene.search.ScoreMode scoreMode() {
      return collector.scoreMode();
    }

    @Override
    public BytesRefHash getCollectedTerms() {
      return collector.getCollectorTerms();
    }

    @Override
    public float[] getScoresPerTerm() {
      throw new UnsupportedOperationException("scores are not available for "+collector);
    }
  };
}

Source File: StatisHelper.java From HongsCORE with MIT License

6 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
    LeafReader reader = lrc.reader( );

    for (int i = 0; i < fields.length; i ++) {
        if (groups[i][0] >= 1) {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
        } else {
            values[i] = reader.      getNumericDocValues("#"+fields[i]);
        }
        } else {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedSetDocValues("%"+fields[i]);
        } else {
            values[i] = reader.   getSortedDocValues("#"+fields[i]);
        }
        }
    }

    return this;
}

Source File: DrillSidewaysScorer.java From lucene-solr with Apache License 2.0

6 votes

private void collectHit(LeafCollector collector, DocsAndCost[] dims) throws IOException {
  //if (DEBUG) {
  //  System.out.println("      hit");
  //}

  collector.collect(collectDocID);
  if (drillDownCollector != null) {
    drillDownLeafCollector.collect(collectDocID);
  }

  // TODO: we could "fix" faceting of the sideways counts
  // to do this "union" (of the drill down hits) in the
  // end instead:

  // Tally sideways counts:
  for (DocsAndCost dim : dims) {
    dim.sidewaysLeafCollector.collect(collectDocID);
  }
}

Source File: DocSetUtil.java From lucene-solr with Apache License 2.0

5 votes

public static void collectSortedDocSet(DocSet docs, IndexReader reader, Collector collector) throws IOException {
  // TODO add SortedDocSet sub-interface and take that.
  // TODO collectUnsortedDocSet: iterate segment, then all docSet per segment.

  final List<LeafReaderContext> leaves = reader.leaves();
  final Iterator<LeafReaderContext> ctxIt = leaves.iterator();
  int segBase = 0;
  int segMax;
  int adjustedMax = 0;
  LeafReaderContext ctx = null;
  LeafCollector leafCollector = null;
  for (DocIterator docsIt = docs.iterator(); docsIt.hasNext(); ) {
    final int doc = docsIt.nextDoc();
    if (doc >= adjustedMax) {
      do {
        ctx = ctxIt.next();
        segBase = ctx.docBase;
        segMax = ctx.reader().maxDoc();
        adjustedMax = segBase + segMax;
      } while (doc >= adjustedMax);
      leafCollector = collector.getLeafCollector(ctx);
    }
    if (doc < segBase) {
      throw new IllegalStateException("algorithm expects sorted DocSet but wasn't: " + docs.getClass());
    }
    leafCollector.collect(doc - segBase);  // per-seg collectors
  }
}

Source File: FilterCollector.java From lucene-solr with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
    throws IOException {
  final int docBase = context.docBase;
  return new FilterLeafCollector(super.getLeafCollector(context)) {
    @Override
    public void collect(int doc) throws IOException {
      matches++;
      if (filter.exists(doc + docBase)) {
        super.collect(doc);
      }
    }
  };
}

Source File: GlobalOrdinalsCollector.java From lucene-solr with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
  if (ordinalMap != null) {
    LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
    return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
  } else {
    return new SegmentOrdinalCollector(docTermOrds);
  }
}

Source File: GlobalOrdinalsWithScoreCollector.java From lucene-solr with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  SortedDocValues docTermOrds = DocValues.getSorted(context.reader(), field);
  if (ordinalMap != null) {
    LongValues segmentOrdToGlobalOrdLookup = ordinalMap.getGlobalOrds(context.ord);
    return new OrdinalMapCollector(docTermOrds, segmentOrdToGlobalOrdLookup);
  } else {
    return new SegmentOrdinalCollector(docTermOrds);
  }
}

Source File: StatisHelper.java From HongsCORE with MIT License

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext lrc) throws IOException {
    LeafReader reader = lrc.reader( );

    for (int i = 0; i < fields.length; i ++) {
        if (groups[i][1] == 1) {
            values[i] = reader.getSortedNumericDocValues("%"+fields[i]);
        } else {
            values[i] = reader.      getNumericDocValues("#"+fields[i]);
        }
    }

    return this;
}

Source File: CompletionScorer.java From lucene-solr with Apache License 2.0

5 votes

@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
  if (!(collector instanceof TopSuggestDocsCollector)) {
    throw new IllegalArgumentException("collector is not of type TopSuggestDocsCollector");
  }
  suggester.lookup(this, acceptDocs, ((TopSuggestDocsCollector) collector));
  return max;
}

Source File: LuceneDocIdCollector.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) {
  return new LeafCollector() {

    @Override
    public void setScorer(Scorable scorer) throws IOException {
      // we don't use scoring, so this is NO-OP
    }

    @Override
    public void collect(int doc) throws IOException {
      _docIds.add(_docIdTranslator.getPinotDocId(doc));
    }
  };
}

Source File: RealtimeLuceneDocIdCollector.java From incubator-pinot with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) {
  return new LeafCollector() {

    @Override
    public void setScorer(Scorable scorer) throws IOException {
      // we don't use scoring, so this is NO-OP
    }

    @Override
    public void collect(int doc) throws IOException {
      _docIds.add(doc);
    }
  };
}

Source File: SolrOwnerQuery.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
        throws IOException {
    set = new FixedBitSet(context.reader().maxDoc());
    sets.add(set);
    return this;
}

Source File: SolrAuthoritySetQuery.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
		throws IOException {
	   set = new FixedBitSet(context.reader().maxDoc());
           sets.add(set);
	return this;
}

Source File: FilteredCollector.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
    final Scorer filterScorer = filter.scorer(context);
    final LeafCollector in = collector.getLeafCollector(context);
    final Bits bits = Lucene.asSequentialAccessBits(context.reader().maxDoc(), filterScorer);

    return new FilterLeafCollector(in) {
        @Override
        public void collect(int doc) throws IOException {
            if (bits.get(doc)) {
                in.collect(doc);
            }
        }
    };
}

Source File: ParentQuery.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
    final SortedDocValues values = globalIfd.load(context).getOrdinalsValues(parentType);
    if (values == null) {
        throw new CollectionTerminatedException();
    }
    return new LeafCollector() {
        Scorer scorer;
        @Override
        public void setScorer(Scorer scorer) throws IOException {
            this.scorer = scorer;
        }
        @Override
        public void collect(int doc) throws IOException {
            long globalOrdinal = values.getOrd(doc);
            if (globalOrdinal != SortedSetDocValues.NO_MORE_ORDS) {
                long parentIdx = parentIdxs.add(globalOrdinal);
                if (parentIdx >= 0) {
                    scores = bigArrays.grow(scores, parentIdx + 1);
                    scores.set(parentIdx, scorer.score());
                } else {
                    assert false : "parent id should only match once, since there can only be one parent doc";
                }
            }
        }
    };
}

Source File: LuceneOrderedDocCollector.java From crate with Apache License 2.0

4 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
    raiseIfKilled.run();
    return new KillableLeafCollector(delegate.getLeafCollector(context), raiseIfKilled);
}

Source File: LuceneOrderedDocCollector.java From crate with Apache License 2.0

4 votes

public KillableLeafCollector(LeafCollector delegate, Runnable raiseIfKilled) {
    this.delegate = delegate;
    this.raiseIfKilled = raiseIfKilled;
}

Source File: ReservoirSampler.java From crate with Apache License 2.0

4 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) {
    return new ReservoirLeafCollector(reservoir, readerIdx, context);
}

Source File: ReRankCollector.java From lucene-solr with Apache License 2.0

4 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  return mainCollector.getLeafCollector(context);
}

Source File: DrillSidewaysScorer.java From lucene-solr with Apache License 2.0

4 votes

private void collectNearMiss(LeafCollector sidewaysCollector) throws IOException {
  //if (DEBUG) {
  //  System.out.println("      missingDim=" + dim);
  //}
  sidewaysCollector.collect(collectDocID);
}

Source File: DrillSidewaysScorer.java From lucene-solr with Apache License 2.0

4 votes

/** Used when base query is highly constraining vs the
 *  drilldowns, or when the docs must be scored at once
 *  (i.e., like BooleanScorer2, not BooleanScorer).  In
 *  this case we just .next() on base and .advance() on
 *  the dim filters. */ 
private void doQueryFirstScoring(Bits acceptDocs, LeafCollector collector, DocsAndCost[] dims) throws IOException {
  //if (DEBUG) {
  //  System.out.println("  doQueryFirstScoring");
  //}
  int docID = baseScorer.docID();

  nextDoc: while (docID != PostingsEnum.NO_MORE_DOCS) {
    if (acceptDocs != null && acceptDocs.get(docID) == false) {
      docID = baseIterator.nextDoc();
      continue;
    }
    LeafCollector failedCollector = null;
    for (DocsAndCost dim : dims) {
      // TODO: should we sort this 2nd dimension of
      // docsEnums from most frequent to least?
      if (dim.approximation.docID() < docID) {
        dim.approximation.advance(docID);
      }

      boolean matches = false;
      if (dim.approximation.docID() == docID) {
        if (dim.twoPhase == null) {
          matches = true;
        } else {
          matches = dim.twoPhase.matches();
        }
      }

      if (matches == false) {
        if (failedCollector != null) {
          // More than one dim fails on this document, so
          // it's neither a hit nor a near-miss; move to
          // next doc:
          docID = baseIterator.nextDoc();
          continue nextDoc;
        } else {
          failedCollector = dim.sidewaysLeafCollector;
        }
      }
    }

    collectDocID = docID;

    // TODO: we could score on demand instead since we are
    // daat here:
    collectScore = baseScorer.score();

    if (failedCollector == null) {
      // Hit passed all filters, so it's "real":
      collectHit(collector, dims);
    } else {
      // Hit missed exactly one filter:
      collectNearMiss(failedCollector);
    }

    docID = baseIterator.nextDoc();
  }
}

Source File: BlockGroupingCollector.java From lucene-solr with Apache License 2.0

4 votes

/** Returns the grouped results.  Returns null if the
 *  number of groups collected is &lt;= groupOffset.
 *
 *  <p><b>NOTE</b>: This collector is unable to compute
 *  the groupValue per group so it will always be null.
 *  This is normally not a problem, as you can obtain the
 *  value just like you obtain other values for each
 *  matching document (eg, via stored fields, via
 *  DocValues, etc.)
 *
 *  @param withinGroupSort The {@link Sort} used to sort
 *    documents within each group.
 *  @param groupOffset Which group to start from
 *  @param withinGroupOffset Which document to start from
 *    within each group
 *  @param maxDocsPerGroup How many top documents to keep
 *     within each group.
 */
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup) throws IOException {

  //if (queueFull) {
  //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
  //}
  if (subDocUpto != 0) {
    processGroup();
  }
  if (groupOffset >= groupQueue.size()) {
    return null;
  }
  int totalGroupedHitCount = 0;

  final ScoreAndDoc fakeScorer = new ScoreAndDoc();

  float maxScore = Float.MIN_VALUE;

  @SuppressWarnings({"unchecked","rawtypes"})
  final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
  for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
    final OneGroup og = groupQueue.pop();

    // At this point we hold all docs w/ in each group,
    // unsorted; we now sort them:
    final TopDocsCollector<?> collector;
    if (withinGroupSort.equals(Sort.RELEVANCE)) {
      // Sort by score
      if (!needsScores) {
        throw new IllegalArgumentException("cannot sort by relevance within group: needsScores=false");
      }
      collector = TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE);
    } else {
      // Sort by fields
      collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
    }

    float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
    LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
    leafCollector.setScorer(fakeScorer);
    for(int docIDX=0;docIDX<og.count;docIDX++) {
      final int doc = og.docs[docIDX];
      fakeScorer.doc = doc;
      if (needsScores) {
        fakeScorer.score = og.scores[docIDX];
        groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
      }
      leafCollector.collect(doc);
    }
    totalGroupedHitCount += og.count;

    final Object[] groupSortValues;

    groupSortValues = new Comparable<?>[comparators.length];
    for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
      groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.comparatorSlot);
    }

    final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);

    // TODO: we could aggregate scores across children
    // by Sum/Avg instead of passing NaN:
    groups[downTo] = new GroupDocs<>(Float.NaN,
                                           groupMaxScore,
                                           new TotalHits(og.count, TotalHits.Relation.EQUAL_TO),
                                           topDocs.scoreDocs,
                                           null,
                                           groupSortValues);
    maxScore = Math.max(maxScore, groupMaxScore);
  }

  /*
  while (groupQueue.size() != 0) {
    final OneGroup og = groupQueue.pop();
    //System.out.println("  leftover: og ord=" + og.groupOrd + " count=" + og.count);
    totalGroupedHitCount += og.count;
  }
  */

  return new TopGroups<>(new TopGroups<>(groupSort.getSort(),
                                     withinGroupSort.getSort(),
                                     totalHitCount, totalGroupedHitCount, groups, maxScore),
                       totalGroupCount);
}

Source File: SolrInformationServer.java From SearchServices with GNU Lesser General Public License v3.0

4 votes

public LeafCollector getLeafCollector(LeafReaderContext context)
{
    this.docBase = context.docBase;
    return this;
}

Source File: AlfrescoReRankQParserPlugin.java From SearchServices with GNU Lesser General Public License v3.0

4 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context)
		throws IOException {
	return mainCollector.getLeafCollector(context);
}

Source File: InternalProfileCollector.java From Elasticsearch with Apache License 2.0

4 votes

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
    return collector.getLeafCollector(context);
}

org.apache.lucene.search.LeafCollector Java Examples