org.apache.lucene.search.DocIdSetIterator Java Examples
The following examples show how to use
org.apache.lucene.search.DocIdSetIterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestRTGBase.java From lucene-solr with Apache License 2.0 | 6 votes |
protected int getFirstMatch(IndexReader r, Term t) throws IOException { Terms terms = MultiTerms.getTerms(r, t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes(); final TermsEnum termsEnum = terms.iterator(); if (!termsEnum.seekExact(termBytes)) { return -1; } PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE); docs = BitsFilteredPostingsEnum.wrap(docs, MultiBits.getLiveDocs(r)); int id = docs.nextDoc(); if (id != DocIdSetIterator.NO_MORE_DOCS) { int next = docs.nextDoc(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, next); } return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; }
Example #2
Source File: TestDocsAndPositions.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testDocsEnumStart() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); doc.add(newStringField("foo", "bar", Field.Store.NO)); writer.addDocument(doc); DirectoryReader reader = writer.getReader(); LeafReader r = getOnlyLeafReader(reader); PostingsEnum disi = TestUtil.docs(random(), r, "foo", new BytesRef("bar"), null, PostingsEnum.NONE); int docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = r.terms("foo").iterator(); assertTrue(te.seekExact(new BytesRef("bar"))); disi = TestUtil.docs(random(), te, disi, PostingsEnum.NONE); docid = disi.docID(); assertEquals(-1, docid); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); writer.close(); r.close(); dir.close(); }
Example #3
Source File: LuceneBatchIterator.java From crate with Apache License 2.0 | 6 votes |
private boolean innerMoveNext() throws IOException { while (tryAdvanceDocIdSetIterator()) { LeafReader reader = currentLeaf.reader(); Bits liveDocs = reader.getLiveDocs(); int doc; while ((doc = currentDocIdSetIt.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (docDeleted(liveDocs, doc) || belowMinScore(currentScorer)) { continue; } onDoc(doc); return true; } currentDocIdSetIt = null; } clearState(); return false; }
Example #4
Source File: SerializedDVStrategy.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc()); TwoPhaseIterator it = predicateValueSource.iterator(context, approximation); return new ConstantScoreScorer(this, score(), scoreMode, it); } @Override public boolean isCacheable(LeafReaderContext ctx) { return predicateValueSource.isCacheable(ctx); } }; }
Example #5
Source File: BlockJoin.java From lucene-solr with Apache License 2.0 | 6 votes |
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */ public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException { FixedBitSet parentBits = parentList.getBits(); DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()); DocIterator iter = childInput.iterator(); int currentParent = -1; while (iter.hasNext()) { int childDoc = iter.nextDoc(); // TODO: skipping if (childDoc <= currentParent) { // use <= since we also allow parents in the input // we already visited this parent continue; } currentParent = parentBits.nextSetBit(childDoc); if (currentParent != DocIdSetIterator.NO_MORE_DOCS) { // only collect the parent the first time we skip to it collector.collect( currentParent ); } } return collector.getDocSet(); }
Example #6
Source File: PerThreadIDVersionAndSeqNoLookup.java From crate with Apache License 2.0 | 6 votes |
/** * returns the internal lucene doc id for the given id bytes. * {@link DocIdSetIterator#NO_MORE_DOCS} is returned if not found * */ private int getDocID(BytesRef id, Bits liveDocs) throws IOException { // termsEnum can possibly be null here if this leaf contains only no-ops. if (termsEnum != null && termsEnum.seekExact(id)) { int docID = DocIdSetIterator.NO_MORE_DOCS; // there may be more than one matching docID, in the case of nested docs, so we want the last one: docsEnum = termsEnum.postings(docsEnum, 0); for (int d = docsEnum.nextDoc(); d != DocIdSetIterator.NO_MORE_DOCS; d = docsEnum.nextDoc()) { if (liveDocs != null && liveDocs.get(d) == false) { continue; } docID = d; } return docID; } else { return DocIdSetIterator.NO_MORE_DOCS; } }
Example #7
Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0 | 6 votes |
/** accumulates per-segment single-valued facet counts, mapping to global ordinal space on-the-fly */ static void accumSingleGeneric(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { final LongValues ordmap = map == null ? null : map.getGlobalOrds(subIndex); int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int term; if (si.advanceExact(doc)) { term = si.ordValue(); } else { term = -1; } if (map != null && term >= 0) { term = (int) ordmap.get(term); } int arrIdx = term-startTermIndex; if (arrIdx>=0 && arrIdx<counts.length) counts[arrIdx]++; } }
Example #8
Source File: TestDocCount.java From lucene-solr with Apache License 2.0 | 6 votes |
private void verifyCount(IndexReader ir) throws Exception { final Collection<String> fields = FieldInfos.getIndexedFields(ir); for (String field : fields) { Terms terms = MultiTerms.getTerms(ir, field); if (terms == null) { continue; } int docCount = terms.getDocCount(); FixedBitSet visited = new FixedBitSet(ir.maxDoc()); TermsEnum te = terms.iterator(); while (te.next() != null) { PostingsEnum de = TestUtil.docs(random(), te, null, PostingsEnum.NONE); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visited.set(de.docID()); } } assertEquals(visited.cardinality(), docCount); } }
Example #9
Source File: FilterCache.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader, String segmentName, Directory directory) throws IOException { if (docIdSet == null) { // this is better than returning null, as the nonnull result can be cached return DocIdSet.EMPTY_DOCIDSET; } else if (docIdSet.isCacheable()) { return docIdSet; } else { final DocIdSetIterator it = docIdSet.iterator(); // null is allowed to be returned by iterator(), // in this case we wrap with the empty set, // which is cacheable. if (it == null) { return DocIdSet.EMPTY_DOCIDSET; } else { final IndexFileBitSet bits = new IndexFileBitSet(reader.maxDoc(), _id, segmentName, directory); if (!bits.exists()) { bits.create(it); } bits.load(); return bits; } } }
Example #10
Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0 | 6 votes |
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (si.advanceExact(doc)) { segCounts[1+si.ordValue()]++; } else { segCounts[0]++; } } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
Example #11
Source File: ExportWriter.java From lucene-solr with Apache License 2.0 | 6 votes |
protected void identifyLowestSortingUnexportedDocs(List<LeafReaderContext> leaves, SortDoc sortDoc, SortQueue queue) throws IOException { queue.reset(); SortDoc top = queue.top(); for (int i = 0; i < leaves.size(); i++) { sortDoc.setNextReader(leaves.get(i)); DocIdSetIterator it = new BitSetIterator(sets[i], 0); // cost is not useful here int docId; while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { sortDoc.setValues(docId); if (top.lessThan(sortDoc)) { top.setValues(sortDoc); top = queue.updateTop(); } } } }
Example #12
Source File: ValueSourceRangeFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override @SuppressWarnings({"rawtypes"}) public DocIdSet getDocIdSet(final Map context, final LeafReaderContext readerContext, Bits acceptDocs) throws IOException { // NB the IndexSearcher parameter here can be null because Filter Weights don't // actually use it. Weight weight = createWeight(null, ScoreMode.COMPLETE, 1); return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { @SuppressWarnings({"unchecked"}) Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(weight, readerContext, lowerVal, upperVal, includeLower, includeUpper); return scorer == null ? null : scorer.iterator(); } @Override public Bits bits() { return null; // don't use random access } @Override public long ramBytesUsed() { return 0L; } }, acceptDocs); }
Example #13
Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0 | 6 votes |
private void assertAdvanceEquality(IndexedDISI disi, BitSetIterator disi2, int step) throws IOException { int index = -1; while (true) { int target = disi2.docID() + step; int doc; do { doc = disi2.nextDoc(); index++; } while (doc < target); assertEquals(doc, disi.advance(target)); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } assertEquals("Expected equality using step " + step + " at docID " + doc, index, disi.index()); } }
Example #14
Source File: CrossCollectionJoinQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Scorer scorer(LeafReaderContext context) throws IOException { if (filter == null) { filter = getDocSet().getTopFilter(); } DocIdSet readerSet = filter.getDocIdSet(context, null); if (readerSet == null) { return null; } DocIdSetIterator readerSetIterator = readerSet.iterator(); if (readerSetIterator == null) { return null; } return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator); }
Example #15
Source File: TestSegmentTermDocs.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testTermDocs() throws IOException { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(info, Version.LATEST.major, newIOContext(random())); assertTrue(reader != null); TermsEnum terms = reader.terms(DocHelper.TEXT_FIELD_2_KEY).iterator(); terms.seekCeil(new BytesRef("field")); PostingsEnum termDocs = TestUtil.docs(random(), terms, null, PostingsEnum.FREQS); if (termDocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int docId = termDocs.docID(); assertTrue(docId == 0); int freq = termDocs.freq(); assertTrue(freq == 3); } reader.close(); }
Example #16
Source File: SparseFixedBitSet.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void or(DocIdSetIterator it) throws IOException { { // specialize union with another SparseFixedBitSet final SparseFixedBitSet other = BitSetIterator.getSparseFixedBitSetOrNull(it); if (other != null) { checkUnpositioned(it); or(other); return; } } // We do not specialize the union with a FixedBitSet since FixedBitSets are // supposed to be used for dense data and sparse fixed bit sets for sparse // data, so a sparse set would likely get upgraded by DocIdSetBuilder before // being or'ed with a FixedBitSet if (it.cost() < indices.length) { // the default impl is good for sparse iterators super.or(it); } else { orDense(it); } }
Example #17
Source File: LukeRequestHandler.java From lucene-solr with Apache License 2.0 | 6 votes |
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException { PostingsEnum postingsEnum = null; TermsEnum termsEnum = terms.iterator(); BytesRef text; // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way? for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) { text = termsEnum.next(); if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them. return null; } postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (liveDocs != null && liveDocs.get(postingsEnum.docID())) { continue; } return reader.document(postingsEnum.docID()); } } return null; }
Example #18
Source File: DocValuesFieldUpdates.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public final int nextDoc() { if (idx >= size) { return doc = DocIdSetIterator.NO_MORE_DOCS; } long longDoc = docs.get(idx); ++idx; for (; idx < size; idx++) { // scan forward to last update to this doc final long nextLongDoc = docs.get(idx); if ((longDoc >>> 1) != (nextLongDoc >>> 1)) { break; } longDoc = nextLongDoc; } hasValue = (longDoc & HAS_VALUE_MASK) > 0; if (hasValue) { set(idx - 1); } doc = (int)(longDoc >> SHIFT); return doc; }
Example #19
Source File: TaxonomyFacetSumValueSource.java From lucene-solr with Apache License 2.0 | 6 votes |
private void sumValues(List<MatchingDocs> matchingDocs, boolean keepScores, DoubleValuesSource valueSource) throws IOException { IntsRef scratch = new IntsRef(); for(MatchingDocs hits : matchingDocs) { OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context); DoubleValues scores = keepScores ? scores(hits) : null; DoubleValues functionValues = valueSource.getValues(hits.context, scores); DocIdSetIterator docs = hits.bits.iterator(); int doc; while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { ords.get(doc, scratch); if (functionValues.advanceExact(doc)) { float value = (float) functionValues.doubleValue(); for (int i = 0; i < scratch.length; i++) { values[scratch.ints[i]] += value; } } } } rollup(); }
Example #20
Source File: TestBlockPostingsFormat3.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * checks advancing docs */ public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception { if (leftDocs == null) { assertNull(rightDocs); return; } int docid = -1; int averageGap = MAXDOC / (1+docFreq); int skipInterval = 16; while (true) { if (random().nextBoolean()) { // nextDoc() docid = leftDocs.nextDoc(); assertEquals(docid, rightDocs.nextDoc()); } else { // advance() int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap)); docid = leftDocs.advance(skip); assertEquals(docid, rightDocs.advance(skip)); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } // we don't assert freqs, they are allowed to be different } }
Example #21
Source File: LongValueFacetCounts.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Counts directly from SortedNumericDocValues. */ private void countMultiValued(String field, List<MatchingDocs> matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { SortedNumericDocValues values = hits.context.reader().getSortedNumericDocValues(field); if (values == null) { // this field has no doc values for this segment continue; } NumericDocValues singleValues = DocValues.unwrapSingleton(values); if (singleValues != null) { countOneSegment(singleValues, hits); } else { DocIdSetIterator it = ConjunctionDISI.intersectIterators( Arrays.asList(hits.bits.iterator(), values)); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { int limit = values.docValueCount(); totCount += limit; for (int i = 0; i < limit; i++) { increment(values.nextValue()); } } } } }
Example #22
Source File: IndexFileBitSet.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
public void create(DocIdSetIterator it) throws IOException { String fileName = getFileName(); if (_directory.fileExists(getFileName())) { LOG.warn("Filter [{0}] in directory [{1}] being recreated due to incorrect size.", fileName, _directory); _directory.deleteFile(fileName); } IndexOutput output = _directory.createOutput(fileName, IOContext.READ); int index; int currentWordNum = 0; long wordValue = 0; while ((index = it.nextDoc()) < _numBits) { int wordNum = index >> 6; // div 64 if (currentWordNum > wordNum) { throw new IOException("We got a problem here!"); } while (currentWordNum < wordNum) { output.writeLong(wordValue); currentWordNum++; wordValue = 0; } int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; wordValue |= bitmask; } if (_numBits > 0) { int totalWords = (_numBits / 64) + 1; while (currentWordNum < totalWords) { output.writeLong(wordValue); currentWordNum++; wordValue = 0; } } output.close(); }
Example #23
Source File: ArrayLengthQuery.java From crate with Apache License 2.0 | 5 votes |
NumTermsPerDocTwoPhaseIterator(LeafReader reader, IntUnaryOperator numTermsOfDoc, IntPredicate matches) { super(DocIdSetIterator.all(reader.maxDoc())); this.numTermsOfDoc = numTermsOfDoc; this.matches = matches; }
Example #24
Source File: FrozenBufferedUpdates.java From lucene-solr with Apache License 2.0 | 5 votes |
DocIdSetIterator nextTerm(String field, BytesRef term) throws IOException { setField(field); if (termsEnum != null) { if (sortedTerms) { assert assertSorted(term); // in the sorted case we can take advantage of the "seeking forward" property // this allows us depending on the term dict impl to reuse data-structures internally // which speed up iteration over terms and docs significantly. int cmp = term.compareTo(readerTerm); if (cmp < 0) { return null; // requested term does not exist in this segment } else if (cmp == 0) { return getDocs(); } else { TermsEnum.SeekStatus status = termsEnum.seekCeil(term); switch (status) { case FOUND: return getDocs(); case NOT_FOUND: readerTerm = termsEnum.term(); return null; case END: // no more terms in this segment termsEnum = null; return null; default: throw new AssertionError("unknown status"); } } } else if (termsEnum.seekExact(term)) { return getDocs(); } } return null; }
Example #25
Source File: TestBackwardsCompatibility.java From lucene-solr with Apache License 2.0 | 5 votes |
private int countDocs(PostingsEnum docs) throws IOException { int count = 0; while((docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { count ++; } return count; }
Example #26
Source File: SolrConstantScoreQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Scorer scorer(LeafReaderContext context) throws IOException { DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter)filter).getDocIdSet(this.context, context, null) : filter.getDocIdSet(context, null); if (docIdSet == null) { return null; } DocIdSetIterator iterator = docIdSet.iterator(); if (iterator == null) { return null; } return new ConstantScoreScorer(this, score(), scoreMode, iterator); }
Example #27
Source File: ValueFeature.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { if(featureValue!=null) { return new ValueFeatureScorer(this, featureValue, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } else { return null; } }
Example #28
Source File: TestMultiDocValues.java From lucene-solr with Apache License 2.0 | 5 votes |
private void testRandomAdvance(DocIdSetIterator iter1, DocIdSetIterator iter2) throws IOException { assertEquals(-1, iter1.docID()); assertEquals(-1, iter2.docID()); while (iter1.docID() != NO_MORE_DOCS) { if (random().nextBoolean()) { assertEquals(iter1.nextDoc(), iter2.nextDoc()); } else { int target = iter1.docID() + TestUtil.nextInt(random(), 1, 100); assertEquals(iter1.advance(target), iter2.advance(target)); } } }
Example #29
Source File: TestExitableDirectoryReader.java From lucene-solr with Apache License 2.0 | 5 votes |
static private void scan(LeafReader leaf, DocValuesIterator iter ) throws IOException { for (iter.nextDoc(); iter.docID()!=DocIdSetIterator.NO_MORE_DOCS && iter.docID()<leaf.maxDoc();) { final int nextDocId = iter.docID()+1; if (random().nextBoolean() && nextDocId<leaf.maxDoc()) { if(random().nextBoolean()) { iter.advance(nextDocId); } else { iter.advanceExact(nextDocId); } } else { iter.nextDoc(); } } }
Example #30
Source File: AnalyticsDriver.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Drive the collection of reduction data. This includes overall data as well as faceted data. * * @param manager of the request to drive * @param searcher the results of the query * @param filter that represents the overall query * @param queryRequest used for the search request * @throws IOException if an error occurs while reading from Solr */ public static void drive(AnalyticsRequestManager manager, SolrIndexSearcher searcher, Filter filter, SolrQueryRequest queryRequest) throws IOException { StreamingInfo streamingInfo = manager.getStreamingFacetInfo(); Iterable<StreamingFacet> streamingFacets = streamingInfo.streamingFacets; ReductionCollectionManager collectionManager = streamingInfo.streamingCollectionManager; Iterable<FacetValueQueryExecuter> facetExecuters = manager.getFacetExecuters(filter, queryRequest); // Streaming phase (Overall results & Value/Pivot Facets) // Loop through all documents and collect reduction data for streaming facets and overall results if (collectionManager.needsCollection()) { List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves(); for (int leafNum = 0; leafNum < contexts.size(); leafNum++) { LeafReaderContext context = contexts.get(leafNum); DocIdSet dis = filter.getDocIdSet(context, null); // solr docsets already exclude any deleted docs if (dis == null) { continue; } DocIdSetIterator disi = dis.iterator(); if (disi != null) { collectionManager.doSetNextReader(context); int doc = disi.nextDoc(); while( doc != DocIdSetIterator.NO_MORE_DOCS){ // Add a document to the statistics being generated collectionManager.collect(doc); streamingFacets.forEach( facet -> facet.addFacetValueCollectionTargets() ); collectionManager.apply(); doc = disi.nextDoc(); } } } } // Executing phase (Query/Range Facets) // Send additional Solr Queries to compute facet values for (FacetValueQueryExecuter executer : facetExecuters) { executer.execute(searcher); } }