org.apache.lucene.util.BitDocIdSet Java Examples
The following examples show how to use
org.apache.lucene.util.BitDocIdSet.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestConjunctionDISI.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testConjunction() throws IOException { final int iters = atLeast(100); for (int iter = 0; iter < iters; ++iter) { final int maxDoc = TestUtil.nextInt(random(), 100, 10000); final int numIterators = TestUtil.nextInt(random(), 2, 5); final FixedBitSet[] sets = new FixedBitSet[numIterators]; final Scorer[] iterators = new Scorer[numIterators]; for (int i = 0; i < iterators.length; ++i) { final FixedBitSet set = randomSet(maxDoc); switch (random().nextInt(3)) { case 0: // simple iterator sets[i] = set; iterators[i] = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, anonymizeIterator(new BitDocIdSet(set).iterator())); break; case 1: // bitSet iterator sets[i] = set; iterators[i] = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, new BitDocIdSet(set).iterator()); break; default: // scorer with approximation final FixedBitSet confirmed = clearRandomBits(set); sets[i] = confirmed; final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed); iterators[i] = scorer(approximation); break; } } final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators)); assertEquals(intersect(sets), toBitSet(maxDoc, conjunction)); } }
Example #2
Source File: TestConjunctionDISI.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testConjunctionApproximation() throws IOException { final int iters = atLeast(100); for (int iter = 0; iter < iters; ++iter) { final int maxDoc = TestUtil.nextInt(random(), 100, 10000); final int numIterators = TestUtil.nextInt(random(), 2, 5); final FixedBitSet[] sets = new FixedBitSet[numIterators]; final Scorer[] iterators = new Scorer[numIterators]; boolean hasApproximation = false; for (int i = 0; i < iterators.length; ++i) { final FixedBitSet set = randomSet(maxDoc); if (random().nextBoolean()) { // simple iterator sets[i] = set; iterators[i] = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.COMPLETE_NO_SCORES, new BitDocIdSet(set).iterator()); } else { // scorer with approximation final FixedBitSet confirmed = clearRandomBits(set); sets[i] = confirmed; final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed); iterators[i] = scorer(approximation); hasApproximation = true; } } final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators)); TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction); assertEquals(hasApproximation, twoPhaseIterator != null); if (hasApproximation) { assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator))); } } }
Example #3
Source File: TestSort.java From lucene-solr with Apache License 2.0 | 5 votes |
public DocIdSet randSet(int sz) { FixedBitSet obs = new FixedBitSet(sz); int n = r.nextInt(sz); for (int i=0; i<n; i++) { obs.set(r.nextInt(sz)); } return new BitDocIdSet(obs); }
Example #4
Source File: FilterableTermsEnum.java From Elasticsearch with Apache License 2.0 | 4 votes |
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException { if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) { throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag); } this.docsEnumFlag = docsEnumFlag; if (filter == null) { // Important - need to use the doc count that includes deleted docs // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951 numDocs = reader.maxDoc(); } List<LeafReaderContext> leaves = reader.leaves(); List<Holder> enums = new ArrayList<>(leaves.size()); final Weight weight; if (filter == null) { weight = null; } else { final IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); weight = searcher.createNormalizedWeight(filter, false); } for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { continue; } BitSet bits = null; if (weight != null) { Scorer scorer = weight.scorer(context); if (scorer == null) { // fully filtered, none matching, no need to iterate on this continue; } DocIdSetIterator docs = scorer.iterator(); // we want to force apply deleted docs final Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { docs = new FilteredDocIdSetIterator(docs) { @Override protected boolean match(int doc) { return liveDocs.get(doc); } }; } BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc()); builder.or(docs); bits = builder.build().bits(); // Count how many docs are in our filtered set // TODO make this lazy-loaded only for those that need it? numDocs += bits.cardinality(); } enums.add(new Holder(termsEnum, bits)); } this.enums = enums.toArray(new Holder[enums.size()]); }
Example #5
Source File: BitsFilter.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
public DocIdSet getDocIdSet(LeafReaderContext context, Bits bits) { return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bitSets.get(context.ord)), bits); }
Example #6
Source File: RandomSamplingFacetsCollector.java From lucene-solr with Apache License 2.0 | 4 votes |
/** Create a sampled of the given hits. */ private MatchingDocs createSample(MatchingDocs docs) { int maxdoc = docs.context.reader().maxDoc(); // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse FixedBitSet sampleDocs = new FixedBitSet(maxdoc); int binSize = (int) (1.0 / samplingRate); try { int counter = 0; int limit, randomIndex; if (leftoverBin != NOT_CALCULATED) { limit = leftoverBin; // either NOT_CALCULATED, which means we already sampled from that bin, // or the next document to sample randomIndex = leftoverIndex; } else { limit = binSize; randomIndex = random.nextInt(binSize); } final DocIdSetIterator it = docs.bits.iterator(); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { if (counter == randomIndex) { sampleDocs.set(doc); } counter++; if (counter >= limit) { counter = 0; limit = binSize; randomIndex = random.nextInt(binSize); } } if (counter == 0) { // we either exhausted the bin and the iterator at the same time, or // this segment had no results. in the latter case we might want to // carry leftover to the next segment as is, but that complicates the // code and doesn't seem so important. leftoverBin = leftoverIndex = NOT_CALCULATED; } else { leftoverBin = limit - counter; if (randomIndex > counter) { // the document to sample is in the next bin leftoverIndex = randomIndex - counter; } else if (randomIndex < counter) { // we sampled a document from the bin, so just skip over remaining // documents in the bin in the next segment. leftoverIndex = NOT_CALCULATED; } } return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null); } catch (IOException e) { throw new RuntimeException(e); } }
Example #7
Source File: TestConjunctionDISI.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testRecursiveConjunctionApproximation() throws IOException { final int iters = atLeast(100); for (int iter = 0; iter < iters; ++iter) { final int maxDoc = TestUtil.nextInt(random(), 100, 10000); final int numIterators = TestUtil.nextInt(random(), 2, 5); final FixedBitSet[] sets = new FixedBitSet[numIterators]; Scorer conjunction = null; boolean hasApproximation = false; for (int i = 0; i < numIterators; ++i) { final FixedBitSet set = randomSet(maxDoc); final Scorer newIterator; switch (random().nextInt(3)) { case 0: // simple iterator sets[i] = set; newIterator = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, anonymizeIterator(new BitDocIdSet(set).iterator())); break; case 1: // bitSet iterator sets[i] = set; newIterator = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, new BitDocIdSet(set).iterator()); break; default: // scorer with approximation final FixedBitSet confirmed = clearRandomBits(set); sets[i] = confirmed; final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed); newIterator = scorer(approximation); hasApproximation = true; break; } if (conjunction == null) { conjunction = newIterator; } else { final DocIdSetIterator conj = ConjunctionDISI.intersectScorers(Arrays.asList(conjunction, newIterator)); conjunction = scorer(conj, TwoPhaseIterator.unwrap(conj)); } } TwoPhaseIterator twoPhaseIterator = conjunction.twoPhaseIterator(); assertEquals(hasApproximation, twoPhaseIterator != null); if (hasApproximation) { assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator))); } else { assertEquals(intersect(sets), toBitSet(maxDoc, conjunction.iterator())); } } }
Example #8
Source File: TestConjunctionDISI.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testCollapseSubConjunctions(boolean wrapWithScorer) throws IOException { final int iters = atLeast(100); for (int iter = 0; iter < iters; ++iter) { final int maxDoc = TestUtil.nextInt(random(), 100, 10000); final int numIterators = TestUtil.nextInt(random(), 5, 10); final FixedBitSet[] sets = new FixedBitSet[numIterators]; final List<Scorer> scorers = new LinkedList<>(); for (int i = 0; i < numIterators; ++i) { final FixedBitSet set = randomSet(maxDoc); if (random().nextBoolean()) { // simple iterator sets[i] = set; scorers.add(new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, new BitDocIdSet(set).iterator())); } else { // scorer with approximation final FixedBitSet confirmed = clearRandomBits(set); sets[i] = confirmed; final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed); scorers.add(scorer(approximation)); } } // make some sub sequences into sub conjunctions final int subIters = atLeast(3); for (int subIter = 0; subIter < subIters && scorers.size() > 3; ++subIter) { final int subSeqStart = TestUtil.nextInt(random(), 0, scorers.size() - 2); final int subSeqEnd = TestUtil.nextInt(random(), subSeqStart + 2, scorers.size()); List<Scorer> subIterators = scorers.subList(subSeqStart, subSeqEnd); Scorer subConjunction; if (wrapWithScorer) { subConjunction = new ConjunctionScorer(new FakeWeight(), subIterators, Collections.emptyList()); } else { subConjunction = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, ConjunctionDISI.intersectScorers(subIterators)); } scorers.set(subSeqStart, subConjunction); int toRemove = subSeqEnd - subSeqStart - 1; while (toRemove-- > 0) { scorers.remove(subSeqStart + 1); } } if (scorers.size() == 1) { // ConjunctionDISI needs two iterators scorers.add(new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, DocIdSetIterator.all(maxDoc))); } final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(scorers); assertEquals(intersect(sets), toBitSet(maxDoc, conjunction)); } }
Example #9
Source File: HashQParserPlugin.java From lucene-solr with Apache License 2.0 | 4 votes |
public DocIdSet getDocIdSet(LeafReaderContext context, Bits bits) { return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bitSets[context.ord]), bits); }