Java Code Examples for org.apache.lucene.util.BitSet#cardinality()
The following examples show how to use
org.apache.lucene.util.BitSet#cardinality() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0 | 6 votes |
private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException { final int cardinality = set.cardinality(); final byte denseRankPower = 9; // Not tested here so fixed to isolate factors long length; int jumpTableentryCount; try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) { jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower); } try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) { BitSetIterator disi2 = new BitSetIterator(set, cardinality); int doc = disi2.docID(); int index = 0; while (doc < cardinality) { doc = disi2.nextDoc(); index++; } IndexedDISI disi = new IndexedDISI(in, 0L, in.length(), jumpTableentryCount, denseRankPower, cardinality); // Advance 1 docID beyond end assertFalse("There should be no set bit beyond the valid docID range", disi.advanceExact(set.length())); disi.advance(doc); // Should be the special docID signifyin NO_MORE_DOCS from the BitSetIterator assertEquals("The index when advancing beyond the last defined docID should be correct", index, disi.index()+1); // disi.index()+1 as the while-loop also counts the NO_MORE_DOCS } }
Example 2
Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPositionNotZero() throws IOException { final int BLOCKS = 10; final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable BitSet set = createSetWithRandomBlocks(BLOCKS); try (Directory dir = newDirectory()) { final int cardinality = set.cardinality(); int jumpTableEntryCount; try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { jumpTableEntryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower); } try (IndexInput fullInput = dir.openInput("foo", IOContext.DEFAULT)) { IndexInput blockData = IndexedDISI.createBlockSlice(fullInput, "blocks", 0, fullInput.length(), jumpTableEntryCount); blockData.seek(random().nextInt((int) blockData.length())); RandomAccessInput jumpTable = IndexedDISI.createJumpTable(fullInput, 0, fullInput.length(), jumpTableEntryCount); IndexedDISI disi = new IndexedDISI(blockData, jumpTable, jumpTableEntryCount, denseRankPower, cardinality); // This failed at some point during LUCENE-8585 development as it did not reset the slice position disi.advanceExact(BLOCKS*65536-1); } } }
Example 3
Source File: RecoverySourcePruneMergePolicy.java From crate with Apache License 2.0 | 6 votes |
static CodecReader wrapReader(String recoverySourceField, CodecReader reader, Supplier<Query> retainSourceQuerySupplier) throws IOException { NumericDocValues recoverySource = reader.getNumericDocValues(recoverySourceField); if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore. } IndexSearcher s = new IndexSearcher(reader); s.setQueryCache(null); Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f); Scorer scorer = weight.scorer(reader.getContext()); if (scorer != null) { BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc()); // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do // if retentions are high we keep most of it if (recoverySourceToKeep.cardinality() == reader.maxDoc()) { return reader; // keep all source } return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep); } else { return new SourcePruningFilterCodecReader(recoverySourceField, reader, null); } }
Example 4
Source File: CheckJoinIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Check that the given index is good to use for block joins. * @throws IllegalStateException if the index does not have an appropriate structure */ public static void check(IndexReader reader, BitSetProducer parentsFilter) throws IOException { for (LeafReaderContext context : reader.leaves()) { if (context.reader().maxDoc() == 0) { continue; } final BitSet parents = parentsFilter.getBitSet(context); if (parents == null || parents.cardinality() == 0) { throw new IllegalStateException("Every segment should have at least one parent, but " + context.reader() + " does not have any"); } if (parents.get(context.reader().maxDoc() - 1) == false) { throw new IllegalStateException("The last document of a segment must always be a parent, but " + context.reader() + " has a child as a last doc"); } final Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { int prevParentDoc = -1; DocIdSetIterator it = new BitSetIterator(parents, 0L); for (int parentDoc = it.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = it.nextDoc()) { final boolean parentIsLive = liveDocs.get(parentDoc); for (int child = prevParentDoc + 1; child != parentDoc; child++) { final boolean childIsLive = liveDocs.get(child); if (parentIsLive != childIsLive) { if (childIsLive) { throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is live but has a deleted child document " + child); } else { throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is deleted but has a live child document " + child); } } } prevParentDoc = parentDoc; } } } }
Example 5
Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0 | 5 votes |
private void doTestAllSingleJump(BitSet set, Directory dir) throws IOException { final int cardinality = set.cardinality(); final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable long length; int jumpTableentryCount; try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower); length = out.getFilePointer(); } try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { for (int i = 0; i < set.length(); i++) { IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); assertEquals("The bit at " + i + " should be correct with advanceExact", set.get(i), disi.advanceExact(i)); IndexedDISI disi2 = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); disi2.advance(i); // Proper sanity check with jump tables as an error could make them seek backwards assertTrue("The docID should at least be " + i + " after advance(" + i + ") but was " + disi2.docID(), i <= disi2.docID()); if (set.get(i)) { assertEquals("The docID should be present with advance", i, disi2.docID()); } else { assertNotSame("The docID should not be present with advance", i, disi2.docID()); } } } }
Example 6
Source File: FilterableTermsEnum.java From Elasticsearch with Apache License 2.0 | 4 votes |
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException { if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) { throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag); } this.docsEnumFlag = docsEnumFlag; if (filter == null) { // Important - need to use the doc count that includes deleted docs // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951 numDocs = reader.maxDoc(); } List<LeafReaderContext> leaves = reader.leaves(); List<Holder> enums = new ArrayList<>(leaves.size()); final Weight weight; if (filter == null) { weight = null; } else { final IndexSearcher searcher = new IndexSearcher(reader); searcher.setQueryCache(null); weight = searcher.createNormalizedWeight(filter, false); } for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); if (termsEnum == null) { continue; } BitSet bits = null; if (weight != null) { Scorer scorer = weight.scorer(context); if (scorer == null) { // fully filtered, none matching, no need to iterate on this continue; } DocIdSetIterator docs = scorer.iterator(); // we want to force apply deleted docs final Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { docs = new FilteredDocIdSetIterator(docs) { @Override protected boolean match(int doc) { return liveDocs.get(doc); } }; } BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc()); builder.or(docs); bits = builder.build().bits(); // Count how many docs are in our filtered set // TODO make this lazy-loaded only for those that need it? numDocs += bits.cardinality(); } enums.add(new Holder(termsEnum, bits)); } this.enums = enums.toArray(new Holder[enums.size()]); }