org.apache.lucene.util.BitSet#cardinality

Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0

6 votes

private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException {
  final int cardinality = set.cardinality();
  final byte denseRankPower = 9; // Not tested here so fixed to isolate factors
  long length;
  int jumpTableentryCount;
  try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
    jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
  }

  try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
    BitSetIterator disi2 = new BitSetIterator(set, cardinality);
    int doc = disi2.docID();
    int index = 0;
    while (doc < cardinality) {
      doc = disi2.nextDoc();
      index++;
    }

    IndexedDISI disi = new IndexedDISI(in, 0L, in.length(), jumpTableentryCount, denseRankPower, cardinality);
    // Advance 1 docID beyond end
    assertFalse("There should be no set bit beyond the valid docID range", disi.advanceExact(set.length()));
    disi.advance(doc); // Should be the special docID signifyin NO_MORE_DOCS from the BitSetIterator
    assertEquals("The index when advancing beyond the last defined docID should be correct",
        index, disi.index()+1); // disi.index()+1 as the while-loop also counts the NO_MORE_DOCS
  }
}

Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0

6 votes

public void testPositionNotZero() throws IOException {
  final int BLOCKS = 10;
  final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable

  BitSet set = createSetWithRandomBlocks(BLOCKS);
  try (Directory dir = newDirectory()) {
    final int cardinality = set.cardinality();
    int jumpTableEntryCount;
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      jumpTableEntryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
    }
    try (IndexInput fullInput = dir.openInput("foo", IOContext.DEFAULT)) {
      IndexInput blockData =
          IndexedDISI.createBlockSlice(fullInput, "blocks", 0, fullInput.length(), jumpTableEntryCount);
      blockData.seek(random().nextInt((int) blockData.length()));

      RandomAccessInput jumpTable = IndexedDISI.createJumpTable(fullInput, 0, fullInput.length(), jumpTableEntryCount);
      IndexedDISI disi = new IndexedDISI(blockData, jumpTable, jumpTableEntryCount, denseRankPower, cardinality);
      // This failed at some point during LUCENE-8585 development as it did not reset the slice position
      disi.advanceExact(BLOCKS*65536-1);
    }
  }
}

Source File: RecoverySourcePruneMergePolicy.java From crate with Apache License 2.0

6 votes

static CodecReader wrapReader(String recoverySourceField, CodecReader reader, Supplier<Query> retainSourceQuerySupplier)
    throws IOException {
    NumericDocValues recoverySource = reader.getNumericDocValues(recoverySourceField);
    if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore.
    }
    IndexSearcher s = new IndexSearcher(reader);
    s.setQueryCache(null);
    Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    Scorer scorer = weight.scorer(reader.getContext());
    if (scorer != null) {
        BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc());
        // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do
        // if retentions are high we keep most of it
        if (recoverySourceToKeep.cardinality() == reader.maxDoc()) {
            return reader; // keep all source
        }
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep);
    } else {
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, null);
    }
}

Source File: CheckJoinIndex.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Check that the given index is good to use for block joins.
 * @throws IllegalStateException if the index does not have an appropriate structure
 */
public static void check(IndexReader reader, BitSetProducer parentsFilter) throws IOException {
  for (LeafReaderContext context : reader.leaves()) {
    if (context.reader().maxDoc() == 0) {
      continue;
    }
    final BitSet parents = parentsFilter.getBitSet(context);
    if (parents == null || parents.cardinality() == 0) {
      throw new IllegalStateException("Every segment should have at least one parent, but " + context.reader() + " does not have any");
    }
    if (parents.get(context.reader().maxDoc() - 1) == false) {
      throw new IllegalStateException("The last document of a segment must always be a parent, but " + context.reader() + " has a child as a last doc");
    }
    final Bits liveDocs = context.reader().getLiveDocs();
    if (liveDocs != null) {
      int prevParentDoc = -1;
      DocIdSetIterator it = new BitSetIterator(parents, 0L);
      for (int parentDoc = it.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = it.nextDoc()) {
        final boolean parentIsLive = liveDocs.get(parentDoc);
        for (int child = prevParentDoc + 1; child != parentDoc; child++) {
          final boolean childIsLive = liveDocs.get(child);
          if (parentIsLive != childIsLive) {
            if (childIsLive) {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is live but has a deleted child document " + child);
            } else {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is deleted but has a live child document " + child);
            }
          }
        }
        prevParentDoc = parentDoc;
      }
    }
  }
}

Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0

5 votes

private void doTestAllSingleJump(BitSet set, Directory dir) throws IOException {
  final int cardinality = set.cardinality();
  final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable
  long length;
  int jumpTableentryCount;
  try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
    jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
    length = out.getFilePointer();
  }

  try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
    for (int i = 0; i < set.length(); i++) {
      IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
      assertEquals("The bit at " + i + " should be correct with advanceExact", set.get(i), disi.advanceExact(i));

      IndexedDISI disi2 = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
      disi2.advance(i);
      // Proper sanity check with jump tables as an error could make them seek backwards
      assertTrue("The docID should at least be " + i + " after advance(" + i + ") but was " + disi2.docID(),
          i <= disi2.docID());
      if (set.get(i)) {
        assertEquals("The docID should be present with advance", i, disi2.docID());
      } else {
        assertNotSame("The docID should not be present with advance", i, disi2.docID());
      }
    }
  }
}

Source File: FilterableTermsEnum.java From Elasticsearch with Apache License 2.0

4 votes

public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}

Java Code Examples for org.apache.lucene.util.BitSet#cardinality()