org.apache.lucene.search.DocIdSetIterator#NO_MORE

Source File: RecoverySourcePruneMergePolicy.java From crate with Apache License 2.0

6 votes

static CodecReader wrapReader(String recoverySourceField, CodecReader reader, Supplier<Query> retainSourceQuerySupplier)
    throws IOException {
    NumericDocValues recoverySource = reader.getNumericDocValues(recoverySourceField);
    if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore.
    }
    IndexSearcher s = new IndexSearcher(reader);
    s.setQueryCache(null);
    Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    Scorer scorer = weight.scorer(reader.getContext());
    if (scorer != null) {
        BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc());
        // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do
        // if retentions are high we keep most of it
        if (recoverySourceToKeep.cardinality() == reader.maxDoc()) {
            return reader; // keep all source
        }
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep);
    } else {
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, null);
    }
}

Source File: TermsIncludingScoreQuery.java From lucene-solr with Apache License 2.0

6 votes

@Override
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException {
  BytesRef spare = new BytesRef();
  PostingsEnum postingsEnum = null;
  for (int i = 0; i < terms.size(); i++) {
    if (termsEnum.seekExact(terms.get(ords[i], spare))) {
      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      float score = TermsIncludingScoreQuery.this.scores[ords[i]];
      for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
        // I prefer this:
        /*if (scores[doc] < score) {
          scores[doc] = score;
          matchingDocs.set(doc);
        }*/
        // But this behaves the same as MVInnerScorer and only then the tests will pass:
        if (!matchingDocs.get(doc)) {
          scores[doc] = score;
          matchingDocs.set(doc);
        }
      }
    }
  }
}

Source File: TestDocIdSetBuilder.java From lucene-solr with Apache License 2.0

6 votes

private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
  if (d1 == null) {
    if (d2 != null) {
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc());
    }
  } else if (d2 == null) {
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc());
  } else {
    DocIdSetIterator i1 = d1.iterator();
    DocIdSetIterator i2 = d2.iterator();
    for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) {
      assertEquals(doc, i2.nextDoc());
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc());
  }
}

Source File: Lucene.java From crate with Apache License 2.0

6 votes

/**
 * Check whether there is one or more documents matching the provided query.
 */
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
    final Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    // the scorer API should be more efficient at stopping after the first
    // match than the bulk scorer API
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        final Scorer scorer = weight.scorer(context);
        if (scorer == null) {
            continue;
        }
        final Bits liveDocs = context.reader().getLiveDocs();
        final DocIdSetIterator iterator = scorer.iterator();
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            if (liveDocs == null || liveDocs.get(doc)) {
                return true;
            }
        }
    }
    return false;
}

Source File: TestRTGBase.java From lucene-solr with Apache License 2.0

6 votes

protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Terms terms = MultiTerms.getTerms(r, t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator();
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
  docs = BitsFilteredPostingsEnum.wrap(docs, MultiBits.getLiveDocs(r));
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}

Source File: TaxonomyIndexArrays.java From lucene-solr with Apache License 2.0

5 votes

private void initParents(IndexReader reader, int first) throws IOException {
  if (reader.maxDoc() == first) {
    return;
  }
  
  // it's ok to use MultiTerms because we only iterate on one posting list.
  // breaking it to loop over the leaves() only complicates code for no
  // apparent gain.
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,
      Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF,
      PostingsEnum.PAYLOADS);

  // shouldn't really happen, if it does, something's wrong
  if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
    throw new CorruptIndexException("Missing parent data for category " + first, reader.toString());
  }
  
  int num = reader.maxDoc();
  for (int i = first; i < num; i++) {
    if (positions.docID() == i) {
      if (positions.freq() == 0) { // shouldn't happen
        throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
      }
      
      parents[i] = positions.nextPosition();
      
      if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        if (i + 1 < num) {
          throw new CorruptIndexException("Missing parent data for category "+ (i + 1), reader.toString());
        }
        break;
      }
    } else { // this shouldn't happen
      throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
    }
  }
}

Source File: RoaringDocIdSet.java From lucene-solr with Apache License 2.0

5 votes

/** Add the content of the provided {@link DocIdSetIterator}. */
public Builder add(DocIdSetIterator disi) throws IOException {
  for (int doc = disi.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi.nextDoc()) {
    add(doc);
  }
  return this;
}

Source File: TestCodecs.java From lucene-solr with Apache License 2.0

5 votes

public void testDocsOnlyFreq() throws Exception {
  // tests that when fields are indexed with DOCS_ONLY, the Codec
  // returns 1 in docsEnum.freq()
  Directory dir = newDirectory();
  Random random = random();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random)));
  // we don't need many documents to assert this, but don't use one document either
  int numDocs = atLeast(random, 50);
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    doc.add(new StringField("f", "doc", Store.NO));
    writer.addDocument(doc);
  }
  writer.close();
  
  Term term = new Term("f", new BytesRef("doc"));
  DirectoryReader reader = DirectoryReader.open(dir);
  for (LeafReaderContext ctx : reader.leaves()) {
    PostingsEnum de = ctx.reader().postings(term);
    while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      assertEquals("wrong freq for doc " + de.docID(), 1, de.freq());
    }
  }
  reader.close();
  
  dir.close();
}

Source File: CodecCollector.java From mtas with Apache License 2.0

5 votes

/**
 * Compute termvector number basic.
 *
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param r
 *          the r
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @return the termvector number basic
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static TermvectorNumberBasic computeTermvectorNumberBasic(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r,
    LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException {
  TermvectorNumberBasic result = new TermvectorNumberBasic();
  boolean hasDeletedDocuments = (r.getLiveDocs() != null);
  if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) {
    try {
      return computeTermvectorNumberBasic(termsEnum, r);
    } catch (IOException e) {
      log.debug("problem", e);
      // problem
    }
  }
  result.docNumber = 0;
  result.valueSum[0] = 0;
  int localTermDocId = termDocId;
  Iterator<Integer> docIterator = docSet.iterator();
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  int docId;
  while (docIterator.hasNext()) {
    docId = docIterator.next() - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.docNumber++;
      result.valueSum[0] += postingsEnum.freq();
    }
    if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
  }
  return result;
}

Source File: BitSet.java From lucene-solr with Apache License 2.0

5 votes

/** Does in-place OR of the bits provided by the iterator. The state of the
 *  iterator after this operation terminates is undefined. */
public void or(DocIdSetIterator iter) throws IOException {
  checkUnpositioned(iter);
  for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
    set(doc);
  }
}

Source File: TestIndexWriter.java From lucene-solr with Apache License 2.0

5 votes

private void assertHardLiveDocs(IndexWriter writer, Set<Integer> uniqueDocs) throws IOException {
  try (DirectoryReader reader = DirectoryReader.open(writer)) {
    assertEquals(uniqueDocs.size(), reader.numDocs());
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext ctx : leaves) {
      LeafReader leaf = ctx.reader();
      assertTrue(leaf instanceof SegmentReader);
      SegmentReader sr = (SegmentReader) leaf;
      if (sr.getHardLiveDocs() != null) {
        Terms id = sr.terms("id");
        TermsEnum iterator = id.iterator();
        Bits hardLiveDocs = sr.getHardLiveDocs();
        Bits liveDocs = sr.getLiveDocs();
        for (Integer dId : uniqueDocs) {
          boolean mustBeHardDeleted = dId % 2 == 0;
          if (iterator.seekExact(new BytesRef(dId.toString()))) {
            PostingsEnum postings = iterator.postings(null);
            while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
              if (liveDocs.get(postings.docID())) {
                assertTrue(hardLiveDocs.get(postings.docID()));
              } else if (mustBeHardDeleted) {
                assertFalse(hardLiveDocs.get(postings.docID()));
              } else {
                assertTrue(hardLiveDocs.get(postings.docID()));
              }
            }
          }
        }
      }
    }
  }
}

Source File: BitDocSet.java From lucene-solr with Apache License 2.0

5 votes

@Override
public DocIterator iterator() {
  return new DocIterator() {
    private final BitSetIterator iter = new BitSetIterator(bits, 0L); // cost is not useful here
    private int pos = iter.nextDoc();
    @Override
    public boolean hasNext() {
      return pos != DocIdSetIterator.NO_MORE_DOCS;
    }

    @Override
    public Integer next() {
      return nextDoc();
    }

    @Override
    public void remove() {
      bits.clear(pos);
    }

    @Override
    public int nextDoc() {
      int old=pos;
      pos=iter.nextDoc();
      return old;
    }

    @Override
    public float score() {
      return 0.0f;
    }
  };
}

Source File: IndexImporter.java From incubator-retired-blur with Apache License 2.0

5 votes

private void applyDeletes(Directory directory, IndexWriter indexWriter, IndexSearcherCloseable searcher,
    String shard, boolean emitDeletes, Configuration configuration) throws IOException {
  DirectoryReader newReader = DirectoryReader.open(directory);
  try {
    List<AtomicReaderContext> newLeaves = newReader.getContext().leaves();
    BlurPartitioner blurPartitioner = new BlurPartitioner();
    Text key = new Text();
    int numberOfShards = _shardContext.getTableContext().getDescriptor().getShardCount();
    int shardId = ShardUtil.getShardIndex(shard);

    Action action = new Action() {
      @Override
      public void found(AtomicReader reader, Bits liveDocs, TermsEnum termsEnum) throws IOException {
        DocsEnum docsEnum = termsEnum.docs(liveDocs, null);
        if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          indexWriter.deleteDocuments(new Term(BlurConstants.ROW_ID, BytesRef.deepCopyOf(termsEnum.term())));
        }
      }
    };

    LOG.info("Applying deletes for table [{0}] shard [{1}] new reader [{2}]", _table, shard, newReader);
    boolean skipCheckRowIds = isInternal(newReader);
    LOG.info("Skip rowid check [{0}] for table [{1}] shard [{2}] new reader [{3}]", skipCheckRowIds, _table, shard,
        newReader);
    for (AtomicReaderContext context : newLeaves) {
      AtomicReader newAtomicReader = context.reader();
      if (isFastRowIdDeleteSupported(newAtomicReader)) {
        runNewRowIdCheckAndDelete(indexWriter, emitDeletes, blurPartitioner, key, numberOfShards, shardId,
            newAtomicReader, skipCheckRowIds);
      } else {
        runOldMergeSortRowIdCheckAndDelete(emitDeletes, searcher.getIndexReader(), blurPartitioner, key,
            numberOfShards, shardId, action, newAtomicReader);
      }
    }
  } finally {
    newReader.close();
  }
}

Source File: CollapsingQParserPlugin.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void finish() throws IOException {
  if(contexts.length == 0) {
    return;
  }

  if(nullScore > -1) {
    collapsedSet.set(nullDoc);
  }

  //Handle the boosted docs.
  if(this.boostKeys != null) {
    int s = boostKeys.size();
    for(int i=0; i<s; i++) {
      int key = this.boostKeys.get(i);
      if(key != nullValue) {
        cmap.remove(key);
      }
      //Add the boosted docs to the collapsedSet
      this.collapsedSet.set(boostDocs.get(i));
    }
  }

  Iterator<IntLongCursor> it1 = cmap.iterator();

  while(it1.hasNext()) {
    IntLongCursor cursor = it1.next();
    int doc = (int)cursor.value;
    collapsedSet.set(doc);
  }

  int currentContext = 0;
  int currentDocBase = 0;

  collapseValues = DocValues.getNumeric(contexts[currentContext].reader(), this.field);
  int nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
  leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
  ScoreAndDoc dummy = new ScoreAndDoc();
  leafDelegate.setScorer(dummy);
  DocIdSetIterator it = new BitSetIterator(collapsedSet, 0L); // cost is not useful here
  int globalDoc = -1;
  int nullScoreIndex = 0;
  while((globalDoc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {

    while(globalDoc >= nextDocBase) {
      currentContext++;
      currentDocBase = contexts[currentContext].docBase;
      nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
      leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
      leafDelegate.setScorer(dummy);
      collapseValues = DocValues.getNumeric(contexts[currentContext].reader(), this.field);
    }

    int contextDoc = globalDoc-currentDocBase;
    int collapseValue;
    if (collapseValues.advanceExact(contextDoc)) {
      collapseValue = (int) collapseValues.longValue();
    } else {
      collapseValue = 0;
    }

    if(collapseValue != nullValue) {
      long scoreDoc = cmap.get(collapseValue);
      dummy.score = Float.intBitsToFloat((int)(scoreDoc>>32));
    } else if(boosts && mergeBoost.boost(globalDoc)) {
      //Ignore so boosted documents don't mess up the null scoring policies.
    } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
      dummy.score = nullScore;
    } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
      dummy.score = nullScores.get(nullScoreIndex++);
    }

    dummy.docId = contextDoc;
    leafDelegate.collect(contextDoc);
  }

  if(delegate instanceof DelegatingCollector) {
    ((DelegatingCollector) delegate).finish();
  }
}

Source File: TestBackwardsCompatibility.java From lucene-solr with Apache License 2.0

4 votes

public void testDocValuesUpdatesWithNewField() throws Exception {
  Path oldIndexDir = createTempDir("dvupdates");
  TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
  Directory dir = newFSDirectory(oldIndexDir);
  verifyUsesDefaultCodec(dir, dvUpdatesIndex);

  // update fields and verify index
  IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter writer = new IndexWriter(dir, conf);
  // introduce a new field that we later update
  writer.addDocument(Arrays.asList(new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO),
      new NumericDocValuesField("new_numeric", 1),
      new BinaryDocValuesField("new_binary", toBytes(1))));
  writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1);
  writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1));

  writer.commit();
  Runnable assertDV = () -> {
    boolean found = false;
    try (DirectoryReader reader = DirectoryReader.open(dir)) {
      for (LeafReaderContext ctx : reader.leaves()) {
        LeafReader leafReader = ctx.reader();
        TermsEnum id = leafReader.terms("id").iterator();
        if (id.seekExact(new BytesRef("1"))) {
          PostingsEnum postings = id.postings(null, PostingsEnum.NONE);
          NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric");
          BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary");
          int doc;
          while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            found = true;
            assertTrue(binaryDocValues.advanceExact(doc));
            assertTrue(numericDocValues.advanceExact(doc));
            assertEquals(1, numericDocValues.longValue());
            assertEquals(toBytes(1), binaryDocValues.binaryValue());
          }
        }
      }
    } catch (IOException e) {
      throw new AssertionError(e);
    }
    assertTrue(found);
  };
  assertDV.run();
  // merge all segments
  writer.forceMerge(1);
  writer.commit();
  assertDV.run();
  writer.close();
  dir.close();
}

Source File: LongRangeFacetCounts.java From lucene-solr with Apache License 2.0

4 votes

private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {

    LongRange[] ranges = (LongRange[]) this.ranges;

    LongRangeCounter counter = new LongRangeCounter(ranges);

    int missingCount = 0;
    for (MatchingDocs hits : matchingDocs) {
      LongValues fv = valueSource.getValues(hits.context, null);
      
      totCount += hits.totalHits;
      final DocIdSetIterator fastMatchDocs;
      if (fastMatchQuery != null) {
        final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
        final IndexSearcher searcher = new IndexSearcher(topLevelContext);
        searcher.setQueryCache(null);
        final Weight fastMatchWeight = searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
        Scorer s = fastMatchWeight.scorer(hits.context);
        if (s == null) {
          continue;
        }
        fastMatchDocs = s.iterator();
      } else {
        fastMatchDocs = null;
      }

      DocIdSetIterator docs = hits.bits.iterator();      
      for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
        if (fastMatchDocs != null) {
          int fastMatchDoc = fastMatchDocs.docID();
          if (fastMatchDoc < doc) {
            fastMatchDoc = fastMatchDocs.advance(doc);
          }

          if (doc != fastMatchDoc) {
            doc = docs.advance(fastMatchDoc);
            continue;
          }
        }
        // Skip missing docs:
        if (fv.advanceExact(doc)) {
          counter.add(fv.longValue());
        } else {
          missingCount++;
        }

        doc = docs.nextDoc();
      }
    }
    
    int x = counter.fillCounts(counts);

    missingCount += x;

    //System.out.println("totCount " + totCount + " x " + x + " missingCount " + missingCount);
    totCount -= missingCount;
  }

Source File: BitSetDocumentVisibilityFilterCacheStrategy.java From incubator-retired-blur with Apache License 2.0

4 votes

public static DocIdSetIterator getFullySetDocIdSetIterator(int maxDoc) {
  return new DocIdSetIterator() {

    private int _docId = -1;

    @Override
    public int advance(int target) throws IOException {
      if (_docId == DocIdSetIterator.NO_MORE_DOCS) {
        return DocIdSetIterator.NO_MORE_DOCS;
      }
      _docId = target;
      if (_docId >= maxDoc) {
        return _docId = DocIdSetIterator.NO_MORE_DOCS;
      }
      return _docId;
    }

    @Override
    public int nextDoc() throws IOException {
      if (_docId == DocIdSetIterator.NO_MORE_DOCS) {
        return DocIdSetIterator.NO_MORE_DOCS;
      }
      _docId++;
      if (_docId >= maxDoc) {
        return _docId = DocIdSetIterator.NO_MORE_DOCS;
      }
      return _docId;
    }

    @Override
    public int docID() {
      return _docId;
    }

    @Override
    public long cost() {
      return 0l;
    }

  };
}

Source File: DocSetBuilder.java From lucene-solr with Apache License 2.0

4 votes

public static void add(FixedBitSet bitSet, DocIdSetIterator iter, int base) throws IOException {
  for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
    bitSet.set(doc + base);
  }
}

Source File: IndexedDISI.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Writes the docIDs from it to out, in logical blocks, one for each 65536 docIDs in monotonically
 * increasing gap-less order.
 * The caller must keep track of the number of jump-table entries (returned by this method) as well as the
 * denseRankPower and provide them when constructing an IndexedDISI for reading.
 * @param it  the document IDs.
 * @param out destination for the blocks.
 * @param denseRankPower for {@link Method#DENSE} blocks, a rank will be written every {@code 2^denseRankPower} docIDs.
 *                       Values &lt; 7 (every 128 docIDs) or &gt; 15 (every 32768 docIDs) disables DENSE rank.
 *                       Recommended values are 8-12: Every 256-4096 docIDs or 4-64 longs.
 *                       {@link #DEFAULT_DENSE_RANK_POWER} is 9: Every 512 docIDs.
 *                       This should be stored in meta and used when creating an instance of IndexedDISI.
 * @throws IOException if there was an error writing to out.
 * @return the number of jump-table entries following the blocks, -1 for no entries.
 *         This should be stored in meta and used when creating an instance of IndexedDISI.
 */
static short writeBitSet(DocIdSetIterator it, IndexOutput out, byte denseRankPower) throws IOException {
  final long origo = out.getFilePointer(); // All jumps are relative to the origo
  if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
    throw new IllegalArgumentException("Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). " +
        "The provided power was " + denseRankPower + " (every " + (int)Math.pow(2, denseRankPower) + " docIDs)");
  }
  int totalCardinality = 0;
  int blockCardinality = 0;
  final FixedBitSet buffer = new FixedBitSet(1<<16);
  int[] jumps = new int[ArrayUtil.oversize(1, Integer.BYTES*2)];
  int prevBlock = -1;
  int jumpBlockIndex = 0;

  for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
    final int block = doc >>> 16;
    if (prevBlock != -1 && block != prevBlock) {
      // Track offset+index from previous block up to current
      jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, jumpBlockIndex, prevBlock+1);
      jumpBlockIndex = prevBlock+1;
      // Flush block
      flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
      // Reset for next block
      buffer.clear(0, buffer.length());
      totalCardinality += blockCardinality;
      blockCardinality = 0;
    }
    buffer.set(doc & 0xFFFF);
    blockCardinality++;
    prevBlock = block;
  }
  if (blockCardinality > 0) {
    jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, jumpBlockIndex, prevBlock+1);
    totalCardinality += blockCardinality;
    flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
    buffer.clear(0, buffer.length());
    prevBlock++;
  }
  final int lastBlock = prevBlock == -1 ? 0 : prevBlock; // There will always be at least 1 block (NO_MORE_DOCS)
  // Last entry is a SPARSE with blockIndex == 32767 and the single entry 65535, which becomes the docID NO_MORE_DOCS
  // To avoid creating 65K jump-table entries, only a single entry is created pointing to the offset of the
  // NO_MORE_DOCS block, with the jumpBlockIndex set to the logical EMPTY block after all real blocks.
  jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, lastBlock, lastBlock+1);
  buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
  flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, denseRankPower, out);
  // offset+index jump-table stored at the end
  return flushBlockJumps(jumps, lastBlock+1, out, origo);
}

Source File: TestDirectoryReader.java From lucene-solr with Apache License 2.0

4 votes

public void testMultiTermDocs() throws IOException {
  Directory ramDir1=newDirectory();
  addDoc(random(), ramDir1, "test foo", true);
  Directory ramDir2=newDirectory();
  addDoc(random(), ramDir2, "test blah", true);
  Directory ramDir3=newDirectory();
  addDoc(random(), ramDir3, "test wow", true);

  IndexReader[] readers1 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir3)};
  IndexReader[] readers2 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir2), DirectoryReader.open(ramDir3)};
  MultiReader mr2 = new MultiReader(readers1);
  MultiReader mr3 = new MultiReader(readers2);

  // test mixing up TermDocs and TermEnums from different readers.
  TermsEnum te2 = MultiTerms.getTerms(mr2, "body").iterator();
  te2.seekCeil(new BytesRef("wow"));
  PostingsEnum td = TestUtil.docs(random(), mr2,
      "body",
      te2.term(),
      null,
      0);

  TermsEnum te3 = MultiTerms.getTerms(mr3, "body").iterator();
  te3.seekCeil(new BytesRef("wow"));
  td = TestUtil.docs(random(), te3,
      td,
      0);
  
  int ret = 0;

  // This should blow up if we forget to check that the TermEnum is from the same
  // reader as the TermDocs.
  while (td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ret += td.docID();

  // really a dummy assert to ensure that we got some docs and to ensure that
  // nothing is eliminated by hotspot
  assertTrue(ret > 0);
  readers1[0].close();
  readers1[1].close();
  readers2[0].close();
  readers2[1].close();
  readers2[2].close();
  ramDir1.close();
  ramDir2.close();
  ramDir3.close();
}

Java Code Examples for org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS