Java Code Examples for org.apache.lucene.index.PostingsEnum#advance()

The following examples show how to use org.apache.lucene.index.PostingsEnum#advance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
 * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator}
 * Only terms that have at least one match in the given document will be included
static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException {
  Terms t = context.reader().terms(field);
  if (t == null)
    return null;
  TermsEnum te = t.iterator();
  PostingsEnum reuse = null;
  for (BytesRef term =; term != null; term = {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query);
      else {
        reuse = pe;
  return null;
Example 2
Source File:    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void init() throws IOException {
  List<MatchesIterator> mis = new ArrayList<>();
  PostingsEnum reuse = null;
  for (BytesRef term =; term != null; term = {
    if (te.seekExact(term)) {
      PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS);
      if (pe.advance(doc) == doc) {
        mis.add(new TermMatchesIterator(query, pe));
        reuse = null;
      } else {
        reuse = pe;
  it = fromSubIterators(mis);
Example 3
Source File:    From mtas with Apache License 2.0 6 votes vote down vote up
 * Compute termvector number full.
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @param positionsData
 *          the positions data
 * @return the termvector number full
 * @throws IOException
 *           Signals that an I/O exception has occurred.
private static TermvectorNumberFull computeTermvectorNumberFull(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum,
    LeafReaderContext lrc, PostingsEnum postingsEnum,
    Map<Integer, Integer> positionsData) throws IOException {
  TermvectorNumberFull result = new TermvectorNumberFull(docSet.size());
  Iterator<Integer> docIterator = docSet.iterator();
  int localTermDocId = termDocId;
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  while (docIterator.hasNext()) {
    int docId = - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.args[result.docNumber] = postingsEnum.freq();
      result.positions[result.docNumber] = (positionsData == null) ? 0
          : positionsData.get(docId + lrc.docBase);
  return result;
Example 4
Source File:    From clue with Apache License 2.0 6 votes vote down vote up
public String reconstructNoPositions(TermsEnum te, int docid, Bits liveDocs) throws IOException{
  List<String> textList = new ArrayList<String>();
  BytesRef text;
  PostingsEnum postings = null;
  while ((text = != null) {
    postings = te.postings(postings, PostingsEnum.FREQS);
    int iterDoc = postings.advance(docid);
    if (iterDoc == docid) {
  StringBuilder buf = new StringBuilder();
  for (String s : textList) {
    buf.append(s+" ");
  return buf.toString();
Example 5
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException {
  TermsEnum termsEnum = termsIndex.iterator();//does not return null
  for (BytesRef term : sourceTerms) {
    if (termsEnum.seekExact(term)) {
      PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
      if (postingsEnum == null) {
        // no offsets or positions available
        throw new IllegalArgumentException("field '" + getField() + "' was indexed without offsets, cannot highlight");
      if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted
        results.add(new OffsetsEnum.OfPostings(term, postingsEnum));
Example 6
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void initParents(IndexReader reader, int first) throws IOException {
  if (reader.maxDoc() == first) {
  // it's ok to use MultiTerms because we only iterate on one posting list.
  // breaking it to loop over the leaves() only complicates code for no
  // apparent gain.
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,

  // shouldn't really happen, if it does, something's wrong
  if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
    throw new CorruptIndexException("Missing parent data for category " + first, reader.toString());
  int num = reader.maxDoc();
  for (int i = first; i < num; i++) {
    if (positions.docID() == i) {
      if (positions.freq() == 0) { // shouldn't happen
        throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
      parents[i] = positions.nextPosition();
      if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        if (i + 1 < num) {
          throw new CorruptIndexException("Missing parent data for category "+ (i + 1), reader.toString());
    } else { // this shouldn't happen
      throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
Example 7
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public int advance(int target) throws IOException {
  PostingsEnum top =;

  do {
    top = docsQueue.updateTop();
  } while (top.docID() < target);

  return top.docID();
Example 8
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
 * checks advancing docs
public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
  if (leftDocs == null) {
  int docid = -1;
  int averageGap = MAXDOC / (1+docFreq);
  int skipInterval = 16;

  while (true) {
    if (random().nextBoolean()) {
      // nextDoc()
      docid = leftDocs.nextDoc();
      assertEquals(docid, rightDocs.nextDoc());
    } else {
      // advance()
      int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
      docid = leftDocs.advance(skip);
      assertEquals(docid, rightDocs.advance(skip));
    if (docid == DocIdSetIterator.NO_MORE_DOCS) {
    // we don't assert freqs, they are allowed to be different
Example 9
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
 * checks advancing docs + positions
public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
  if (leftDocs == null || rightDocs == null) {
  int docid = -1;
  int averageGap = MAXDOC / (1+docFreq);
  int skipInterval = 16;

  while (true) {
    if (random().nextBoolean()) {
      // nextDoc()
      docid = leftDocs.nextDoc();
      assertEquals(docid, rightDocs.nextDoc());
    } else {
      // advance()
      int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap));
      docid = leftDocs.advance(skip);
      assertEquals(docid, rightDocs.advance(skip));
    if (docid == DocIdSetIterator.NO_MORE_DOCS) {
    int freq = leftDocs.freq();
    assertEquals(freq, rightDocs.freq());
    for (int i = 0; i < freq; i++) {
      assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition());
      // we don't compare the payloads, it's allowed that one is empty etc
Example 10
Source File:    From mtas with Apache License 2.0 5 votes vote down vote up
 * Compute termvector number basic.
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param r
 *          the r
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @return the termvector number basic
 * @throws IOException
 *           Signals that an I/O exception has occurred.
private static TermvectorNumberBasic computeTermvectorNumberBasic(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r,
    LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException {
  TermvectorNumberBasic result = new TermvectorNumberBasic();
  boolean hasDeletedDocuments = (r.getLiveDocs() != null);
  if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) {
    try {
      return computeTermvectorNumberBasic(termsEnum, r);
    } catch (IOException e) {
      log.debug("problem", e);
      // problem
  result.docNumber = 0;
  result.valueSum[0] = 0;
  int localTermDocId = termDocId;
  Iterator<Integer> docIterator = docSet.iterator();
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  int docId;
  while (docIterator.hasNext()) {
    docId = - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.valueSum[0] += postingsEnum.freq();
    if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) {
  return result;
Example 11
Source File:    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException {
  PostingsEnum pe = te.postings(null, PostingsEnum.ALL);
  if (pe.advance(doc) != doc) {
    return null;
  return new IntervalMatchesIterator() {

    public int gaps() {
      return 0;

    public int width() {
      return 1;

    int upto = pe.freq();
    int pos = -1;

    public boolean next() throws IOException {
      do {
        if (upto <= 0) {
          pos = IntervalIterator.NO_MORE_INTERVALS;
          return false;
        pos = pe.nextPosition();
      while (filter.test(pe.getPayload()) == false);
      return true;

    public int startPosition() {
      return pos;

    public int endPosition() {
      return pos;

    public int startOffset() throws IOException {
      return pe.startOffset();

    public int endOffset() throws IOException {
      return pe.endOffset();

    public MatchesIterator getSubMatches() {
      return null;

    public Query getQuery() {
      throw new UnsupportedOperationException();
Example 12
Source File:    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static IntervalMatchesIterator matches(TermsEnum te, int doc, String field) throws IOException {
  TermQuery query = new TermQuery(new Term(field, te.term()));
  PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
  if (pe.advance(doc) != doc) {
    return null;
  return new IntervalMatchesIterator() {

    public int gaps() {
      return 0;

    public int width() {
      return 1;

    int upto = pe.freq();
    int pos = -1;

    public boolean next() throws IOException {
      if (upto <= 0) {
        pos = IntervalIterator.NO_MORE_INTERVALS;
        return false;
      pos = pe.nextPosition();
      return true;

    public int startPosition() {
      return pos;

    public int endPosition() {
      return pos;

    public int startOffset() throws IOException {
      return pe.startOffset();

    public int endOffset() throws IOException {
      return pe.endOffset();

    public MatchesIterator getSubMatches() {
      return null;

    public Query getQuery() {
      return query;