Java Code Examples for org.apache.lucene.index.PostingsEnum#advance()
The following examples show how to use
org.apache.lucene.index.PostingsEnum#advance() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DisjunctionMatchesIterator.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator} * * Only terms that have at least one match in the given document will be included */ static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException { Objects.requireNonNull(field); Terms t = context.reader().terms(field); if (t == null) return null; TermsEnum te = t.iterator(); PostingsEnum reuse = null; for (BytesRef term = terms.next(); term != null; term = terms.next()) { if (te.seekExact(term)) { PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS); if (pe.advance(doc) == doc) { return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query); } else { reuse = pe; } } } return null; }
Example 2
Source File: DisjunctionMatchesIterator.java From lucene-solr with Apache License 2.0 | 6 votes |
private void init() throws IOException { List<MatchesIterator> mis = new ArrayList<>(); mis.add(first); PostingsEnum reuse = null; for (BytesRef term = terms.next(); term != null; term = terms.next()) { if (te.seekExact(term)) { PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS); if (pe.advance(doc) == doc) { mis.add(new TermMatchesIterator(query, pe)); reuse = null; } else { reuse = pe; } } } it = fromSubIterators(mis); }
Example 3
Source File: CodecCollector.java From mtas with Apache License 2.0 | 6 votes |
/** * Compute termvector number full. * * @param docSet * the doc set * @param termDocId * the term doc id * @param termsEnum * the terms enum * @param lrc * the lrc * @param postingsEnum * the postings enum * @param positionsData * the positions data * @return the termvector number full * @throws IOException * Signals that an I/O exception has occurred. */ private static TermvectorNumberFull computeTermvectorNumberFull( List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReaderContext lrc, PostingsEnum postingsEnum, Map<Integer, Integer> positionsData) throws IOException { TermvectorNumberFull result = new TermvectorNumberFull(docSet.size()); Iterator<Integer> docIterator = docSet.iterator(); int localTermDocId = termDocId; postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS); while (docIterator.hasNext()) { int docId = docIterator.next() - lrc.docBase; if (docId >= localTermDocId && ((docId == localTermDocId) || ((localTermDocId = postingsEnum.advance(docId)) == docId))) { result.args[result.docNumber] = postingsEnum.freq(); result.positions[result.docNumber] = (positionsData == null) ? 0 : positionsData.get(docId + lrc.docBase); result.docNumber++; } } return result; }
Example 4
Source File: ReconstructCommand.java From clue with Apache License 2.0 | 6 votes |
public String reconstructNoPositions(TermsEnum te, int docid, Bits liveDocs) throws IOException{ List<String> textList = new ArrayList<String>(); BytesRef text; PostingsEnum postings = null; while ((text = te.next()) != null) { postings = te.postings(postings, PostingsEnum.FREQS); int iterDoc = postings.advance(docid); if (iterDoc == docid) { textList.add(text.utf8ToString()); } } StringBuilder buf = new StringBuilder(); for (String s : textList) { buf.append(s+" "); } return buf.toString(); }
Example 5
Source File: FieldOffsetStrategy.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void createOffsetsEnumsForTerms(BytesRef[] sourceTerms, Terms termsIndex, int doc, List<OffsetsEnum> results) throws IOException { TermsEnum termsEnum = termsIndex.iterator();//does not return null for (BytesRef term : sourceTerms) { if (termsEnum.seekExact(term)) { PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); if (postingsEnum == null) { // no offsets or positions available throw new IllegalArgumentException("field '" + getField() + "' was indexed without offsets, cannot highlight"); } if (doc == postingsEnum.advance(doc)) { // now it's positioned, although may be exhausted results.add(new OffsetsEnum.OfPostings(term, postingsEnum)); } } } }
Example 6
Source File: TaxonomyIndexArrays.java From lucene-solr with Apache License 2.0 | 5 votes |
private void initParents(IndexReader reader, int first) throws IOException { if (reader.maxDoc() == first) { return; } // it's ok to use MultiTerms because we only iterate on one posting list. // breaking it to loop over the leaves() only complicates code for no // apparent gain. PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader, Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF, PostingsEnum.PAYLOADS); // shouldn't really happen, if it does, something's wrong if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) { throw new CorruptIndexException("Missing parent data for category " + first, reader.toString()); } int num = reader.maxDoc(); for (int i = first; i < num; i++) { if (positions.docID() == i) { if (positions.freq() == 0) { // shouldn't happen throw new CorruptIndexException("Missing parent data for category " + i, reader.toString()); } parents[i] = positions.nextPosition(); if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { if (i + 1 < num) { throw new CorruptIndexException("Missing parent data for category "+ (i + 1), reader.toString()); } break; } } else { // this shouldn't happen throw new CorruptIndexException("Missing parent data for category " + i, reader.toString()); } } }
Example 7
Source File: MultiPhraseQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public int advance(int target) throws IOException { PostingsEnum top = docsQueue.top(); do { top.advance(target); top = docsQueue.updateTop(); } while (top.docID() < target); return top.docID(); }
Example 8
Source File: TestBlockPostingsFormat3.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * checks advancing docs */ public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception { if (leftDocs == null) { assertNull(rightDocs); return; } int docid = -1; int averageGap = MAXDOC / (1+docFreq); int skipInterval = 16; while (true) { if (random().nextBoolean()) { // nextDoc() docid = leftDocs.nextDoc(); assertEquals(docid, rightDocs.nextDoc()); } else { // advance() int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap)); docid = leftDocs.advance(skip); assertEquals(docid, rightDocs.advance(skip)); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } // we don't assert freqs, they are allowed to be different } }
Example 9
Source File: TestBlockPostingsFormat3.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * checks advancing docs + positions */ public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); return; } int docid = -1; int averageGap = MAXDOC / (1+docFreq); int skipInterval = 16; while (true) { if (random().nextBoolean()) { // nextDoc() docid = leftDocs.nextDoc(); assertEquals(docid, rightDocs.nextDoc()); } else { // advance() int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap)); docid = leftDocs.advance(skip); assertEquals(docid, rightDocs.advance(skip)); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } int freq = leftDocs.freq(); assertEquals(freq, rightDocs.freq()); for (int i = 0; i < freq; i++) { assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition()); // we don't compare the payloads, it's allowed that one is empty etc } } }
Example 10
Source File: CodecCollector.java From mtas with Apache License 2.0 | 5 votes |
/** * Compute termvector number basic. * * @param docSet * the doc set * @param termDocId * the term doc id * @param termsEnum * the terms enum * @param r * the r * @param lrc * the lrc * @param postingsEnum * the postings enum * @return the termvector number basic * @throws IOException * Signals that an I/O exception has occurred. */ private static TermvectorNumberBasic computeTermvectorNumberBasic( List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r, LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException { TermvectorNumberBasic result = new TermvectorNumberBasic(); boolean hasDeletedDocuments = (r.getLiveDocs() != null); if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) { try { return computeTermvectorNumberBasic(termsEnum, r); } catch (IOException e) { log.debug("problem", e); // problem } } result.docNumber = 0; result.valueSum[0] = 0; int localTermDocId = termDocId; Iterator<Integer> docIterator = docSet.iterator(); postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS); int docId; while (docIterator.hasNext()) { docId = docIterator.next() - lrc.docBase; if (docId >= localTermDocId && ((docId == localTermDocId) || ((localTermDocId = postingsEnum.advance(docId)) == docId))) { result.docNumber++; result.valueSum[0] += postingsEnum.freq(); } if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) { break; } } return result; }
Example 11
Source File: PayloadFilteredTermIntervalsSource.java From lucene-solr with Apache License 2.0 | 4 votes |
private IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException { PostingsEnum pe = te.postings(null, PostingsEnum.ALL); if (pe.advance(doc) != doc) { return null; } return new IntervalMatchesIterator() { @Override public int gaps() { return 0; } @Override public int width() { return 1; } int upto = pe.freq(); int pos = -1; @Override public boolean next() throws IOException { do { if (upto <= 0) { pos = IntervalIterator.NO_MORE_INTERVALS; return false; } upto--; pos = pe.nextPosition(); } while (filter.test(pe.getPayload()) == false); return true; } @Override public int startPosition() { return pos; } @Override public int endPosition() { return pos; } @Override public int startOffset() throws IOException { return pe.startOffset(); } @Override public int endOffset() throws IOException { return pe.endOffset(); } @Override public MatchesIterator getSubMatches() { return null; } @Override public Query getQuery() { throw new UnsupportedOperationException(); } }; }
Example 12
Source File: TermIntervalsSource.java From lucene-solr with Apache License 2.0 | 4 votes |
static IntervalMatchesIterator matches(TermsEnum te, int doc, String field) throws IOException { TermQuery query = new TermQuery(new Term(field, te.term())); PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS); if (pe.advance(doc) != doc) { return null; } return new IntervalMatchesIterator() { @Override public int gaps() { return 0; } @Override public int width() { return 1; } int upto = pe.freq(); int pos = -1; @Override public boolean next() throws IOException { if (upto <= 0) { pos = IntervalIterator.NO_MORE_INTERVALS; return false; } upto--; pos = pe.nextPosition(); return true; } @Override public int startPosition() { return pos; } @Override public int endPosition() { return pos; } @Override public int startOffset() throws IOException { return pe.startOffset(); } @Override public int endOffset() throws IOException { return pe.endOffset(); } @Override public MatchesIterator getSubMatches() { return null; } @Override public Query getQuery() { return query; } }; }