org.apache.lucene.index.PostingsEnum Java Examples
The following examples show how to use
org.apache.lucene.index.PostingsEnum.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PhraseHelper.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (!fieldMatcher.test(term.field())) { return; } SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes()); if (offsetsEnum == null) { // If it's pos insensitive we handle it outside of PhraseHelper. term.field() is from the Query. if (positionInsensitiveTerms.contains(term.bytes())) { return; } offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq()); termToOffsetsEnums.put(term.bytes(), offsetsEnum); } offsetsEnum.add(postings.startOffset(), postings.endOffset()); }
Example #2
Source File: SignificantTermsAggregatorFactory.java From Elasticsearch with Apache License 2.0 | 6 votes |
/** * Creates the TermsEnum (if not already created) and must be called before any calls to getBackgroundFrequency * @param context The aggregation context * @return The number of documents in the index (after an optional filter might have been applied) */ public long prepareBackground(AggregationContext context) { if (termsEnum != null) { // already prepared - return return termsEnum.getNumDocs(); } SearchContext searchContext = context.searchContext(); IndexReader reader = searchContext.searcher().getIndexReader(); try { if (numberOfAggregatorsCreated == 1) { // Setup a termsEnum for sole use by one aggregator termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter); } else { // When we have > 1 agg we have possibility of duplicate term frequency lookups // and so use a TermsEnum that caches results of all term lookups termsEnum = new FreqTermsEnum(reader, indexedFieldName, true, false, filter, searchContext.bigArrays()); } } catch (IOException e) { throw new ElasticsearchException("failed to build terms enumeration", e); } return termsEnum.getNumDocs(); }
Example #3
Source File: DocumentsImpl.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Optional<Integer> firstTermDoc() { if (tenum == null) { // terms enum is not set log.warn("Terms enum un-positioned."); return Optional.empty(); } try { setPostingsIterator(tenum.postings(penum, PostingsEnum.ALL)); if (penum.nextDoc() == PostingsEnum.NO_MORE_DOCS) { // no docs available for this term resetPostingsIterator(); log.warn("No docs available for term: {} in field: {}.", BytesRefUtils.decode(tenum.term()), curField); return Optional.empty(); } else { return Optional.of(penum.docID()); } } catch (IOException e) { resetPostingsIterator(); throw new LukeException(String.format(Locale.ENGLISH, "Term docs not available for field: %s.", curField), e); } }
Example #4
Source File: IDVersionPostingsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, PostingsEnum reuse, int flags) throws IOException { SingleDocsEnum docsEnum; if (PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS)) { SinglePostingsEnum posEnum; if (reuse instanceof SinglePostingsEnum) { posEnum = (SinglePostingsEnum) reuse; } else { posEnum = new SinglePostingsEnum(); } IDVersionTermState _termState = (IDVersionTermState) termState; posEnum.reset(_termState.docID, _termState.idVersion); return posEnum; } if (reuse instanceof SingleDocsEnum) { docsEnum = (SingleDocsEnum) reuse; } else { docsEnum = new SingleDocsEnum(); } docsEnum.reset(((IDVersionTermState) termState).docID); return docsEnum; }
Example #5
Source File: TermsIncludingScoreQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException { BytesRef spare = new BytesRef(); PostingsEnum postingsEnum = null; for (int i = 0; i < terms.size(); i++) { if (termsEnum.seekExact(terms.get(ords[i], spare))) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); float score = TermsIncludingScoreQuery.this.scores[ords[i]]; for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) { // I prefer this: /*if (scores[doc] < score) { scores[doc] = score; matchingDocs.set(doc); }*/ // But this behaves the same as MVInnerScorer and only then the tests will pass: if (!matchingDocs.get(doc)) { scores[doc] = score; matchingDocs.set(doc); } } } } }
Example #6
Source File: CodecCollector.java From mtas with Apache License 2.0 | 6 votes |
/** * Collect collection. * * @param reader * the reader * @param docSet * the doc set * @param collectionInfo * the collection info * @throws IOException * Signals that an I/O exception has occurred. */ public static void collectCollection(IndexReader reader, List<Integer> docSet, ComponentCollection collectionInfo) throws IOException { if (collectionInfo.action().equals(ComponentCollection.ACTION_CHECK)) { // can't do anything in lucene for check } else if (collectionInfo.action() .equals(ComponentCollection.ACTION_LIST)) { // can't do anything in lucene for list } else if (collectionInfo.action() .equals(ComponentCollection.ACTION_CREATE)) { BytesRef term = null; PostingsEnum postingsEnum = null; Integer docId; Integer termDocId = -1; Terms terms; LeafReaderContext lrc; LeafReader r; ListIterator<LeafReaderContext> iterator = reader.leaves().listIterator(); while (iterator.hasNext()) { lrc = iterator.next(); r = lrc.reader(); for (String field : collectionInfo.fields()) { if ((terms = r.terms(field)) != null) { TermsEnum termsEnum = terms.iterator(); while ((term = termsEnum.next()) != null) { Iterator<Integer> docIterator = docSet.iterator(); postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); termDocId = -1; while (docIterator.hasNext()) { docId = docIterator.next() - lrc.docBase; if ((docId >= termDocId) && ((docId.equals(termDocId)) || ((termDocId = postingsEnum.advance(docId)) .equals(docId)))) { collectionInfo.addValue(term.utf8ToString()); break; } if (termDocId.equals(PostingsEnum.NO_MORE_DOCS)) { break; } } } } } } } }
Example #7
Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (!matches) return; if (upto >= payloadToMatch.size()) { matches = false; return; } BytesRef payload = postings.getPayload(); if (payloadToMatch.get(upto) == null) { matches = payload == null; upto++; return; } if (payload == null) { matches = false; upto++; return; } matches = payloadToMatch.get(upto).bytesEquals(payload); upto++; }
Example #8
Source File: DocumentsImpl.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Optional<Integer> nextTermDoc() { if (penum == null) { // postings enum is not initialized log.warn("Postings enum un-positioned for field: {}.", curField); return Optional.empty(); } try { if (penum.nextDoc() == PostingsEnum.NO_MORE_DOCS) { // end of the iterator resetPostingsIterator(); if (log.isInfoEnabled()) { log.info("Reached the end of the postings iterator for term: {} in field: {}", BytesRefUtils.decode(tenum.term()), curField); } return Optional.empty(); } else { return Optional.of(penum.docID()); } } catch (IOException e) { resetPostingsIterator(); throw new LukeException(String.format(Locale.ENGLISH, "Term docs not available for field: %s.", curField), e); } }
Example #9
Source File: TermVectorsResponse.java From Elasticsearch with Apache License 2.0 | 6 votes |
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException { // start term, optimized writing BytesRef term = termIter.next(); spare.copyUTF8Bytes(term); builder.startObject(spare.toString()); buildTermStatistics(builder, termIter); // finally write the term vectors PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL); int termFreq = posEnum.freq(); builder.field(FieldStrings.TERM_FREQ, termFreq); initMemory(curTerms, termFreq); initValues(curTerms, posEnum, termFreq); buildValues(builder, curTerms, termFreq); buildScore(builder, boostAtt); builder.endObject(); }
Example #10
Source File: TermsIncludingScoreQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException { BytesRef spare = new BytesRef(); PostingsEnum postingsEnum = null; for (int i = 0; i < terms.size(); i++) { if (termsEnum.seekExact(terms.get(ords[i], spare))) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); float score = TermsIncludingScoreQuery.this.scores[ords[i]]; for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) { matchingDocs.set(doc); // In the case the same doc is also related to a another doc, a score might be overwritten. I think this // can only happen in a many-to-many relation scores[doc] = score; } } } }
Example #11
Source File: TermVectorsResponse.java From Elasticsearch with Apache License 2.0 | 6 votes |
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException { for (int j = 0; j < termFreq; j++) { int nextPos = posEnum.nextPosition(); if (curTerms.hasPositions()) { currentPositions[j] = nextPos; } if (curTerms.hasOffsets()) { currentStartOffset[j] = posEnum.startOffset(); currentEndOffset[j] = posEnum.endOffset(); } if (curTerms.hasPayloads()) { BytesRef curPayload = posEnum.getPayload(); if (curPayload != null) { currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length); } else { currentPayloads[j] = null; } } } }
Example #12
Source File: TermPosting.java From lucene-solr with Apache License 2.0 | 6 votes |
static TermPosting of(int position, PostingsEnum penum) throws IOException { TermPosting posting = new TermPosting(); // set position posting.position = position; // set offset (if available) int sOffset = penum.startOffset(); int eOffset = penum.endOffset(); if (sOffset >= 0 && eOffset >= 0) { posting.startOffset = sOffset; posting.endOffset = eOffset; } // set payload (if available) if (penum.getPayload() != null) { posting.payload = BytesRef.deepCopyOf(penum.getPayload()); } return posting; }
Example #13
Source File: DisjunctionMatchesIterator.java From lucene-solr with Apache License 2.0 | 6 votes |
private void init() throws IOException { List<MatchesIterator> mis = new ArrayList<>(); mis.add(first); PostingsEnum reuse = null; for (BytesRef term = terms.next(); term != null; term = terms.next()) { if (te.seekExact(term)) { PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS); if (pe.advance(doc) == doc) { mis.add(new TermMatchesIterator(query, pe)); reuse = null; } else { reuse = pe; } } } it = fromSubIterators(mis); }
Example #14
Source File: TestIDVersionPostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Returns docID if found, else -1. */ public int lookup(BytesRef id, long version) throws IOException { for(int seg=0;seg<numSegs;seg++) { if (((IDVersionSegmentTermsEnum) termsEnums[seg]).seekExact(id, version)) { if (VERBOSE) { System.out.println(" found in seg=" + termsEnums[seg]); } postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0); int docID = postingsEnums[seg].nextDoc(); if (docID != PostingsEnum.NO_MORE_DOCS && (liveDocs[seg] == null || liveDocs[seg].get(docID))) { lastVersion = ((IDVersionSegmentTermsEnum) termsEnums[seg]).getVersion(); return docBases[seg] + docID; } assert hasDeletions; } } return -1; }
Example #15
Source File: DisjunctionMatchesIterator.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Create a {@link DisjunctionMatchesIterator} over a list of terms extracted from a {@link BytesRefIterator} * * Only terms that have at least one match in the given document will be included */ static MatchesIterator fromTermsEnum(LeafReaderContext context, int doc, Query query, String field, BytesRefIterator terms) throws IOException { Objects.requireNonNull(field); Terms t = context.reader().terms(field); if (t == null) return null; TermsEnum te = t.iterator(); PostingsEnum reuse = null; for (BytesRef term = terms.next(); term != null; term = terms.next()) { if (te.seekExact(term)) { PostingsEnum pe = te.postings(reuse, PostingsEnum.OFFSETS); if (pe.advance(doc) == doc) { return new TermsEnumDisjunctionMatchesIterator(new TermMatchesIterator(query, pe), terms, te, doc, query); } else { reuse = pe; } } } return null; }
Example #16
Source File: TermVectorEntry.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns a new term vector entry representing the specified term, and optionally, positions. * * @param te - positioned terms iterator * @return term vector entry * @throws IOException - if there is a low level IO error. */ static TermVectorEntry of(TermsEnum te) throws IOException { Objects.requireNonNull(te); String termText = BytesRefUtils.decode(te.term()); List<TermVectorEntry.TermVectorPosition> tvPositions = new ArrayList<>(); PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS); pe.nextDoc(); int freq = pe.freq(); for (int i = 0; i < freq; i++) { int pos = pe.nextPosition(); if (pos < 0) { // no position information available continue; } TermVectorPosition tvPos = TermVectorPosition.of(pos, pe); tvPositions.add(tvPos); } return new TermVectorEntry(termText, te.totalTermFreq(), tvPositions); }
Example #17
Source File: ShardSplittingQuery.java From crate with Apache License 2.0 | 6 votes |
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard, LeafReader leafReader, IntConsumer consumer) throws IOException { Terms terms = leafReader.terms(idField); TermsEnum iterator = terms.iterator(); BytesRef idTerm; PostingsEnum postingsEnum = null; while ((idTerm = iterator.next()) != null) { if (includeInShard.test(idTerm) == false) { postingsEnum = iterator.postings(postingsEnum); int doc; while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { consumer.accept(doc); } } } }
Example #18
Source File: TestUtil.java From lucene-solr with Apache License 2.0 | 6 votes |
public static PostingsEnum docs(Random random, TermsEnum termsEnum, PostingsEnum reuse, int flags) throws IOException { // TODO: simplify this method? it would be easier to randomly either use the flags passed, or do the random selection, // FREQS should be part fo the random selection instead of outside on its own? if (random.nextBoolean()) { if (random.nextBoolean()) { final int posFlags; switch (random.nextInt(4)) { case 0: posFlags = PostingsEnum.POSITIONS; break; case 1: posFlags = PostingsEnum.OFFSETS; break; case 2: posFlags = PostingsEnum.PAYLOADS; break; default: posFlags = PostingsEnum.ALL; break; } return termsEnum.postings(null, posFlags); } flags |= PostingsEnum.FREQS; } return termsEnum.postings(reuse, flags); }
Example #19
Source File: SpanPayloadCheckQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (!matches) return; if (upto >= payloadToMatch.size()) { matches = false; return; } BytesRef payload = postings.getPayload(); if (payloadToMatch.get(upto) == null) { matches = payload == null; upto++; return; } if (payload == null) { matches = false; upto++; return; } matches = payloadToMatch.get(upto).bytesEquals(payload); upto++; }
Example #20
Source File: LukeRequestHandler.java From lucene-solr with Apache License 2.0 | 6 votes |
private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws IOException { PostingsEnum postingsEnum = null; TermsEnum termsEnum = terms.iterator(); BytesRef text; // Deal with the chance that the first bunch of terms are in deleted documents. Is there a better way? for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) { text = termsEnum.next(); if (text == null) { // Ran off the end of the terms enum without finding any live docs with that field in them. return null; } postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (liveDocs != null && liveDocs.get(postingsEnum.docID())) { continue; } return reader.document(postingsEnum.docID()); } } return null; }
Example #21
Source File: PhraseQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public PostingsAndFreq(PostingsEnum postings, ImpactsEnum impacts, int position, Term... terms) { this.postings = postings; this.impacts = impacts; this.position = position; nTerms = terms==null ? 0 : terms.length; if (nTerms>0) { if (terms.length==1) { this.terms = terms; } else { Term[] terms2 = new Term[terms.length]; System.arraycopy(terms, 0, terms2, 0, terms.length); Arrays.sort(terms2); this.terms = terms2; } } else { this.terms = null; } }
Example #22
Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDocValuesDoNotAffectBoostPositionsOrOffset() throws Exception { Document doc = new Document(); doc.add(new BinaryDocValuesField("text", new BytesRef("quick brown fox"))); doc.add(new TextField("text", "quick brown fox", Field.Store.NO)); MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer, true, true); LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader(); TermsEnum tenum = leafReader.terms("text").iterator(); assertEquals("brown", tenum.next().utf8ToString()); PostingsEnum penum = tenum.postings(null, PostingsEnum.OFFSETS); assertEquals(0, penum.nextDoc()); assertEquals(1, penum.freq()); assertEquals(1, penum.nextPosition()); assertEquals(6, penum.startOffset()); assertEquals(11, penum.endOffset()); assertEquals("fox", tenum.next().utf8ToString()); penum = tenum.postings(penum, PostingsEnum.OFFSETS); assertEquals(0, penum.nextDoc()); assertEquals(1, penum.freq()); assertEquals(2, penum.nextPosition()); assertEquals(12, penum.startOffset()); assertEquals(15, penum.endOffset()); assertEquals("quick", tenum.next().utf8ToString()); penum = tenum.postings(penum, PostingsEnum.OFFSETS); assertEquals(0, penum.nextDoc()); assertEquals(1, penum.freq()); assertEquals(0, penum.nextPosition()); assertEquals(0, penum.startOffset()); assertEquals(5, penum.endOffset()); BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("text"); assertEquals(0, binaryDocValues.nextDoc()); assertEquals("quick brown fox", binaryDocValues.binaryValue().utf8ToString()); }
Example #23
Source File: FeatureSortField.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected void doSetNextReader(LeafReaderContext context) throws IOException { Terms terms = context.reader().terms(field); if (terms == null) { currentReaderPostingsValues = null; } else { TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(featureName) == false) { currentReaderPostingsValues = null; } else { currentReaderPostingsValues = termsEnum.postings(currentReaderPostingsValues, PostingsEnum.FREQS); } } }
Example #24
Source File: DirectPostingsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
public PostingsEnum reset(int[] postings, byte[] payloadBytes) { this.postings = postings; upto = 0; skipPositions = 0; pos = -1; startOffset = -1; endOffset = -1; docID = -1; payloadLength = 0; this.payloadBytes = payloadBytes; return this; }
Example #25
Source File: Lucene84PostingsReader.java From lucene-solr with Apache License 2.0 | 5 votes |
public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOException { docFreq = termState.docFreq; totalTermFreq = indexHasFreq ? termState.totalTermFreq : docFreq; docTermStartFP = termState.docStartFP; skipOffset = termState.skipOffset; singletonDocID = termState.singletonDocID; if (docFreq > 1) { if (docIn == null) { // lazy init docIn = startDocIn.clone(); } docIn.seek(docTermStartFP); } doc = -1; this.needsFreq = PostingsEnum.featureRequested(flags, PostingsEnum.FREQS); this.isFreqsRead = true; if (indexHasFreq == false || needsFreq == false) { for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) { freqBuffer[i] = 1; } } accum = 0; blockUpto = 0; nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block docBufferUpto = BLOCK_SIZE; skipped = false; return this; }
Example #26
Source File: MultiPhraseQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public int nextDoc() throws IOException { PostingsEnum top = docsQueue.top(); int doc = top.docID(); do { top.nextDoc(); top = docsQueue.updateTop(); } while (top.docID() == doc); return top.docID(); }
Example #27
Source File: OrdsSegmentTermsEnum.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { assert !eof; //if (DEBUG) { //System.out.println("BTTR.docs seg=" + segment); //} currentFrame.decodeMetaData(); //if (DEBUG) { //System.out.println(" state=" + currentFrame.state); //} return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, reuse, flags); }
Example #28
Source File: CrossCollectionJoinQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void collect(Object value) throws IOException { fieldType.readableToIndexed((String) value, bytes); if (termsEnum.seekExact(bytes.get())) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); bitSet.or(postingsEnum); } }
Example #29
Source File: BlockTermsReader.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { //System.out.println("BTR.docs this=" + this); decodeMetaData(); //System.out.println("BTR.docs: state.docFreq=" + state.docFreq); return postingsReader.postings(fieldInfo, state, reuse, flags); }
Example #30
Source File: MultiPhraseQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public int advance(int target) throws IOException { PostingsEnum top = docsQueue.top(); do { top.advance(target); top = docsQueue.updateTop(); } while (top.docID() < target); return top.docID(); }