Java Code Examples for org.apache.lucene.index.PostingsEnum#freq()
The following examples show how to use
org.apache.lucene.index.PostingsEnum#freq() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TermVectorsResponse.java From Elasticsearch with Apache License 2.0 | 6 votes |
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException { // start term, optimized writing BytesRef term = termIter.next(); spare.copyUTF8Bytes(term); builder.startObject(spare.toString()); buildTermStatistics(builder, termIter); // finally write the term vectors PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL); int termFreq = posEnum.freq(); builder.field(FieldStrings.TERM_FREQ, termFreq); initMemory(curTerms, termFreq); initValues(curTerms, posEnum, termFreq); buildValues(builder, curTerms, termFreq); buildScore(builder, boostAtt); builder.endObject(); }
Example 2
Source File: PhraseHelper.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException { if (!fieldMatcher.test(term.field())) { return; } SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes()); if (offsetsEnum == null) { // If it's pos insensitive we handle it outside of PhraseHelper. term.field() is from the Query. if (positionInsensitiveTerms.contains(term.bytes())) { return; } offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq()); termToOffsetsEnums.put(term.bytes(), offsetsEnum); } offsetsEnum.add(postings.startOffset(), postings.endOffset()); }
Example 3
Source File: LuceneUtils.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Gets the 1 - entropy (i.e. 1+ plogp) of a term, * a function that favors terms that are focally distributed * We use the definition of log-entropy weighting provided in * Martin and Berry (2007): * Entropy = 1 + sum ((Pij log2(Pij)) / log2(n)) * where Pij = frequency of term i in doc j / global frequency of term i * n = number of documents in collection * @param term whose entropy you want * Thanks to Vidya Vasuki for adding the hash table to * eliminate redundant calculation */ private float getEntropy(Term term) { if (termEntropy.containsKey(term.field()+"_"+term.text())) return termEntropy.get(term.field()+"_"+term.text()); int gf = getGlobalTermFreq(term); double entropy = 0; try { PostingsEnum docsEnum = this.getDocsForTerm(term); while ((docsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) { double p = docsEnum.freq(); //frequency in this document p = p / gf; //frequency across all documents entropy += p * (Math.log(p) / Math.log(2)); //sum of Plog(P) } int n = this.getNumDocs(); double log2n = Math.log(n) / Math.log(2); entropy = entropy / log2n; } catch (IOException e) { logger.info("Couldn't get term entropy for term " + term.text()); } termEntropy.put(term.field()+"_"+term.text(), 1 + (float) entropy); return (float) (1 + entropy); }
Example 4
Source File: CodecCollector.java From mtas with Apache License 2.0 | 6 votes |
/** * Compute termvector number full. * * @param docSet * the doc set * @param termDocId * the term doc id * @param termsEnum * the terms enum * @param lrc * the lrc * @param postingsEnum * the postings enum * @param positionsData * the positions data * @return the termvector number full * @throws IOException * Signals that an I/O exception has occurred. */ private static TermvectorNumberFull computeTermvectorNumberFull( List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReaderContext lrc, PostingsEnum postingsEnum, Map<Integer, Integer> positionsData) throws IOException { TermvectorNumberFull result = new TermvectorNumberFull(docSet.size()); Iterator<Integer> docIterator = docSet.iterator(); int localTermDocId = termDocId; postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS); while (docIterator.hasNext()) { int docId = docIterator.next() - lrc.docBase; if (docId >= localTermDocId && ((docId == localTermDocId) || ((localTermDocId = postingsEnum.advance(docId)) == docId))) { result.args[result.docNumber] = postingsEnum.freq(); result.positions[result.docNumber] = (positionsData == null) ? 0 : positionsData.get(docId + lrc.docBase); result.docNumber++; } } return result; }
Example 5
Source File: TermVectorEntry.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns a new term vector entry representing the specified term, and optionally, positions. * * @param te - positioned terms iterator * @return term vector entry * @throws IOException - if there is a low level IO error. */ static TermVectorEntry of(TermsEnum te) throws IOException { Objects.requireNonNull(te); String termText = BytesRefUtils.decode(te.term()); List<TermVectorEntry.TermVectorPosition> tvPositions = new ArrayList<>(); PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS); pe.nextDoc(); int freq = pe.freq(); for (int i = 0; i < freq; i++) { int pos = pe.nextPosition(); if (pos < 0) { // no position information available continue; } TermVectorPosition tvPos = TermVectorPosition.of(pos, pe); tvPositions.add(tvPos); } return new TermVectorEntry(termText, te.totalTermFreq(), tvPositions); }
Example 6
Source File: TestBlockPostingsFormat3.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * checks docs + freqs + positions + payloads, sequentially */ public void assertDocsAndPositionsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception { assertNotNull(leftDocs); assertNotNull(rightDocs); assertEquals(-1, leftDocs.docID()); assertEquals(-1, rightDocs.docID()); int docid; while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(docid, rightDocs.nextDoc()); int freq = leftDocs.freq(); assertEquals(freq, rightDocs.freq()); for (int i = 0; i < freq; i++) { assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition()); // we don't assert offsets/payloads, they are allowed to be different } } assertEquals(DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc()); }
Example 7
Source File: FrequencyCtxSentenceBasedFBWorker.java From jate with GNU Lesser General Public License v3.0 | 5 votes |
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException { List<MWESentenceContext> result = new ArrayList<>(); TermsEnum tiRef= termVectorLookup.iterator(); BytesRef luceneTerm = tiRef.next(); while (luceneTerm != null) { if (luceneTerm.length == 0) { luceneTerm = tiRef.next(); continue; } String tString = luceneTerm.utf8ToString(); if(!allCandidates.contains(tString)) { luceneTerm=tiRef.next(); continue; } PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL); //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS); int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV if (doc != PostingsEnum.NO_MORE_DOCS) { int totalOccurrence = postingsEnum.freq(); for (int i = 0; i < totalOccurrence; i++) { postingsEnum.nextPosition(); int start = postingsEnum.startOffset(); int end = postingsEnum.endOffset(); BytesRef payload=postingsEnum.getPayload(); int sentenceId=-1; if(payload!=null){ sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId(); } result.add(new MWESentenceContext(tString,sentenceId, start, end)); } } luceneTerm = tiRef.next(); } Collections.sort(result); return result; }
Example 8
Source File: ESIndex.java From pyramid with Apache License 2.0 | 5 votes |
private Map<Integer,String> getTermVectorWithException(String field, String id) throws IOException { TermVectorsResponse response = client.prepareTermVector(indexName, documentType, id) .setOffsets(false).setPositions(true).setFieldStatistics(false) .setTermStatistics(false) .setSelectedFields(field). execute().actionGet(); Map<Integer,String> map = new HashMap<>(); Terms terms = response.getFields().terms(field); if (terms==null){ return map; } TermsEnum iterator = terms.iterator(); PostingsEnum postings = null; for (BytesRef termBytes = null; (termBytes = iterator.next()) != null; ) { String term = termBytes.utf8ToString(); postings = iterator.postings(postings, PostingsEnum.ALL); //there can only be one doc since we are getting with id. get the doc and the position postings.nextDoc(); int tf = postings.freq(); for (int i = 0; i < tf; i++) { int pos = postings.nextPosition(); map.put(pos,term); } } return map; }
Example 9
Source File: CodecCollector.java From mtas with Apache License 2.0 | 5 votes |
/** * Compute termvector number basic. * * @param docSet * the doc set * @param termDocId * the term doc id * @param termsEnum * the terms enum * @param r * the r * @param lrc * the lrc * @param postingsEnum * the postings enum * @return the termvector number basic * @throws IOException * Signals that an I/O exception has occurred. */ private static TermvectorNumberBasic computeTermvectorNumberBasic( List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r, LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException { TermvectorNumberBasic result = new TermvectorNumberBasic(); boolean hasDeletedDocuments = (r.getLiveDocs() != null); if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) { try { return computeTermvectorNumberBasic(termsEnum, r); } catch (IOException e) { log.debug("problem", e); // problem } } result.docNumber = 0; result.valueSum[0] = 0; int localTermDocId = termDocId; Iterator<Integer> docIterator = docSet.iterator(); postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS); int docId; while (docIterator.hasNext()) { docId = docIterator.next() - lrc.docBase; if (docId >= localTermDocId && ((docId == localTermDocId) || ((localTermDocId = postingsEnum.advance(docId)) == docId))) { result.docNumber++; result.valueSum[0] += postingsEnum.freq(); } if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) { break; } } return result; }
Example 10
Source File: TestBlockPostingsFormat3.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * checks advancing docs + positions */ public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception { if (leftDocs == null || rightDocs == null) { assertNull(leftDocs); assertNull(rightDocs); return; } int docid = -1; int averageGap = MAXDOC / (1+docFreq); int skipInterval = 16; while (true) { if (random().nextBoolean()) { // nextDoc() docid = leftDocs.nextDoc(); assertEquals(docid, rightDocs.nextDoc()); } else { // advance() int skip = docid + (int) Math.ceil(Math.abs(skipInterval + random().nextGaussian() * averageGap)); docid = leftDocs.advance(skip); assertEquals(docid, rightDocs.advance(skip)); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } int freq = leftDocs.freq(); assertEquals(freq, rightDocs.freq()); for (int i = 0; i < freq; i++) { assertEquals(leftDocs.nextPosition(), rightDocs.nextPosition()); // we don't compare the payloads, it's allowed that one is empty etc } } }
Example 11
Source File: TaxonomyIndexArrays.java From lucene-solr with Apache License 2.0 | 5 votes |
private void initParents(IndexReader reader, int first) throws IOException { if (reader.maxDoc() == first) { return; } // it's ok to use MultiTerms because we only iterate on one posting list. // breaking it to loop over the leaves() only complicates code for no // apparent gain. PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader, Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF, PostingsEnum.PAYLOADS); // shouldn't really happen, if it does, something's wrong if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) { throw new CorruptIndexException("Missing parent data for category " + first, reader.toString()); } int num = reader.maxDoc(); for (int i = first; i < num; i++) { if (positions.docID() == i) { if (positions.freq() == 0) { // shouldn't happen throw new CorruptIndexException("Missing parent data for category " + i, reader.toString()); } parents[i] = positions.nextPosition(); if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { if (i + 1 < num) { throw new CorruptIndexException("Missing parent data for category "+ (i + 1), reader.toString()); } break; } } else { // this shouldn't happen throw new CorruptIndexException("Missing parent data for category " + i, reader.toString()); } } }
Example 12
Source File: TermMatchesIterator.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Create a new {@link TermMatchesIterator} for the given term and postings list */ TermMatchesIterator(Query query, PostingsEnum pe) throws IOException { this.pe = pe; this.query = query; this.upto = pe.freq(); }
Example 13
Source File: TermIntervalsSource.java From lucene-solr with Apache License 2.0 | 4 votes |
static IntervalMatchesIterator matches(TermsEnum te, int doc, String field) throws IOException { TermQuery query = new TermQuery(new Term(field, te.term())); PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS); if (pe.advance(doc) != doc) { return null; } return new IntervalMatchesIterator() { @Override public int gaps() { return 0; } @Override public int width() { return 1; } int upto = pe.freq(); int pos = -1; @Override public boolean next() throws IOException { if (upto <= 0) { pos = IntervalIterator.NO_MORE_INTERVALS; return false; } upto--; pos = pe.nextPosition(); return true; } @Override public int startPosition() { return pos; } @Override public int endPosition() { return pos; } @Override public int startOffset() throws IOException { return pe.startOffset(); } @Override public int endOffset() throws IOException { return pe.endOffset(); } @Override public MatchesIterator getSubMatches() { return null; } @Override public Query getQuery() { return query; } }; }
Example 14
Source File: PayloadFilteredTermIntervalsSource.java From lucene-solr with Apache License 2.0 | 4 votes |
private IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException { PostingsEnum pe = te.postings(null, PostingsEnum.ALL); if (pe.advance(doc) != doc) { return null; } return new IntervalMatchesIterator() { @Override public int gaps() { return 0; } @Override public int width() { return 1; } int upto = pe.freq(); int pos = -1; @Override public boolean next() throws IOException { do { if (upto <= 0) { pos = IntervalIterator.NO_MORE_INTERVALS; return false; } upto--; pos = pe.nextPosition(); } while (filter.test(pe.getPayload()) == false); return true; } @Override public int startPosition() { return pos; } @Override public int endPosition() { return pos; } @Override public int startOffset() throws IOException { return pe.startOffset(); } @Override public int endOffset() throws IOException { return pe.endOffset(); } @Override public MatchesIterator getSubMatches() { return null; } @Override public Query getQuery() { throw new UnsupportedOperationException(); } }; }
Example 15
Source File: TestPerfTasksLogic.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Test ReadTokensTask */ public void testReadTokens() throws Exception { // We will call ReadTokens on this many docs final int NUM_DOCS = 20; // Read tokens from first NUM_DOCS docs from Reuters and // then build index from the same docs String algLines1[] = { "# ----- properties ", "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer", "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", "docs.file=" + getReuters20LinesFile(), "# ----- alg ", "{ReadTokens}: " + NUM_DOCS, "ResetSystemErase", "CreateIndex", "{AddDoc}: " + NUM_DOCS, "CloseIndex", }; // Run algo Benchmark benchmark = execBenchmark(algLines1); List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats(); // Count how many tokens all ReadTokens saw int totalTokenCount1 = 0; for (final TaskStats stat : stats) { if (stat.getTask().getName().equals("ReadTokens")) { totalTokenCount1 += stat.getCount(); } } // Separately count how many tokens are actually in the index: IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory()); assertEquals(NUM_DOCS, reader.numDocs()); int totalTokenCount2 = 0; Collection<String> fields = FieldInfos.getIndexedFields(reader); for (String fieldName : fields) { if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) { continue; } Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms == null) { continue; } TermsEnum termsEnum = terms.iterator(); PostingsEnum docs = null; while(termsEnum.next() != null) { docs = TestUtil.docs(random(), termsEnum, docs, PostingsEnum.FREQS); while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } } } reader.close(); // Make sure they are the same assertEquals(totalTokenCount1, totalTokenCount2); }
Example 16
Source File: OffsetsEnum.java From lucene-solr with Apache License 2.0 | 4 votes |
public OfPostings(BytesRef term, PostingsEnum postingsEnum) throws IOException { this(term, postingsEnum.freq(), postingsEnum); }
Example 17
Source File: DocVectors.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Creates doc vectors, iterating over terms. */ private void trainDocVectors() throws IOException { VerbatimLogger.info("Building document vectors ... "); Enumeration<ObjectVector> termEnum = termVectors.getAllVectors(); try { int tc = 0; while (termEnum.hasMoreElements()) { // Output progress counter. if ((tc % 10000 == 0) || (tc < 10000 && tc % 1000 == 0)) { VerbatimLogger.info("Processed " + tc + " terms ... "); } tc++; ObjectVector termVectorObject = termEnum.nextElement(); Vector termVector = termVectorObject.getVector(); String word = (String) termVectorObject.getObject(); // Go through checking terms for each fieldName. for (String fieldName : flagConfig.contentsfields()) { Term term = new Term(fieldName, word); float globalweight = luceneUtils.getGlobalTermWeight(term); float fieldweight = 1; // Get any docs for this term. PostingsEnum docsEnum = this.luceneUtils.getDocsForTerm(term); // This may occur frequently if one term vector store is derived from multiple fields if (docsEnum == null) { continue; } while (docsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) { String externalDocID = luceneUtils.getExternalDocId(docsEnum.docID()); // Add vector from this term, taking freq into account. Vector docVector = this.docVectors.getVector(externalDocID); float localweight = docsEnum.freq(); if (flagConfig.fieldweight()) { //field weight: 1/sqrt(number of terms in field) TermsEnum terms = luceneUtils.getTermVector(docsEnum.docID(), fieldName).iterator(); int numTerms = 0; while (terms.next() != null) { numTerms++; } fieldweight = (float) (1/Math.sqrt(numTerms)); } docVector.superpose( termVector, localweight * globalweight * fieldweight, null); } } } } catch (IOException e) { // catches from indexReader. e.printStackTrace(); } VerbatimLogger.info("\nNormalizing doc vectors ...\n"); Enumeration<ObjectVector> docEnum = docVectors.getAllVectors(); while (docEnum.hasMoreElements()) docEnum.nextElement().getVector().normalize(); }
Example 18
Source File: VectorScoreQuery.java From solr-vector-scoring with Apache License 2.0 | 4 votes |
@Override protected CustomScoreProvider getCustomScoreProvider(LeafReaderContext context) throws IOException { return new CustomScoreProvider(context){ @Override public float customScore(int docID, float subQueryScore, float valSrcScore) throws IOException { float score = 0; double docVectorNorm = 0; LeafReader reader = context.reader(); Terms terms = reader.getTermVector(docID, field); if(vector.size() != terms.size()){ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "indexed and input vector array must have same length"); } TermsEnum iter = terms.iterator(); BytesRef text; while ((text = iter.next()) != null) { String term = text.utf8ToString(); float payloadValue = 0f; PostingsEnum postings = iter.postings(null, PostingsEnum.ALL); while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = postings.freq(); while (freq-- > 0) postings.nextPosition(); BytesRef payload = postings.getPayload(); payloadValue = PayloadHelper.decodeFloat(payload.bytes, payload.offset); if (cosine) docVectorNorm += Math.pow(payloadValue, 2.0); } score = (float)(score + payloadValue * (vector.get(Integer.parseInt(term)))); } if (cosine) { if ((docVectorNorm == 0) || (queryVectorNorm == 0)) return 0f; return (float)(score / (Math.sqrt(docVectorNorm) * Math.sqrt(queryVectorNorm))); } return score; } }; }
Example 19
Source File: FrequencyCtxWindowBasedFBWorker.java From jate with GNU Lesser General Public License v3.0 | 4 votes |
private List<MWEInSentence> collectTermSentenceContext(Terms termVectorLookup, Map<Integer, Integer> sentenceBoundaries) throws IOException { List<MWEInSentence> result = new ArrayList<>(); TermsEnum tiRef = termVectorLookup.iterator(); BytesRef luceneTerm = tiRef.next(); while (luceneTerm != null) { if (luceneTerm.length == 0) { luceneTerm = tiRef.next(); continue; } String tString = luceneTerm.utf8ToString(); if (!allCandidates.contains(tString)) { luceneTerm = tiRef.next(); continue; } PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL); //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS); int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV if (doc != PostingsEnum.NO_MORE_DOCS) { int totalOccurrence = postingsEnum.freq(); for (int i = 0; i < totalOccurrence; i++) { postingsEnum.nextPosition(); int start = postingsEnum.startOffset(); int end = postingsEnum.endOffset(); BytesRef payload = postingsEnum.getPayload(); SentenceContext sentenceContextInfo = null; if (payload != null) { sentenceContextInfo = new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())); } if (sentenceContextInfo == null) result.add(new MWEInSentence(tString, start, end, 0, 0, 0)); else { result.add(new MWEInSentence(tString, start, end, sentenceContextInfo.getFirstTokenIdx(), sentenceContextInfo.getLastTokenIdx(), sentenceContextInfo.getSentenceId())); Integer endBound = sentenceBoundaries.get(sentenceContextInfo.getSentenceId()); if (endBound == null || endBound < sentenceContextInfo.getLastTokenIdx()) sentenceBoundaries.put(sentenceContextInfo.getSentenceId(), sentenceContextInfo.getLastTokenIdx()); } } } luceneTerm = tiRef.next(); } Collections.sort(result); return result; }
Example 20
Source File: FilterableTermsEnum.java From Elasticsearch with Apache License 2.0 | 4 votes |
@Override public boolean seekExact(BytesRef text) throws IOException { int docFreq = 0; long totalTermFreq = 0; for (Holder anEnum : enums) { if (anEnum.termsEnum.seekExact(text)) { if (anEnum.bits == null) { docFreq += anEnum.termsEnum.docFreq(); if (docsEnumFlag == PostingsEnum.FREQS) { long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq(); if (totalTermFreq == -1 || leafTotalTermFreq == -1) { totalTermFreq = -1; continue; } totalTermFreq += leafTotalTermFreq; } } else { final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum, docsEnumFlag); // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not if (docsEnumFlag == PostingsEnum.FREQS) { for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (anEnum.bits != null && anEnum.bits.get(docId) == false) { continue; } docFreq++; // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value // is really 1 or unrecorded when filtering like this totalTermFreq += docsEnum.freq(); } } else { for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { if (anEnum.bits != null && anEnum.bits.get(docId) == false) { continue; } // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call docFreq++; } } } } } if (docFreq > 0) { currentDocFreq = docFreq; currentTotalTermFreq = totalTermFreq; current = text; return true; } else { currentDocFreq = NOT_FOUND; currentTotalTermFreq = NOT_FOUND; current = null; return false; } }