Java Code Examples for org.apache.lucene.index.TermsEnum#SeekStatus
The following examples show how to use
org.apache.lucene.index.TermsEnum#SeekStatus .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ContainsPrefixTreeQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private boolean seek(Cell cell) throws IOException { if (thisTerm == null) return false; final int compare = indexedCell.compareToNoLeaf(cell); if (compare > 0) { return false;//leap-frog effect } else if (compare == 0) { return true; // already there! } else {//compare > 0 //seek! seekTerm = cell.getTokenBytesNoLeaf(seekTerm); final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm); if (seekStatus == TermsEnum.SeekStatus.END) { thisTerm = null;//all done return false; } thisTerm = termsEnum.term(); indexedCell = grid.readCell(thisTerm, indexedCell); if (seekStatus == TermsEnum.SeekStatus.FOUND) { return true; } return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0; } }
Example 2
Source File: DocToDoubleVectorUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * create a sparse <code>Double</code> vector given doc and field term vectors using local frequency of the terms in the doc * * @param docTerms term vectors for a given document * @param fieldTerms field term vectors * @return a sparse vector of <code>Double</code>s as an array * @throws IOException in case accessing the underlying index fails */ public static Double[] toSparseLocalFreqDoubleArray(Terms docTerms, Terms fieldTerms) throws IOException { TermsEnum fieldTermsEnum = fieldTerms.iterator(); Double[] freqVector = null; if (docTerms != null && fieldTerms.size() > -1) { freqVector = new Double[(int) fieldTerms.size()]; int i = 0; TermsEnum docTermsEnum = docTerms.iterator(); BytesRef term; while ((term = fieldTermsEnum.next()) != null) { TermsEnum.SeekStatus seekStatus = docTermsEnum.seekCeil(term); if (seekStatus.equals(TermsEnum.SeekStatus.END)) { docTermsEnum = docTerms.iterator(); } if (seekStatus.equals(TermsEnum.SeekStatus.FOUND)) { long termFreqLocal = docTermsEnum.totalTermFreq(); // the total number of occurrences of this term in the given document freqVector[i] = Long.valueOf(termFreqLocal).doubleValue(); } else { freqVector[i] = 0d; } i++; } } return freqVector; }
Example 3
Source File: SrndTermQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void visitMatchingTerms( IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { /* check term presence in index here for symmetry with other SimpleTerm's */ Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText())); if (status == TermsEnum.SeekStatus.FOUND) { mtv.visitMatchingTerm(getLuceneTerm(fieldName)); } } }
Example 4
Source File: SolrRangeQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public RangeTermsEnum(Terms terms) throws IOException { if (terms == null) { positioned = true; } else { te = terms.iterator(); if (lower != null) { TermsEnum.SeekStatus status = te.seekCeil(lower); if (status == TermsEnum.SeekStatus.END) { positioned = true; curr = null; } else if (status == SeekStatus.FOUND) { positioned = includeLower(); curr = te.term(); } else { // lower bound not found, so includeLower is irrelevant positioned = true; curr = te.term(); } } } }
Example 5
Source File: TestTermBytesComparator.java From lucene-solr with Apache License 2.0 | 5 votes |
private TermsEnum.SeekStatus assertGreaterUntil(int expectedPosition, MockBlockReader blockReader, BytesRef lookedTerm) throws IOException { TermsEnum.SeekStatus seekStatus = blockReader.seekInBlock(lookedTerm); assertEquals("looked Term: " + lookedTerm.utf8ToString(), expectedPosition, blockReader.lineIndexInBlock - 1); //reset the state blockReader.reset(); return seekStatus; }
Example 6
Source File: SrndTruncQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void visitMatchingTerms( IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { int prefixLength = prefix.length(); Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms != null) { Matcher matcher = pattern.matcher(""); try { TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef); BytesRef text; if (status == TermsEnum.SeekStatus.FOUND) { text = prefixRef; } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { text = termsEnum.term(); } else { text = null; } while(text != null) { if (text != null && StringHelper.startsWith(text, prefixRef)) { String textString = text.utf8ToString(); matcher.reset(textString.substring(prefixLength)); if (matcher.matches()) { mtv.visitMatchingTerm(new Term(fieldName, textString)); } } else { break; } text = termsEnum.next(); } } finally { matcher.reset(); } } }
Example 7
Source File: SrndPrefixQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void visitMatchingTerms( IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { /* inspired by PrefixQuery.rewrite(): */ Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); boolean skip = false; TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix())); if (status == TermsEnum.SeekStatus.FOUND) { mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName)); } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { if (StringHelper.startsWith(termsEnum.term(), prefixRef)) { mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString())); } else { skip = true; } } else { // EOF skip = true; } if (!skip) { while(true) { BytesRef text = termsEnum.next(); if (text != null && StringHelper.startsWith(text, prefixRef)) { mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString())); } else { break; } } } } }
Example 8
Source File: TermPrefixCursor.java From SolrTextTagger with Apache License 2.0 | 5 votes |
/** Seeks to prefixBuf or the next term that is prefixed by prefixBuf plus the separator char. * Sets docIds. **/ private boolean seekPrefix() throws IOException { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefixBuf); docIds = null;//invalidate switch (seekStatus) { case END: return false; case FOUND: postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); docIds = postingsEnumToIntsRef(postingsEnum, liveDocs); if (docIds.length > 0) { return true; } //Pretend we didn't find it; go to next term docIds = null; if (termsEnum.next() == null) { // case END return false; } //fall through to NOT_FOUND case NOT_FOUND: //termsEnum must start with prefixBuf to continue BytesRef teTerm = termsEnum.term(); if (teTerm.length > prefixBuf.length) { for (int i = 0; i < prefixBuf.length; i++) { if (prefixBuf.bytes[prefixBuf.offset + i] != teTerm.bytes[teTerm.offset + i]) return false; } if (teTerm.bytes[teTerm.offset + prefixBuf.length] != SEPARATOR_CHAR) return false; return true; } return false; } throw new IllegalStateException(seekStatus.toString()); }
Example 9
Source File: MultiPhrasePrefixQuery.java From crate with Apache License 2.0 | 5 votes |
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }
Example 10
Source File: TestTermBytesComparator.java From lucene-solr with Apache License 2.0 | 4 votes |
private void assertAlwaysGreater(MockBlockReader blockReader, BytesRef lookedTerm) throws IOException { TermsEnum.SeekStatus seekStatus = assertGreaterUntil(-1, blockReader, lookedTerm); assertEquals(TermsEnum.SeekStatus.END, seekStatus); }
Example 11
Source File: TermGroupFacetCollector.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override protected void doSetNextReader(LeafReaderContext context) throws IOException { if (segmentFacetCounts != null) { segmentResults.add(createSegmentResult()); } groupFieldTermsIndex = DocValues.getSorted(context.reader(), groupField); facetFieldDocTermOrds = DocValues.getSortedSet(context.reader(), facetField); facetFieldNumTerms = (int) facetFieldDocTermOrds.getValueCount(); if (facetFieldNumTerms == 0) { facetOrdTermsEnum = null; } else { facetOrdTermsEnum = facetFieldDocTermOrds.termsEnum(); } // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field segmentFacetCounts = new int[facetFieldNumTerms + 1]; segmentTotalCount = 0; segmentGroupedFacetHits.clear(); for (GroupedFacetHit groupedFacetHit : groupedFacetHits) { int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.lookupTerm(groupedFacetHit.groupValue); if (groupedFacetHit.groupValue != null && groupOrd < 0) { continue; } int facetOrd; if (groupedFacetHit.facetValue != null) { if (facetOrdTermsEnum == null || !facetOrdTermsEnum.seekExact(groupedFacetHit.facetValue)) { continue; } facetOrd = (int) facetOrdTermsEnum.ord(); } else { facetOrd = facetFieldNumTerms; } // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd; segmentGroupedFacetHits.put(segmentGroupedFacetsIndex); } if (facetPrefix != null) { TermsEnum.SeekStatus seekStatus; if (facetOrdTermsEnum != null) { seekStatus = facetOrdTermsEnum.seekCeil(facetPrefix); } else { seekStatus = TermsEnum.SeekStatus.END; } if (seekStatus != TermsEnum.SeekStatus.END) { startFacetOrd = (int) facetOrdTermsEnum.ord(); } else { startFacetOrd = 0; endFacetOrd = 0; return; } BytesRefBuilder facetEndPrefix = new BytesRefBuilder(); facetEndPrefix.append(facetPrefix); facetEndPrefix.append(UnicodeUtil.BIG_TERM); seekStatus = facetOrdTermsEnum.seekCeil(facetEndPrefix.get()); if (seekStatus != TermsEnum.SeekStatus.END) { endFacetOrd = (int) facetOrdTermsEnum.ord(); } else { endFacetOrd = facetFieldNumTerms; // Don't include null... } } else { startFacetOrd = 0; endFacetOrd = facetFieldNumTerms + 1; } }
Example 12
Source File: TermPrefixCursor.java From lucene-solr with Apache License 2.0 | 4 votes |
/** Seeks to prefixBuf or the next term that is prefixed by prefixBuf plus the separator char. * Sets docIds. **/ @SuppressWarnings({"fallthrough"}) private boolean seekPrefix() throws IOException { TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefixBuf); docIds = null;//invalidate switch (seekStatus) { case END: return false; case FOUND: postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); docIds = postingsEnumToIntsRef(postingsEnum, liveDocs); if (docIds.length > 0) { return true; } //Pretend we didn't find it; go to next term docIds = null; if (termsEnum.next() == null) { // case END return false; } //fall through to NOT_FOUND case NOT_FOUND: //termsEnum must start with prefixBuf to continue BytesRef teTerm = termsEnum.term(); if (teTerm.length > prefixBuf.length) { for (int i = 0; i < prefixBuf.length; i++) { if (prefixBuf.bytes[prefixBuf.offset + i] != teTerm.bytes[teTerm.offset + i]) return false; } if (teTerm.bytes[teTerm.offset + prefixBuf.length] != SEPARATOR_CHAR) return false; return true; } return false; } throw new IllegalStateException(seekStatus.toString()); }