org.apache.lucene.index.TermStates Java Examples
The following examples show how to use
org.apache.lucene.index.TermStates.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SpanMultiTermQueryWrapper.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Create a TopTermsSpanBooleanQueryRewrite for * at most <code>size</code> terms. */ public TopTermsSpanBooleanQueryRewrite(int size) { delegate = new TopTermsRewrite<List<SpanQuery>>(size) { @Override protected int getMaxSize() { return Integer.MAX_VALUE; } @Override protected List<SpanQuery> getTopLevelBuilder() { return new ArrayList<SpanQuery>(); } @Override protected Query build(List<SpanQuery> builder) { return new SpanOrQuery(builder.toArray(new SpanQuery[builder.size()])); } @Override protected void addClause(List<SpanQuery> topLevel, Term term, int docFreq, float boost, TermStates states) { final SpanTermQuery q = new SpanTermQuery(term, states); topLevel.add(q); } }; }
Example #2
Source File: BlendedTermQuery.java From crate with Apache License 2.0 | 6 votes |
@Override public Query rewrite(IndexReader reader) throws IOException { Query rewritten = super.rewrite(reader); if (rewritten != this) { return rewritten; } IndexReaderContext context = reader.getContext(); TermStates[] ctx = new TermStates[terms.length]; int[] docFreqs = new int[ctx.length]; for (int i = 0; i < terms.length; i++) { ctx[i] = TermStates.build(context, terms[i], true); docFreqs[i] = ctx[i].docFreq(); } final int maxDoc = reader.maxDoc(); blend(ctx, maxDoc, reader); return topLevelQuery(terms, ctx, docFreqs, maxDoc); }
Example #3
Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private static TermStates adjustFrequencies(IndexReaderContext readerContext, TermStates ctx, int artificialDf, long artificialTtf) throws IOException { List<LeafReaderContext> leaves = readerContext.leaves(); final int len; if (leaves == null) { len = 1; } else { len = leaves.size(); } TermStates newCtx = new TermStates(readerContext); for (int i = 0; i < len; ++i) { TermState termState = ctx.get(leaves.get(i)); if (termState == null) { continue; } newCtx.register(termState, i); } newCtx.accumulateStatistics(artificialDf, artificialTtf); return newCtx; }
Example #4
Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermStates> termStates, float boost) throws IOException { super(TermAutomatonQuery.this); this.automaton = automaton; this.termStates = termStates; this.similarity = searcher.getSimilarity(); List<TermStatistics> allTermStats = new ArrayList<>(); for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) { Integer termID = ent.getKey(); if (ent.getValue() != null) { TermStates ts = termStates.get(termID); if (ts.docFreq() > 0) { allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq())); } } } if (allTermStats.isEmpty()) { stats = null; // no terms matched at all, will not use sim } else { stats = similarity.scorer(boost, searcher.collectionStatistics(field), allTermStats.toArray(new TermStatistics[allTermStats.size()])); } }
Example #5
Source File: FuzzyLikeThisQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private Query newTermQuery(IndexReader reader, Term term) throws IOException { if (ignoreTF) { return new ConstantScoreQuery(new TermQuery(term)); } else { // we build an artificial TermStates that will give an overall df and ttf // equal to 1 TermStates context = new TermStates(reader.getContext()); for (LeafReaderContext leafContext : reader.leaves()) { Terms terms = leafContext.reader().terms(term.field()); if (terms != null) { TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1 context.register(termsEnum.termState(), leafContext.ord, freq, freq); } } } return new TermQuery(term, context); } }
Example #6
Source File: NearestFuzzyQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private Query newTermQuery(IndexReader reader, Term term) throws IOException { // we build an artificial TermStates that will give an overall df and ttf // equal to 1 TermStates termStates = new TermStates(reader.getContext()); for (LeafReaderContext leafContext : reader.leaves()) { Terms terms = leafContext.reader().terms(term.field()); if (terms != null) { TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1 termStates.register(termsEnum.termState(), leafContext.ord, freq, freq); } } } return new TermQuery(term, termStates); }
Example #7
Source File: SpanWeight.java From lucene-solr with Apache License 2.0 | 6 votes |
private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException { if (termStates == null || termStates.size() == 0 || query.getField() == null) return null; TermStatistics[] termStats = new TermStatistics[termStates.size()]; int termUpTo = 0; for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) { TermStates ts = entry.getValue(); if (ts.docFreq() > 0) { termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq()); } } CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField()); if (termUpTo > 0) { return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo)); } else { return null; // no terms at all exist, we won't use similarity } }
Example #8
Source File: ShardSearchingTestBase.java From lucene-solr with Apache License 2.0 | 6 votes |
Map<Term,TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException { final NodeState node = nodes[nodeID]; final Map<Term,TermStatistics> stats = new HashMap<>(); final IndexSearcher s = node.searchers.acquire(version); if (s == null) { throw new SearcherExpiredException("node=" + nodeID + " version=" + version); } try { for(Term term : terms) { final TermStates ts = TermStates.build(s.getIndexReader().getContext(), term, true); if (ts.docFreq() > 0) { stats.put(term, s.termStatistics(term, ts.docFreq(), ts.totalTermFreq())); } } } finally { node.searchers.release(s); } return stats; }
Example #9
Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Expert: Add a {@link Term} with the provided boost and context. * This method is useful if you already have a {@link TermStates} * object constructed for the given term. */ public Builder add(Term term, float boost, TermStates context) { if (numTerms >= IndexSearcher.getMaxClauseCount()) { throw new IndexSearcher.TooManyClauses(); } terms = ArrayUtil.grow(terms, numTerms + 1); boosts = ArrayUtil.grow(boosts, numTerms + 1); contexts = ArrayUtil.grow(contexts, numTerms + 1); terms[numTerms] = term; boosts[numTerms] = boost; contexts[numTerms] = context; numTerms += 1; return this; }
Example #10
Source File: BlendedTermQuery.java From crate with Apache License 2.0 | 5 votes |
private static TermStates adjustDF(IndexReaderContext readerContext, TermStates ctx, int newDocFreq) throws IOException { assert ctx.wasBuiltFor(readerContext); // Use a value of ttf that is consistent with the doc freq (ie. gte) long newTTF; if (ctx.totalTermFreq() < 0) { newTTF = -1; } else { newTTF = Math.max(ctx.totalTermFreq(), newDocFreq); } List<LeafReaderContext> leaves = readerContext.leaves(); final int len; if (leaves == null) { len = 1; } else { len = leaves.size(); } TermStates newCtx = new TermStates(readerContext); if (leaves != null) { for (int i = 0; i < len; ++i) { TermState termState = ctx.get(leaves.get(i)); if (termState == null) { continue; } newCtx.register(termState, i, newDocFreq, newTTF); newDocFreq = 0; newTTF = 0; } } return newCtx; }
Example #11
Source File: SpanQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights * @lucene.internal */ public static Map<Term, TermStates> getTermStates(Collection<SpanWeight> weights) { Map<Term, TermStates> terms = new TreeMap<>(); for (SpanWeight w : weights) { w.extractTermStates(terms); } return terms; }
Example #12
Source File: SpanQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights * @lucene.internal */ public static Map<Term, TermStates> getTermStates(SpanWeight... weights) { Map<Term, TermStates> terms = new TreeMap<>(); for (SpanWeight w : weights) { w.extractTermStates(terms); } return terms; }
Example #13
Source File: SpanTermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final TermStates context; final IndexReaderContext topContext = searcher.getTopReaderContext(); if (termStates == null || termStates.wasBuiltFor(topContext) == false) { context = TermStates.build(topContext, term, scoreMode.needsScores()); } else { context = termStates; } return new SpanTermWeight(context, searcher, scoreMode.needsScores() ? Collections.singletonMap(term, context) : null, boost); }
Example #14
Source File: TermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermStates termState; if (perReaderTermState == null || perReaderTermState.wasBuiltFor(context) == false) { termState = TermStates.build(context, term, scoreMode.needsScores()); } else { // PRTS was pre-build for this IS termState = this.perReaderTermState; } return new TermWeight(searcher, scoreMode, boost, termState); }
Example #15
Source File: CommonTermsQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Query rewrite(IndexReader reader) throws IOException { if (this.terms.isEmpty()) { return new MatchNoDocsQuery("CommonTermsQuery with no terms"); } else if (this.terms.size() == 1) { return newTermQuery(this.terms.get(0), null); } final List<LeafReaderContext> leaves = reader.leaves(); final int maxDoc = reader.maxDoc(); final TermStates[] contextArray = new TermStates[terms.size()]; final Term[] queryTerms = this.terms.toArray(new Term[0]); collectTermStates(reader, leaves, contextArray, queryTerms); return buildQuery(maxDoc, contextArray, queryTerms); }
Example #16
Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { IndexReaderContext context = searcher.getTopReaderContext(); Map<Integer,TermStates> termStates = new HashMap<>(); for (Map.Entry<BytesRef,Integer> ent : termToID.entrySet()) { if (ent.getKey() != null) { termStates.put(ent.getValue(), TermStates.build(context, new Term(field, ent.getKey()), scoreMode.needsScores())); } } return new TermAutomatonWeight(det, searcher, termStates, boost); }
Example #17
Source File: TermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public TermWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost, TermStates termStates) throws IOException { super(TermQuery.this); if (scoreMode.needsScores() && termStates == null) { throw new IllegalStateException("termStates are required when scores are needed"); } this.scoreMode = scoreMode; this.termStates = termStates; this.similarity = searcher.getSimilarity(); final CollectionStatistics collectionStats; final TermStatistics termStats; if (scoreMode.needsScores()) { collectionStats = searcher.collectionStatistics(term.field()); termStats = termStates.docFreq() > 0 ? searcher.termStatistics(term, termStates.docFreq(), termStates.totalTermFreq()) : null; } else { // we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1 collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1); termStats = new TermStatistics(term.bytes(), 1, 1); } if (termStats == null) { this.simScorer = null; // term doesn't exist in any segment, we won't use similarity at all } else { this.simScorer = similarity.scorer(boost, collectionStats, termStats); } }
Example #18
Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
private BlendedTermQuery(Term[] terms, float[] boosts, TermStates[] contexts, RewriteMethod rewriteMethod) { assert terms.length == boosts.length; assert terms.length == contexts.length; this.terms = terms; this.boosts = boosts; this.contexts = contexts; this.rewriteMethod = rewriteMethod; // we sort terms so that equals/hashcode does not rely on the order new InPlaceMergeSorter() { @Override protected void swap(int i, int j) { Term tmpTerm = terms[i]; terms[i] = terms[j]; terms[j] = tmpTerm; TermStates tmpContext = contexts[i]; contexts[i] = contexts[j]; contexts[j] = tmpContext; float tmpBoost = boosts[i]; boosts[i] = boosts[j]; boosts[j] = tmpBoost; } @Override protected int compare(int i, int j) { return terms[i].compareTo(terms[j]); } }.sort(0, terms.length); }
Example #19
Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public final Query rewrite(IndexReader reader) throws IOException { final TermStates[] contexts = ArrayUtil.copyOfSubArray(this.contexts, 0, this.contexts.length); for (int i = 0; i < contexts.length; ++i) { if (contexts[i] == null || contexts[i].wasBuiltFor(reader.getContext()) == false) { contexts[i] = TermStates.build(reader.getContext(), terms[i], true); } } // Compute aggregated doc freq and total term freq // df will be the max of all doc freqs // ttf will be the sum of all total term freqs int df = 0; long ttf = 0; for (TermStates ctx : contexts) { df = Math.max(df, ctx.docFreq()); ttf += ctx.totalTermFreq(); } for (int i = 0; i < contexts.length; ++i) { contexts[i] = adjustFrequencies(reader.getContext(), contexts[i], df, ttf); } Query[] termQueries = new Query[terms.length]; for (int i = 0; i < terms.length; ++i) { termQueries[i] = new TermQuery(terms[i], contexts[i]); if (boosts[i] != 1f) { termQueries[i] = new BoostQuery(termQueries[i], boosts[i]); } } return rewriteMethod.rewrite(termQueries); }
Example #20
Source File: FeatureField.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Compute a feature value that may be used as the {@code pivot} parameter of * the {@link #newSaturationQuery(String, String, float, float)} and * {@link #newSigmoidQuery(String, String, float, float, float)} factory * methods. The implementation takes the average of the int bits of the float * representation in practice before converting it back to a float. Given that * floats store the exponent in the higher bits, it means that the result will * be an approximation of the geometric mean of all feature values. * @param reader the {@link IndexReader} to search against * @param featureField the field that stores features * @param featureName the name of the feature */ static float computePivotFeatureValue(IndexReader reader, String featureField, String featureName) throws IOException { Term term = new Term(featureField, featureName); TermStates states = TermStates.build(reader.getContext(), term, true); if (states.docFreq() == 0) { // avoid division by 0 // The return value doesn't matter much here, the term doesn't exist, // it will never be used for scoring. Just Make sure to return a legal // value. return 1; } float avgFreq = (float) ((double) states.totalTermFreq() / states.docFreq()); return decodeFeatureValue(avgFreq); }
Example #21
Source File: TestTermQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testEquals() throws IOException { QueryUtils.checkEqual( new TermQuery(new Term("foo", "bar")), new TermQuery(new Term("foo", "bar"))); QueryUtils.checkUnequal( new TermQuery(new Term("foo", "bar")), new TermQuery(new Term("foo", "baz"))); final CompositeReaderContext context; try (MultiReader multiReader = new MultiReader()) { context = multiReader.getContext(); } QueryUtils.checkEqual( new TermQuery(new Term("foo", "bar")), new TermQuery(new Term("foo", "bar"), TermStates.build(context, new Term("foo", "bar"), true))); }
Example #22
Source File: GraphTermsQParserPlugin.java From lucene-solr with Apache License 2.0 | 5 votes |
private void collectTermStates(IndexReader reader, List<LeafReaderContext> leaves, TermStates[] contextArray, Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : leaves) { Terms terms = context.reader().terms(this.field); if (terms == null) { // field does not exist continue; } termsEnum = terms.iterator(); if (termsEnum == TermsEnum.EMPTY) continue; for (int i = 0; i < queryTerms.length; i++) { Term term = queryTerms[i]; TermStates termStates = contextArray[i]; if (termsEnum.seekExact(term.bytes())) { if (termStates == null) { contextArray[i] = new TermStates(reader.getContext(), termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { termStates.register(termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } } }
Example #23
Source File: TermsComponent.java From lucene-solr with Apache License 2.0 | 5 votes |
private static void collectTermStates(IndexReaderContext topReaderContext, TermStates[] contextArray, Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : topReaderContext.leaves()) { for (int i = 0; i < queryTerms.length; i++) { Term term = queryTerms[i]; final Terms terms = context.reader().terms(term.field()); if (terms == null) { // field does not exist continue; } termsEnum = terms.iterator(); assert termsEnum != null; if (termsEnum == TermsEnum.EMPTY) continue; TermStates termStates = contextArray[i]; if (termsEnum.seekExact(term.bytes())) { if (termStates == null) { termStates = new TermStates(topReaderContext); contextArray[i] = termStates; } termStates.accumulateStatistics(termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } }
Example #24
Source File: FieldBoostTermQueryBuilder.java From querqy with Apache License 2.0 | 5 votes |
@Override public Weight createWeight(final IndexSearcher searcher, final ScoreMode scoreMode, final float boost) throws IOException { final IndexReaderContext context = searcher.getTopReaderContext(); final TermStates termState = TermStates.build(context, term, scoreMode.needsScores()); // TODO: set boosts to 1f if needsScores is false? return new FieldBoostWeight(termState, boost, fieldBoost.getBoost(term.field(), searcher.getIndexReader())); }
Example #25
Source File: FieldBoostTermQueryBuilder.java From querqy with Apache License 2.0 | 5 votes |
public FieldBoostWeight(final TermStates termStates, final float queryBoost, final float fieldBoost) { super(FieldBoostTermQuery.this); assert termStates != null : "TermContext must not be null"; this.termStates = termStates; this.queryBoost = queryBoost; this.fieldBoost = fieldBoost; this.score = queryBoost * fieldBoost; }
Example #26
Source File: BlendedTermQuery.java From crate with Apache License 2.0 | 5 votes |
private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termContext, long sumTTF) throws IOException { assert termContext.wasBuiltFor(readerContext); if (sumTTF == -1 && termContext.totalTermFreq() == -1) { return termContext; } TermStates newTermContext = new TermStates(readerContext); List<LeafReaderContext> leaves = readerContext.leaves(); final int len; if (leaves == null) { len = 1; } else { len = leaves.size(); } int df = termContext.docFreq(); long ttf = sumTTF; if (leaves != null) { for (int i = 0; i < len; i++) { TermState termState = termContext.get(leaves.get(i)); if (termState == null) { continue; } newTermContext.register(termState, i, df, ttf); df = 0; ttf = 0; } } return newTermContext; }
Example #27
Source File: CommonTermsQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void collectTermStates(IndexReader reader, List<LeafReaderContext> leaves, TermStates[] contextArray, Term[] queryTerms) throws IOException { TermsEnum termsEnum = null; for (LeafReaderContext context : leaves) { for (int i = 0; i < queryTerms.length; i++) { Term term = queryTerms[i]; TermStates termStates = contextArray[i]; final Terms terms = context.reader().terms(term.field()); if (terms == null) { // field does not exist continue; } termsEnum = terms.iterator(); assert termsEnum != null; if (termsEnum == TermsEnum.EMPTY) continue; if (termsEnum.seekExact(term.bytes())) { if (termStates == null) { contextArray[i] = new TermStates(reader.getContext(), termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { termStates.register(termsEnum.termState(), context.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } } }
Example #28
Source File: AssertingSpanWeight.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void extractTermStates(Map<Term, TermStates> contexts) { in.extractTermStates(contexts); }
Example #29
Source File: SpanContainingQuery.java From lucene-solr with Apache License 2.0 | 4 votes |
public SpanContainingWeight(IndexSearcher searcher, Map<Term, TermStates> terms, SpanWeight bigWeight, SpanWeight littleWeight, float boost) throws IOException { super(searcher, terms, bigWeight, littleWeight, boost); }
Example #30
Source File: TopTermsRewrite.java From lucene-solr with Apache License 2.0 | 4 votes |
public ScoreTerm(TermStates termState) { this.termState = termState; }