org.apache.lucene.index.TermStates Java Exaples

Source File: SpanMultiTermQueryWrapper.java From lucene-solr with Apache License 2.0

6 votes

/** 
 * Create a TopTermsSpanBooleanQueryRewrite for 
 * at most <code>size</code> terms.
 */
public TopTermsSpanBooleanQueryRewrite(int size) {
  delegate = new TopTermsRewrite<List<SpanQuery>>(size) {
    @Override
    protected int getMaxSize() {
      return Integer.MAX_VALUE;
    }

    @Override
    protected List<SpanQuery> getTopLevelBuilder() {
      return new ArrayList<SpanQuery>();
    }

    @Override
    protected Query build(List<SpanQuery> builder) {
      return new SpanOrQuery(builder.toArray(new SpanQuery[builder.size()]));
    }

    @Override
    protected void addClause(List<SpanQuery> topLevel, Term term, int docFreq, float boost, TermStates states) {
      final SpanTermQuery q = new SpanTermQuery(term, states);
      topLevel.add(q);
    }
  };
}

Source File: BlendedTermQuery.java From crate with Apache License 2.0

6 votes

@Override
public Query rewrite(IndexReader reader) throws IOException {
    Query rewritten = super.rewrite(reader);
    if (rewritten != this) {
        return rewritten;
    }
    IndexReaderContext context = reader.getContext();
    TermStates[] ctx = new TermStates[terms.length];
    int[] docFreqs = new int[ctx.length];
    for (int i = 0; i < terms.length; i++) {
        ctx[i] = TermStates.build(context, terms[i], true);
        docFreqs[i] = ctx[i].docFreq();
    }

    final int maxDoc = reader.maxDoc();
    blend(ctx, maxDoc, reader);
    return topLevelQuery(terms, ctx, docFreqs, maxDoc);
}

Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0

6 votes

private static TermStates adjustFrequencies(IndexReaderContext readerContext,
                                            TermStates ctx, int artificialDf, long artificialTtf) throws IOException {
  List<LeafReaderContext> leaves = readerContext.leaves();
  final int len;
  if (leaves == null) {
    len = 1;
  } else {
    len = leaves.size();
  }
  TermStates newCtx = new TermStates(readerContext);
  for (int i = 0; i < len; ++i) {
    TermState termState = ctx.get(leaves.get(i));
    if (termState == null) {
      continue;
    }
    newCtx.register(termState, i);
  }
  newCtx.accumulateStatistics(artificialDf, artificialTtf);
  return newCtx;
}

Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0

6 votes

public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermStates> termStates, float boost) throws IOException {
  super(TermAutomatonQuery.this);
  this.automaton = automaton;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity();
  List<TermStatistics> allTermStats = new ArrayList<>();
  for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) {
    Integer termID = ent.getKey();
    if (ent.getValue() != null) {
      TermStates ts = termStates.get(termID);
      if (ts.docFreq() > 0) {
        allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq()));
      }
    }
  }

  if (allTermStats.isEmpty()) {
    stats = null; // no terms matched at all, will not use sim
  } else {
    stats = similarity.scorer(boost, searcher.collectionStatistics(field),
                                     allTermStats.toArray(new TermStatistics[allTermStats.size()]));
  }
}

Source File: FuzzyLikeThisQuery.java From lucene-solr with Apache License 2.0

6 votes

private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  if (ignoreTF) {
    return new ConstantScoreQuery(new TermQuery(term));
  } else {
    // we build an artificial TermStates that will give an overall df and ttf
    // equal to 1
    TermStates context = new TermStates(reader.getContext());
    for (LeafReaderContext leafContext : reader.leaves()) {
      Terms terms = leafContext.reader().terms(term.field());
      if (terms != null) {
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum.seekExact(term.bytes())) {
          int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
          context.register(termsEnum.termState(), leafContext.ord, freq, freq);
        }
      }
    }
    return new TermQuery(term, context);
  }
}

Source File: NearestFuzzyQuery.java From lucene-solr with Apache License 2.0

6 votes

private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  // we build an artificial TermStates that will give an overall df and ttf
  // equal to 1
  TermStates termStates = new TermStates(reader.getContext());
  for (LeafReaderContext leafContext : reader.leaves()) {
    Terms terms = leafContext.reader().terms(term.field());
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      if (termsEnum.seekExact(term.bytes())) {
        int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1
        termStates.register(termsEnum.termState(), leafContext.ord, freq, freq);
      }
    }
  }
  return new TermQuery(term, termStates);
}

Source File: SpanWeight.java From lucene-solr with Apache License 2.0

6 votes

private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
  if (termStates == null || termStates.size() == 0 || query.getField() == null)
    return null;
  TermStatistics[] termStats = new TermStatistics[termStates.size()];
  int termUpTo = 0;
  for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
    TermStates ts = entry.getValue();
    if (ts.docFreq() > 0) {
      termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
    }
  }
  CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
  if (termUpTo > 0) {
    return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
  } else {
    return null; // no terms at all exist, we won't use similarity
  }
}

Source File: ShardSearchingTestBase.java From lucene-solr with Apache License 2.0

6 votes

Map<Term,TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException {
  final NodeState node = nodes[nodeID];
  final Map<Term,TermStatistics> stats = new HashMap<>();
  final IndexSearcher s = node.searchers.acquire(version);
  if (s == null) {
    throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
  }
  try {
    for(Term term : terms) {
      final TermStates ts = TermStates.build(s.getIndexReader().getContext(), term, true);
      if (ts.docFreq() > 0) {
        stats.put(term, s.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
      }
    }
  } finally {
    node.searchers.release(s);
  }
  return stats;
}

Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Expert: Add a {@link Term} with the provided boost and context.
 * This method is useful if you already have a {@link TermStates}
 * object constructed for the given term.
 */
public Builder add(Term term, float boost, TermStates context) {
  if (numTerms >= IndexSearcher.getMaxClauseCount()) {
    throw new IndexSearcher.TooManyClauses();
  }
  terms = ArrayUtil.grow(terms, numTerms + 1);
  boosts = ArrayUtil.grow(boosts, numTerms + 1);
  contexts = ArrayUtil.grow(contexts, numTerms + 1);
  terms[numTerms] = term;
  boosts[numTerms] = boost;
  contexts[numTerms] = context;
  numTerms += 1;
  return this;
}

Source File: BlendedTermQuery.java From crate with Apache License 2.0

5 votes

private static TermStates adjustDF(IndexReaderContext readerContext,
                                   TermStates ctx,
                                   int newDocFreq) throws IOException {
    assert ctx.wasBuiltFor(readerContext);
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    }
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    TermStates newCtx = new TermStates(readerContext);
    if (leaves != null) {
        for (int i = 0; i < len; ++i) {
            TermState termState = ctx.get(leaves.get(i));
            if (termState == null) {
                continue;
            }
            newCtx.register(termState, i, newDocFreq, newTTF);
            newDocFreq = 0;
            newTTF = 0;
        }
    }
    return newCtx;
}

Source File: SpanQuery.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights
 * @lucene.internal
 */
public static Map<Term, TermStates> getTermStates(Collection<SpanWeight> weights) {
  Map<Term, TermStates> terms = new TreeMap<>();
  for (SpanWeight w : weights) {
    w.extractTermStates(terms);
  }
  return terms;
}

Source File: SpanQuery.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Build a map of terms to {@link TermStates}, for use in constructing SpanWeights
 * @lucene.internal
 */
public static Map<Term, TermStates> getTermStates(SpanWeight... weights) {
  Map<Term, TermStates> terms = new TreeMap<>();
  for (SpanWeight w : weights) {
    w.extractTermStates(terms);
  }
  return terms;
}

Source File: SpanTermQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public SpanWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final TermStates context;
  final IndexReaderContext topContext = searcher.getTopReaderContext();
  if (termStates == null || termStates.wasBuiltFor(topContext) == false) {
    context = TermStates.build(topContext, term, scoreMode.needsScores());
  }
  else {
    context = termStates;
  }
  return new SpanTermWeight(context, searcher, scoreMode.needsScores() ? Collections.singletonMap(term, context) : null, boost);
}

Source File: TermQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final IndexReaderContext context = searcher.getTopReaderContext();
  final TermStates termState;
  if (perReaderTermState == null
      || perReaderTermState.wasBuiltFor(context) == false) {
    termState = TermStates.build(context, term, scoreMode.needsScores());
  } else {
    // PRTS was pre-build for this IS
    termState = this.perReaderTermState;
  }

  return new TermWeight(searcher, scoreMode, boost, termState);
}

Source File: CommonTermsQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Query rewrite(IndexReader reader) throws IOException {
  if (this.terms.isEmpty()) {
    return new MatchNoDocsQuery("CommonTermsQuery with no terms");
  } else if (this.terms.size() == 1) {
    return newTermQuery(this.terms.get(0), null);
  }
  final List<LeafReaderContext> leaves = reader.leaves();
  final int maxDoc = reader.maxDoc();
  final TermStates[] contextArray = new TermStates[terms.size()];
  final Term[] queryTerms = this.terms.toArray(new Term[0]);
  collectTermStates(reader, leaves, contextArray, queryTerms);
  return buildQuery(maxDoc, contextArray, queryTerms);
}

Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  IndexReaderContext context = searcher.getTopReaderContext();
  Map<Integer,TermStates> termStates = new HashMap<>();

  for (Map.Entry<BytesRef,Integer> ent : termToID.entrySet()) {
    if (ent.getKey() != null) {
      termStates.put(ent.getValue(), TermStates.build(context, new Term(field, ent.getKey()), scoreMode.needsScores()));
    }
  }

  return new TermAutomatonWeight(det, searcher, termStates, boost);
}

Source File: TermQuery.java From lucene-solr with Apache License 2.0

5 votes

public TermWeight(IndexSearcher searcher, ScoreMode scoreMode,
    float boost, TermStates termStates) throws IOException {
  super(TermQuery.this);
  if (scoreMode.needsScores() && termStates == null) {
    throw new IllegalStateException("termStates are required when scores are needed");
  }
  this.scoreMode = scoreMode;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity();

  final CollectionStatistics collectionStats;
  final TermStatistics termStats;
  if (scoreMode.needsScores()) {
    collectionStats = searcher.collectionStatistics(term.field());
    termStats = termStates.docFreq() > 0 ? searcher.termStatistics(term, termStates.docFreq(), termStates.totalTermFreq()) : null;
  } else {
    // we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
    collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
    termStats = new TermStatistics(term.bytes(), 1, 1);
  }
 
  if (termStats == null) {
    this.simScorer = null; // term doesn't exist in any segment, we won't use similarity at all
  } else {
    this.simScorer = similarity.scorer(boost, collectionStats, termStats);
  }
}

Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0

5 votes

private BlendedTermQuery(Term[] terms, float[] boosts, TermStates[] contexts,
    RewriteMethod rewriteMethod) {
  assert terms.length == boosts.length;
  assert terms.length == contexts.length;
  this.terms = terms;
  this.boosts = boosts;
  this.contexts = contexts;
  this.rewriteMethod = rewriteMethod;

  // we sort terms so that equals/hashcode does not rely on the order
  new InPlaceMergeSorter() {

    @Override
    protected void swap(int i, int j) {
      Term tmpTerm = terms[i];
      terms[i] = terms[j];
      terms[j] = tmpTerm;

      TermStates tmpContext = contexts[i];
      contexts[i] = contexts[j];
      contexts[j] = tmpContext;

      float tmpBoost = boosts[i];
      boosts[i] = boosts[j];
      boosts[j] = tmpBoost;
    }

    @Override
    protected int compare(int i, int j) {
      return terms[i].compareTo(terms[j]);
    }
  }.sort(0, terms.length);
}

Source File: BlendedTermQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final Query rewrite(IndexReader reader) throws IOException {
  final TermStates[] contexts = ArrayUtil.copyOfSubArray(this.contexts, 0, this.contexts.length);
  for (int i = 0; i < contexts.length; ++i) {
    if (contexts[i] == null || contexts[i].wasBuiltFor(reader.getContext()) == false) {
      contexts[i] = TermStates.build(reader.getContext(), terms[i], true);
    }
  }

  // Compute aggregated doc freq and total term freq
  // df will be the max of all doc freqs
  // ttf will be the sum of all total term freqs
  int df = 0;
  long ttf = 0;
  for (TermStates ctx : contexts) {
    df = Math.max(df, ctx.docFreq());
    ttf += ctx.totalTermFreq();
  }

  for (int i = 0; i < contexts.length; ++i) {
    contexts[i] = adjustFrequencies(reader.getContext(), contexts[i], df, ttf);
  }

  Query[] termQueries = new Query[terms.length];
  for (int i = 0; i < terms.length; ++i) {
    termQueries[i] = new TermQuery(terms[i], contexts[i]);
    if (boosts[i] != 1f) {
      termQueries[i] = new BoostQuery(termQueries[i], boosts[i]);
    }
  }
  return rewriteMethod.rewrite(termQueries);
}

Source File: FeatureField.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Compute a feature value that may be used as the {@code pivot} parameter of
 * the {@link #newSaturationQuery(String, String, float, float)} and
 * {@link #newSigmoidQuery(String, String, float, float, float)} factory
 * methods. The implementation takes the average of the int bits of the float
 * representation in practice before converting it back to a float. Given that
 * floats store the exponent in the higher bits, it means that the result will
 * be an approximation of the geometric mean of all feature values.
 * @param reader       the {@link IndexReader} to search against
 * @param featureField the field that stores features
 * @param featureName  the name of the feature
 */
static float computePivotFeatureValue(IndexReader reader, String featureField, String featureName) throws IOException {
  Term term = new Term(featureField, featureName);
  TermStates states = TermStates.build(reader.getContext(), term, true);
  if (states.docFreq() == 0) {
    // avoid division by 0
    // The return value doesn't matter much here, the term doesn't exist,
    // it will never be used for scoring. Just Make sure to return a legal
    // value.
    return 1;
  }
  float avgFreq = (float) ((double) states.totalTermFreq() / states.docFreq());
  return decodeFeatureValue(avgFreq);
}

Source File: TestTermQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testEquals() throws IOException {
  QueryUtils.checkEqual(
      new TermQuery(new Term("foo", "bar")),
      new TermQuery(new Term("foo", "bar")));
  QueryUtils.checkUnequal(
      new TermQuery(new Term("foo", "bar")),
      new TermQuery(new Term("foo", "baz")));
  final CompositeReaderContext context;
  try (MultiReader multiReader = new MultiReader()) {
    context = multiReader.getContext();
  }
  QueryUtils.checkEqual(
      new TermQuery(new Term("foo", "bar")),
      new TermQuery(new Term("foo", "bar"), TermStates.build(context, new Term("foo", "bar"), true)));
}

Source File: GraphTermsQParserPlugin.java From lucene-solr with Apache License 2.0

5 votes

private void collectTermStates(IndexReader reader,
                               List<LeafReaderContext> leaves,
                               TermStates[] contextArray,
                               Term[] queryTerms) throws IOException {
  TermsEnum termsEnum = null;
  for (LeafReaderContext context : leaves) {

    Terms terms = context.reader().terms(this.field);
    if (terms == null) {
      // field does not exist
      continue;
    }

    termsEnum = terms.iterator();

    if (termsEnum == TermsEnum.EMPTY) continue;

    for (int i = 0; i < queryTerms.length; i++) {
      Term term = queryTerms[i];
      TermStates termStates = contextArray[i];

      if (termsEnum.seekExact(term.bytes())) {
        if (termStates == null) {
          contextArray[i] = new TermStates(reader.getContext(),
              termsEnum.termState(), context.ord, termsEnum.docFreq(),
              termsEnum.totalTermFreq());
        } else {
          termStates.register(termsEnum.termState(), context.ord,
              termsEnum.docFreq(), termsEnum.totalTermFreq());
        }
      }
    }
  }
}

Source File: TermsComponent.java From lucene-solr with Apache License 2.0

5 votes

private static void collectTermStates(IndexReaderContext topReaderContext, TermStates[] contextArray,
                                      Term[] queryTerms) throws IOException {
  TermsEnum termsEnum = null;
  for (LeafReaderContext context : topReaderContext.leaves()) {
    for (int i = 0; i < queryTerms.length; i++) {
      Term term = queryTerms[i];
      final Terms terms = context.reader().terms(term.field());
      if (terms == null) {
        // field does not exist
        continue;
      }
      termsEnum = terms.iterator();
      assert termsEnum != null;

      if (termsEnum == TermsEnum.EMPTY) continue;

      TermStates termStates = contextArray[i];
      if (termsEnum.seekExact(term.bytes())) {
        if (termStates == null) {
          termStates = new TermStates(topReaderContext);
          contextArray[i] = termStates;
        }
        termStates.accumulateStatistics(termsEnum.docFreq(), termsEnum.totalTermFreq());
      }
    }
  }
}

Source File: FieldBoostTermQueryBuilder.java From querqy with Apache License 2.0

5 votes

@Override
public Weight createWeight(final IndexSearcher searcher, final ScoreMode scoreMode, final float boost)
        throws IOException {
    final IndexReaderContext context = searcher.getTopReaderContext();
    final TermStates termState = TermStates.build(context, term, scoreMode.needsScores());
    // TODO: set boosts to 1f if needsScores is false?
    return new FieldBoostWeight(termState, boost, fieldBoost.getBoost(term.field(), searcher.getIndexReader()));
}

Source File: FieldBoostTermQueryBuilder.java From querqy with Apache License 2.0

5 votes

public FieldBoostWeight(final TermStates termStates, final float queryBoost, final float fieldBoost) {
    super(FieldBoostTermQuery.this);
    assert termStates != null : "TermContext must not be null";
    this.termStates = termStates;

    this.queryBoost = queryBoost;
    this.fieldBoost = fieldBoost;
    this.score = queryBoost * fieldBoost;
}

Source File: BlendedTermQuery.java From crate with Apache License 2.0

5 votes

private TermStates adjustTTF(IndexReaderContext readerContext,
                             TermStates termContext,
                             long sumTTF) throws IOException {
    assert termContext.wasBuiltFor(readerContext);
    if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
        return termContext;
    }
    TermStates newTermContext = new TermStates(readerContext);
    List<LeafReaderContext> leaves = readerContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    int df = termContext.docFreq();
    long ttf = sumTTF;
    if (leaves != null) {
        for (int i = 0; i < len; i++) {
            TermState termState = termContext.get(leaves.get(i));
            if (termState == null) {
                continue;
            }
            newTermContext.register(termState, i, df, ttf);
            df = 0;
            ttf = 0;
        }
    }
    return newTermContext;
}

Source File: CommonTermsQuery.java From lucene-solr with Apache License 2.0

5 votes

public void collectTermStates(IndexReader reader,
                              List<LeafReaderContext> leaves, TermStates[] contextArray,
                              Term[] queryTerms) throws IOException {
  TermsEnum termsEnum = null;
  for (LeafReaderContext context : leaves) {
    for (int i = 0; i < queryTerms.length; i++) {
      Term term = queryTerms[i];
      TermStates termStates = contextArray[i];
      final Terms terms = context.reader().terms(term.field());
      if (terms == null) {
        // field does not exist
        continue;
      }
      termsEnum = terms.iterator();
      assert termsEnum != null;
      
      if (termsEnum == TermsEnum.EMPTY) continue;
      if (termsEnum.seekExact(term.bytes())) {
        if (termStates == null) {
          contextArray[i] = new TermStates(reader.getContext(),
              termsEnum.termState(), context.ord, termsEnum.docFreq(),
              termsEnum.totalTermFreq());
        } else {
          termStates.register(termsEnum.termState(), context.ord,
              termsEnum.docFreq(), termsEnum.totalTermFreq());
        }
        
      }
      
    }
  }
}

Source File: AssertingSpanWeight.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void extractTermStates(Map<Term, TermStates> contexts) {
  in.extractTermStates(contexts);
}

Source File: SpanContainingQuery.java From lucene-solr with Apache License 2.0

4 votes

public SpanContainingWeight(IndexSearcher searcher, Map<Term, TermStates> terms,
                            SpanWeight bigWeight, SpanWeight littleWeight, float boost) throws IOException {
  super(searcher, terms, bigWeight, littleWeight, boost);
}

Source File: TopTermsRewrite.java From lucene-solr with Apache License 2.0

4 votes

public ScoreTerm(TermStates termState) {
  this.termState = termState;
}

org.apache.lucene.index.TermStates Java Examples