Java Code Examples for org.apache.lucene.index.TermStates#docFreq()

The following examples show how to use org.apache.lucene.index.TermStates#docFreq() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TermAutomatonQuery.java From lucene-solr with Apache License 2.0

6 votes

public TermAutomatonWeight(Automaton automaton, IndexSearcher searcher, Map<Integer,TermStates> termStates, float boost) throws IOException {
  super(TermAutomatonQuery.this);
  this.automaton = automaton;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity();
  List<TermStatistics> allTermStats = new ArrayList<>();
  for(Map.Entry<Integer,BytesRef> ent : idToTerm.entrySet()) {
    Integer termID = ent.getKey();
    if (ent.getValue() != null) {
      TermStates ts = termStates.get(termID);
      if (ts.docFreq() > 0) {
        allTermStats.add(searcher.termStatistics(new Term(field, ent.getValue()), ts.docFreq(), ts.totalTermFreq()));
      }
    }
  }

  if (allTermStats.isEmpty()) {
    stats = null; // no terms matched at all, will not use sim
  } else {
    stats = similarity.scorer(boost, searcher.collectionStatistics(field),
                                     allTermStats.toArray(new TermStatistics[allTermStats.size()]));
  }
}

Example 2

Source File: FuzzyLikeThisQuery.java From lucene-solr with Apache License 2.0

6 votes

private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  if (ignoreTF) {
    return new ConstantScoreQuery(new TermQuery(term));
  } else {
    // we build an artificial TermStates that will give an overall df and ttf
    // equal to 1
    TermStates context = new TermStates(reader.getContext());
    for (LeafReaderContext leafContext : reader.leaves()) {
      Terms terms = leafContext.reader().terms(term.field());
      if (terms != null) {
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum.seekExact(term.bytes())) {
          int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
          context.register(termsEnum.termState(), leafContext.ord, freq, freq);
        }
      }
    }
    return new TermQuery(term, context);
  }
}

Example 3

Source File: NearestFuzzyQuery.java From lucene-solr with Apache License 2.0

6 votes

private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  // we build an artificial TermStates that will give an overall df and ttf
  // equal to 1
  TermStates termStates = new TermStates(reader.getContext());
  for (LeafReaderContext leafContext : reader.leaves()) {
    Terms terms = leafContext.reader().terms(term.field());
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      if (termsEnum.seekExact(term.bytes())) {
        int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1
        termStates.register(termsEnum.termState(), leafContext.ord, freq, freq);
      }
    }
  }
  return new TermQuery(term, termStates);
}

Example 4

Source File: ShardSearchingTestBase.java From lucene-solr with Apache License 2.0

6 votes

Map<Term,TermStatistics> getNodeTermStats(Set<Term> terms, int nodeID, long version) throws IOException {
  final NodeState node = nodes[nodeID];
  final Map<Term,TermStatistics> stats = new HashMap<>();
  final IndexSearcher s = node.searchers.acquire(version);
  if (s == null) {
    throw new SearcherExpiredException("node=" + nodeID + " version=" + version);
  }
  try {
    for(Term term : terms) {
      final TermStates ts = TermStates.build(s.getIndexReader().getContext(), term, true);
      if (ts.docFreq() > 0) {
        stats.put(term, s.termStatistics(term, ts.docFreq(), ts.totalTermFreq()));
      }
    }
  } finally {
    node.searchers.release(s);
  }
  return stats;
}

Example 5

Source File: SpanWeight.java From lucene-solr with Apache License 2.0

6 votes

private Similarity.SimScorer buildSimWeight(SpanQuery query, IndexSearcher searcher, Map<Term, TermStates> termStates, float boost) throws IOException {
  if (termStates == null || termStates.size() == 0 || query.getField() == null)
    return null;
  TermStatistics[] termStats = new TermStatistics[termStates.size()];
  int termUpTo = 0;
  for (Map.Entry<Term, TermStates> entry : termStates.entrySet()) {
    TermStates ts = entry.getValue();
    if (ts.docFreq() > 0) {
      termStats[termUpTo++] = searcher.termStatistics(entry.getKey(), ts.docFreq(), ts.totalTermFreq());
    }
  }
  CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
  if (termUpTo > 0) {
    return similarity.scorer(boost, collectionStats, ArrayUtil.copyOfSubArray(termStats, 0, termUpTo));
  } else {
    return null; // no terms at all exist, we won't use similarity
  }
}

Example 6

Source File: TermQuery.java From lucene-solr with Apache License 2.0

5 votes

public TermWeight(IndexSearcher searcher, ScoreMode scoreMode,
    float boost, TermStates termStates) throws IOException {
  super(TermQuery.this);
  if (scoreMode.needsScores() && termStates == null) {
    throw new IllegalStateException("termStates are required when scores are needed");
  }
  this.scoreMode = scoreMode;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity();

  final CollectionStatistics collectionStats;
  final TermStatistics termStats;
  if (scoreMode.needsScores()) {
    collectionStats = searcher.collectionStatistics(term.field());
    termStats = termStates.docFreq() > 0 ? searcher.termStatistics(term, termStates.docFreq(), termStates.totalTermFreq()) : null;
  } else {
    // we do not need the actual stats, use fake stats with docFreq=maxDoc=ttf=1
    collectionStats = new CollectionStatistics(term.field(), 1, 1, 1, 1);
    termStats = new TermStatistics(term.bytes(), 1, 1);
  }
 
  if (termStats == null) {
    this.simScorer = null; // term doesn't exist in any segment, we won't use similarity at all
  } else {
    this.simScorer = similarity.scorer(boost, collectionStats, termStats);
  }
}

Example 7

Source File: FeatureField.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Compute a feature value that may be used as the {@code pivot} parameter of
 * the {@link #newSaturationQuery(String, String, float, float)} and
 * {@link #newSigmoidQuery(String, String, float, float, float)} factory
 * methods. The implementation takes the average of the int bits of the float
 * representation in practice before converting it back to a float. Given that
 * floats store the exponent in the higher bits, it means that the result will
 * be an approximation of the geometric mean of all feature values.
 * @param reader       the {@link IndexReader} to search against
 * @param featureField the field that stores features
 * @param featureName  the name of the feature
 */
static float computePivotFeatureValue(IndexReader reader, String featureField, String featureName) throws IOException {
  Term term = new Term(featureField, featureName);
  TermStates states = TermStates.build(reader.getContext(), term, true);
  if (states.docFreq() == 0) {
    // avoid division by 0
    // The return value doesn't matter much here, the term doesn't exist,
    // it will never be used for scoring. Just Make sure to return a legal
    // value.
    return 1;
  }
  float avgFreq = (float) ((double) states.totalTermFreq() / states.docFreq());
  return decodeFeatureValue(avgFreq);
}