org.apache.lucene.index.Term#text

Source File: PersistentClassIndex.java From netbeans with Apache License 2.0

6 votes

@CheckForNull
@Override
@SuppressWarnings("StringEquality")
public Void convert(@NonNull final Index.WithTermFrequencies.TermFreq param) throws Stop {
    final Term term = param.getTerm();
    if (fieldName != term.field()) {
        throw new Stop();
    }
    final int docCount = param.getFreq();
    final String encBinName = term.text();
    final String binName = encBinName.substring(
        0,
        encBinName.length() - postfixLen);
    final int dotIndex = binName.lastIndexOf('.');  //NOI18N
    final String pkgName = dotIndex == -1 ? "" : binName.substring(0, dotIndex);    //NOI18N
    final Integer typeCount = typeFreq.get(binName);
    final Integer pkgCount = pkgFreq.get(pkgName);
    typeFreq.put(binName, typeCount == null ? docCount : docCount + typeCount);
    pkgFreq.put(pkgName, pkgCount == null ? docCount : docCount + pkgCount);
    return null;
}

Source File: QueryUtil.java From netbeans with Apache License 2.0

6 votes

@Override
public String convert(Term currentTerm) throws Stop {
    if (fieldName != currentTerm.field()) {
        throw STOP;
    }
    String currentText = currentTerm.text();
    if (all || currentText.startsWith(value)) {
        if (directOnly) {
            int index = currentText.indexOf('.', value.length());    //NOI18N
            if (index>0) {
                currentText = currentText.substring(0,index);
            }
        }
        return currentText;
    }
    return null;
}

Source File: MtasSpanWildcardQuery.java From mtas with Apache License 2.0

6 votes

/**
 * Instantiates a new mtas span wildcard query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanWildcardQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  WildcardQuery wcq = new WildcardQuery(term);
  query = new SpanMultiTermQueryWrapper<>(wcq);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}

Source File: MtasSpanPrefixQuery.java From mtas with Apache License 2.0

6 votes

/**
 * Instantiates a new mtas span prefix query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanPrefixQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  PrefixQuery pfq = new PrefixQuery(term);
  query = new SpanMultiTermQueryWrapper<>(pfq);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}

Source File: TestSolrCoreParser.java From lucene-solr with Apache License 2.0

6 votes

private static void checkChooseOneWordQuery(boolean span, Query query, String fieldName, String ... expectedTermTexts) {
  final Term term;
  if (span) {
    assertTrue(query instanceof SpanTermQuery);
    final SpanTermQuery stq = (SpanTermQuery)query;
    term = stq.getTerm();
  } else {
    assertTrue(query instanceof TermQuery);
    final TermQuery tq = (TermQuery)query;
    term = tq.getTerm();
  }
  final String text = term.text();
  boolean foundExpected = false;
  for (String expected : expectedTermTexts) {
    foundExpected |= expected.equals(text);
  }
  assertEquals(fieldName, term.field());
  assertTrue("expected term text ("+text+") not found in ("+expectedTermTexts+")", foundExpected);
}

Source File: Queries.java From netbeans with Apache License 2.0

5 votes

@Override
protected boolean termCompare(Term term) {
    if (fieldName == term.field()) {
        String searchText = term.text();
        if (searchText.startsWith(startPrefix)) {
            return pattern.matcher(term.text()).matches();
        }
    }
    endEnum = true;
    return false;
}

Source File: TermCharacterFilter.java From semanticvectors with BSD 3-Clause "New" or "Revised" License

5 votes

public boolean filter(Term t) 
{
	String termText = t.text();
    for (int i = 0; i < termText.length(); ++i) {
      if (!Character.isLetter(termText.charAt(i))) {
        return false;
      }
    }
	return true;
}

Source File: TermStopListFilter.java From semanticvectors with BSD 3-Clause "New" or "Revised" License

5 votes

public boolean filter(Term t) 
{
	String term = t.text();
	
	if (stopwords.contains(term)) return false;		
	else return true;
}

Source File: WeightedSpanTermExtractor.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Fills a <code>Map</code> with {@link WeightedSpanTerm}s using the terms from the supplied <code>Query</code>.
 * 
 * @param terms
 *          Map to place created WeightedSpanTerms in
 * @param query
 *          Query to extract Terms from
 * @throws IOException If there is a low-level I/O error
 */
protected void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query, float boost) throws IOException {
  Set<Term> nonWeightedTerms = new HashSet<>();
  final IndexSearcher searcher = new IndexSearcher(getLeafContext());
  searcher.rewrite(query).visit(QueryVisitor.termCollector(nonWeightedTerms));

  for (final Term queryTerm : nonWeightedTerms) {

    if (fieldNameComparator(queryTerm.field())) {
      WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(boost, queryTerm.text());
      terms.put(queryTerm.text(), weightedSpanTerm);
    }
  }
}

Source File: 1139461_WildcardQuery_0_t.java From coming with MIT License

5 votes

public WildcardQuery(Term term) {
  this.term = term;
  String text = term.text();
  this.termContainsWildcard = (text.indexOf('*') != -1)
      || (text.indexOf('?') != -1);
  this.termIsPrefix = termContainsWildcard 
      && (text.indexOf('?') == -1) 
      && (text.indexOf('*') == text.length() - 1);
}

Source File: 1139461_WildcardQuery_0_s.java From coming with MIT License

5 votes

public WildcardQuery(Term term) {
  this.term = term;
  String text = term.text();
  this.termContainsWildcard = (text.indexOf('*') != -1)
      || (text.indexOf('?') != -1);
  this.termIsPrefix = termContainsWildcard 
      && (text.indexOf('?') == -1) 
      && (text.indexOf('*') == text.length() - 1);
}

Source File: QueryParserBase.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Builds a new FuzzyQuery instance
 * @param term Term
 * @param minimumSimilarity minimum similarity
 * @param prefixLength prefix length
 * @return new FuzzyQuery Instance
 */
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
  // FuzzyQuery doesn't yet allow constant score rewrite
  String text = term.text();
  int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, 
      text.codePointCount(0, text.length()));
  return new FuzzyQuery(term,numEdits,prefixLength);
}

Source File: CrateRegexTermsEnum.java From crate with Apache License 2.0

5 votes

CrateRegexTermsEnum(TermsEnum tenum, Term term, int flags) {
    super(tenum);
    String text = term.text();
    this.regexImpl = CrateRegexCapabilities.compile(text, flags);

    setInitialSeekTerm(new BytesRef(""));
}

Source File: SolrQueryParserBase.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Builds a new FuzzyQuery instance
 * @param term Term
 * @param minimumSimilarity minimum similarity
 * @param prefixLength prefix length
 * @return new FuzzyQuery Instance
 */
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
  // FuzzyQuery doesn't yet allow constant score rewrite
  String text = term.text();
  int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity,
      text.codePointCount(0, text.length()));
  return new FuzzyQuery(term,numEdits,prefixLength);
}

Source File: QueryUtil.java From netbeans with Apache License 2.0

4 votes

PackageFilter(final @NonNull Term startTerm, final boolean directOnly) {
    this.fieldName = startTerm.field();
    this.value = startTerm.text();
    this.directOnly = directOnly;
    this.all = value.length() == 0;
}

Source File: TestRegexpRandom2.java From lucene-solr with Apache License 2.0

4 votes

DumbRegexpQuery(Term term, int flags) {
  super(term.field());
  RegExp re = new RegExp(term.text(), flags);
  automaton = re.toAutomaton();
}

Source File: DirectSpellChecker.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Provide spelling corrections based on several parameters.
 *
 * @param term The term to suggest spelling corrections for
 * @param numSug The maximum number of spelling corrections
 * @param ir The index reader to fetch the candidate spelling corrections from
 * @param docfreq The minimum document frequency a potential suggestion need to have in order to be included
 * @param editDistance The maximum edit distance candidates are allowed to have
 * @param accuracy The minimum accuracy a suggested spelling correction needs to have in order to be included
 * @param spare a chars scratch
 * @return a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order.
 * @throws IOException If I/O related errors occur
 */
protected Collection<ScoreTerm> suggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance,
                                               float accuracy, final CharsRefBuilder spare) throws IOException {

  Terms terms = MultiTerms.getTerms(ir, term.field());
  if (terms == null) {
    return Collections.emptyList();
  }
  FuzzyTermsEnum e = new FuzzyTermsEnum(terms, term, editDistance, Math.max(minPrefix, editDistance - 1), true);
  final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<>();
  
  BytesRef queryTerm = new BytesRef(term.text());
  BytesRef candidateTerm;
  ScoreTerm st = new ScoreTerm();
  while ((candidateTerm = e.next()) != null) {
    // For FuzzyQuery, boost is the score:
    float score = e.getBoost();
    // ignore uncompetitive hits
    if (stQueue.size() >= numSug && score <= stQueue.peek().boost) {
      continue;
    }
    
    // ignore exact match of the same term
    if (queryTerm.bytesEquals(candidateTerm)) {
      continue;
    }
    
    int df = e.docFreq();
    
    // check docFreq if required
    if (df <= docfreq) {
      continue;
    }
    
    final String termAsString;
    if (distance == INTERNAL_LEVENSHTEIN) {
      // delay creating strings until the end
      termAsString = null;
    } else {
      spare.copyUTF8Bytes(candidateTerm);
      termAsString = spare.toString();
      score = distance.getDistance(term.text(), termAsString);
    }
    
    if (score < accuracy) {
      continue;
    }
    
    // add new entry in PQ
    st.term = BytesRef.deepCopyOf(candidateTerm);
    st.boost = score;
    st.docfreq = df;
    st.termAsString = termAsString;
    st.score = score;
    stQueue.offer(st);
    // possibly drop entries from queue
    st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
    e.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
  }
    
  return stQueue;
}

Source File: WordBreakSpellChecker.java From lucene-solr with Apache License 2.0

4 votes

private int generateBreakUpSuggestions(Term term, IndexReader ir,
    int numberBreaks, int maxSuggestions, int useMinSuggestionFrequency,
    SuggestWord[] prefix, Queue<SuggestWordArrayWrapper> suggestions,
    int totalEvaluations, BreakSuggestionSortMethod sortMethod)
    throws IOException {
  String termText = term.text();
  int termLength = termText.codePointCount(0, termText.length());
  int useMinBreakWordLength = minBreakWordLength;
  if (useMinBreakWordLength < 1) {
    useMinBreakWordLength = 1;
  }
  if (termLength < (useMinBreakWordLength * 2)) {
    return 0;
  }    
  
  int thisTimeEvaluations = 0;
  for (int i = useMinBreakWordLength; i <= (termLength - useMinBreakWordLength); i++) {
    int end = termText.offsetByCodePoints(0, i);
    String leftText = termText.substring(0, end);
    String rightText = termText.substring(end);
    SuggestWord leftWord = generateSuggestWord(ir, term.field(), leftText);
    
    if (leftWord.freq >= useMinSuggestionFrequency) {
      SuggestWord rightWord = generateSuggestWord(ir, term.field(), rightText);
      if (rightWord.freq >= useMinSuggestionFrequency) {
        SuggestWordArrayWrapper suggestion = new SuggestWordArrayWrapper(
            newSuggestion(prefix, leftWord, rightWord));
        suggestions.offer(suggestion);
        if (suggestions.size() > maxSuggestions) {
          suggestions.poll();
        }
      }        
      int newNumberBreaks = numberBreaks + 1;
      if (newNumberBreaks <= maxChanges) {
        int evaluations = generateBreakUpSuggestions(new Term(term.field(),
            rightWord.string), ir, newNumberBreaks, maxSuggestions,
            useMinSuggestionFrequency, newPrefix(prefix, leftWord),
            suggestions, totalEvaluations, sortMethod);
        totalEvaluations += evaluations;
      }
    }
    
    thisTimeEvaluations++;
    totalEvaluations++;
    if (totalEvaluations >= maxEvaluations) {
      break;
    }
  }
  return thisTimeEvaluations;
}

Source File: FuzzyTermsEnum.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first
 * valid term if such a term exists.
 *
 * @param terms Delivers terms.
 * @param atts An AttributeSource used to share automata between segments
 * @param term Pattern term.
 * @param maxEdits Maximum edit distance.
 * @param prefixLength the length of the required common prefix
 * @param transpositions whether transpositions should count as a single edit
 * @throws IOException if there is a low-level IO error
 */
FuzzyTermsEnum(Terms terms, AttributeSource atts, Term term, int maxEdits, int prefixLength, boolean transpositions) throws IOException {
  this(terms, atts, term, () -> new FuzzyAutomatonBuilder(term.text(), maxEdits, prefixLength, transpositions));
}

Source File: FuzzyTermsEnum.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Constructor for enumeration of all terms from specified <code>reader</code> which share a prefix of
 * length <code>prefixLength</code> with <code>term</code> and which have at most {@code maxEdits} edits.
 * <p>
 * After calling the constructor the enumeration is already pointing to the first
 * valid term if such a term exists.
 *
 * @param terms Delivers terms.
 * @param term Pattern term.
 * @param maxEdits Maximum edit distance.
 * @param prefixLength the length of the required common prefix
 * @param transpositions whether transpositions should count as a single edit
 * @throws IOException if there is a low-level IO error
 */
public FuzzyTermsEnum(Terms terms, Term term, int maxEdits, int prefixLength, boolean transpositions) throws IOException {
  this(terms, new AttributeSource(), term, () -> new FuzzyAutomatonBuilder(term.text(), maxEdits, prefixLength, transpositions));
}

Java Code Examples for org.apache.lucene.index.Term#text()