org.apache.lucene.search.spell.DirectSpellChecker Java Exaples

Source File: TermSuggester.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}

Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0

6 votes

public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood,  int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}

Source File: LuceneWordSearch.java From preDict with GNU Lesser General Public License v3.0

5 votes

@Override
public void indexingDone() {
	try {
		spellChecker = new DirectSpellChecker();
		spellChecker.setMaxEdits(2);
		spellChecker.setAccuracy(0.1f);
		spellChecker.setMinPrefix(0);
		reader = DirectoryReader.open(writer);

		fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer());
		Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() {
			
			@Override
			public boolean needsScores() {
				return false;
			}
			
			@Override
			public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
				return null;
			}
		});
		fuzzySuggester.build(dict);
		
		writer.close();
		searcher = new IndexSearcher(DirectoryReader.open(directory));
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}

Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0

4 votes

public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader,  nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}

Source File: DirectSolrSpellChecker.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {

  SolrParams params = config.toSolrParams();

  log.info("init: {}", config);
  String name = super.init(config, core);
  
  Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
  String compClass = (String) config.get(COMPARATOR_CLASS);
  if (compClass != null) {
    if (compClass.equalsIgnoreCase(SCORE_COMP))
      comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    else if (compClass.equalsIgnoreCase(FREQ_COMP))
      comp = new SuggestWordFrequencyComparator();
    else //must be a FQCN
      comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
  }
  
  StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
  String distClass = (String) config.get(STRING_DISTANCE);
  if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
    sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

  float minAccuracy = DEFAULT_ACCURACY;
  Float accuracy = params.getFloat(ACCURACY);
  if (accuracy != null)
    minAccuracy = accuracy;
  
  int maxEdits = DEFAULT_MAXEDITS;
  Integer edits = params.getInt(MAXEDITS);
  if (edits != null)
    maxEdits = edits;
  
  int minPrefix = DEFAULT_MINPREFIX;
  Integer prefix = params.getInt(MINPREFIX);
  if (prefix != null)
    minPrefix = prefix;
  
  int maxInspections = DEFAULT_MAXINSPECTIONS;
  Integer inspections = params.getInt(MAXINSPECTIONS);
  if (inspections != null)
    maxInspections = inspections;
  
  float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
  Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
  if (threshold != null)
    minThreshold = threshold;
  
  int minQueryLength = DEFAULT_MINQUERYLENGTH;
  Integer queryLength = params.getInt(MINQUERYLENGTH);
  if (queryLength != null)
    minQueryLength = queryLength;

  int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
  Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
  if (overriddenMaxQueryLength != null)
    maxQueryLength = overriddenMaxQueryLength;
  
  float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
  Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
  if (queryFreq != null)
    maxQueryFrequency = queryFreq;
  
  checker.setComparator(comp);
  checker.setDistance(sd);
  checker.setMaxEdits(maxEdits);
  checker.setMinPrefix(minPrefix);
  checker.setAccuracy(minAccuracy);
  checker.setThresholdFrequency(minThreshold);
  checker.setMaxInspections(maxInspections);
  checker.setMinQueryLength(minQueryLength);
  checker.setMaxQueryLength(maxQueryLength);
  checker.setMaxQueryFrequency(maxQueryFrequency);
  checker.setLowerCaseTerms(false);
  
  return name;
}

org.apache.lucene.search.spell.DirectSpellChecker Java Examples