org.apache.lucene.search.spell.DirectSpellChecker Java Examples
The following examples show how to use
org.apache.lucene.search.spell.DirectSpellChecker.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TermSuggester.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings()); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() ); List<Token> tokens = queryTerms(suggestion, spare); for (Token token : tokens) { // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode() ); Text key = new Text(new BytesArray(token.term.bytes())); TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset); for (SuggestWord suggestWord : suggestedWords) { Text word = new Text(suggestWord.string); resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score)); } response.addTerm(resultEntry); } return response; }
Example #2
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 6 votes |
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException { if (terms == null) { throw new IllegalArgumentException("generator field [" + field + "] doesn't exist"); } this.spellchecker = spellchecker; this.field = field; this.numCandidates = numCandidates; this.suggestMode = suggestMode; this.reader = reader; final long dictSize = terms.getSumTotalTermFreq(); this.useTotalTermFrequency = dictSize != -1; this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize; this.preFilter = preFilter; this.postFilter = postFilter; this.nonErrorLikelihood = nonErrorLikelihood; float thresholdFrequency = spellchecker.getThresholdFrequency(); this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency); termsEnum = terms.iterator(); }
Example #3
Source File: LuceneWordSearch.java From preDict with GNU Lesser General Public License v3.0 | 5 votes |
@Override public void indexingDone() { try { spellChecker = new DirectSpellChecker(); spellChecker.setMaxEdits(2); spellChecker.setAccuracy(0.1f); spellChecker.setMinPrefix(0); reader = DirectoryReader.open(writer); fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer()); Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() { @Override public boolean needsScores() { return false; } @Override public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { return null; } }); fuzzySuggester.build(dict); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); } catch (IOException e) { throw new RuntimeException(e); } }
Example #4
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 4 votes |
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
Example #5
Source File: DirectSolrSpellChecker.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings({"unchecked"}) public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) { SolrParams params = config.toSolrParams(); log.info("init: {}", config); String name = super.init(config, core); Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR; String compClass = (String) config.get(COMPARATOR_CLASS); if (compClass != null) { if (compClass.equalsIgnoreCase(SCORE_COMP)) comp = SuggestWordQueue.DEFAULT_COMPARATOR; else if (compClass.equalsIgnoreCase(FREQ_COMP)) comp = new SuggestWordFrequencyComparator(); else //must be a FQCN comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class); } StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN; String distClass = (String) config.get(STRING_DISTANCE); if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE)) sd = core.getResourceLoader().newInstance(distClass, StringDistance.class); float minAccuracy = DEFAULT_ACCURACY; Float accuracy = params.getFloat(ACCURACY); if (accuracy != null) minAccuracy = accuracy; int maxEdits = DEFAULT_MAXEDITS; Integer edits = params.getInt(MAXEDITS); if (edits != null) maxEdits = edits; int minPrefix = DEFAULT_MINPREFIX; Integer prefix = params.getInt(MINPREFIX); if (prefix != null) minPrefix = prefix; int maxInspections = DEFAULT_MAXINSPECTIONS; Integer inspections = params.getInt(MAXINSPECTIONS); if (inspections != null) maxInspections = inspections; float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY; Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY); if (threshold != null) minThreshold = threshold; int minQueryLength = DEFAULT_MINQUERYLENGTH; Integer queryLength = params.getInt(MINQUERYLENGTH); if (queryLength != null) minQueryLength = queryLength; int maxQueryLength = DEFAULT_MAXQUERYLENGTH; Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH); if (overriddenMaxQueryLength != null) maxQueryLength = overriddenMaxQueryLength; float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY; Float queryFreq = params.getFloat(MAXQUERYFREQUENCY); if (queryFreq != null) maxQueryFrequency = queryFreq; checker.setComparator(comp); checker.setDistance(sd); checker.setMaxEdits(maxEdits); checker.setMinPrefix(minPrefix); checker.setAccuracy(minAccuracy); checker.setThresholdFrequency(minThreshold); checker.setMaxInspections(maxInspections); checker.setMinQueryLength(minQueryLength); checker.setMaxQueryLength(maxQueryLength); checker.setMaxQueryFrequency(maxQueryFrequency); checker.setLowerCaseTerms(false); return name; }