org.apache.lucene.search.spell.SuggestMode Java Examples
The following examples show how to use
org.apache.lucene.search.spell.SuggestMode.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 6 votes |
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException { if (terms == null) { throw new IllegalArgumentException("generator field [" + field + "] doesn't exist"); } this.spellchecker = spellchecker; this.field = field; this.numCandidates = numCandidates; this.suggestMode = suggestMode; this.reader = reader; final long dictSize = terms.getSumTotalTermFreq(); this.useTotalTermFrequency = dictSize != -1; this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize; this.preFilter = preFilter; this.postFilter = postFilter; this.nonErrorLikelihood = nonErrorLikelihood; float thresholdFrequency = spellchecker.getThresholdFrequency(); this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency); termsEnum = terms.iterator(); }
Example #2
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public CandidateSet drawCandidates(CandidateSet set) throws IOException { Candidate original = set.originalTerm; BytesRef term = preFilter(original.term, spare, byteSpare); final long frequency = original.frequency; spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize)); SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode); List<Candidate> candidates = new ArrayList<>(suggestSimilar.length); for (int i = 0; i < suggestSimilar.length; i++) { SuggestWord suggestWord = suggestSimilar[i]; BytesRef candidate = new BytesRef(suggestWord.string); postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates); } set.addCandidates(candidates); return set; }
Example #3
Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0 | 6 votes |
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException { final SuggestWord[][] rawSuggestions = wordBreakSpellChecker .suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); if (rawSuggestions.length == 0) { return Collections.emptyList(); } if (!verifyDecompoundCollation) { return Arrays.stream(rawSuggestions) .filter(suggestion -> suggestion != null && suggestion.length > 1) .limit(maxDecompoundExpansions).collect(Collectors.toList()); } final IndexSearcher searcher = new IndexSearcher(indexReader); return Arrays.stream(rawSuggestions) .filter(suggestion -> suggestion != null && suggestion.length > 1) .map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher))) .filter(sortable -> sortable.count > 0) .sorted() .limit(maxDecompoundExpansions) // TODO: use PriorityQueue .map(sortable -> sortable.obj) .collect(Collectors.toList()); }
Example #4
Source File: LuceneWordSearch.java From preDict with GNU Lesser General Public License v3.0 | 5 votes |
private List<String> getUsingSpellcheck(String searchQuery) throws IOException { SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS); List<String> result = new ArrayList<>(); for(SuggestWord suggestion : suggestions) { result.add(suggestion.string); } return result; }
Example #5
Source File: SpellingOptions.java From lucene-solr with Apache License 2.0 | 5 votes |
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, SuggestMode suggestMode, boolean extendedResults, float accuracy, SolrParams customParams) { this.tokens = tokens; this.reader = reader; this.count = count; this.suggestMode = suggestMode; this.extendedResults = extendedResults; this.accuracy = accuracy; this.customParams = customParams; }
Example #6
Source File: SpellingOptions.java From lucene-solr with Apache License 2.0 | 5 votes |
public SpellingOptions(Collection<Token> tokens, IndexReader reader, int count, int alternativeTermCount, SuggestMode suggestMode, boolean extendedResults, float accuracy, SolrParams customParams) { this.tokens = tokens; this.reader = reader; this.count = count; this.alternativeTermCount = alternativeTermCount; this.suggestMode = suggestMode; this.extendedResults = extendedResults; this.accuracy = accuracy; this.customParams = customParams; }
Example #7
Source File: Suggester.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { log.debug("getSuggestions: {}", options.tokens); if (lookup == null) { log.info("Lookup is null - invoke spellchecker.build first"); return EMPTY_RESULT; } SpellingResult res = new SpellingResult(); CharsRef scratch = new CharsRef(); for (Token t : options.tokens) { scratch.chars = t.buffer(); scratch.offset = 0; scratch.length = t.length(); boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) && !(lookup instanceof WFSTCompletionLookup) && !(lookup instanceof AnalyzingSuggester); List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count); if (suggestions == null) { continue; } if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) { Collections.sort(suggestions); } for (LookupResult lr : suggestions) { res.add(t, lr.key.toString(), (int)lr.value); } } return res; }
Example #8
Source File: DirectSpellcheckerSettings.java From Elasticsearch with Apache License 2.0 | 4 votes |
public SuggestMode suggestMode() { return suggestMode; }
Example #9
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 4 votes |
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
Example #10
Source File: SpellCheckComponent.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings("unchecked") public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) { return; } boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD)); String q = params.get(SPELLCHECK_Q); SolrSpellChecker spellChecker = getSpellChecker(params); Collection<Token> tokens = null; if (q != null) { //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker tokens = getTokens(q, spellChecker.getQueryAnalyzer()); } else { q = rb.getQueryString(); if (q == null) { q = params.get(CommonParams.Q); } tokens = queryConverter.convert(q); } if (tokens != null && tokens.isEmpty() == false) { if (spellChecker != null) { int count = params.getInt(SPELLCHECK_COUNT, 1); boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR); boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false); boolean collate = params.getBool(SPELLCHECK_COLLATE, false); float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE); int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0); //If specified, this can be a discrete # of results, or a percentage of fq results. Integer maxResultsForSuggest = maxResultsForSuggest(rb); ModifiableSolrParams customParams = new ModifiableSolrParams(); for (String checkerName : getDictionaryNames(params)) { customParams.add(getCustomParams(checkerName, params)); } Number hitsLong = (Number) rb.rsp.getToLog().get("hits"); long hits = 0; if (hitsLong == null) { hits = rb.getNumberDocumentsFound(); } else { hits = hitsLong.longValue(); } SpellingResult spellingResult = null; if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) { SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX; if (onlyMorePopular) { suggestMode = SuggestMode.SUGGEST_MORE_POPULAR; } else if (alternativeTermCount > 0) { suggestMode = SuggestMode.SUGGEST_ALWAYS; } IndexReader reader = rb.req.getSearcher().getIndexReader(); SpellingOptions options = new SpellingOptions(tokens, reader, count, alternativeTermCount, suggestMode, extendedResults, accuracy, customParams); spellingResult = spellChecker.getSuggestions(options); } else { spellingResult = new SpellingResult(); } boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest); @SuppressWarnings({"rawtypes"}) NamedList response = new SimpleOrderedMap(); @SuppressWarnings({"rawtypes"}) NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults); response.add("suggestions", suggestions); if (extendedResults) { response.add("correctlySpelled", isCorrectlySpelled); } if (collate) { addCollationsToResponse(params, spellingResult, rb, q, response, spellChecker.isSuggestionsMayOverlap()); } if (shardRequest) { addOriginalTermsToResponse(response, tokens); } rb.rsp.add("spellcheck", response); } else { throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params))); } } }
Example #11
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings({"unchecked"}) public void testExtendedResults() throws Exception { IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); @SuppressWarnings({"rawtypes"}) NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = createTempDir().toFile(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); h.getCore().withSearcher(searcher -> { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("documemt"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("documemt is null and it shouldn't be", suggestions != null); assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true); assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("document"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); return null; }); }
Example #12
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings({"unchecked"}) public void testAlternateLocation() throws Exception { String[] ALT_DOCS = new String[]{ "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" }; IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); @SuppressWarnings({"rawtypes"}) NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File tmpDir = createTempDir().toFile(); File indexDir = new File(tmpDir, "spellingIdx"); //create a standalone index File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime()); Directory dir = newFSDirectory(altIndexDir.toPath()); IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(new WhitespaceAnalyzer()) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES)); iw.addDocument(doc); } iw.forceMerge(1); iw.close(); dir.close(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); h.getCore().withSearcher(searcher -> { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("flesh is null and it shouldn't be", suggestions != null); assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true); assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("Caroline"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); return null; }); }
Example #13
Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0 | 3 votes |
protected CombineSuggestion[] suggestCombination(final Iterator<Term> terms) throws IOException { final List<org.apache.lucene.index.Term> luceneTerms = new ArrayList<>(COMPOUND_WINDOW); terms.forEachRemaining(term -> luceneTerms.add(toLuceneTerm(term))); return wordBreakSpellChecker.suggestWordCombinations( luceneTerms.toArray(new org.apache.lucene.index.Term[0]), 10, indexReader, SuggestMode.SUGGEST_ALWAYS); }