org.apache.lucene.search.spell.SuggestWord Java Examples
The following examples show how to use
org.apache.lucene.search.spell.SuggestWord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DirectCandidateGenerator.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public CandidateSet drawCandidates(CandidateSet set) throws IOException { Candidate original = set.originalTerm; BytesRef term = preFilter(original.term, spare, byteSpare); final long frequency = original.frequency; spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize)); SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode); List<Candidate> candidates = new ArrayList<>(suggestSimilar.length); for (int i = 0; i < suggestSimilar.length; i++) { SuggestWord suggestWord = suggestSimilar[i]; BytesRef candidate = new BytesRef(suggestWord.string); postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates); } set.addCandidates(candidates); return set; }
Example #2
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testThatDecompoundRespectsLowerCaseInputFalse() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "W1w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); rewriter.rewrite(expandedQuery); verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "W1w2")), anyInt(), any(), any(), any()); }
Example #3
Source File: TermSuggester.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException { DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings()); final IndexReader indexReader = searcher.getIndexReader(); TermSuggestion response = new TermSuggestion( name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort() ); List<Token> tokens = queryTerms(suggestion, spare); for (Token token : tokens) { // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar( token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode() ); Text key = new Text(new BytesArray(token.term.bytes())); TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset); for (SuggestWord suggestWord : suggestedWords) { Text word = new Text(suggestWord.string); resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score)); } response.addTerm(resultEntry); } return response; }
Example #4
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testThatCompoundRespectsLowerCaseInputFalse() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "W1", false); addTerm(query, "W2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); rewriter.rewrite(expandedQuery); verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] { new Term("field1", "W1"), new Term("field1", "W2")}), anyInt(), any(), any()); }
Example #5
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testThatCompoundRespectsLowerCaseInputTrue() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", true, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "W1", false); addTerm(query, "W2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); rewriter.rewrite(expandedQuery); verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] { new Term("field1", "w1"), new Term("field1", "w2")}), anyInt(), any(), any()); }
Example #6
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testComparator() throws Exception { SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck"); assertNotNull(component); AbstractLuceneSpellChecker spellChecker; Comparator<SuggestWord> comp; spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq"); assertNotNull(spellChecker); comp = spellChecker.getSpellChecker().getComparator(); assertNotNull(comp); assertTrue(comp instanceof SuggestWordFrequencyComparator); spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn"); assertNotNull(spellChecker); comp = spellChecker.getSpellChecker().getComparator(); assertNotNull(comp); assertTrue(comp instanceof SampleComparator); }
Example #7
Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0 | 6 votes |
protected void decompound(final Term term) { // determine the nodesToAdd based on the term try { for (final SuggestWord[] decompounded : suggestWordbreaks(term)) { if (decompounded != null && decompounded.length > 0) { final BooleanQuery bq = new BooleanQuery(term.getParent(), Clause.Occur.SHOULD, true); for (final SuggestWord word : decompounded) { final DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(bq, Clause.Occur.MUST, true); bq.addClause(dmq); dmq.addClause(new Term(dmq, term.getField(), word.string, true)); } nodesToAdd.add(bq); } } } catch (final IOException e) { // IO is broken, this looks serious -> throw as RTE throw new RuntimeException("Error decompounding " + term, e); } }
Example #8
Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0 | 6 votes |
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException { final SuggestWord[][] rawSuggestions = wordBreakSpellChecker .suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS, WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY); if (rawSuggestions.length == 0) { return Collections.emptyList(); } if (!verifyDecompoundCollation) { return Arrays.stream(rawSuggestions) .filter(suggestion -> suggestion != null && suggestion.length > 1) .limit(maxDecompoundExpansions).collect(Collectors.toList()); } final IndexSearcher searcher = new IndexSearcher(indexReader); return Arrays.stream(rawSuggestions) .filter(suggestion -> suggestion != null && suggestion.length > 1) .map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher))) .filter(sortable -> sortable.count > 0) .sorted() .limit(maxDecompoundExpansions) // TODO: use PriorityQueue .map(sortable -> sortable.obj) .collect(Collectors.toList()); }
Example #9
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testNoDecompoundForSingleToken() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1w2", false) ) ) ); }
Example #10
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testThatDecompoundRespectsLowerCaseInputTrue() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", true, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "W1w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); rewriter.rewrite(expandedQuery); verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "w1w2")), anyInt(), any(), any(), any()); }
Example #11
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testDecompoundSingleTokenIntoOneTwoTokenAlternative() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2") }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1w2", false), bq( dmq(must(), term("w1", true)), dmq(must(), term("w2", true)) ) ) ) ); }
Example #12
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testSingleReverseCompoundTriggerWord() throws IOException { TrieMap<Boolean> triggerWords = new TrieMap<>(); triggerWords.put("trigger", true); // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>(); suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) }); setupWordBreakMockWithCombinations(suggestions); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, triggerWords, 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "trigger", false); addTerm(query, "w3", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false), term("w3w1", true) ), dmq( term("w3", false), term("w3w1", true) ) ) ); }
Example #13
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testAlwaysAddReverseCompoundsForTwoWordInput() throws IOException { // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>(); suggestions.put(Arrays.asList("w1", "w2"), new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) }); suggestions.put(Arrays.asList("w2", "w1"), new CombineSuggestion[] { combineSuggestion("w2w1", 0, 1) }); setupWordBreakMockWithCombinations(suggestions); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, true, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false), term("w1w2", true), term("w2w1", true) ), dmq( term("w2", false), term("w1w2", true), term("w2w1", true) ) ) ); }
Example #14
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testNoCompoundForTwoInputTokensOnly() throws IOException { // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any())) .thenReturn(new CombineSuggestion[] { }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false) ), dmq( term("w2", false) ) ) ); }
Example #15
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testCompoundTwoInputTokensOnly() throws IOException { // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); // compound of terms at idx 0+1 when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any())) .thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false), term("w1w2", true) ), dmq( term("w2", false), term("w1w2", true) ) ) ); }
Example #16
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testThatOnlyMaxExpansionsAreApplied() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { decompoundSuggestion("w3", "w4"), decompoundSuggestion("w", "3w4"), decompoundSuggestion("w3w", "4") }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 2, false); Query query = new Query(); addTerm(query, "w3w4", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w3w4", false), bq( dmq(must(), term("w3", true)), dmq(must(), term("w4", true)) ), bq( dmq(must(), term("w", true)), dmq(must(), term("3w4", true)) ) ) ) ); }
Example #17
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testDecompoundSingleTokenIntoTwoTwoTokenAlternatives() throws IOException { when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2"), decompoundSuggestion("w", "1w2") }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1w2", false), bq( dmq(must(), term("w1", true)), dmq(must(), term("w2", true)) ), bq( dmq(must(), term("w", true)), dmq(must(), term("1w2", true)) ) ) ) ); }
Example #18
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testThatGeneratedFirstTermIsNotCompounded() throws IOException { // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); // compound of terms at idx 0+1 // when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any())) // .thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1", true); addTerm(query, "w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", true) ), dmq( term("w2", false) ) ) ); }
Example #19
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testThatGeneratedSecondTermIsNotCompounded() throws IOException { // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); // compound of terms at idx 0+1 // when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any())) // .thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "w2", true); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false) ), dmq( term("w2", true) ) ) ); }
Example #20
Source File: WordBreakCompoundRewriter.java From querqy with Apache License 2.0 | 5 votes |
protected int countCollatedMatches(final SuggestWord[] suggestion, final IndexSearcher searcher) { org.apache.lucene.search.BooleanQuery.Builder builder = new org.apache.lucene.search.BooleanQuery.Builder(); for (final SuggestWord word : suggestion) { builder.add(new org.apache.lucene.search.BooleanClause( new TermQuery(new org.apache.lucene.index.Term(dictionaryField, word.string)), org.apache.lucene.search.BooleanClause.Occur.FILTER)); } try { return searcher.count(builder.build()); } catch (final IOException e) { throw new RuntimeException(e); } }
Example #21
Source File: LuceneWordSearch.java From preDict with GNU Lesser General Public License v3.0 | 5 votes |
private List<String> getUsingSpellcheck(String searchQuery) throws IOException { SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS); List<String> result = new ArrayList<>(); for(SuggestWord suggestion : suggestions) { result.add(suggestion.string); } return result; }
Example #22
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 4 votes |
private static SuggestWord[] decompoundSuggestion(String... parts) { return Arrays.stream(parts).map(WordBreakCompoundRewriterTest::suggestWord).toArray(SuggestWord[]::new); }
Example #23
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 4 votes |
@Test public void testCompoundTriggerAffectsOnlySurroundingCompound() throws IOException { TrieMap<Boolean> triggerWords = new TrieMap<>(); triggerWords.put("trigger", true); // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>(); suggestions.put(Arrays.asList("w0", "w1"), new CombineSuggestion[] { combineSuggestion("w0w1", 0, 1) }); suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) }); suggestions.put(Arrays.asList("w3", "w4"), new CombineSuggestion[] { combineSuggestion("w3w4", 0, 1) }); setupWordBreakMockWithCombinations(suggestions); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, triggerWords, 5, false); Query query = new Query(); addTerm(query, "w0", false); addTerm(query, "w1", false); addTerm(query, "trigger", false); addTerm(query, "w3", false); addTerm(query, "w4", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w0", false), term("w0w1", true) ), dmq( term("w1", false), term("w0w1", true), term("w3w1", true) ), dmq( term("w3", false), term("w3w1", true), term("w3w4", true) ), dmq( term("w4", false), term("w3w4", true) ) ) ); }
Example #24
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 4 votes |
@Test public void testCompoundTriggerWordWithLowerCaseInputSetToTrue() throws IOException { TrieMap<Boolean> triggerWords = new TrieMap<>(); triggerWords.put("trigger_lower", true); // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>(); suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) }); setupWordBreakMockWithCombinations(suggestions); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", true, false, triggerWords, 5, false); Query query1 = new Query(); addTerm(query1, "w1", false); addTerm(query1, "trigger_lower", false); addTerm(query1, "w3", false); ExpandedQuery expandedQuery1 = new ExpandedQuery(query1); final ExpandedQuery rewritten1 = rewriter.rewrite(expandedQuery1); assertThat((Query) rewritten1.getUserQuery(), bq( dmq( term("w1", false), term("w3w1", true) ), dmq( term("w3", false), term("w3w1", true) ) ) ); Query query2 = new Query(); addTerm(query2, "w1", false); addTerm(query2, "Trigger_Lower", false); addTerm(query2, "w3", false); ExpandedQuery expandedQuery2 = new ExpandedQuery(query2); final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2); assertThat((Query) rewritten2.getUserQuery(), bq( dmq( term("w1", false), term("w3w1", true) ), dmq( term("w3", false), term("w3w1", true) ) ) ); }
Example #25
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 4 votes |
@Test public void testCompoundTriggerWordWithLowerCaseInputSetToFalse() throws IOException { TrieMap<Boolean> triggerWords = new TrieMap<>(); triggerWords.put("Trigger_Upper", true); triggerWords.put("trigger_lower", true); // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>(); suggestions.put(Arrays.asList("w3", "w1"), new CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) }); setupWordBreakMockWithCombinations(suggestions); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, triggerWords, 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "Trigger_Upper", false); addTerm(query, "w3", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false), term("w3w1", true) ), dmq( term("w3", false), term("w3w1", true) ) ) ); Query query2 = new Query(); addTerm(query2, "w1", false); addTerm(query2, "trigger_upper", false); addTerm(query2, "w3", false); ExpandedQuery expandedQuery2 = new ExpandedQuery(query2); final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2); assertThat((Query) rewritten2.getUserQuery(), bq( dmq( term("w1", false) ), dmq( term("trigger_upper", false) ), dmq( term("w3", false) ) ) ); Query query3 = new Query(); addTerm(query3, "w1", false); addTerm(query3, "Trigger_Lower", false); addTerm(query3, "w3", false); ExpandedQuery expandedQuery3 = new ExpandedQuery(query3); final ExpandedQuery rewritten3 = rewriter.rewrite(expandedQuery3); assertThat((Query) rewritten3.getUserQuery(), bq( dmq( term("w1", false) ), dmq( term("Trigger_Lower", false) ), dmq( term("w3", false) ) ) ); }
Example #26
Source File: WordBreakCompoundRewriterTest.java From querqy with Apache License 2.0 | 4 votes |
@Test public void testThatCompoundingIfGeneratedIsMixedIn() throws IOException { // don't de-compound when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any())) .thenReturn(new SuggestWord[][] {new SuggestWord[] {}}); // compound of terms at idx 0+1 when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any())) .thenReturn(new CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) }); WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1", false, false, new TrieMap<>(), 5, false); Query query = new Query(); addTerm(query, "w1", false); addTerm(query, "w2g", true); addTerm(query, "w2", false); ExpandedQuery expandedQuery = new ExpandedQuery(query); final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery); assertThat((Query) rewritten.getUserQuery(), bq( dmq( term("w1", false), term("w1w2", true) ), dmq( term("w2g", true) ), dmq( term("w2", false), term("w1w2", true) ) ) ); }
Example #27
Source File: SampleComparator.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) { return suggestWord.string.compareTo(suggestWord1.string); }
Example #28
Source File: DirectSolrSpellChecker.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public SpellingResult getSuggestions(SpellingOptions options) throws IOException { log.debug("getSuggestions: {}", options.tokens); SpellingResult result = new SpellingResult(); float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy; for (Token token : options.tokens) { String tokenText = token.toString(); Term term = new Term(field, tokenText); int freq = options.reader.docFreq(term); int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount: options.count; SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy); result.addFrequency(token, freq); // If considering alternatives to "correctly-spelled" terms, then add the // original as a viable suggestion. if (options.alternativeTermCount > 0 && freq > 0) { boolean foundOriginal = false; SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1]; for (int i = 0; i < suggestions.length; i++) { if (suggestions[i].string.equals(tokenText)) { foundOriginal = true; break; } suggestionsWithOrig[i + 1] = suggestions[i]; } if (!foundOriginal) { SuggestWord orig = new SuggestWord(); orig.freq = freq; orig.string = tokenText; suggestionsWithOrig[0] = orig; suggestions = suggestionsWithOrig; } } if(suggestions.length==0 && freq==0) { List<String> empty = Collections.emptyList(); result.add(token, empty); } else { for (SuggestWord suggestion : suggestions) { result.add(token, suggestion.string, suggestion.freq); } } } return result; }
Example #29
Source File: DirectSolrSpellChecker.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings({"unchecked"}) public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) { SolrParams params = config.toSolrParams(); log.info("init: {}", config); String name = super.init(config, core); Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR; String compClass = (String) config.get(COMPARATOR_CLASS); if (compClass != null) { if (compClass.equalsIgnoreCase(SCORE_COMP)) comp = SuggestWordQueue.DEFAULT_COMPARATOR; else if (compClass.equalsIgnoreCase(FREQ_COMP)) comp = new SuggestWordFrequencyComparator(); else //must be a FQCN comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class); } StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN; String distClass = (String) config.get(STRING_DISTANCE); if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE)) sd = core.getResourceLoader().newInstance(distClass, StringDistance.class); float minAccuracy = DEFAULT_ACCURACY; Float accuracy = params.getFloat(ACCURACY); if (accuracy != null) minAccuracy = accuracy; int maxEdits = DEFAULT_MAXEDITS; Integer edits = params.getInt(MAXEDITS); if (edits != null) maxEdits = edits; int minPrefix = DEFAULT_MINPREFIX; Integer prefix = params.getInt(MINPREFIX); if (prefix != null) minPrefix = prefix; int maxInspections = DEFAULT_MAXINSPECTIONS; Integer inspections = params.getInt(MAXINSPECTIONS); if (inspections != null) maxInspections = inspections; float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY; Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY); if (threshold != null) minThreshold = threshold; int minQueryLength = DEFAULT_MINQUERYLENGTH; Integer queryLength = params.getInt(MINQUERYLENGTH); if (queryLength != null) minQueryLength = queryLength; int maxQueryLength = DEFAULT_MAXQUERYLENGTH; Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH); if (overriddenMaxQueryLength != null) maxQueryLength = overriddenMaxQueryLength; float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY; Float queryFreq = params.getFloat(MAXQUERYFREQUENCY); if (queryFreq != null) maxQueryFrequency = queryFreq; checker.setComparator(comp); checker.setDistance(sd); checker.setMaxEdits(maxEdits); checker.setMinPrefix(minPrefix); checker.setAccuracy(minAccuracy); checker.setThresholdFrequency(minThreshold); checker.setMaxInspections(maxInspections); checker.setMinQueryLength(minQueryLength); checker.setMaxQueryLength(maxQueryLength); checker.setMaxQueryFrequency(maxQueryFrequency); checker.setLowerCaseTerms(false); return name; }