Java Code Examples for org.apache.lucene.analysis.StopFilter#makeStopSet()
The following examples show how to use
org.apache.lucene.analysis.StopFilter#makeStopSet() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestFreeTextSuggester.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testEndingHole() throws Exception { // Just deletes "of" Analyzer a = new Analyzer() { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(); CharArraySet stopSet = StopFilter.makeStopSet("of"); return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet)); } }; Iterable<Input> keys = AnalyzingSuggesterTest.shuffle( new Input("wizard of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); sug.build(new InputArrayIterator(keys)); assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10))); // Falls back to unigram model, with backoff 0.4 times // prop 0.5: assertEquals("oz/0.20", toString(sug.lookup("wizard o", 10))); a.close(); }
Example 2
Source File: TestFreeTextSuggester.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testTwoEndingHoles() throws Exception { // Just deletes "of" Analyzer a = new Analyzer() { @Override public TokenStreamComponents createComponents(String field) { Tokenizer tokenizer = new MockTokenizer(); CharArraySet stopSet = StopFilter.makeStopSet("of"); return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet)); } }; Iterable<Input> keys = AnalyzingSuggesterTest.shuffle( new Input("wizard of of oz", 50) ); FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20); sug.build(new InputArrayIterator(keys)); assertEquals("", toString(sug.lookup("wizard of of", 10))); a.close(); }
Example 3
Source File: TestSuggestStopFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testEndNotStopWord() throws Exception { CharArraySet stopWords = StopFilter.makeStopSet("to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to")); TokenStream filter = new SuggestStopFilter(stream, stopWords); assertTokenStreamContents(filter, new String[] {"go", "to"}, new int[] {0, 3}, new int[] {2, 5}, null, new int[] {1, 1}, null, 5, new boolean[] {false, true}, true); }
Example 4
Source File: TestSuggestStopFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testEndIsStopWord() throws Exception { CharArraySet stopWords = StopFilter.makeStopSet("to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to ")); TokenStream filter = new SuggestStopFilter(stream, stopWords); assertTokenStreamContents(filter, new String[] {"go"}, new int[] {0}, new int[] {2}, null, new int[] {1}, null, 6, new boolean[] {false}, true); }
Example 5
Source File: TestSuggestStopFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testMidStopWord() throws Exception { CharArraySet stopWords = StopFilter.makeStopSet("to"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to school")); TokenStream filter = new SuggestStopFilter(stream, stopWords); assertTokenStreamContents(filter, new String[] {"go", "school"}, new int[] {0, 6}, new int[] {2, 12}, null, new int[] {1, 2}, null, 12, new boolean[] {false, false}, true); }
Example 6
Source File: TestSuggestStopFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testMultipleStopWords() throws Exception { CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the school")); TokenStream filter = new SuggestStopFilter(stream, stopWords); assertTokenStreamContents(filter, new String[] { "go", "school" }, new int[] {0, 12}, new int[] {2, 18}, null, new int[] {1, 4}, null, 18, new boolean[] {false, false}, true); }
Example 7
Source File: TestSuggestStopFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testMultipleStopWordsEnd() throws Exception { CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the")); TokenStream filter = new SuggestStopFilter(stream, stopWords); assertTokenStreamContents(filter, new String[] { "go", "the"}, new int[] {0, 8}, new int[] {2, 11}, null, new int[] {1, 3}, null, 11, new boolean[] {false, true}, true); }
Example 8
Source File: TestSuggestStopFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testMultipleStopWordsEnd2() throws Exception { CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a"); Tokenizer stream = new MockTokenizer(); stream.setReader(new StringReader("go to a the ")); TokenStream filter = new SuggestStopFilter(stream, stopWords); assertTokenStreamContents(filter, new String[] { "go"}, new int[] {0}, new int[] {2}, null, new int[] {1}, null, 12, new boolean[] {false}, true); }
Example 9
Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testWithStopword() throws Exception { for (boolean preservePosInc : new boolean[]{true, false}) { Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true); String input = "a mykeyword a keyword"; //LUCENE-8344 add "a" tokenStream.setReader(new StringReader(input)); TokenFilter tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("a")); ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenFilter, SEP_LABEL, preservePosInc, 10); CharsRefBuilder builder = new CharsRefBuilder(); if (preservePosInc) { builder.append(SEP_LABEL); } builder.append("mykeyword"); builder.append(SEP_LABEL); if (preservePosInc) { builder.append(SEP_LABEL); } builder.append("keyword"); // if (preservePosInc) { LUCENE-8344 uncomment // builder.append(SEP_LABEL); // } assertTokenStreamContents(concatStream, new String[]{builder.toCharsRef().toString()}); } }
Example 10
Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testSeparatorWithStopWords() throws IOException { Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false); String input = "A B C D E F J H"; tokenStream.setReader(new StringReader(input)); TokenStream tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("A", "D", "E", "J")); ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenFilter, '-', false, 100); assertTokenStreamContents(stream, new String[] {"B-C-F-H"}, null, null, new int[] { 1 }); }
Example 11
Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testSeparatorWithStopWordsAndPreservePositionIncrements() throws IOException { Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false); String input = "A B C D E F J H"; tokenStream.setReader(new StringReader(input)); TokenStream tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("A", "D", "E", "J")); ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenFilter, '-', true, 100); assertTokenStreamContents(stream, new String[] {"-B-C---F--H"}, null, null, new int[] { 1 }); }