Java Code Examples for org.apache.lucene.analysis.CharArraySet#EMPTY_SET
The following examples show how to use
org.apache.lucene.analysis.CharArraySet#EMPTY_SET .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Analysis.java From crate with Apache License 2.0 | 6 votes |
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords, Map<String, Set<?>> namedWords, boolean ignoreCase) { String value = settings.get(name); if (value != null) { if ("_none_".equals(value)) { return CharArraySet.EMPTY_SET; } else { return resolveNamedWords(settings.getAsList(name), namedWords, ignoreCase); } } List<String> pathLoadedWords = getWordList(env, settings, name); if (pathLoadedWords != null) { return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase); } return defaultWords; }
Example 2
Source File: TestDutchAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testExclusionTableViaCtor() throws IOException { CharArraySet set = new CharArraySet( 1, true); set.add("lichamelijk"); DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); a.close(); a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set); assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" }); a.close(); }
Example 3
Source File: StandardHtmlStripAnalyzerProvider.java From crate with Apache License 2.0 | 5 votes |
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET; CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords); analyzer = new StandardHtmlStripAnalyzer(stopWords); analyzer.setVersion(version); }
Example 4
Source File: TestThaiAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testOffsets() throws Exception { Analyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET); assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี", new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" }, new int[] { 0, 3, 6, 9, 13, 17, 20, 23 }, new int[] { 3, 6, 9, 13, 17, 20, 23, 25 }); analyzer.close(); }
Example 5
Source File: StandardAnalyzerProvider.java From crate with Apache License 2.0 | 5 votes |
public StandardAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET; CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords); int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); standardAnalyzer = new StandardAnalyzer(stopWords); standardAnalyzer.setVersion(version); standardAnalyzer.setMaxTokenLength(maxTokenLength); }
Example 6
Source File: TestThaiAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testReusableTokenStream() throws Exception { ThaiAnalyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET); assertAnalyzesTo(analyzer, "", new String[] {}); assertAnalyzesTo( analyzer, "การที่ได้ต้องแสดงว่างานดี", new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"}); assertAnalyzesTo( analyzer, "บริษัทชื่อ XY&Z - คุยกับ [email protected]", new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" }); analyzer.close(); }
Example 7
Source File: CJKBigramFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
@Before public void up() { analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, new DefaultIcuTokenizerConfig(false, true)); TokenStream result = new CJKBigramFilter(source); return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET)); } }; }
Example 8
Source File: BulgarianAnalyzer.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Builds an analyzer with the given stop words. */ public BulgarianAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 9
Source File: TestDutchAnalyzer.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * check that the default stem overrides are used * even if you use a non-default ctor. */ public void testStemOverrides() throws IOException { DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET); checkOneTerm(a, "fiets", "fiets"); a.close(); }
Example 10
Source File: UkrainianMorfologikAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words. * * @param stopwords a stopword set */ public UkrainianMorfologikAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 11
Source File: IndonesianAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words * * @param stopwords * a stopword set */ public IndonesianAnalyzer(CharArraySet stopwords){ this(stopwords, CharArraySet.EMPTY_SET); }
Example 12
Source File: IrishAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words. * * @param stopwords a stopword set */ public IrishAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 13
Source File: CatalanAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words. * * @param stopwords a stopword set */ public CatalanAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 14
Source File: GermanAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words * * @param stopwords * a stopword set */ public GermanAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 15
Source File: PolishAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words. * * @param stopwords a stopword set */ public PolishAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 16
Source File: SmartChineseAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * <p> * Create a new SmartChineseAnalyzer, optionally using the default stopword list. * </p> * <p> * The included default stopword list is simply a list of punctuation. * If you do not use this list, punctuation will not be removed from the text! * </p> * * @param useDefaultStopWords true to use the default stopword list. */ public SmartChineseAnalyzer(boolean useDefaultStopWords) { stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET : CharArraySet.EMPTY_SET; }
Example 17
Source File: SoraniAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words. * * @param stopwords a stopword set */ public SoraniAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 18
Source File: BlendedInfixSuggesterTest.java From lucene-solr with Apache License 2.0 | 2 votes |
public void testSuggesterCountForAllLookups() throws IOException { Input keys[] = new Input[]{ new Input("lend me your ears", 1), new Input("as you sow so shall you reap", 1), }; Path tempDir = createTempDir("BlendedInfixSuggesterTest"); Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET); // BlenderType.LINEAR is used by default (remove position*10%) BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a); suggester.build(new InputArrayIterator(keys)); String term = "you"; List<Lookup.LookupResult> responses = suggester.lookup(term, false, 1); assertEquals(1, responses.size()); responses = suggester.lookup(term, false, 2); assertEquals(2, responses.size()); responses = suggester.lookup(term, 1, false, false); assertEquals(1, responses.size()); responses = suggester.lookup(term, 2, false, false); assertEquals(2, responses.size()); responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 1, false, false); assertEquals(1, responses.size()); responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 2, false, false); assertEquals(2, responses.size()); responses = suggester.lookup(term, (Set<BytesRef>) null, 1, false, false); assertEquals(1, responses.size()); responses = suggester.lookup(term, (Set<BytesRef>) null, 2, false, false); assertEquals(2, responses.size()); responses = suggester.lookup(term, null, false, 1); assertEquals(1, responses.size()); responses = suggester.lookup(term, null, false, 2); assertEquals(2, responses.size()); responses = suggester.lookup(term, (BooleanQuery) null, 1, false, false); assertEquals(1, responses.size()); responses = suggester.lookup(term, (BooleanQuery) null, 2, false, false); assertEquals(2, responses.size()); suggester.close(); }
Example 19
Source File: GalicianAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words. * * @param stopwords a stopword set */ public GalicianAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }
Example 20
Source File: BengaliAnalyzer.java From lucene-solr with Apache License 2.0 | 2 votes |
/** * Builds an analyzer with the given stop words * * @param stopwords a stopword set */ public BengaliAnalyzer(CharArraySet stopwords) { this(stopwords, CharArraySet.EMPTY_SET); }