org.apache.lucene.analysis.core.LowerCaseFilter Java Examples
The following examples show how to use
org.apache.lucene.analysis.core.LowerCaseFilter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestProtectedTermFilter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBasic() throws IOException { CannedTokenStream cts = new CannedTokenStream( new Token("Alice", 1, 0, 5), new Token("Bob", 1, 6, 9), new Token("Clara", 1, 10, 15), new Token("David", 1, 16, 21) ); CharArraySet protectedTerms = new CharArraySet(5, true); protectedTerms.add("bob"); TokenStream ts = new ProtectedTermFilter(protectedTerms, cts, LowerCaseFilter::new); assertTokenStreamContents(ts, new String[]{ "alice", "Bob", "clara", "david" }); }
Example #2
Source File: LowerCaseTokenFilterFactory.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else { throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
Example #3
Source File: TestBrazilianAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testWithKeywordAttribute() throws IOException { CharArraySet set = new CharArraySet(1, true); set.add("Brasília"); Tokenizer tokenizer = new LetterTokenizer(); tokenizer.setReader(new StringReader("Brasília Brasilia")); BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseFilter(tokenizer), set)); assertTokenStreamContents(filter, new String[] { "brasília", "brasil" }); }
Example #4
Source File: PatternAnalyzerImpl.java From database with GNU General Public License v2.0 | 5 votes |
@Override protected TokenStreamComponents createComponents(final String field) { //Use default grouping final Tokenizer tokenizer = new PatternTokenizer(pattern,-1); final TokenStream filter = new LowerCaseFilter(tokenizer); return new TokenStreamComponents(tokenizer, filter); }
Example #5
Source File: SynonymLoader.java From elasticsearch-analysis-synonym with Apache License 2.0 | 5 votes |
protected static Analyzer getAnalyzer(final boolean ignoreCase) { return new Analyzer() { @Override protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer tokenizer = new KeywordTokenizer(); final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; }
Example #6
Source File: AnnotationAnalyzer.java From elasticsearch-analysis-annotation with Apache License 2.0 | 5 votes |
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer source = new WhitespaceTokenizer(version, reader); TokenStream filter = new LowerCaseFilter(version, source); filter = new InlineAnnotationFilter(filter); return new TokenStreamComponents(source, filter); }
Example #7
Source File: NGramAnalyzer.java From onedev with MIT License | 4 votes |
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer src = new NGramTokenizer(minGram, maxGram); TokenStream stream = new LowerCaseFilter(src); return new TokenStreamComponents(src, stream); }
Example #8
Source File: DynamicSynonymTokenFilterFactory.java From elasticsearch-analysis-dynamic-synonym with Apache License 2.0 | 4 votes |
public DynamicSynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, String name, Settings settings) throws IOException { //加载配置 super(indexSettings, name, settings); this.indexName = indexSettings.getIndex().getName(); this.interval = settings.getAsInt("interval", 60); this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.expand = settings.getAsBoolean("expand", true); this.format = settings.get("format", ""); this.location = settings.get("synonyms_path"); logger.info("indexName:{} synonyms_path:{} interval:{} ignore_case:{} expand:{} format:{}", indexName, location, interval, ignoreCase, expand, format); //属性检查 if (this.location == null) { throw new IllegalArgumentException( "dynamic synonym requires `synonyms_path` to be configured"); } String tokenizerName = settings.get("tokenizer", "whitespace"); AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); } final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName, AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName)); Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create(); TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; //根据location前缀初始化同义词更新策略 SynonymFile synonymFile; if (location.startsWith("http://")) { synonymFile = new RemoteSynonymFile(env, analyzer, expand, format, location); } else { synonymFile = new LocalSynonymFile(env, analyzer, expand, format, location); } synonymMap = synonymFile.reloadSynonymMap(); //加入监控队列,定时load scheduledFutures.putIfAbsent(this.indexName, new CopyOnWriteArrayList<ScheduledFuture>()); scheduledFutures.get(this.indexName) .add(monitorPool.scheduleAtFixedRate(new Monitor(synonymFile), interval, interval, TimeUnit.SECONDS)); }
Example #9
Source File: SynonymTokenFilterFactory.java From elasticsearch-analysis-synonym with Apache License 2.0 | 4 votes |
public SynonymTokenFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings, final AnalysisRegistry analysisRegistry) throws IOException { super(indexSettings, name, settings); this.ignoreCase = settings.getAsBoolean("ignore_case", false); final boolean expand = settings.getAsBoolean("expand", true); final String tokenizerName = settings.get("tokenizer", "whitespace"); AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = null; if (analysisRegistry != null) { tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter"); } } final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory == null ? null : tokenizerFactoryFactory.get(indexSettings, environment, tokenizerName, AnalysisRegistry .getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName)); final Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create(); final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; synonymLoader = new SynonymLoader(environment, settings, expand, analyzer); if (synonymLoader.getSynonymMap() == null) { if (settings.getAsList("synonyms", null) != null) { logger.warn("synonyms values are empty."); } else if (settings.get("synonyms_path") != null) { logger.warn("synonyms_path[{}] is empty.", settings.get("synonyms_path")); } else { throw new IllegalArgumentException("synonym requires either `synonyms` or `synonyms_path` to be configured"); } } }