org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider Java Examples
The following examples show how to use
org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CommonAnalysisPlugin.java From crate with Apache License 2.0 | 6 votes |
@Override public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { Map<String, AnalysisProvider<TokenizerFactory>> tokenizers = new TreeMap<>(); tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new); tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new); tokenizers.put("thai", ThaiTokenizerFactory::new); tokenizers.put("ngram", NGramTokenizerFactory::new); tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new); tokenizers.put("char_group", CharGroupTokenizerFactory::new); tokenizers.put("classic", ClassicTokenizerFactory::new); tokenizers.put("letter", LetterTokenizerFactory::new); tokenizers.put("lowercase", LowerCaseTokenizerFactory::new); tokenizers.put("path_hierarchy", PathHierarchyTokenizerFactory::new); tokenizers.put("PathHierarchy", PathHierarchyTokenizerFactory::new); tokenizers.put("pattern", PatternTokenizerFactory::new); tokenizers.put("uax_url_email", UAX29URLEmailTokenizerFactory::new); tokenizers.put("whitespace", WhitespaceTokenizerFactory::new); tokenizers.put("keyword", KeywordTokenizerFactory::new); return tokenizers; }
Example #2
Source File: SynonymPlugin.java From elasticsearch-analysis-synonym with Apache License 2.0 | 6 votes |
@Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { final Map<String, AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>(); extra.put("synonym_filter", new AnalysisProvider<TokenFilterFactory>() { @Override public TokenFilterFactory get(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings) throws IOException { return new SynonymTokenFilterFactory(indexSettings, environment, name, settings, pluginComponent.getAnalysisRegistry()); } @Override public boolean requiresAnalysisSettings() { return true; } }); return extra; }
Example #3
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 6 votes |
/** * Returns a registered {@link TokenFilterFactory} provider by {@link IndexSettings} * or a registered {@link TokenFilterFactory} provider by predefined name * or <code>null</code> if the tokenFilter was not registered * @param tokenFilter global or defined tokenFilter name * @param indexSettings an index settings * @return {@link TokenFilterFactory} provider or <code>null</code> */ public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenFilter, IndexSettings indexSettings) { final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.filter"); if (tokenFilterSettings.containsKey(tokenFilter)) { Settings currentSettings = tokenFilterSettings.get(tokenFilter); String typeName = currentSettings.get("type"); /* * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * hide internal data-structures as much as possible. */ if ("synonym".equals(typeName)) { return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); } else if ("synonym_graph".equals(typeName)) { return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)); } else { return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); } } else { return getTokenFilterProvider(tokenFilter); } }
Example #4
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 6 votes |
public AnalysisRegistry(Environment environment, Map<String, AnalysisProvider<CharFilterFactory>> charFilters, Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters, Map<String, AnalysisProvider<TokenizerFactory>> tokenizers, Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers, Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers, Map<String, PreConfiguredCharFilter> preConfiguredCharFilters, Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters, Map<String, PreConfiguredTokenizer> preConfiguredTokenizers, Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) { this.environment = environment; this.charFilters = unmodifiableMap(charFilters); this.tokenFilters = unmodifiableMap(tokenFilters); this.tokenizers = unmodifiableMap(tokenizers); this.analyzers = unmodifiableMap(analyzers); this.normalizers = unmodifiableMap(normalizers); prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers); }
Example #5
Source File: CommonAnalysisPlugin.java From crate with Apache License 2.0 | 5 votes |
@Override public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() { Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>(); filters.put("html_strip", HtmlStripCharFilterFactory::new); filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceCharFilterFactory::new)); filters.put("mapping", requiresAnalysisSettings(MappingCharFilterFactory::new)); return filters; }
Example #6
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 5 votes |
/** * Returns a registered {@link CharFilterFactory} provider by {@link IndexSettings} * or a registered {@link CharFilterFactory} provider by predefined name * or <code>null</code> if the charFilter was not registered * @param charFilter global or defined charFilter name * @param indexSettings an index settings * @return {@link CharFilterFactory} provider or <code>null</code> */ public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilter, IndexSettings indexSettings) { final Map<String, Settings> tokenFilterSettings = indexSettings.getSettings().getGroups("index.analysis.char_filter"); if (tokenFilterSettings.containsKey(charFilter)) { Settings currentSettings = tokenFilterSettings.get(charFilter); return getAnalysisProvider(Component.CHAR_FILTER, charFilters, charFilter, currentSettings.get("type")); } else { return getCharFilterProvider(charFilter); } }
Example #7
Source File: AnalysisPlugin.java From crate with Apache License 2.0 | 5 votes |
/** * Mark an {@link AnalysisProvider} as requiring the index's settings. */ static <T> AnalysisProvider<T> requiresAnalysisSettings(AnalysisProvider<T> provider) { return new AnalysisProvider<T>() { @Override public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { return provider.get(indexSettings, environment, name, settings); } @Override public boolean requiresAnalysisSettings() { return true; } }; }
Example #8
Source File: ChineseWordPlugin.java From word with Apache License 2.0 | 5 votes |
@Override public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { Map<String, AnalysisProvider<TokenizerFactory>> extra = new HashMap<>(); extra.put("word_tokenizer", ChineseWordTokenizerFactory::new); extra.put("word_sentence", ChineseWordTokenizerFactory::new); return extra; }
Example #9
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 5 votes |
/** * Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings} * or a registered {@link TokenizerFactory} provider by predefined name * or <code>null</code> if the tokenizer was not registered * @param tokenizer global or defined tokenizer name * @param indexSettings an index settings * @return {@link TokenizerFactory} provider or <code>null</code> */ public AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { final Map<String, Settings> tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer"); if (tokenizerSettings.containsKey(tokenizer)) { Settings currentSettings = tokenizerSettings.get(tokenizer); return getAnalysisProvider(Component.TOKENIZER, tokenizers, tokenizer, currentSettings.get("type")); } else { return getTokenizerProvider(tokenizer); } }
Example #10
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 5 votes |
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException { final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER); Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters = new HashMap<>(this.tokenFilters); /* * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * hide internal data-structures as much as possible. */ tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); }
Example #11
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 5 votes |
/** * Returns a registered {@link Analyzer} provider by name or <code>null</code> if the analyzer was not registered */ public Analyzer getAnalyzer(String analyzer) throws IOException { AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> analyzerProvider = this.prebuiltAnalysis.getAnalyzerProvider(analyzer); if (analyzerProvider == null) { AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> provider = analyzers.get(analyzer); return provider == null ? null : cachedAnalyzer.computeIfAbsent(analyzer, (key) -> { try { return provider.get(environment, key).get(); } catch (IOException ex) { throw new ElasticsearchException("failed to load analyzer for name " + key, ex); } }); } return analyzerProvider.get(environment, analyzer).get(); }
Example #12
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 5 votes |
private <T> AnalysisProvider<T> getAnalysisProvider(Component component, Map<String, ? extends AnalysisProvider<T>> providerMap, String name, String typeName) { if (typeName == null) { throw new IllegalArgumentException(component + " [" + name + "] must specify either an analyzer type, or a tokenizer"); } AnalysisProvider<T> type = providerMap.get(typeName); if (type == null) { throw new IllegalArgumentException("Unknown " + component + " type [" + typeName + "] for [" + name + "]"); } return type; }
Example #13
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 5 votes |
private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) { return new AnalysisModule.AnalysisProvider<T>() { @Override public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { return provider.get(indexSettings, environment, name, settings); } @Override public boolean requiresAnalysisSettings() { return true; } }; }
Example #14
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { return tokenizers; }
Example #15
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() { return charFilters; }
Example #16
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public Map<String, AnalysisProvider<AnalyzerProvider<?>>> getAnalyzers() { return analyzers; }
Example #17
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterFactory(String name) { return preConfiguredCharFilterFactories.get(name); }
Example #18
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterFactory(String name) { return preConfiguredTokenFilters.get(name); }
Example #19
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerFactory(String name) { return preConfiguredTokenizers.get(name); }
Example #20
Source File: AnalysisRegistry.java From crate with Apache License 2.0 | 4 votes |
public AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> getAnalyzerProvider(String name) { return analyzerProviderFactories.get(name); }
Example #21
Source File: AnalysisPhoneticPlugin.java From crate with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { return singletonMap("phonetic", PhoneticTokenFilterFactory::new); }
Example #22
Source File: CommonAnalysisPlugin.java From crate with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>(); analyzers.put("fingerprint", FingerprintAnalyzerProvider::new); analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new); analyzers.put("pattern", PatternAnalyzerProvider::new); analyzers.put("snowball", SnowballAnalyzerProvider::new); // Language analyzers: analyzers.put("arabic", ArabicAnalyzerProvider::new); analyzers.put("armenian", ArmenianAnalyzerProvider::new); analyzers.put("basque", BasqueAnalyzerProvider::new); analyzers.put("bengali", BengaliAnalyzerProvider::new); analyzers.put("brazilian", BrazilianAnalyzerProvider::new); analyzers.put("bulgarian", BulgarianAnalyzerProvider::new); analyzers.put("catalan", CatalanAnalyzerProvider::new); analyzers.put("chinese", ChineseAnalyzerProvider::new); analyzers.put("cjk", CjkAnalyzerProvider::new); analyzers.put("czech", CzechAnalyzerProvider::new); analyzers.put("danish", DanishAnalyzerProvider::new); analyzers.put("dutch", DutchAnalyzerProvider::new); analyzers.put("english", EnglishAnalyzerProvider::new); analyzers.put("finnish", FinnishAnalyzerProvider::new); analyzers.put("french", FrenchAnalyzerProvider::new); analyzers.put("galician", GalicianAnalyzerProvider::new); analyzers.put("german", GermanAnalyzerProvider::new); analyzers.put("greek", GreekAnalyzerProvider::new); analyzers.put("hindi", HindiAnalyzerProvider::new); analyzers.put("hungarian", HungarianAnalyzerProvider::new); analyzers.put("indonesian", IndonesianAnalyzerProvider::new); analyzers.put("irish", IrishAnalyzerProvider::new); analyzers.put("italian", ItalianAnalyzerProvider::new); analyzers.put("latvian", LatvianAnalyzerProvider::new); analyzers.put("lithuanian", LithuanianAnalyzerProvider::new); analyzers.put("norwegian", NorwegianAnalyzerProvider::new); analyzers.put("persian", PersianAnalyzerProvider::new); analyzers.put("portuguese", PortugueseAnalyzerProvider::new); analyzers.put("romanian", RomanianAnalyzerProvider::new); analyzers.put("russian", RussianAnalyzerProvider::new); analyzers.put("sorani", SoraniAnalyzerProvider::new); analyzers.put("spanish", SpanishAnalyzerProvider::new); analyzers.put("swedish", SwedishAnalyzerProvider::new); analyzers.put("turkish", TurkishAnalyzerProvider::new); analyzers.put("thai", ThaiAnalyzerProvider::new); return analyzers; }
Example #23
Source File: CommonAnalysisPlugin.java From crate with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { Map<String, AnalysisProvider<TokenFilterFactory>> filters = new TreeMap<>(); filters.put("apostrophe", ApostropheFilterFactory::new); filters.put("arabic_normalization", ArabicNormalizationFilterFactory::new); filters.put("arabic_stem", ArabicStemTokenFilterFactory::new); filters.put("asciifolding", ASCIIFoldingTokenFilterFactory::new); filters.put("bengali_normalization", BengaliNormalizationFilterFactory::new); filters.put("brazilian_stem", BrazilianStemTokenFilterFactory::new); filters.put("cjk_bigram", CJKBigramFilterFactory::new); filters.put("cjk_width", CJKWidthFilterFactory::new); filters.put("classic", ClassicFilterFactory::new); filters.put("czech_stem", CzechStemTokenFilterFactory::new); filters.put("common_grams", requiresAnalysisSettings(CommonGramsTokenFilterFactory::new)); filters.put("decimal_digit", DecimalDigitFilterFactory::new); filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new); filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new); filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new)); filters.put("dutch_stem", DutchStemTokenFilterFactory::new); filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new); filters.put("elision", ElisionTokenFilterFactory::new); filters.put("fingerprint", FingerprintTokenFilterFactory::new); filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new); filters.put("french_stem", FrenchStemTokenFilterFactory::new); filters.put("german_normalization", GermanNormalizationFilterFactory::new); filters.put("german_stem", GermanStemTokenFilterFactory::new); filters.put("hindi_normalization", HindiNormalizationFilterFactory::new); filters.put("hyphenation_decompounder", requiresAnalysisSettings(HyphenationCompoundWordTokenFilterFactory::new)); filters.put("indic_normalization", IndicNormalizationFilterFactory::new); filters.put("keep", requiresAnalysisSettings(KeepWordFilterFactory::new)); filters.put("keep_types", requiresAnalysisSettings(KeepTypesFilterFactory::new)); filters.put("keyword_marker", requiresAnalysisSettings(KeywordMarkerTokenFilterFactory::new)); filters.put("kstem", KStemTokenFilterFactory::new); filters.put("length", LengthTokenFilterFactory::new); filters.put("limit", LimitTokenCountFilterFactory::new); filters.put("lowercase", LowerCaseTokenFilterFactory::new); filters.put("min_hash", MinHashTokenFilterFactory::new); filters.put("multiplexer", MultiplexerTokenFilterFactory::new); filters.put("ngram", NGramTokenFilterFactory::new); filters.put("pattern_capture", requiresAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new)); filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceTokenFilterFactory::new)); filters.put("persian_normalization", PersianNormalizationFilterFactory::new); filters.put("porter_stem", PorterStemTokenFilterFactory::new); filters.put("remove_duplicates", RemoveDuplicatesTokenFilterFactory::new); filters.put("reverse", ReverseTokenFilterFactory::new); filters.put("russian_stem", RussianStemTokenFilterFactory::new); filters.put("scandinavian_folding", ScandinavianFoldingFilterFactory::new); filters.put("scandinavian_normalization", ScandinavianNormalizationFilterFactory::new); filters.put("serbian_normalization", SerbianNormalizationFilterFactory::new); filters.put("snowball", SnowballTokenFilterFactory::new); filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new); filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); filters.put("stemmer", StemmerTokenFilterFactory::new); filters.put("trim", TrimTokenFilterFactory::new); filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new)); filters.put("unique", UniqueTokenFilterFactory::new); filters.put("uppercase", UpperCaseTokenFilterFactory::new); filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new); filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new); return filters; }
Example #24
Source File: AnalysisMorfologikPlugin.java From elasticsearch-analysis-morfologik with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { return singletonMap(FILTER_NAME, MorfologikTokenFilterFactory::new); }
Example #25
Source File: AnalysisMorfologikPlugin.java From elasticsearch-analysis-morfologik with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { return singletonMap(ANALYZER_NAME, (indexSettings, environment, name, settings) -> new MorfologikAnalyzerProvider(indexSettings, name, settings)); }
Example #26
Source File: SynonymPlugin.java From elasticsearch-analysis-synonym with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { return singletonMap("ngram_synonym", NGramSynonymTokenizerFactory::new); }
Example #27
Source File: AnalysisVietnamesePlugin.java From elasticsearch-analysis-vietnamese with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { return singletonMap("vi_tokenizer", VietnameseTokenizerFactory::new); }
Example #28
Source File: AnalysisVietnamesePlugin.java From elasticsearch-analysis-vietnamese with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { return singletonMap("vi_analyzer", VietnameseAnalyzerProvider::new); }
Example #29
Source File: AnalysisVietnamesePlugin.java From elasticsearch-analysis-vietnamese with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { Map<String, AnalysisProvider<TokenFilterFactory>> filters = new HashMap<>(); filters.put("vi_stop", VietnameseStopTokenFilterFactory::new); return filters; }
Example #30
Source File: MinHashPlugin.java From elasticsearch-minhash with Apache License 2.0 | 4 votes |
@Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { final Map<String, AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>(); extra.put("minhash", MinHashTokenFilterFactory::new); return extra; }