Java Code Examples for org.elasticsearch.indices.analysis.AnalysisModule#AnalysisProvider

The following examples show how to use org.elasticsearch.indices.analysis.AnalysisModule#AnalysisProvider . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BundlePlugin.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 7 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
    Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new LinkedHashMap<>();
    if (settings.getAsBoolean("plugins.xbib.icu.enabled", true)) {
        extra.put("icu_collation", IcuCollationKeyAnalyzerProvider::new);
    }
    if (settings.getAsBoolean("plugins.xbib.hyphen.enabled", true)) {
        extra.put("hyphen", HyphenAnalyzerProvider::new);
    }
    if (settings.getAsBoolean("plugins.xbib.naturalsort.enabled", true)) {
        extra.put("naturalsort", NaturalSortKeyAnalyzerProvider::new);
    }
    if (settings.getAsBoolean("plugins.xbib.sortform.enabled", true)) {
        extra.put("sortform", SortformAnalyzerProvider::new);
    }
    if (settings.getAsBoolean("plugins.xbib.standardnumber.enabled", true)) {
        extra.put("standardnumber", (indexSettings, environment, name, factorySettings) ->
                new StandardnumberAnalyzerProvider(indexSettings, environment, name, factorySettings, standardNumberTypeParser));
    }
    return extra;
}
 
Example 2
Source File: AnalysisHanLPPlugin.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

    extra.put("hanlp", HanLPTokenizerFactory::getHanLPTokenizerFactory);
    extra.put("hanlp_standard", HanLPTokenizerFactory::getHanLPStandardTokenizerFactory);
    extra.put("hanlp_index", HanLPTokenizerFactory::getHanLPIndexTokenizerFactory);
    extra.put("hanlp_nlp", HanLPTokenizerFactory::getHanLPNLPTokenizerFactory);
    extra.put("hanlp_n_short", HanLPTokenizerFactory::getHanLPNShortTokenizerFactory);
    extra.put("hanlp_dijkstra", HanLPTokenizerFactory::getHanLPDijkstraTokenizerFactory);
    extra.put("hanlp_crf", HanLPTokenizerFactory::getHanLPCRFTokenizerFactory);
    extra.put("hanlp_speed", HanLPTokenizerFactory::getHanLPSpeedTokenizerFactory);

    return extra;
}
 
Example 3
Source File: AnalysisLcPinyinPlugin.java    From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> providerMap
            = new HashMap<String, AnalysisModule.AnalysisProvider<TokenizerFactory>>();

    providerMap.put("lc_index", LcPinyinTokenizerFactory::getLcIndexTokenizerFactory);
    providerMap.put("lc_search", LcPinyinTokenizerFactory::getLcSmartPinyinTokenizerFactory);

    return providerMap;
}
 
Example 4
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 5 votes vote down vote up
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
    final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
    Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters = new HashMap<>(this.tokenFilters);
    /*
     * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index.
     * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
     * hide internal data-structures as much as possible.
     */
    tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
    tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));

    return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters);
}
 
Example 5
Source File: AnalysisHanLPPlugin.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
    Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new HashMap<>();
    analyzers.put("hanlp", HanLPAnalyzerProvider::new);
    analyzers.put("hanlp-index", HanLPIndexAnalyzerProvider::new);
    return analyzers;
}
 
Example 6
Source File: AnalysisHanLPPlugin.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
    Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new HashMap<>();

    extra.put("hanlp", HanLPAnalyzerProvider::getHanLPAnalyzerProvider);
    extra.put("hanlp_standard", HanLPAnalyzerProvider::getHanLPStandardAnalyzerProvider);
    extra.put("hanlp_index", HanLPAnalyzerProvider::getHanLPIndexAnalyzerProvider);
    extra.put("hanlp_nlp", HanLPAnalyzerProvider::getHanLPNLPAnalyzerProvider);
    extra.put("hanlp_n_short", HanLPAnalyzerProvider::getHanLPNShortAnalyzerProvider);
    extra.put("hanlp_dijkstra", HanLPAnalyzerProvider::getHanLPDijkstraAnalyzerProvider);
    extra.put("hanlp_crf", HanLPAnalyzerProvider::getHanLPCRFAnalyzerProvider);
    extra.put("hanlp_speed", HanLPAnalyzerProvider::getHanLPSpeedAnalyzerProvider);

    return extra;
}
 
Example 7
Source File: AnalysiaHLSegPlugin.java    From elasticsearch-analysis-hlseg with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

    extra.put("hlseg_search", HLSegTokenizerFactory::getHLSegSearchTokenizerFactory);
   
    return extra;
}
 
Example 8
Source File: AnalysisIkPlugin.java    From Elasticsearch-Tutorial-zh-CN with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
    Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new HashMap<>();

    extra.put("ik_smart", IkAnalyzerProvider::getIkSmartAnalyzerProvider);
    extra.put("ik_max_word", IkAnalyzerProvider::getIkAnalyzerProvider);

    return extra;
}
 
Example 9
Source File: AnalysisJiebaPlugin.java    From elasticsearch-jieba-plugin with MIT License 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
  Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

  extra.put("jieba_search", JiebaTokenizerFactory::getJiebaSearchTokenizerFactory);
  extra.put("jieba_index", JiebaTokenizerFactory::getJiebaIndexTokenizerFactory);

  return extra;
}
 
Example 10
Source File: AnalysisOpenKoreanTextPlugin.java    From elasticsearch-analysis-openkoreantext with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
    Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters = new HashMap<>();
    tokenFilters.put("openkoreantext-stemmer", OpenKoreanTextStemmerFactory::new);
    tokenFilters.put("openkoreantext-redundant-filter", OpenKoreanTextRedundantFilterFactory::new);
    tokenFilters.put("openkoreantext-phrase-extractor", OpenKoreanTextPhraseExtractorFactory::new);

    return tokenFilters;
}
 
Example 11
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 5 votes vote down vote up
private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
    return new AnalysisModule.AnalysisProvider<T>() {

        @Override
        public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
            return provider.get(indexSettings, environment, name, settings);
        }

        @Override
        public boolean requiresAnalysisSettings() {
            return true;
        }
    };
}
 
Example 12
Source File: DynamicSynonymTokenFilterFactory.java    From elasticsearch-analysis-dynamic-synonym with Apache License 2.0 4 votes vote down vote up
public DynamicSynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
                                        String name, Settings settings) throws IOException {

    //加载配置
    super(indexSettings, name, settings);
    this.indexName = indexSettings.getIndex().getName();
    this.interval = settings.getAsInt("interval", 60);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.expand = settings.getAsBoolean("expand", true);
    this.format = settings.get("format", "");
    this.location = settings.get("synonyms_path");

    logger.info("indexName:{} synonyms_path:{} interval:{} ignore_case:{} expand:{} format:{}",
            indexName, location, interval, ignoreCase, expand, format);

    //属性检查
    if (this.location == null) {
        throw new IllegalArgumentException(
                "dynamic synonym requires `synonyms_path` to be configured");
    }

    String tokenizerName = settings.get("tokenizer", "whitespace");
    AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
            analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
    if (tokenizerFactoryFactory == null) {
        throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
    }
    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
            AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create();
            TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };


    //根据location前缀初始化同义词更新策略
    SynonymFile synonymFile;
    if (location.startsWith("http://")) {
        synonymFile = new RemoteSynonymFile(env, analyzer, expand, format, location);
    } else {
        synonymFile = new LocalSynonymFile(env, analyzer, expand, format, location);
    }
    synonymMap = synonymFile.reloadSynonymMap();

    //加入监控队列,定时load
    scheduledFutures.putIfAbsent(this.indexName, new CopyOnWriteArrayList<ScheduledFuture>());
    scheduledFutures.get(this.indexName)
            .add(monitorPool.scheduleAtFixedRate(new Monitor(synonymFile), interval, interval, TimeUnit.SECONDS));
}
 
Example 13
Source File: BundlePlugin.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
    Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extra = new LinkedHashMap<>();
    if (settings.getAsBoolean("plugins.xbib.icu.enabled", true)) {
        extra.put("icu_normalizer", IcuNormalizerTokenFilterFactory::new);
        extra.put("icu_folding", IcuFoldingTokenFilterFactory::new);
        extra.put("icu_transform", IcuTransformTokenFilterFactory::new);
        extra.put("icu_numberformat", IcuNumberFormatTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.autophrase.enabled", true)) {
        extra.put("auto_phrase", AutoPhrasingTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.baseform.enabled", true)) {
        extra.put("baseform", BaseformTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.concat.enabled", true)) {
        extra.put("concat", ConcatTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.pair.enabled", true)) {
        extra.put("pair", PairTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.decompound.enabled", true)) {
        extra.put("decompound", DecompoundTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.german_normalize.enabled", true)) {
        extra.put("german_normalize", GermanNormalizationFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.hyphen.enabled", true)) {
        extra.put("hyphen", HyphenTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.sortform.enabled", true)) {
        extra.put("sortform", SortformTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.standardnumber.enabled", true)) {
        extra.put("standardnumber", (indexSettings, environment, name, factorySettings) ->
                new StandardnumberTokenFilterFactory(indexSettings, environment, name, factorySettings, standardNumberTypeParser));
    }
    if (settings.getAsBoolean("plugins.xbib.fst_decompound.enabled", true)) {
        extra.put("fst_decompound", FstDecompoundTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.worddelimiter.enabled", true)) {
        extra.put("worddelimiter", WordDelimiterFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.worddelimiter2.enabled", true)) {
        extra.put("worddelimiter2", WordDelimiterFilter2Factory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.symbolname.enabled", true)) {
        extra.put("symbolname", SymbolnameTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.year.enabled", true)) {
        extra.put("year", GregorianYearTokenFilterFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.lemmatize.enabled", true)) {
        extra.put("lemmatize", LemmatizeTokenFilterFactory::new);
    }
    return extra;
}
 
Example 14
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 4 votes vote down vote up
/**
 * Returns a registered {@link TokenizerFactory} provider by name or <code>null</code> if the tokenizer was not registered
 */
public AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerProvider(String tokenizer) {
    return tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer));
}
 
Example 15
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 4 votes vote down vote up
public AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterFactory(String name) {
    return preConfiguredTokenFilters.get(name);
}
 
Example 16
Source File: SynonymTokenFilterFactory.java    From elasticsearch-analysis-synonym with Apache License 2.0 4 votes vote down vote up
public SynonymTokenFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings,
        final AnalysisRegistry analysisRegistry) throws IOException {
    super(indexSettings, name, settings);

    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    final boolean expand = settings.getAsBoolean("expand", true);

    final String tokenizerName = settings.get("tokenizer", "whitespace");

    AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = null;
    if (analysisRegistry != null) {
        tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
        if (tokenizerFactoryFactory == null) {
            throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
        }
    }

    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory == null ? null
            : tokenizerFactoryFactory.get(indexSettings, environment, tokenizerName, AnalysisRegistry
                    .getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));

    final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create();
            final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };

    synonymLoader = new SynonymLoader(environment, settings, expand, analyzer);
    if (synonymLoader.getSynonymMap() == null) {
        if (settings.getAsList("synonyms", null) != null) {
            logger.warn("synonyms values are empty.");
        } else if (settings.get("synonyms_path") != null) {
            logger.warn("synonyms_path[{}] is empty.", settings.get("synonyms_path"));
        } else {
            throw new IllegalArgumentException("synonym requires either `synonyms` or `synonyms_path` to be configured");
        }
    }
}
 
Example 17
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 4 votes vote down vote up
public AnalysisModule.AnalysisProvider<TokenizerFactory> getTokenizerFactory(String name) {
    return preConfiguredTokenizers.get(name);
}
 
Example 18
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 4 votes vote down vote up
public AnalysisModule.AnalysisProvider<AnalyzerProvider<?>> getAnalyzerProvider(String name) {
    return analyzerProviderFactories.get(name);
}
 
Example 19
Source File: AnalysisOpenKoreanTextPlugin.java    From elasticsearch-analysis-openkoreantext with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
    return singletonMap("openkoreantext-analyzer", OpenKoreanTextAnalyzerProvider::new);
}
 
Example 20
Source File: AnalysisIkPlugin.java    From Elasticsearch-Tutorial-zh-CN with GNU General Public License v3.0 3 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();


    extra.put("ik_smart", IkTokenizerFactory::getIkSmartTokenizerFactory);
    extra.put("ik_max_word", IkTokenizerFactory::getIkTokenizerFactory);

    return extra;
}