org.elasticsearch.index.analysis.CharFilterFactory Java Examples

The following examples show how to use org.elasticsearch.index.analysis.CharFilterFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IcuAnalysisTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testDefaultsIcuAnalysis() throws IOException {

        TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
                new BundlePlugin(Settings.EMPTY));

        CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer");
        assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class));

        TokenizerFactory tf = analysis.tokenizer.get("icu_tokenizer");
        assertThat(tf, instanceOf(IcuTokenizerFactory.class));

        TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer");
        assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_folding");
        assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_transform");
        assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class));

        Analyzer analyzer = analysis.indexAnalyzers.get( "icu_collation");
        assertThat(analyzer, instanceOf(NamedAnalyzer.class));
    }
 
Example #2
Source File: AnalysisModule.java    From crate with Apache License 2.0 6 votes vote down vote up
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
    HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers();

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
    Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);

    analysisRegistry = new AnalysisRegistry(environment,
            charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
            analyzers.getRegistry(), normalizers.getRegistry(),
            preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example #3
Source File: HyphenAnalyzerProvider.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
                  final Map<String, CharFilterFactory> charFilters,
                  final Map<String, TokenFilterFactory> tokenFilters) {
    List<CharFilterFactory> myCharFilters = new ArrayList<>();
    List<String> charFilterNames = analyzerSettings.getAsList("char_filter");
    for (String charFilterName : charFilterNames) {
        CharFilterFactory charFilter = charFilters.get(charFilterName);
        if (charFilter == null) {
            throw new IllegalArgumentException("hyphen analyzer [" + name()
                    + "] failed to find char_filter under name [" + charFilterName + "]");
        }
        myCharFilters.add(charFilter);
    }
    List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
    myTokenFilters.add(tokenFilterFactory);
    List<String> tokenFilterNames = analyzerSettings.getAsList("filter");
    for (String tokenFilterName : tokenFilterNames) {
        TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
        if (tokenFilter == null) {
            throw new IllegalArgumentException("hyphen analyzer [" + name()
                    + "] failed to find filter under name [" + tokenFilterName + "]");
        }
        myTokenFilters.add(tokenFilter);
    }
    int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
    int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
    this.customAnalyzer = new CustomAnalyzer(name(), tokenizerFactory,
            myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
            myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
            positionOffsetGap,
            offsetGap
    );
}
 
Example #4
Source File: SortformAnalyzerProvider.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
                  final Map<String, CharFilterFactory> charFilters,
                  final Map<String, TokenFilterFactory> tokenFilters) {
    List<CharFilterFactory> myCharFilters = new ArrayList<>();
    List<String> charFilterNames = analyzerSettings.getAsList("char_filter");
    for (String charFilterName : charFilterNames) {
        CharFilterFactory charFilter = charFilters.get(charFilterName);
        if (charFilter == null) {
            throw new IllegalArgumentException("Sortform Analyzer [" + name() +
                    "] failed to find char_filter under name [" + charFilterName + "]");
        }
        myCharFilters.add(charFilter);
    }
    List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
    List<String> tokenFilterNames = analyzerSettings.getAsList("filter");
    for (String tokenFilterName : tokenFilterNames) {
        TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
        if (tokenFilter == null) {
            throw new IllegalArgumentException("Sortform Analyzer [" + name() +
                    "] failed to find filter under name [" + tokenFilterName + "]");
        }
        myTokenFilters.add(tokenFilter);
    }
    int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
    int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
    this.customAnalyzer = new CustomAnalyzer(name(), tokenizerFactory,
            myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
            myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
            positionOffsetGap,
            offsetGap
    );
}
 
Example #5
Source File: BundlePlugin.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> getCharFilters() {
    Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> extra = new LinkedHashMap<>();
    if (settings.getAsBoolean("plugins.xbib.icu.enabled", true)) {
        extra.put("icu_normalizer", IcuNormalizerCharFilterFactory::new);
        extra.put("icu_folding", IcuFoldingCharFilterFactory::new);
    }
    return extra;
}
 
Example #6
Source File: ESTestCase.java    From crate with Apache License 2.0 5 votes vote down vote up
public TestAnalysis(IndexAnalyzers indexAnalyzers,
                    Map<String, TokenFilterFactory> tokenFilter,
                    Map<String, TokenizerFactory> tokenizer,
                    Map<String, CharFilterFactory> charFilter) {
    this.indexAnalyzers = indexAnalyzers;
    this.tokenFilter = tokenFilter;
    this.tokenizer = tokenizer;
    this.charFilter = charFilter;
}
 
Example #7
Source File: CommonAnalysisPlugin.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
    Map<String, AnalysisProvider<CharFilterFactory>> filters = new TreeMap<>();
    filters.put("html_strip", HtmlStripCharFilterFactory::new);
    filters.put("pattern_replace", requiresAnalysisSettings(PatternReplaceCharFilterFactory::new));
    filters.put("mapping", requiresAnalysisSettings(MappingCharFilterFactory::new));
    return filters;
}
 
Example #8
Source File: AnalysisModule.java    From crate with Apache License 2.0 4 votes vote down vote up
private NamedRegistry<AnalysisProvider<CharFilterFactory>> setupCharFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = new NamedRegistry<>("char_filter");
    charFilters.extractAndRegister(plugins, AnalysisPlugin::getCharFilters);
    return charFilters;
}
 
Example #9
Source File: AnalysisPlugin.java    From crate with Apache License 2.0 2 votes vote down vote up
/**
 * Override to add additional {@link CharFilter}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
 * how to on get the configuration from the index.
 */
default Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
    return emptyMap();
}