org.elasticsearch.plugins.AnalysisPlugin Java Exaples

Source File: AnalysisModule.java From crate with Apache License 2.0

6 votes

static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");

    // Add filters available in lucene-core
    preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
    preConfiguredTokenFilters.register(
        "standard",
        PreConfiguredTokenFilter.singletonWithVersion("standard", false, (reader, version) -> {
            DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
                "The [standard] token filter is deprecated and will be removed in a future version.");
            return reader;
        }));
    /* Note that "stop" is available in lucene-core but it's pre-built
     * version uses a set of English stop words that are in
     * lucene-analyzers-common so "stop" is defined in the analysis-common
     * module. */

    for (AnalysisPlugin plugin: plugins) {
        for (PreConfiguredTokenFilter filter : plugin.getPreConfiguredTokenFilters()) {
            preConfiguredTokenFilters.register(filter.getName(), filter);
        }
    }
    return unmodifiableMap(preConfiguredTokenFilters.getRegistry());
}

Source File: AnalysisModule.java From crate with Apache License 2.0

6 votes

public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
    HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers();

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
    Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);

    analysisRegistry = new AnalysisRegistry(environment,
            charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
            analyzers.getRegistry(), normalizers.getRegistry(),
            preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}

Source File: AnalysisTestsHelper.java From crate with Apache License 2.0

6 votes

public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(
        final Settings settings,
        final Path configPath,
        final AnalysisPlugin... plugins) throws IOException {
    final Settings actualSettings;
    if (settings.get(IndexMetaData.SETTING_VERSION_CREATED) == null) {
        actualSettings = Settings.builder().put(settings).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    } else {
        actualSettings = settings;
    }
    final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", actualSettings);
    final AnalysisRegistry analysisRegistry =
            new AnalysisModule(new Environment(actualSettings, configPath), Arrays.asList(plugins)).getAnalysisRegistry();
    return new ESTestCase.TestAnalysis(analysisRegistry.build(indexSettings),
            analysisRegistry.buildTokenFilterFactories(indexSettings),
            analysisRegistry.buildTokenizerFactories(indexSettings),
            analysisRegistry.buildCharFilterFactories(indexSettings));
}

Source File: AnalysisModule.java From crate with Apache License 2.0

5 votes

private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(List<AnalysisPlugin> plugins, HunspellService
    hunspellService) {
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
    tokenFilters.register("stop", StopTokenFilterFactory::new);
    tokenFilters.register("standard", (indexSettings, environment, name, settings) -> {
        DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
            "The [standard] token filter name is deprecated and will be removed in a future version.");
        return new AbstractTokenFilterFactory(indexSettings, name, settings) {
            @Override
            public TokenStream create(TokenStream tokenStream) {
                return tokenStream;
            }
        };
    });
    tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
    tokenFilters.register(
        "hunspell",
        requiresAnalysisSettings((indexSettings, env, name, settings) ->
            new HunspellTokenFilterFactory(
                indexSettings,
                name,
                settings,
                hunspellService
            )
        )
    );

    tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
    return tokenFilters;
}

Source File: AnalysisModule.java From crate with Apache License 2.0

5 votes

static Map<String, PreBuiltAnalyzerProviderFactory> setupPreBuiltAnalyzerProviderFactories(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreBuiltAnalyzerProviderFactory> preConfiguredCharFilters = new NamedRegistry<>("pre-built analyzer");
    for (AnalysisPlugin plugin : plugins) {
        for (PreBuiltAnalyzerProviderFactory factory : plugin.getPreBuiltAnalyzerProviderFactories()) {
            preConfiguredCharFilters.register(factory.getName(), factory);
        }
    }
    return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}

Source File: AnalysisModule.java From crate with Apache License 2.0

5 votes

static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");

    // No char filter are available in lucene-core so none are built in to Elasticsearch core

    for (AnalysisPlugin plugin: plugins) {
        for (PreConfiguredCharFilter filter : plugin.getPreConfiguredCharFilters()) {
            preConfiguredCharFilters.register(filter.getName(), filter);
        }
    }
    return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}

Source File: ESTestCase.java From crate with Apache License 2.0

5 votes

/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(IndexSettings indexSettings, Settings nodeSettings,
                                              AnalysisPlugin... analysisPlugins) throws IOException {
    Environment env = TestEnvironment.newEnvironment(nodeSettings);
    AnalysisModule analysisModule = new AnalysisModule(env, Arrays.asList(analysisPlugins));
    AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
    return new TestAnalysis(analysisRegistry.build(indexSettings),
        analysisRegistry.buildTokenFilterFactories(indexSettings),
        analysisRegistry.buildTokenizerFactories(indexSettings),
        analysisRegistry.buildCharFilterFactories(indexSettings));
}

Source File: AnalysisModule.java From crate with Apache License 2.0

5 votes

private NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> setupAnalyzers(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = new NamedRegistry<>("analyzer");
    analyzers.register("default", StandardAnalyzerProvider::new);
    analyzers.register("standard", StandardAnalyzerProvider::new);
    analyzers.register("simple", SimpleAnalyzerProvider::new);
    analyzers.register("stop", StopAnalyzerProvider::new);
    analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
    analyzers.register("keyword", KeywordAnalyzerProvider::new);
    analyzers.extractAndRegister(plugins, AnalysisPlugin::getAnalyzers);
    return analyzers;
}

Source File: AnalysisTestsHelper.java From crate with Apache License 2.0

5 votes

public static ESTestCase.TestAnalysis createTestAnalysisFromClassPath(final Path baseDir,
                                                                      final String resource,
                                                                      final AnalysisPlugin... plugins) throws IOException {
    final Settings settings = Settings.builder()
            .loadFromStream(resource, AnalysisTestsHelper.class.getResourceAsStream(resource), false)
            .put(Environment.PATH_HOME_SETTING.getKey(), baseDir.toString())
            .build();

    return createTestAnalysisFromSettings(settings, plugins);
}

Source File: ESTestCase.java From crate with Apache License 2.0

5 votes

/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(Index index, Settings nodeSettings, Settings settings,
                                              AnalysisPlugin... analysisPlugins) throws IOException {
    Settings indexSettings = Settings.builder().put(settings)
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();
    return createTestAnalysis(IndexSettingsModule.newIndexSettings(index, indexSettings), nodeSettings, analysisPlugins);
}

Source File: AnalysisFactoryTestCase.java From crate with Apache License 2.0

4 votes

public AnalysisFactoryTestCase(AnalysisPlugin plugin) {
    this.plugin = Objects.requireNonNull(plugin, "plugin is required. use an empty plugin for core");
}

Source File: SQLExecutor.java From crate with Apache License 2.0

4 votes

public static Builder builder(ClusterService clusterService,
                              int numNodes,
                              Random random,
                              List<AnalysisPlugin> analysisPlugins) {
    return new Builder(clusterService, numNodes, random, analysisPlugins);
}

Source File: SQLExecutor.java From crate with Apache License 2.0

4 votes

private Builder(ClusterService clusterService,
                int numNodes,
                Random random,
                List<AnalysisPlugin> analysisPlugins) {
    if (numNodes < 1) {
        throw new IllegalArgumentException("Must have at least 1 node");
    }
    this.random = random;
    this.clusterService = clusterService;
    addNodesToClusterState(numNodes);
    functions = getFunctions();
    UserDefinedFunctionService udfService = new UserDefinedFunctionService(clusterService, functions);
    Map<String, SchemaInfo> schemaInfoByName = new HashMap<>();
    CrateSettings crateSettings = new CrateSettings(clusterService, clusterService.getSettings());
    schemaInfoByName.put("sys", new SysSchemaInfo(clusterService, crateSettings, new CeLicenseService()));
    schemaInfoByName.put("information_schema", new InformationSchemaInfo());
    schemaInfoByName.put(PgCatalogSchemaInfo.NAME, new PgCatalogSchemaInfo(udfService, tableStats));
    IndexNameExpressionResolver indexNameExpressionResolver = new IndexNameExpressionResolver();
    schemaInfoByName.put(
        BlobSchemaInfo.NAME,
        new BlobSchemaInfo(
            clusterService,
            new TestingBlobTableInfoFactory(
                Collections.emptyMap(), indexNameExpressionResolver, createTempDir())));


    Map<RelationName, DocTableInfo> docTables = new HashMap<>();
    DocTableInfoFactory tableInfoFactory = new TestingDocTableInfoFactory(
        docTables, functions, indexNameExpressionResolver);
    ViewInfoFactory testingViewInfoFactory = (ident, state) -> null;

    schemas = new Schemas(
        schemaInfoByName,
        clusterService,
        new DocSchemaInfoFactory(tableInfoFactory, testingViewInfoFactory, functions, udfService)
    );
    schemas.start();  // start listen to cluster state changes

    File homeDir = createTempDir();
    Environment environment = new Environment(
        Settings.builder().put(PATH_HOME_SETTING.getKey(), homeDir.getAbsolutePath()).build(),
        homeDir.toPath().resolve("config")
    );
    try {
        analysisRegistry = new AnalysisModule(environment, analysisPlugins).getAnalysisRegistry();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    fulltextAnalyzerResolver = new FulltextAnalyzerResolver(clusterService, analysisRegistry);
    createTableStatementAnalyzer = new CreateTableStatementAnalyzer(functions);
    createBlobTableAnalyzer = new CreateBlobTableAnalyzer(
        schemas,
        functions
    );
    allocationService = new AllocationService(
        new AllocationDeciders(
            Arrays.asList(
                new SameShardAllocationDecider(Settings.EMPTY, clusterService.getClusterSettings()),
                new ReplicaAfterPrimaryActiveAllocationDecider()
            )
        ),
        new TestGatewayAllocator(),
        new BalancedShardsAllocator(Settings.EMPTY),
        EmptyClusterInfoService.INSTANCE
    );

    publishInitialClusterState();
}

Source File: ESTestCase.java From crate with Apache License 2.0

4 votes

/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(Index index, Settings settings, AnalysisPlugin... analysisPlugins)
        throws IOException {
    Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
    return createTestAnalysis(index, nodeSettings, settings, analysisPlugins);
}

Source File: AnalysisTestsHelper.java From crate with Apache License 2.0

4 votes

public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(
        final Settings settings, final AnalysisPlugin... plugins) throws IOException {
    return createTestAnalysisFromSettings(settings, null, plugins);
}

Source File: AnalysisModule.java From crate with Apache License 2.0

4 votes

private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
    tokenizers.register("standard", StandardTokenizerFactory::new);
    tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
    return tokenizers;
}

Source File: AnalysisModule.java From crate with Apache License 2.0

4 votes

public NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> setupHunspellDictionaries(List<AnalysisPlugin> plugins) {
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = new NamedRegistry<>("dictionary");
    hunspellDictionaries.extractAndRegister(plugins, AnalysisPlugin::getHunspellDictionaries);
    return hunspellDictionaries;
}

Source File: AnalysisModule.java From crate with Apache License 2.0

4 votes

private NamedRegistry<AnalysisProvider<CharFilterFactory>> setupCharFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = new NamedRegistry<>("char_filter");
    charFilters.extractAndRegister(plugins, AnalysisPlugin::getCharFilters);
    return charFilters;
}

org.elasticsearch.plugins.AnalysisPlugin Java Examples