org.elasticsearch.analysis.common.CommonAnalysisPlugin Java Examples
The following examples show how to use
org.elasticsearch.analysis.common.CommonAnalysisPlugin.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EsEmbeddedServer.java From datashare with GNU Affero General Public License v3.0 | 6 votes |
public EsEmbeddedServer(String clusterName, String homePath, String dataPath, String httpPort) { Settings settings = Settings.builder() .put("transport.type", "netty4") .put("http.type", "netty4") .put("path.home", homePath) .put("path.data", dataPath) .put("http.port", httpPort) .put("cluster.name", clusterName).build(); node = new PluginConfigurableNode(settings, asList( Netty4Plugin.class, ParentJoinPlugin.class, CommonAnalysisPlugin.class, PainlessPlugin.class, ReindexPlugin.class )); }
Example #2
Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testSix() throws Exception { String source = "Programmieren in C++ für Einsteiger"; String[] expected = { "programmieren", "programmi", "c++", "einsteiger", "einsteig" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("default"); assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected); }
Example #3
Source File: DecompoundTokenFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testWithSubwordsOnly() throws Exception { String source = "Das ist ein Schlüsselwort, ein Bindestrichwort"; String[] expected = { "Da", "ist", "ein", "Schlüssel", "wort", "ein", "Bindestrich", "wort" }; String resource = "keywords_analysis.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("with_subwords_only"); assertNotNull(analyzer); assertTokenStreamContents(analyzer.tokenStream("test-field", source), expected); }
Example #4
Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testFour() throws Exception { String source = "Prante, Jörg"; String[] expected = { "prante", "jorg" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed"); assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected); }
Example #5
Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTwo() throws Exception { String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag."; String[] expected = { "wird's", "elasticsearch-buch", "elasticsearchbuch", "erscheint", "o'reilly-verlag", "o'reillyverlag" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("default"); assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected); }
Example #6
Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testThree() throws Exception { String source = "978-1-4493-5854-9"; String[] expected = { "978-1-4493-5854-9", "9781449358549" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("default"); assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected); }
Example #7
Source File: LocalNode.java From core-ng-project with Apache License 2.0 | 5 votes |
private static List<Class<? extends Plugin>> plugins() { return List.of(ReindexPlugin.class, Netty4Plugin.class, MapperExtrasPlugin.class, // for scaled_float type PainlessPlugin.class, CommonAnalysisPlugin.class); // for stemmer analysis }
Example #8
Source File: CreateAnalyzerAnalyzerTest.java From crate with Apache License 2.0 | 5 votes |
@Before public void prepare() throws IOException { e = SQLExecutor.builder(clusterService, 1, Randomness.get(), List.of(new CommonAnalysisPlugin())) .enableDefaultTables() .build(); plannerContext = e.getPlannerContext(clusterService.state()); }
Example #9
Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
public void testFive() throws Exception { String source = "Schroeder"; String[] expected = { "schroder" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("unstemmed"); assertTokenStreamContents(analyzer.tokenStream("test", new StringReader(source)), expected); }
Example #10
Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
public void testOne() throws Exception { String source = "Ein Tag in Köln im Café an der Straßenecke mit einer Standard-Nummer ISBN 1-4493-5854-3"; String[] expected = { "tag", "koln", "cafe", "caf", "strassenecke", "strasseneck", "standard-nummer", "standardnummer", "standard-numm", "standardnumm", "isbn", "1-4493-5854-3", "1449358543", "978-1-4493-5854-9", "9781449358549" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("default"); assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected); }
Example #11
Source File: FstDecompoundTokenFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
public void testDecompound() throws Exception { String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet"; String[] expected = { "Die", "Jahresfeier", "jahres", "feier", "der", "Rechtsanwaltskanzleien", "rechts", "anwalts", "kanzleien", "auf", "dem", "Donaudampfschiff", "donau", "dampf", "schiff", "hat", "viel", "Ökosteuer", "ökos", "teuer", "gekostet" }; Settings settings = Settings.builder() .put("index.analysis.analyzer.myanalyzer.type", "custom") .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard") .put("index.analysis.analyzer.myanalyzer.filter.0", "fst_decompound") .put("index.analysis.analyzer.myanalyzer.filter.1", "unique") .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer"); assertAnalyzesTo(myanalyzer, source, expected); }
Example #12
Source File: DecompoundTokenFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
public void test() throws Exception { String source = "Die Jahresfeier der Rechtsanwaltskanzleien auf dem Donaudampfschiff hat viel Ökosteuer gekostet"; String[] expected = { "Die", "Die", "Jahresfeier", "Jahr", "feier", "der", "der", "Rechtsanwaltskanzleien", "Recht", "anwalt", "kanzlei", "auf", "auf", "dem", "dem", "Donaudampfschiff", "Donau", "dampf", "schiff", "hat", "hat", "viel", "viel", "Ökosteuer", "Ökosteuer", "gekostet", "gekosten" }; String resource = "decompound_analysis.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("decomp"); Tokenizer tokenizer = analysis.tokenizer.get("standard").create(); tokenizer.setReader(new StringReader(source)); assertTokenStreamContents(tokenFilter.create(tokenizer), expected); }
Example #13
Source File: LemmatizeSearchTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
/** The plugin classes that should be added to the node. */ @Override protected Collection<Class<? extends Plugin>> getPlugins() { return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class); }
Example #14
Source File: LemmatizeTokenFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
public void testLemmatizer() throws Exception { String source = "While these texts were previously only available to users of academic libraries " + "participating in the partnership, at the end of the first phase of EEBO-TCP the current " + "25,000 texts have now been released into the public domain."; String[] expected = { "While", "this", "text", "be", "previously", "only", "available", "to", "user", "of", "academic", "library", "participate", "in", "the", "partnership", "at", "end", "first", "phase", "EEBO", "TCP", "current", "25,000", "have", "now", "release", "into", "public", "domain" }; Settings settings = Settings.builder() .put("index.analysis.analyzer.myanalyzer.type", "custom") .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard") .put("index.analysis.analyzer.myanalyzer.filter.0", "lemmatize") .put("index.analysis.analyzer.myanalyzer.filter.1", "unique") .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer myanalyzer = analysis.indexAnalyzers.get( "myanalyzer"); assertAnalyzesTo(myanalyzer, source, expected); }
Example #15
Source File: LemmatizeTokenFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
public void testFull() throws Exception { String source = "While these texts were previously only available to users of academic libraries " + "participating in the partnership, at the end of the first phase of EEBO-TCP the current " + "25,000 texts have now been released into the public domain."; String[] expected = { "While", "these", "this", "texts", "text", "were", "be", "previously", "only", "available", "to", "users", "user", "of", "academic", "libraries", "library", "participating", "participate", "in", "the", "partnership", "at", "end", "first", "phase", "EEBO", "TCP", "current", "25,000", "have", "now", "been", "released", "release", "into", "public", "domain" }; Settings settings = Settings.builder() .put("index.analysis.filter.myfilter.type", "lemmatize") .put("index.analysis.filter.myfilter.lemma_only", "false") .put("index.analysis.analyzer.myanalyzer.type", "custom") .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard") .put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter") .put("index.analysis.analyzer.myanalyzer.filter.1", "unique") .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer myanalyzer =analysis.indexAnalyzers.get("myanalyzer"); assertAnalyzesTo(myanalyzer, source, expected); }
Example #16
Source File: LemmatizeTokenFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
public void testGermanLemmatizer() throws Exception { String source = "Die Würde des Menschen ist unantastbar. " + "Sie zu achten und zu schützen ist Verpflichtung aller staatlichen Gewalt. " + "Das Deutsche Volk bekennt sich darum zu unverletzlichen und unveräußerlichen Menschenrechten " + "als Grundlage jeder menschlichen Gemeinschaft, des Friedens und der Gerechtigkeit in der Welt."; String[] expected = { "Die", "Würde", "der", "Mensch", "mein", // ? "unantastbar", "Sie", "zu", "achten", "und", "zu", "schützen", "mein", // ? "Verpflichtung", "all", "staatlich", "Gewalt", "Das", "deutsch", "Volk", "bekennen", "sich", "darum", "zu", "unverletzlichen", // ? "und", "unveräußerlichen", // ? "Menschenrechten", // ? "als", "Grundlage", "jed", "menschlich", "Gemeinschaft", "der", "Friede", "und", "der", "Gerechtigkeit", "in", "der", "Welt" }; Settings settings = Settings.builder() .put("index.analysis.filter.myfilter.type", "lemmatize") .put("index.analysis.filter.myfilter.language", "de") .put("index.analysis.analyzer.myanalyzer.type", "custom") .put("index.analysis.analyzer.myanalyzer.tokenizer", "standard") .put("index.analysis.analyzer.myanalyzer.filter.0", "myfilter") .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer myanalyzer = analysis.indexAnalyzers.get("myanalyzer"); assertAnalyzesTo(myanalyzer, source, expected); }
Example #17
Source File: GNDReferenceMappingTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
@Override protected Collection<Class<? extends Plugin>> getPlugins() { return Arrays.asList(BundlePlugin.class, CommonAnalysisPlugin.class); }
Example #18
Source File: DecompoundQueryTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 4 votes |
@Override protected Collection<Class<? extends Plugin>> nodePlugins() { return Arrays.asList(CommonAnalysisPlugin.class, Netty4Plugin.class, BundlePlugin.class); }
Example #19
Source File: FulltextAnalyzerResolverTest.java From crate with Apache License 2.0 | 4 votes |
@Override protected Collection<Class<? extends Plugin>> nodePlugins() { var plugins = new ArrayList<>(super.nodePlugins()); plugins.add(CommonAnalysisPlugin.class); return plugins; }
Example #20
Source File: FulltextITest.java From crate with Apache License 2.0 | 4 votes |
@Override protected Collection<Class<? extends Plugin>> nodePlugins() { var plugins = new ArrayList<>(super.nodePlugins()); plugins.add(CommonAnalysisPlugin.class); return plugins; }
Example #21
Source File: CommonAnalyzerITest.java From crate with Apache License 2.0 | 4 votes |
@Override protected Collection<Class<? extends Plugin>> nodePlugins() { var plugins = new ArrayList<>(super.nodePlugins()); plugins.add(CommonAnalysisPlugin.class); return plugins; }