Java Code Examples for org.elasticsearch.index.analysis.TokenFilterFactory#create()
The following examples show how to use
org.elasticsearch.index.analysis.TokenFilterFactory#create() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformTraditionalSimplified() throws Exception { String source = "簡化字"; String[] expected = new String[] { "简化", "字" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_ch").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_ch"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 2
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformHanLatin() throws Exception { String source = "中国"; String[] expected = new String[] { "zhōng guó" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_han").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_han"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 3
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformKatakanaHiragana() throws Exception { String source = "ヒラガナ"; String[] expected = new String[] { "ひらがな" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_katakana").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_katakana"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 4
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformCyrillicLatin() throws Exception { String source = "Российская Федерация"; String[] expected = new String[] { "Rossijskaâ", "Federaciâ" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_cyr").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_cyr"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 5
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformCyrillicLatinReverse() throws Exception { String source = "Rossijskaâ Federaciâ"; String[] expected = new String[] { "Российская", "Федерация"}; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_cyr").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_cyr_reverse"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 6
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformAnyLatin() throws Exception { String source = "Αλφαβητικός Κατάλογος"; String[] expected = new String[] { "Alphabētikós", "Katálogos" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_any_latin").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_any_latin"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 7
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformNFD() throws Exception { String source = "Alphabētikós Katálogos"; String[] expected = new String[] { "Alphabetikos", "Katalogos" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_nfd").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_nfd"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 8
Source File: IcuTransformFilterTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testTransformRules() throws Exception { String source = "abacadaba"; String[] expected = new String[] { "bcbcbdbcb" }; String resource = "icu_transform.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_icu_tokenizer_rules").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("my_icu_transformer_rules"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 9
Source File: HyphenTokenizerTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
public void testOne() throws Exception { String source = "Das ist ein Bindestrich-Wort."; String[] expected = { "Das", "ist", "ein", "Bindestrich-Wort", "BindestrichWort", "Wort", "Bindestrich" }; String resource = "hyphen_tokenizer.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_hyphen_tokenizer").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("hyphen"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 10
Source File: MultiplexerTokenFilterFactory.java From crate with Apache License 2.0 | 6 votes |
private TokenFilterFactory chainFilters(String name, List<TokenFilterFactory> filters) { return new TokenFilterFactory() { @Override public String name() { return name; } @Override public TokenStream create(TokenStream tokenStream) { for (TokenFilterFactory tff : filters) { tokenStream = tff.create(tokenStream); } return tokenStream; } }; }
Example 11
Source File: StandardnumberAnalyzer.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = tokenizerFactory.create(); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilter : Collections.singletonList(stdnumTokenFilterFactory)) { tokenStream = tokenFilter.create(tokenStream); } return new TokenStreamComponents(tokenizer, tokenStream); }
Example 12
Source File: IcuNumberFormatTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
public void testGermanNumberFormat() throws Exception { String source = "Muss Rudi Völler fünftausend oder 10000 EUR Strafe zahlen?"; String[] expected = { "Muss", "Rudi", "Völler", "fünftausend", "oder", "zehntausend", "EUR", "Strafe", "zahlen" }; String resource = "icu_numberformat.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("spellout_de"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }
Example 13
Source File: IcuNumberFormatTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
public void testAmericanEnglish() throws Exception { String source = "You will never get 100,000 US dollars of salary per year."; String[] expected = { "You", "will", "never", "get", "onehundredthousand", "US", "dollars", "of", "salary", "per", "year" }; String resource = "icu_numberformat.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY)); Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create(); tokenizer.setReader(new StringReader(source)); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("spellout_en"); TokenStream tokenStream = tokenFilter.create(tokenizer); assertTokenStreamContents(tokenStream, expected); }