org.apache.lucene.analysis.custom.CustomAnalyzer Java Examples
The following examples show how to use
org.apache.lucene.analysis.custom.CustomAnalyzer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPOS() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, SENTENCES_posTags, null, null, true); analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter(TypeAsPayloadTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, toPayloads(SENTENCES_posTags)); }
Example #2
Source File: DemoTest.java From HongsCORE with MIT License | 6 votes |
public static void main(String[] args) throws IOException { Analyzer az = CustomAnalyzer.builder() //.withTokenizer("Standard") .withTokenizer("Name") .addTokenFilter("EdgeNGram", "minGramSize", "1", "maxGramSize", "20") //.addTokenFilter("ICUTransform", "id", "Han-Latin;NFD;[[:NonspacingMark:][:Space:]] Remove") //.addTokenFilter("EdgeNGram", "minGramSize", "1", "maxGramSize", "20") .build(); StringReader sr = new StringReader(args[0]); TokenStream ts = az.tokenStream ("" , sr); OffsetAttribute oa = ts.addAttribute (OffsetAttribute.class); CharTermAttribute ta = ts.addAttribute (CharTermAttribute.class); try { ts.reset(); // Resets this stream to the beginning. (Required) while (ts.incrementToken()) { System.out.println(ta.toString() + "|" + ta.length() + "[" + oa.startOffset() + "," + oa.endOffset() + "]"); } ts.end( ); // Perform end-of-stream operations, e.g. set the final offset. } finally { ts.close(); // Release resources associated with this stream. } }
Example #3
Source File: NestPathField.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void setArgs(IndexSchema schema, Map<String, String> args) { args.putIfAbsent("stored", "false"); args.putIfAbsent("omitTermFreqAndPositions", "true"); args.putIfAbsent("omitNorms", "true"); args.putIfAbsent("maxCharsForDocValues", "-1"); super.setArgs(schema, args); // CustomAnalyzer is easy to use CustomAnalyzer customAnalyzer; try { customAnalyzer = CustomAnalyzer.builder(schema.getResourceLoader()) .withDefaultMatchVersion(schema.getDefaultLuceneMatchVersion()) .withTokenizer(KeywordTokenizerFactory.class) .addTokenFilter(PatternReplaceFilterFactory.class, "pattern", "#\\d*", "replace", "all") .build(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);//impossible? } // Solr HTTP Schema APIs don't know about CustomAnalyzer so use TokenizerChain instead setIndexAnalyzer(new TokenizerChain(customAnalyzer)); // leave queryAnalyzer as literal }
Example #4
Source File: AnalysisImplTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testAnalyze_custom() { AnalysisImpl analysis = new AnalysisImpl(); Map<String, String> tkParams = new HashMap<>(); tkParams.put("maxTokenLen", "128"); CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder( "keyword", tkParams) .addTokenFilterConfig("lowercase", Collections.emptyMap()); CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build()); assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName()); assertEquals("org.apache.lucene.analysis.core.KeywordTokenizerFactory", analyzer.getTokenizerFactory().getClass().getName()); assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory", analyzer.getTokenFilterFactories().get(0).getClass().getName()); String text = "Apache Lucene"; List<Analysis.Token> tokens = analysis.analyze(text); assertNotNull(tokens); }
Example #5
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test1SentenceDictionaryAndMaxEnt() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin") .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict", "lemmatizerModel", lemmatizerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCE_both, SENTENCE_both_punc, null, null, SENTENCE_both_posTags, null, null, true); }
Example #6
Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testNoBreak() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .build(); assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets, null, null, null, true); }
Example #7
Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testTokenizer() throws IOException { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin") .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc); }
Example #8
Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testTokenizerNoSentenceDetector() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin") .build(); }); assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'")); }
Example #9
Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testTokenizerNoTokenizer() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin") .build(); }); assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'")); }
Example #10
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test1SentenceDictionaryOnly() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin") .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict") .build(); assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null, SENTENCE_posTags, null, null, true); }
Example #11
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test2SentencesDictionaryOnly() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_punc, null, null, SENTENCES_posTags, null, null, true); }
Example #12
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test1SentenceMaxEntOnly() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_maxent_punc, null, null, SENTENCE_posTags, null, null, true); }
Example #13
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test2SentencesMaxEntOnly() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("OpenNLPLemmatizer", "lemmatizerModel", lemmatizerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_maxent_punc, null, null, SENTENCES_posTags, null, null, true); }
Example #14
Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testBasic() throws IOException { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); }
Example #15
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test2SentencesDictionaryAndMaxEnt() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_punc, null, null, SENTENCES_both_posTags, null, null, true); }
Example #16
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testKeywordAttributeAwarenessDictionaryOnly() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter(KeywordRepeatFilterFactory.class) .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile) .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_keep_orig_punc, null, null, SENTENCES_keep_orig_posTags, null, null, true); }
Example #17
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testKeywordAttributeAwarenessMaxEntOnly() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter(KeywordRepeatFilterFactory.class) .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile) .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_max_ent_keep_orig_punc, null, null, SENTENCES_keep_orig_posTags, null, null, true); }
Example #18
Source File: TestOpenNLPLemmatizerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testKeywordAttributeAwarenessDictionaryAndMaxEnt() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter(KeywordRepeatFilterFactory.class) .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile) .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_keep_orig_punc, null, null, SENTENCES_both_keep_orig_posTags, null, null, true); }
Example #19
Source File: TokenizerChain.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Copy from CustomAnalyzer. */ public TokenizerChain(CustomAnalyzer customAnalyzer) { this( customAnalyzer.getCharFilterFactories().toArray(new CharFilterFactory[0]), customAnalyzer.getTokenizerFactory(), customAnalyzer.getTokenFilterFactories().toArray(new TokenFilterFactory[0])); setPositionIncrementGap(customAnalyzer.getPositionIncrementGap(null)); setVersion(customAnalyzer.getVersion()); assert customAnalyzer.getOffsetGap(null) == 1; // note: we don't support setting the offset gap }
Example #20
Source File: TokenAnalyzerMaker.java From lucene4ir with Apache License 2.0 | 5 votes |
public Analyzer createAnalyzer( String tokenFilterFile) { Analyzer analyzer = null; try { lucene4ir.utils.TokenFilters tokenFilters = JAXB.unmarshal(new File(tokenFilterFile), lucene4ir.utils.TokenFilters.class); CustomAnalyzer.Builder builder; if (tokenFilters.getResourceDir() != null) { builder = CustomAnalyzer.builder(Paths.get(tokenFilters.getResourceDir())); } else { builder = CustomAnalyzer.builder(); } builder.withTokenizer(tokenFilters.getTokenizer()); for (lucene4ir.utils.TokenFilter filter : tokenFilters.getTokenFilters()) { System.out.println("Token filter: " + filter.getName()); List<lucene4ir.utils.Param> params = filter.getParams(); if (params.size() > 0) { Map<String, String> paramMap = new HashMap<>(); for (lucene4ir.utils.Param param : params) { paramMap.put(param.getKey(), param.getValue()); } builder.addTokenFilter(filter.getName(), paramMap); } else { builder.addTokenFilter(filter.getName()); } } analyzer = builder.build(); } catch (IOException ioe){ System.out.println(" caught a " + ioe.getClass() + "\n with message: " + ioe.getMessage()); } return analyzer; }
Example #21
Source File: LuceneAnalyzerIntegrationTest.java From tutorials with MIT License | 5 votes |
@Test public void whenUseCustomAnalyzerBuilder_thenAnalyzed() throws IOException { Analyzer analyzer = CustomAnalyzer.builder() .withTokenizer("standard") .addTokenFilter("lowercase") .addTokenFilter("stop") .addTokenFilter("porterstem") .addTokenFilter("capitalization") .build(); List<String> result = analyze(SAMPLE_TEXT, analyzer); assertThat(result, contains("Baeldung.com", "Lucen", "Analyz", "Test")); }
Example #22
Source File: TestOpenNLPChunkerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testPayloads() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile) .addTokenFilter(TypeAsPayloadTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, toPayloads(SENTENCES_chunks)); }
Example #23
Source File: AnalyzerFactory.java From airsonic-advanced with GNU General Public License v3.0 | 5 votes |
private Builder createArtistAnalyzerBuilder() throws IOException { Builder builder = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(CJKWidthFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false") .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST) .addTokenFilter(EnglishPossessiveFilterFactory.class); addTokenFilterForUnderscoreRemovalAroundToken(builder); return builder; }
Example #24
Source File: BibleSearchIndex.java From Quelea with GNU General Public License v3.0 | 5 votes |
/** * Create a new empty search index. */ public BibleSearchIndex() { chapters = new HashMap<>(); try { analyzer = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class) .build(); index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-bible").toAbsolutePath()); } catch (IOException ex) { LOGGER.log(Level.SEVERE, "Couldn't create song search index"); throw new RuntimeException("Couldn't create song search index", ex); } }
Example #25
Source File: SongSearchIndex.java From Quelea with GNU General Public License v3.0 | 5 votes |
/** * Create a new empty search index. */ public SongSearchIndex() { songs = new HashMap<>(); try { analyzer = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class) .build(); index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-song").toAbsolutePath()); } catch(IOException ex) { LOGGER.log(Level.SEVERE, "Couldn't create song search index"); throw new RuntimeException("Couldn't create song search index", ex); } }
Example #26
Source File: AnalyzerFactory.java From airsonic with GNU General Public License v3.0 | 5 votes |
private Builder createDefaultAnalyzerBuilder() throws IOException { Builder builder = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(CJKWidthFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false") .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS) .addTokenFilter(EnglishPossessiveFilterFactory.class); addTokenFilterForUnderscoreRemovalAroundToken(builder); return builder; }
Example #27
Source File: AnalyzerFactory.java From airsonic with GNU General Public License v3.0 | 5 votes |
private Builder createArtistAnalyzerBuilder() throws IOException { Builder builder = CustomAnalyzer.builder() .withTokenizer(StandardTokenizerFactory.class) .addTokenFilter(CJKWidthFilterFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false") .addTokenFilter(LowerCaseFilterFactory.class) .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST) .addTokenFilter(EnglishPossessiveFilterFactory.class); addTokenFilterForUnderscoreRemovalAroundToken(builder); return builder; }
Example #28
Source File: AnalyzerPaneProvider.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void setAnalyzer(Analyzer analyzer) { analyzerNameLbl.setText(analyzer.getClass().getName()); if (analyzer instanceof CustomAnalyzer) { CustomAnalyzer customAnalyzer = (CustomAnalyzer) analyzer; DefaultListModel<String> charFilterListModel = new DefaultListModel<>(); customAnalyzer.getCharFilterFactories().stream() .map(f -> f.getClass().getSimpleName()) .forEach(charFilterListModel::addElement); charFilterList.setModel(charFilterListModel); tokenizerTF.setText(customAnalyzer.getTokenizerFactory().getClass().getSimpleName()); DefaultListModel<String> tokenFilterListModel = new DefaultListModel<>(); customAnalyzer.getTokenFilterFactories().stream() .map(f -> f.getClass().getSimpleName()) .forEach(tokenFilterListModel::addElement); tokenFilterList.setModel(tokenFilterListModel); charFilterList.setBackground(Color.white); tokenizerTF.setBackground(Color.white); tokenFilterList.setBackground(Color.white); } else { charFilterList.setModel(new DefaultListModel<>()); tokenizerTF.setText(""); tokenFilterList.setModel(new DefaultListModel<>()); charFilterList.setBackground(Color.lightGray); tokenizerTF.setBackground(Color.lightGray); tokenFilterList.setBackground(Color.lightGray); } }
Example #29
Source File: AnalysisPanelProvider.java From lucene-solr with Apache License 2.0 | 5 votes |
void showAnalysisChainDialog() { if (getCurrentAnalyzer() instanceof CustomAnalyzer) { CustomAnalyzer analyzer = (CustomAnalyzer) getCurrentAnalyzer(); new DialogOpener<>(analysisChainDialogFactory).open("Analysis chain", 600, 320, (factory) -> { factory.setAnalyzer(analyzer); }); } }
Example #30
Source File: AnalysisImpl.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Analyzer buildCustomAnalyzer(CustomAnalyzerConfig config) { Objects.requireNonNull(config); try { // create builder CustomAnalyzer.Builder builder = config.getConfigDir() .map(path -> CustomAnalyzer.builder(FileSystems.getDefault().getPath(path))) .orElse(CustomAnalyzer.builder()); // set tokenizer builder.withTokenizer(config.getTokenizerConfig().getName(), config.getTokenizerConfig().getParams()); // add char filters for (CustomAnalyzerConfig.ComponentConfig cfConf : config.getCharFilterConfigs()) { builder.addCharFilter(cfConf.getName(), cfConf.getParams()); } // add token filters for (CustomAnalyzerConfig.ComponentConfig tfConf : config.getTokenFilterConfigs()) { builder.addTokenFilter(tfConf.getName(), tfConf.getParams()); } // build analyzer this.analyzer = builder.build(); return analyzer; } catch (Exception e) { throw new LukeException("Failed to build custom analyzer.", e); } }