org.apache.lucene.analysis.util.CharFilterFactory Java Examples
The following examples show how to use
org.apache.lucene.analysis.util.CharFilterFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CustomAnalyzerStrField.java From lucene-solr with Apache License 2.0 | 6 votes |
public CustomAnalyzerStrField() { Random r = LuceneTestCase.random(); // two arg constructor Analyzer a2 = new TokenizerChain (new KeywordTokenizerFactory(new HashMap<>()), r.nextBoolean() ? null : new TokenFilterFactory[0]); // three arg constructor Analyzer a3 = new TokenizerChain (r.nextBoolean() ? null : new CharFilterFactory[0], new KeywordTokenizerFactory(new HashMap<>()), r.nextBoolean() ? null : new TokenFilterFactory[0]); if (r.nextBoolean()) { indexAnalyzer = a2; queryAnalyzer = a3; } else { queryAnalyzer = a2; indexAnalyzer = a3; } }
Example #2
Source File: AnalysisImpl.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void addExternalJars(List<String> jarFiles) { List<URL> urls = new ArrayList<>(); for (String jarFile : jarFiles) { Path path = FileSystems.getDefault().getPath(jarFile); if (!Files.exists(path) || !jarFile.endsWith(".jar")) { throw new LukeException(String.format(Locale.ENGLISH, "Invalid jar file path: %s", jarFile)); } try { URL url = path.toUri().toURL(); urls.add(url); } catch (IOException e) { throw new LukeException(e.getMessage(), e); } } // reload available tokenizers, charfilters, and tokenfilters URLClassLoader classLoader = new URLClassLoader( urls.toArray(new URL[0]), this.getClass().getClassLoader()); CharFilterFactory.reloadCharFilters(classLoader); TokenizerFactory.reloadTokenizers(classLoader); TokenFilterFactory.reloadTokenFilters(classLoader); }
Example #3
Source File: AlfrescoFieldType.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
public void add(Object current) { if (!(current instanceof MultiTermAwareComponent)) return; AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent(); if (newComponent instanceof TokenFilterFactory) { if (filters == null) { filters = new ArrayList<TokenFilterFactory>(2); } filters.add((TokenFilterFactory) newComponent); } else if (newComponent instanceof TokenizerFactory) { tokenizer = (TokenizerFactory) newComponent; } else if (newComponent instanceof CharFilterFactory) { if (charFilters == null) { charFilters = new ArrayList<CharFilterFactory>(1); } charFilters.add((CharFilterFactory) newComponent); } else { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent); } }
Example #4
Source File: PluginsService.java From crate with Apache License 2.0 | 5 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after the new classloader has been created to * register the services for use. */ static void reloadLuceneSPI(ClassLoader loader) { // do NOT change the order of these method calls! // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); Codec.reloadCodecs(loader); // Analysis: CharFilterFactory.reloadCharFilters(loader); TokenFilterFactory.reloadTokenFilters(loader); TokenizerFactory.reloadTokenizers(loader); }
Example #5
Source File: TokenizerChain.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected Reader initReaderForNormalization(String fieldName, Reader reader) { if (charFilters != null && charFilters.length > 0) { for (CharFilterFactory charFilter : charFilters) { reader = charFilter.normalize(reader); } } return reader; }
Example #6
Source File: TokenizerChain.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Reader initReader(String fieldName, Reader reader) { if (charFilters != null && charFilters.length > 0) { Reader cs = reader; for (CharFilterFactory charFilter : charFilters) { cs = charFilter.create(cs); } reader = cs; } return reader; }
Example #7
Source File: TokenizerChain.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Creates a new TokenizerChain. * * @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty. * @param tokenizer Factory for the Tokenizer to use, must not be null. * @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty. */ public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) { charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters; filters = null == filters ? EMPTY_TOKEN_FITLERS : filters; if (null == tokenizer) { throw new NullPointerException("TokenizerFactory must not be null"); } this.charFilters = charFilters; this.tokenizer = tokenizer; this.filters = filters; }
Example #8
Source File: TestFactories.java From lucene-solr with Apache License 2.0 | 5 votes |
private void doTestCharFilter(String charfilter) throws IOException { Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter); CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz); if (factory != null) { // we managed to fully create an instance. check a few more things: if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) { // beast it just a little, it shouldnt throw exceptions: // (it should have thrown them in initialize) Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory); checkRandomData(random(), a, 3, 20, false, false); a.close(); } } }
Example #9
Source File: TestFactories.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test() throws IOException { for (String tokenizer : TokenizerFactory.availableTokenizers()) { doTestTokenizer(tokenizer); } for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) { doTestTokenFilter(tokenFilter); } for (String charFilter : CharFilterFactory.availableCharFilters()) { doTestCharFilter(charFilter); } }
Example #10
Source File: TestCustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testHtmlStripClassicFolding() throws Exception { CustomAnalyzer a = CustomAnalyzer.builder() .withDefaultMatchVersion(LUCENE_8_0_0) .addCharFilter("htmlstrip") .withTokenizer("classic") .addTokenFilter("asciifolding", "preserveOriginal", "true") .addTokenFilter("lowercase") .withPositionIncrementGap(100) .withOffsetGap(1000) .build(); assertSame(ClassicTokenizerFactory.class, a.getTokenizerFactory().getClass()); List<CharFilterFactory> charFilters = a.getCharFilterFactories(); assertEquals(1, charFilters.size()); assertEquals(HTMLStripCharFilterFactory.class, charFilters.get(0).getClass()); List<TokenFilterFactory> tokenFilters = a.getTokenFilterFactories(); assertEquals(2, tokenFilters.size()); assertSame(ASCIIFoldingFilterFactory.class, tokenFilters.get(0).getClass()); assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass()); assertEquals(100, a.getPositionIncrementGap("dummy")); assertEquals(1000, a.getOffsetGap("dummy")); assertSame(LUCENE_8_0_0, a.getVersion()); assertAnalyzesTo(a, "<p>foo bar</p> FOO BAR", new String[] { "foo", "bar", "foo", "bar" }, new int[] { 1, 1, 1, 1}); assertAnalyzesTo(a, "<p><b>föó</b> bär FÖÖ BAR</p>", new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" }, new int[] { 1, 0, 1, 0, 1, 0, 1}); a.close(); }
Example #11
Source File: TestCustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testFactoryHtmlStripClassicFolding() throws Exception { CustomAnalyzer a = CustomAnalyzer.builder() .withDefaultMatchVersion(LUCENE_8_0_0) .addCharFilter(HTMLStripCharFilterFactory.class) .withTokenizer(ClassicTokenizerFactory.class) .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "true") .addTokenFilter(LowerCaseFilterFactory.class) .withPositionIncrementGap(100) .withOffsetGap(1000) .build(); assertSame(ClassicTokenizerFactory.class, a.getTokenizerFactory().getClass()); List<CharFilterFactory> charFilters = a.getCharFilterFactories(); assertEquals(1, charFilters.size()); assertEquals(HTMLStripCharFilterFactory.class, charFilters.get(0).getClass()); List<TokenFilterFactory> tokenFilters = a.getTokenFilterFactories(); assertEquals(2, tokenFilters.size()); assertSame(ASCIIFoldingFilterFactory.class, tokenFilters.get(0).getClass()); assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass()); assertEquals(100, a.getPositionIncrementGap("dummy")); assertEquals(1000, a.getOffsetGap("dummy")); assertSame(LUCENE_8_0_0, a.getVersion()); assertAnalyzesTo(a, "<p>foo bar</p> FOO BAR", new String[] { "foo", "bar", "foo", "bar" }, new int[] { 1, 1, 1, 1}); assertAnalyzesTo(a, "<p><b>föó</b> bär FÖÖ BAR</p>", new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" }, new int[] { 1, 0, 1, 0, 1, 0, 1}); a.close(); }
Example #12
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Builds the analyzer. */ public CustomAnalyzer build() { if (tokenizer.get() == null) { throw new IllegalStateException("You have to set at least a tokenizer."); } return new CustomAnalyzer( defaultMatchVersion.get(), charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizer.get(), tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]), posIncGap.get(), offsetGap.get() ); }
Example #13
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Adds the given char filter. * @param name is used to look up the factory with {@link CharFilterFactory#forName(String, Map)}. * The list of possible names can be looked up with {@link CharFilterFactory#availableCharFilters()}. * @param params the map of parameters to be passed to factory. The map must be modifiable. */ public Builder addCharFilter(String name, Map<String,String> params) throws IOException { Objects.requireNonNull(name, "CharFilter name may not be null"); charFilters.add(applyResourceLoader(CharFilterFactory.forName(name, applyDefaultParams(params)))); componentsAdded = true; return this; }
Example #14
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Adds the given char filter. * @param factory class that is used to create the char filter. * @param params the map of parameters to be passed to factory. The map must be modifiable. */ public Builder addCharFilter(Class<? extends CharFilterFactory> factory, Map<String,String> params) throws IOException { Objects.requireNonNull(factory, "CharFilter name may not be null"); charFilters.add(applyResourceLoader(newFactoryClassInstance(factory, applyDefaultParams(params)))); componentsAdded = true; return this; }
Example #15
Source File: PluginsService.java From Elasticsearch with Apache License 2.0 | 5 votes |
/** * Reloads all Lucene SPI implementations using the new classloader. * This method must be called after the new classloader has been created to * register the services for use. */ static void reloadLuceneSPI(ClassLoader loader) { // do NOT change the order of these method calls! // Codecs: PostingsFormat.reloadPostingsFormats(loader); DocValuesFormat.reloadDocValuesFormats(loader); Codec.reloadCodecs(loader); // Analysis: CharFilterFactory.reloadCharFilters(loader); TokenFilterFactory.reloadTokenFilters(loader); TokenizerFactory.reloadTokenizers(loader); }
Example #16
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected Reader initReader(String fieldName, Reader reader) { for (final CharFilterFactory charFilter : charFilters) { reader = charFilter.create(reader); } return reader; }
Example #17
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
CustomAnalyzer(Version defaultMatchVersion, CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] tokenFilters, Integer posIncGap, Integer offsetGap) { this.charFilters = charFilters; this.tokenizer = tokenizer; this.tokenFilters = tokenFilters; this.posIncGap = posIncGap; this.offsetGap = offsetGap; if (defaultMatchVersion != null) { setVersion(defaultMatchVersion); } }
Example #18
Source File: AnalyzerFactoryTask.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * This method looks up a class with its fully qualified name (FQN), or a short-name * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis." * as the package prefix (e.g. "standard.ClassicTokenizerFactory" -> * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory"). * * If className contains a period, the class is first looked up as-is, assuming that it * is an FQN. If this fails, lookup is retried after prepending the Lucene analysis * package prefix to the class name. * * If className does not contain a period, the analysis SPI *Factory.lookupClass() * methods are used to find the class. * * @param className The name or the short name of the class. * @param expectedType The superclass className is expected to extend * @return the loaded class. * @throws ClassNotFoundException if lookup fails */ public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType) throws ClassNotFoundException { if (className.contains(".")) { try { // First, try className == FQN return Class.forName(className).asSubclass(expectedType); } catch (ClassNotFoundException e) { try { // Second, retry lookup after prepending the Lucene analysis package prefix return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType); } catch (ClassNotFoundException e1) { throw new ClassNotFoundException("Can't find class '" + className + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'"); } } } // No dot - use analysis SPI lookup final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst(""); if (CharFilterFactory.class.isAssignableFrom(expectedType)) { return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType); } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) { return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType); } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) { return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType); } throw new ClassNotFoundException("Can't find class '" + className + "'"); }
Example #19
Source File: TestFactories.java From lucene-solr with Apache License 2.0 | 5 votes |
private void doTestCharFilter(String charfilter) throws IOException { Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter); CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz); if (factory != null) { // we managed to fully create an instance. check a few more things: if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) { // beast it just a little, it shouldnt throw exceptions: // (it should have thrown them in initialize) Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory); checkRandomData(random(), a, 3, 20, false, false); a.close(); } } }
Example #20
Source File: TestFactories.java From lucene-solr with Apache License 2.0 | 5 votes |
public void test() throws IOException { for (String tokenizer : TokenizerFactory.availableTokenizers()) { doTestTokenizer(tokenizer); } for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) { doTestTokenFilter(tokenFilter); } for (String charFilter : CharFilterFactory.availableCharFilters()) { doTestCharFilter(charFilter); } }
Example #21
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected Reader initReaderForNormalization(String fieldName, Reader reader) { for (CharFilterFactory charFilter : charFilters) { reader = charFilter.normalize(reader); } return reader; }
Example #22
Source File: AnalyzerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories, TokenizerFactory tokenizerFactory, List<TokenFilterFactory> tokenFilterFactories) { this.charFilterFactories = charFilterFactories; assert null != tokenizerFactory; this.tokenizerFactory = tokenizerFactory; this.tokenFilterFactories = tokenFilterFactories; }
Example #23
Source File: AnalyzerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public String toString() { StringBuilder sb = new StringBuilder("AnalyzerFactory("); if (null != name) { sb.append("name:"); sb.append(name); sb.append(", "); } if (null != positionIncrementGap) { sb.append("positionIncrementGap:"); sb.append(positionIncrementGap); sb.append(", "); } if (null != offsetGap) { sb.append("offsetGap:"); sb.append(offsetGap); sb.append(", "); } for (CharFilterFactory charFilterFactory: charFilterFactories) { sb.append(charFilterFactory); sb.append(", "); } sb.append(tokenizerFactory); for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { sb.append(", "); sb.append(tokenFilterFactory); } sb.append(')'); return sb.toString(); }
Example #24
Source File: AnalysisImpl.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public Collection<String> getAvailableCharFilters() { return CharFilterFactory.availableCharFilters().stream().sorted().collect(Collectors.toList()); }
Example #25
Source File: AlfrescoFieldType.java From SearchServices with GNU Lesser General Public License v3.0 | 4 votes |
public TokenizerChain build() { CharFilterFactory[] charFilterArr = charFilters == null ? null : charFilters.toArray(new CharFilterFactory[charFilters.size()]); TokenFilterFactory[] filterArr = filters == null ? new TokenFilterFactory[0] : filters.toArray(new TokenFilterFactory[filters.size()]); return new TokenizerChain(charFilterArr, tokenizer, filterArr); }
Example #26
Source File: TokenizerChain.java From lucene-solr with Apache License 2.0 | 4 votes |
/** @return array of CharFilterFactories, may be empty but never null */ public CharFilterFactory[] getCharFilterFactories() { return charFilters; }
Example #27
Source File: TestFactories.java From lucene-solr with Apache License 2.0 | 4 votes |
FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) { assert tokenizer != null; this.tokenizer = tokenizer; this.charFilter = charFilter; this.tokenfilter = tokenfilter; }
Example #28
Source File: AnalysisChainDialogFactory.java From lucene-solr with Apache License 2.0 | 4 votes |
private JPanel analysisChain() { JPanel panel = new JPanel(new GridBagLayout()); panel.setOpaque(false); GridBagConstraints c = new GridBagConstraints(); c.fill = GridBagConstraints.HORIZONTAL; c.insets = new Insets(5, 5, 5, 5); c.gridx = 0; c.gridy = 0; c.weightx = 0.1; c.weighty = 0.5; panel.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.dialog.chain.label.charfilters")), c); String[] charFilters = analyzer.getCharFilterFactories().stream().map(f -> CharFilterFactory.findSPIName(f.getClass())).toArray(String[]::new); JList<String> charFilterList = new JList<>(charFilters); charFilterList.setVisibleRowCount(charFilters.length == 0 ? 1 : Math.min(charFilters.length, 5)); c.gridx = 1; c.gridy = 0; c.weightx = 0.5; c.weighty = 0.5; panel.add(new JScrollPane(charFilterList), c); c.gridx = 0; c.gridy = 1; c.weightx = 0.1; c.weighty = 0.1; panel.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.dialog.chain.label.tokenizer")), c); String tokenizer = TokenizerFactory.findSPIName(analyzer.getTokenizerFactory().getClass()); JTextField tokenizerTF = new JTextField(tokenizer); tokenizerTF.setColumns(30); tokenizerTF.setEditable(false); tokenizerTF.setPreferredSize(new Dimension(300, 25)); tokenizerTF.setBorder(BorderFactory.createLineBorder(Color.gray)); c.gridx = 1; c.gridy = 1; c.weightx = 0.5; c.weighty = 0.1; panel.add(tokenizerTF, c); c.gridx = 0; c.gridy = 2; c.weightx = 0.1; c.weighty = 0.5; panel.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.dialog.chain.label.tokenfilters")), c); String[] tokenFilters = analyzer.getTokenFilterFactories().stream().map(f -> TokenFilterFactory.findSPIName(f.getClass())).toArray(String[]::new); JList<String> tokenFilterList = new JList<>(tokenFilters); tokenFilterList.setVisibleRowCount(tokenFilters.length == 0 ? 1 : Math.min(tokenFilters.length, 5)); tokenFilterList.setMinimumSize(new Dimension(300, 25)); c.gridx = 1; c.gridy = 2; c.weightx = 0.5; c.weighty = 0.5; panel.add(new JScrollPane(tokenFilterList), c); return panel; }
Example #29
Source File: TestFactories.java From lucene-solr with Apache License 2.0 | 4 votes |
FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) { assert tokenizer != null; this.tokenizer = tokenizer; this.charFilter = charFilter; this.tokenfilter = tokenfilter; }
Example #30
Source File: CustomAnalyzer.java From lucene-solr with Apache License 2.0 | 4 votes |
/** Returns the list of char filters that are used in this analyzer. */ public List<CharFilterFactory> getCharFilterFactories() { return Collections.unmodifiableList(Arrays.asList(charFilters)); }