org.tartarus.snowball.SnowballStemmer Java Examples
The following examples show how to use
org.tartarus.snowball.SnowballStemmer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LuceneCarrot2StemmerFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Create and return an {@link IStemmer} adapter for a * {@link SnowballStemmer} for a given language code. An identity stemmer is * returned for unknown languages. */ public static IStemmer createStemmer(LanguageCode language) { final Class<? extends SnowballStemmer> stemmerClazz = snowballStemmerClasses .get(language); if (stemmerClazz == null) { log.warn("No Snowball stemmer class for: {}. " + "Quality of clustering may be degraded.", language.name()); return IdentityStemmer.INSTANCE; } try { return new SnowballStemmerAdapter(stemmerClazz.getConstructor().newInstance()); } catch (Exception e) { log.warn("Could not instantiate snowball stemmer for language: {}" + ". Quality of clustering may be degraded." , language.name(), e); return IdentityStemmer.INSTANCE; } }
Example #2
Source File: StemmerFactory.java From sasi with Apache License 2.0 | 6 votes |
public static SnowballStemmer getStemmer(Locale locale) { if (locale == null) return null; String rootLang = locale.getLanguage().substring(0, 2); try { Class clazz = SUPPORTED_LANGUAGES.get(rootLang); if(clazz == null) return null; Constructor<?> ctor = STEMMER_CONSTRUCTOR_CACHE.get(clazz); return (SnowballStemmer) ctor.newInstance(); } catch (Exception e) { logger.debug("Failed to create new SnowballStemmer instance " + "for language [{}]", locale.getLanguage(), e); } return null; }
Example #3
Source File: StemmerTest.java From spark-stemming with BSD 2-Clause "Simplified" License | 5 votes |
@Test public void englishSanityCheck() { SnowballStemmer snowballStemmer = new englishStemmer(); snowballStemmer.setCurrent("Jumps"); snowballStemmer.stem(); String result = snowballStemmer.getCurrent(); Assert.assertEquals("Jump", result); }
Example #4
Source File: SnowballPorterFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void inform(ResourceLoader loader) throws IOException { String className = "org.tartarus.snowball.ext." + language + "Stemmer"; stemClass = loader.newInstance(className, SnowballStemmer.class).getClass(); if (wordFiles != null) { protectedWords = getWordSet(loader, wordFiles, false); } }
Example #5
Source File: SnowballFilter.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Construct the named stemming filter. * * Available stemmers are listed in {@link org.tartarus.snowball.ext}. * The name of a stemmer is the part of the class name before "Stemmer", * e.g., the stemmer in {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English". * * @param in the input tokens to stem * @param name the name of a stemmer */ public SnowballFilter(TokenStream in, String name) { super(in); //Class.forName is frowned upon in place of the ResourceLoader but in this case, // the factory will use the other constructor so that the program is already loaded. try { Class<? extends SnowballStemmer> stemClass = Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballStemmer.class); stemmer = stemClass.getConstructor().newInstance(); } catch (Exception e) { throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e); } }
Example #6
Source File: StemmingCorrection.java From EDDI with Apache License 2.0 | 5 votes |
private SnowballStemmer createNewStemmer() { try { Class<?> stemClass = Class.forName("org.tartarus.snowball.ext." + language + "Stemmer"); return (SnowballStemmer) stemClass.getDeclaredConstructor().newInstance(); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } }
Example #7
Source File: RevisedLesk.java From lesk-wsd-dsm with GNU General Public License v3.0 | 5 votes |
private SnowballStemmer getStemmer(Language language) { if (language.equals(Language.EN)) { return new porterStemmer(); } else if (language.equals(Language.ES)) { return new spanishStemmer(); } else if (language.equals(Language.FR)) { return new frenchStemmer(); } else if (language.equals(Language.DE)) { return new germanStemmer(); } else if (language.equals(Language.IT)) { return new italianStemmer(); } else { return null; } }
Example #8
Source File: RevisedLesk.java From lesk-wsd-dsm with GNU General Public License v3.0 | 5 votes |
/** * * @param text * @return * @throws IOException */ public Map<String, Float> buildBag(String text) throws IOException { Map<String, Float> bag = new HashMap<>(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); SnowballStemmer stemmer = null; if (stemming) { stemmer = getStemmer(language); if (stemmer == null) { Logger.getLogger(RevisedLesk.class.getName()).log(Level.WARNING, "No stemmer for language {0}", language); } } TokenStream tokenStream = analyzer.tokenStream("gloss", new StringReader(text)); while (tokenStream.incrementToken()) { TermAttribute token = (TermAttribute) tokenStream.getAttribute(TermAttribute.class); String term = token.term(); if (stemmer != null) { stemmer.setCurrent(term); if (stemmer.stem()) { term = stemmer.getCurrent(); } } Float c = bag.get(term); if (c == null) { bag.put(term, 1f); } else { bag.put(term, c + 1f); } } return bag; }
Example #9
Source File: SnowballFilter.java From lucene-solr with Apache License 2.0 | 4 votes |
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) { super(input); this.stemmer = stemmer; }
Example #10
Source File: LuceneCarrot2StemmerFactory.java From lucene-solr with Apache License 2.0 | 4 votes |
public SnowballStemmerAdapter(SnowballStemmer snowballStemmer) { this.snowballStemmer = snowballStemmer; }