org.tartarus.snowball.SnowballStemmer Java Examples

The following examples show how to use org.tartarus.snowball.SnowballStemmer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LuceneCarrot2StemmerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Create and return an {@link IStemmer} adapter for a
 * {@link SnowballStemmer} for a given language code. An identity stemmer is
 * returned for unknown languages.
 */
public static IStemmer createStemmer(LanguageCode language) {
  final Class<? extends SnowballStemmer> stemmerClazz = snowballStemmerClasses
      .get(language);

  if (stemmerClazz == null) {
    log.warn("No Snowball stemmer class for: {}. "
        + "Quality of clustering may be degraded.", language.name());
    return IdentityStemmer.INSTANCE;
  }

  try {
    return new SnowballStemmerAdapter(stemmerClazz.getConstructor().newInstance());
  } catch (Exception e) {
    log.warn("Could not instantiate snowball stemmer for language: {}"
            + ". Quality of clustering may be degraded."
        , language.name(), e);

    return IdentityStemmer.INSTANCE;
  }
}
 
Example #2
Source File: StemmerFactory.java    From sasi with Apache License 2.0 6 votes vote down vote up
public static SnowballStemmer getStemmer(Locale locale)
{
    if (locale == null)
        return null;

    String rootLang = locale.getLanguage().substring(0, 2);
    try
    {
        Class clazz = SUPPORTED_LANGUAGES.get(rootLang);
        if(clazz == null)
            return null;
        Constructor<?> ctor = STEMMER_CONSTRUCTOR_CACHE.get(clazz);
        return (SnowballStemmer) ctor.newInstance();
    }
    catch (Exception e)
    {
        logger.debug("Failed to create new SnowballStemmer instance " +
                "for language [{}]", locale.getLanguage(), e);
    }
    return null;
}
 
Example #3
Source File: StemmerTest.java    From spark-stemming with BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void englishSanityCheck() {

    SnowballStemmer snowballStemmer = new englishStemmer();
    snowballStemmer.setCurrent("Jumps");
    snowballStemmer.stem();
    String result = snowballStemmer.getCurrent();

    Assert.assertEquals("Jump", result);
}
 
Example #4
Source File: SnowballPorterFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  String className = "org.tartarus.snowball.ext." + language + "Stemmer";
  stemClass = loader.newInstance(className, SnowballStemmer.class).getClass();

  if (wordFiles != null) {
    protectedWords = getWordSet(loader, wordFiles, false);
  }
}
 
Example #5
Source File: SnowballFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Construct the named stemming filter.
 *
 * Available stemmers are listed in {@link org.tartarus.snowball.ext}.
 * The name of a stemmer is the part of the class name before "Stemmer",
 * e.g., the stemmer in {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
 *
 * @param in the input tokens to stem
 * @param name the name of a stemmer
 */
public SnowballFilter(TokenStream in, String name) {
  super(in);
  //Class.forName is frowned upon in place of the ResourceLoader but in this case,
  // the factory will use the other constructor so that the program is already loaded.
  try {
    Class<? extends SnowballStemmer> stemClass =
      Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballStemmer.class);
    stemmer = stemClass.getConstructor().newInstance();
  } catch (Exception e) {
    throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
  }
}
 
Example #6
Source File: StemmingCorrection.java    From EDDI with Apache License 2.0 5 votes vote down vote up
private SnowballStemmer createNewStemmer() {
    try {
        Class<?> stemClass = Class.forName("org.tartarus.snowball.ext." + language + "Stemmer");
        return (SnowballStemmer) stemClass.getDeclaredConstructor().newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}
 
Example #7
Source File: RevisedLesk.java    From lesk-wsd-dsm with GNU General Public License v3.0 5 votes vote down vote up
private SnowballStemmer getStemmer(Language language) {
    if (language.equals(Language.EN)) {
        return new porterStemmer();
    } else if (language.equals(Language.ES)) {
        return new spanishStemmer();
    } else if (language.equals(Language.FR)) {
        return new frenchStemmer();
    } else if (language.equals(Language.DE)) {
        return new germanStemmer();
    } else if (language.equals(Language.IT)) {
        return new italianStemmer();
    } else {
        return null;
    }
}
 
Example #8
Source File: RevisedLesk.java    From lesk-wsd-dsm with GNU General Public License v3.0 5 votes vote down vote up
/**
 *
 * @param text
 * @return
 * @throws IOException
 */
public Map<String, Float> buildBag(String text) throws IOException {
    Map<String, Float> bag = new HashMap<>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    SnowballStemmer stemmer = null;
    if (stemming) {
        stemmer = getStemmer(language);
        if (stemmer == null) {
            Logger.getLogger(RevisedLesk.class.getName()).log(Level.WARNING, "No stemmer for language {0}", language);
        }
    }
    TokenStream tokenStream = analyzer.tokenStream("gloss", new StringReader(text));
    while (tokenStream.incrementToken()) {
        TermAttribute token = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
        String term = token.term();
        if (stemmer != null) {
            stemmer.setCurrent(term);
            if (stemmer.stem()) {
                term = stemmer.getCurrent();
            }
        }
        Float c = bag.get(term);
        if (c == null) {
            bag.put(term, 1f);
        } else {
            bag.put(term, c + 1f);
        }
    }
    return bag;
}
 
Example #9
Source File: SnowballFilter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
  super(input);
  this.stemmer = stemmer;
}
 
Example #10
Source File: LuceneCarrot2StemmerFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SnowballStemmerAdapter(SnowballStemmer snowballStemmer) {
  this.snowballStemmer = snowballStemmer;
}