org.apache.lucene.analysis.CharArraySet#unmodifiableSet

Source File: DutchAnalyzer.java From lucene-solr with Apache License 2.0

6 votes

public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
  if (stemOverrideDict.isEmpty()) {
    this.stemdict = null;
  } else {
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}

Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0

5 votes

public static CharArraySet getStopWordList(Version luceneVersion, String stopwordList, boolean ignoreCase) {
    String[] terms = stopwordList.split(",");
    CharArraySet stopwordSet = new CharArraySet(luceneVersion, terms.length, ignoreCase);
    for (String term : terms) {
        stopwordSet.add(term);
    }
    return CharArraySet.unmodifiableSet(stopwordSet);
}

Source File: SnowballAnalyzer.java From crate with Apache License 2.0

4 votes

/** Builds the named analyzer with the given stop words. */
SnowballAnalyzer(String name, CharArraySet stopWords) {
    this(name);
    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}

Source File: ArabicAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * {@link ArabicStemFilter}.
 * 
 * @param stopwords
 *          a stopword set
 * @param stemExclusionSet
 *          a set of terms not to be stemmed
 */
public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: EnglishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: GalicianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: RomanianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public RomanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: BasqueAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: TurkishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 *
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public TurkishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: CzechAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words and a set of work to be
 * excluded from the {@link CzechStemFilter}.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionTable a stemming exclusion set
 */
public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable) {
  super(stopwords);
  this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
}

Source File: ItalianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: RussianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words
 * 
 * @param stopwords
 *          a stopword set
 * @param stemExclusionSet a set of words not to be stemmed
 */
public RussianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: SpanishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: LatvianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: ArmenianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: BengaliAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a stemming exclusion set
 */
public BengaliAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: NorwegianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public NorwegianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: EstonianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 *
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public EstonianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
    super(stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: UkrainianMorfologikAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public UkrainianMorfologikAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Source File: CatalanAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Java Code Examples for org.apache.lucene.analysis.CharArraySet#unmodifiableSet()