org.apache.lucene.analysis.core.StopFilter Java Examples

The following examples show how to use org.apache.lucene.analysis.core.StopFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: StandardPreProcessorIterator.java From Indra with MIT License

6 votes

private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {

        if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
            return new StopFilter(stream, new CharArraySet(metadataStopWords, false));

        } else {
            try {
                Set<String> sws = getDefaultStopWordSet(lang);

                if (sws != null) {
                    CharArraySet stopWords = new CharArraySet(30, true);
                    stopWords.addAll(sws);
                    return new StopFilter(stream, stopWords);
                }
            } catch (IndraException e) {
                throw new IndraRuntimeException(String.format("Error creating stop filter for lang '%s'", lang), e);
            }
        }
        return stream;
    }

Example #2

Source File: QueryParserImpl.java From AdSearch_Endpoints with Apache License 2.0

6 votes

@Override
  public List<String> parseQuery(String queryStr) {
    // tokenize queryStr, remove stop word, stemming
	List<String> tokens = new ArrayList<String>();
	AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
	Tokenizer tokenizer = new StandardTokenizer(factory);
	tokenizer.setReader(new StringReader(queryStr));
	CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet();
    TokenStream tokenStream = new StopFilter(tokenizer, stopWords);
//    StringBuilder sb = new StringBuilder();
    CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class);
    try {
    	tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = charTermAttribute.toString();
            
            tokens.add(term);
//            sb.append(term + " ");
        }
        tokenStream.end();
        tokenStream.close();

        tokenizer.close();  
	} catch (IOException e) {
		e.printStackTrace();
	}
//	System.out.println("QU="+ sb.toString());
	return tokens;	
  }

Example #3

Source File: StopTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public TokenStream create(TokenStream tokenStream) {
    if (removeTrailing) {
        if (version.onOrAfter(Version.LUCENE_4_4)) {
            return new StopFilter(tokenStream, stopWords);
        } else {
            return new Lucene43StopFilter(enablePositionIncrements, tokenStream, stopWords);
        }
    } else {
        return new SuggestStopFilter(tokenStream, stopWords);
    }
}

Example #4

Source File: ManagedStopFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns a StopFilter based on our managed stop word set.
 */
@Override
public TokenStream create(TokenStream input) {    
  if (stopWords == null) {
    throw new IllegalStateException("Managed stopwords not initialized correctly!");
  }
  return new StopFilter(input, stopWords);
}

Example #5

Source File: CASAnalyzer.java From oodt with Apache License 2.0

5 votes

/** Builds an analyzer with the given stop words. */
public CASAnalyzer(CharArraySet stopWords) {
    Iterator iter = stopWords.iterator();
    List<String> sw = new ArrayList<>();
    while(iter.hasNext()) {
        char[] stopWord = (char[]) iter.next();
        sw.add(new String(stopWord));
    }
    stopSet = StopFilter.makeStopSet(sw);

}