org.apache.lucene.analysis.StopAnalyzer Java Examples
The following examples show how to use
org.apache.lucene.analysis.StopAnalyzer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: 387581_IndexTaskTest_0_t.java From coming with MIT License | 6 votes |
/** * The JUnit setup method * *@exception IOException Description of Exception */ public void setUp() throws Exception { Project project = new Project(); IndexTask task = new IndexTask(); FileSet fs = new FileSet(); fs.setDir(new File(docsDir)); task.addFileset(fs); task.setOverwrite(true); task.setDocumentHandler(docHandler); task.setIndex(new File(indexDir)); task.setProject(project); task.execute(); searcher = new IndexSearcher(indexDir); analyzer = new StopAnalyzer(); }
Example #2
Source File: 387581_IndexTaskTest_0_s.java From coming with MIT License | 6 votes |
/** * The JUnit setup method * *@exception IOException Description of Exception */ public void setUp() throws Exception { Project project = new Project(); IndexTask task = new IndexTask(); FileSet fs = new FileSet(); fs.setDir(new File(docsDir)); task.addFileset(fs); task.setOverwrite(true); task.setDocumentHandler(docHandler); task.setIndex(new File(indexDir)); task.setProject(project); task.execute(); searcher = new IndexSearcher(indexDir); analyzer = new StopAnalyzer(); }
Example #3
Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0 | 5 votes |
public StopwordAnnotator(String annotatorClass, Properties props) { this.props = props; this.checkLemma = Boolean.parseBoolean(props.getProperty(CHECK_LEMMA, "false")); if (this.props.containsKey(STOPWORDS_LIST)) { String stopwordList = props.getProperty(STOPWORDS_LIST); boolean ignoreCase = Boolean.parseBoolean(props.getProperty(IGNORE_STOPWORD_CASE, "false")); this.stopwords = getStopWordList(Version.LUCENE_36, stopwordList, ignoreCase); } else { this.stopwords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET; } }
Example #4
Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0 | 5 votes |
/** * Test to validate that stopwords are properly annotated in the token list * @throws Exception */ @org.junit.Test public void testLuceneStopwordList() throws Exception { Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, stopword"); props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props); Annotation document = new Annotation(example); pipeline.annotate(document); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); //get the standard lucene stopword set Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET; for (CoreLabel token : tokens) { //get the stopword annotation Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class); String word = token.word().toLowerCase(); if (stopWords.contains(word)) { assertTrue(stopword.first()); } else { assertFalse(stopword.first()); } //not checking lemma, so always false assertFalse(stopword.second()); } }