org.apache.lucene.analysis.util.ClasspathResourceLoader Java Examples
The following examples show how to use
org.apache.lucene.analysis.util.ClasspathResourceLoader.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractQuerqyDismaxQParserPluginTest.java From querqy with Apache License 2.0 | 6 votes |
@Test public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception { NamedList<NamedList<String>> args = mock(NamedList.class); when(args.get("parser")).thenReturn(parserConfig); when(parserConfig.get("factory")).thenReturn(null); when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser"); ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader()); final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args); assertNotNull(factory); assertTrue(factory instanceof SimpleQuerqyQParserFactory); SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory; assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass); }
Example #2
Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPOS() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, SENTENCES_posTags, null, null, true); analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter(TypeAsPayloadTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, toPayloads(SENTENCES_posTags)); }
Example #3
Source File: TestKeepFilterFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(getClass()); assertTrue("loader is null and it shouldn't be", loader != null); KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord", "words", "keep-1.txt, keep-2.txt", "ignoreCase", "true"); words = factory.getWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); }
Example #4
Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testClose() throws IOException { Map<String,String> args = new HashMap<String,String>() {{ put("sentenceModel", "en-test-sent.bin"); put("tokenizerModel", "en-test-tokenizer.bin"); }}; OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(args); factory.inform(new ClasspathResourceLoader(getClass())); Tokenizer ts = factory.create(newAttributeFactory()); ts.setReader(new StringReader(SENTENCES)); ts.reset(); ts.close(); ts.reset(); ts.setReader(new StringReader(SENTENCES)); assertTokenStreamContents(ts, SENTENCES_punc); ts.close(); ts.reset(); ts.setReader(new StringReader(SENTENCES)); assertTokenStreamContents(ts, SENTENCES_punc); }
Example #5
Source File: TestICUTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testTokenizeLatinDontBreakOnHyphens() throws Exception { Reader reader = new StringReader ("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish."); final Map<String,String> args = new HashMap<>(); args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi"); ICUTokenizerFactory factory = new ICUTokenizerFactory(args); factory.inform(new ClasspathResourceLoader(getClass())); Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "One-two", "punch", "Brang", "not", "brung-it", "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" }); }
Example #6
Source File: TestSuggestStopFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
private SuggestStopFilterFactory createFactory(String ... params) throws IOException { if(params.length%2 != 0) { throw new IllegalArgumentException("invalid keysAndValues map"); } Map<String, String> args = new HashMap<>(params.length/2); for(int i=0; i<params.length; i+=2) { String previous = args.put(params[i], params[i+1]); assertNull("duplicate values for key: " + params[i], previous); } args.put("luceneMatchVersion", Version.LATEST.toString()); SuggestStopFilterFactory factory = new SuggestStopFilterFactory(args); factory.inform(new ClasspathResourceLoader(getClass())); return factory; }
Example #7
Source File: TestICUTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Specify more than one script/rule file pair. * Override default DefaultICUTokenizerConfig Thai script tokenization. * Use the same rule file for both scripts. */ public void testKeywordTokenizeCyrillicAndThai() throws Exception { Reader reader = new StringReader ("Some English. Немного русский. ข้อความภาษาไทยเล็ก ๆ น้อย ๆ More English."); final Map<String,String> args = new HashMap<>(); args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi"); ICUTokenizerFactory factory = new ICUTokenizerFactory(args); factory.inform(new ClasspathResourceLoader(getClass())); Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "Some", "English", "Немного русский. ", "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ ", "More", "English" }); }
Example #8
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Case: default */ public void testFactoryDefaults() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Metaphone"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Metaphone); assertTrue(factory.inject); // default }
Example #9
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInjectFalse() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Metaphone"); args.put(PhoneticFilterFactory.INJECT, "false"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertFalse(factory.inject); }
Example #10
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testMaxCodeLength() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Metaphone"); args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen()); }
Example #11
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testUnknownEncoder() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { Map<String,String> args = new HashMap<>(); args.put("encoder", "XXX"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); }); assertTrue(expected.getMessage().contains("Error loading encoder")); }
Example #12
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testUnknownEncoderReflection() throws IOException { IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> { Map<String,String> args = new HashMap<>(); args.put("encoder", "org.apache.commons.codec.language.NonExistence"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); }); assertTrue(expected.getMessage().contains("Error loading encoder")); }
Example #13
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Case: Reflection */ public void testFactoryReflection() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Metaphone); assertTrue(factory.inject); // default }
Example #14
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone, * so this effectively tests reflection without package name */ public void testFactoryReflectionCaverphone2() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Caverphone2"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Caverphone2); assertTrue(factory.inject); // default }
Example #15
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testFactoryReflectionCaverphone() throws IOException { Map<String,String> args = new HashMap<>(); args.put(PhoneticFilterFactory.ENCODER, "Caverphone"); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); assertTrue(factory.getEncoder() instanceof Caverphone2); assertTrue(factory.inject); // default }
Example #16
Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
static void assertAlgorithm(String algName, String inject, String input, String[] expected) throws Exception { Tokenizer tokenizer = whitespaceMockTokenizer(input); Map<String,String> args = new HashMap<>(); args.put("encoder", algName); args.put("inject", inject); PhoneticFilterFactory factory = new PhoneticFilterFactory(args); factory.inform(new ClasspathResourceLoader(factory.getClass())); TokenStream stream = factory.create(tokenizer); assertTokenStreamContents(stream, expected); }
Example #17
Source File: TestMorfologikFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testExplicitDictionary() throws Exception { final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class); StringReader reader = new StringReader("inflected1 inflected2"); Map<String,String> params = new HashMap<>(); params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict"); MorfologikFilterFactory factory = new MorfologikFilterFactory(params); factory.inform(loader); TokenStream stream = whitespaceMockTokenizer(reader); stream = factory.create(stream); assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"}); }
Example #18
Source File: TestMorfologikFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testMissingDictionary() throws Exception { final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class); IOException expected = expectThrows(IOException.class, () -> { Map<String,String> params = new HashMap<>(); params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict"); MorfologikFilterFactory factory = new MorfologikFilterFactory(params); factory.inform(loader); }); assertTrue(expected.getMessage().contains("Resource not found")); }
Example #19
Source File: TestCommonGramsFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class); assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.getCommonWords(); assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
Example #20
Source File: TestCommonGramsQueryFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class); assertTrue("loader is null and it shouldn't be", loader != null); CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.getCommonWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory .isIgnoreCase() == true); factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader, "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.getCommonWords(); assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); }
Example #21
Source File: TestStopFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(getClass()); assertTrue("loader is null and it shouldn't be", loader != null); StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getStopWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true); factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.getStopWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true); factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.getStopWords(); assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); // defaults factory = (StopFilterFactory) tokenFilterFactory("Stop"); assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords()); assertEquals(false, factory.isIgnoreCase()); }
Example #22
Source File: ElasticsearchConfig.java From spring-boot-practice with Apache License 2.0 | 5 votes |
@PostConstruct public void init() throws IOException { log.info("Start loading accounts"); ClasspathResourceLoader loader = new ClasspathResourceLoader(); ObjectMapper mapper = new ObjectMapper(); ObjectReader objectReader = mapper.readerFor(Account.class); try (BufferedReader reader = new BufferedReader(new InputStreamReader(loader.openResource("import.json")))) { String line; while ((line = reader.readLine()) != null) { Account account = objectReader.readValue(line); accountRepository.save(account); } } log.info("Finished loading"); }
Example #23
Source File: AbstractQuerqyDismaxQParserPluginTest.java From querqy with Apache License 2.0 | 5 votes |
@Test public void testThatASimpleQuerqyQParserFactoryIsCreatedIfTheParserClassIsConfiguredAsAString() throws Exception { NamedList<String> args = mock(NamedList.class); when(args.get("parser")).thenReturn(DummyQuerqyParser.class.getName()); ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader()); final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args); assertNotNull(factory); assertTrue(factory instanceof SimpleQuerqyQParserFactory); SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory; assertEquals(DummyQuerqyParser.class, qParserFactory.querqyParserClass); }
Example #24
Source File: TestICUTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testMixedText() throws Exception { Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี This is a test ກວ່າດອກ"); ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>()); factory.inform(new ClasspathResourceLoader(getClass())); Tokenizer stream = factory.create(newAttributeFactory()); stream.setReader(reader); assertTokenStreamContents(stream, new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี", "This", "is", "a", "test", "ກວ່າ", "ດອກ"}); }
Example #25
Source File: TestSuggestStopFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testInform() throws Exception { ResourceLoader loader = new ClasspathResourceLoader(getClass()); assertTrue("loader is null and it shouldn't be", loader != null); SuggestStopFilterFactory factory = createFactory( "words", "stop-1.txt", "ignoreCase", "true"); CharArraySet words = factory.getStopWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true); factory = createFactory("words", "stop-1.txt, stop-2.txt", "ignoreCase", "true"); words = factory.getStopWords(); assertTrue("words is null and it shouldn't be", words != null); assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4); assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true); factory = createFactory("words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true"); words = factory.getStopWords(); assertEquals(8, words.size()); assertTrue(words.contains("he")); assertTrue(words.contains("him")); assertTrue(words.contains("his")); assertTrue(words.contains("himself")); assertTrue(words.contains("she")); assertTrue(words.contains("her")); assertTrue(words.contains("hers")); assertTrue(words.contains("herself")); // defaults factory = createFactory(); assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords()); assertEquals(false, factory.isIgnoreCase()); }
Example #26
Source File: TestOpenNLPChunkerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testBasic() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, SENTENCES_chunks, null, null, true); }
Example #27
Source File: TestOpenNLPChunkerFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testPayloads() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile) .addTokenFilter(TypeAsPayloadTokenFilterFactory.class) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets, null, null, null, true, toPayloads(SENTENCES_chunks)); }
Example #28
Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testBasic() throws IOException { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); }
Example #29
Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testNoBreak() throws Exception { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile) .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile) .build(); assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets, null, null, null, true); }
Example #30
Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testTokenizer() throws IOException { CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass())) .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin") .build(); assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets); assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc); }