org.apache.lucene.analysis.util.ClasspathResourceLoader Java Exaples

Source File: AbstractQuerqyDismaxQParserPluginTest.java From querqy with Apache License 2.0

6 votes

@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception {

    NamedList<NamedList<String>> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(parserConfig);

    when(parserConfig.get("factory")).thenReturn(null);
    when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser");
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass);

}

Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

public void testPOS() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_posTags, null, null, true);

  analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_posTags));
}

Source File: TestKeepFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}

Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testClose() throws IOException {
  Map<String,String> args = new HashMap<String,String>() {{ put("sentenceModel", "en-test-sent.bin");
                                                            put("tokenizerModel", "en-test-tokenizer.bin"); }};
  OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));

  Tokenizer ts = factory.create(newAttributeFactory());
  ts.setReader(new StringReader(SENTENCES));

  ts.reset();
  ts.close();
  ts.reset();
  ts.setReader(new StringReader(SENTENCES));
  assertTokenStreamContents(ts, SENTENCES_punc);
  ts.close();
  ts.reset();
  ts.setReader(new StringReader(SENTENCES));
  assertTokenStreamContents(ts, SENTENCES_punc);
}

Source File: TestICUTokenizerFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
  Reader reader = new StringReader
      ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
  final Map<String,String> args = new HashMap<>();
  args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  Tokenizer stream = factory.create(newAttributeFactory());
  stream.setReader(reader);
  assertTokenStreamContents(stream,
      new String[] { "One-two", "punch",
          "Brang", "not", "brung-it",
          "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}

Source File: TestSuggestStopFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

private SuggestStopFilterFactory createFactory(String ... params) throws IOException {
  if(params.length%2 != 0) {
    throw new IllegalArgumentException("invalid keysAndValues map");
  }
  Map<String, String> args = new HashMap<>(params.length/2);
  for(int i=0; i<params.length; i+=2) {
    String previous = args.put(params[i], params[i+1]);
    assertNull("duplicate values for key: " + params[i], previous);
  }
  args.put("luceneMatchVersion", Version.LATEST.toString());

  SuggestStopFilterFactory factory = new SuggestStopFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  return factory;
}

Source File: TestICUTokenizerFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Specify more than one script/rule file pair.
 * Override default DefaultICUTokenizerConfig Thai script tokenization.
 * Use the same rule file for both scripts.
 */
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
  Reader reader = new StringReader
      ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
  final Map<String,String> args = new HashMap<>();
  args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  Tokenizer stream = factory.create(newAttributeFactory());
  stream.setReader(reader);
  assertTokenStreamContents(stream, new String[] { "Some", "English",
      "Немного русский.  ",
      "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
      "More", "English" });
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Case: default
 */
public void testFactoryDefaults() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInjectFalse() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.INJECT, "false");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertFalse(factory.inject);
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testMaxCodeLength() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen());
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testUnknownEncoder() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    Map<String,String> args = new HashMap<>();
    args.put("encoder", "XXX");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
  });
  assertTrue(expected.getMessage().contains("Error loading encoder"));
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testUnknownEncoderReflection() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    Map<String,String> args = new HashMap<>();
    args.put("encoder", "org.apache.commons.codec.language.NonExistence");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
  });
  assertTrue(expected.getMessage().contains("Error loading encoder"));
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Case: Reflection
 */
public void testFactoryReflection() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/** 
 * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
 * so this effectively tests reflection without package name
 */
public void testFactoryReflectionCaverphone2() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Caverphone2);
  assertTrue(factory.inject); // default
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testFactoryReflectionCaverphone() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Caverphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Caverphone2);
  assertTrue(factory.inject); // default
}

Source File: TestPhoneticFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

static void assertAlgorithm(String algName, String inject, String input,
    String[] expected) throws Exception {
  Tokenizer tokenizer = whitespaceMockTokenizer(input);
  Map<String,String> args = new HashMap<>();
  args.put("encoder", algName);
  args.put("inject", inject);
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, expected);
}

Source File: TestMorfologikFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testExplicitDictionary() throws Exception {
  final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);

  StringReader reader = new StringReader("inflected1 inflected2");
  Map<String,String> params = new HashMap<>();
  params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict");
  MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
  factory.inform(loader);
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = factory.create(stream);
  assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"});
}

Source File: TestMorfologikFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testMissingDictionary() throws Exception {
  final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);

  IOException expected = expectThrows(IOException.class, () -> {
    Map<String,String> params = new HashMap<>();
    params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
    MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
    factory.inform(loader);
  });
  assertTrue(expected.getMessage().contains("Resource not found"));
}

Source File: TestCommonGramsFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}

Source File: TestCommonGramsQueryFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}

Source File: TestStopFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-1.txt, stop-2.txt",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-snowball.txt",
      "format", "snowball",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));

  // defaults
  factory = (StopFilterFactory) tokenFilterFactory("Stop");
  assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
  assertEquals(false, factory.isIgnoreCase());
}

Source File: ElasticsearchConfig.java From spring-boot-practice with Apache License 2.0

5 votes

@PostConstruct
public void init() throws IOException {
    log.info("Start loading accounts");
    ClasspathResourceLoader loader = new ClasspathResourceLoader();
    ObjectMapper mapper = new ObjectMapper();
    ObjectReader objectReader = mapper.readerFor(Account.class);
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(loader.openResource("import.json")))) {
        String line;
        while ((line = reader.readLine()) != null) {
            Account account = objectReader.readValue(line);
            accountRepository.save(account);
        }
    }
    log.info("Finished loading");
}

Source File: AbstractQuerqyDismaxQParserPluginTest.java From querqy with Apache License 2.0

5 votes

@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfTheParserClassIsConfiguredAsAString() throws Exception {

    NamedList<String> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(DummyQuerqyParser.class.getName());
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(DummyQuerqyParser.class, qParserFactory.querqyParserClass);

}

Source File: TestICUTokenizerFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testMixedText() throws Exception {
  Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
  factory.inform(new ClasspathResourceLoader(getClass()));
  Tokenizer stream = factory.create(newAttributeFactory());
  stream.setReader(reader);
  assertTokenStreamContents(stream,
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี",
      "This", "is", "a", "test", "ກວ່າ", "ດອກ"});
}

Source File: TestSuggestStopFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  SuggestStopFilterFactory factory = createFactory(
      "words", "stop-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = createFactory("words", "stop-1.txt, stop-2.txt",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = createFactory("words", "stop-snowball.txt",
      "format", "snowball",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));

  // defaults
  factory = createFactory();
  assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
  assertEquals(false, factory.isIgnoreCase());
}

Source File: TestOpenNLPChunkerFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testBasic() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_chunks, null, null, true);
}

Source File: TestOpenNLPChunkerFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testPayloads() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_chunks));
}

Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testBasic() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
}

Source File: TestOpenNLPPOSFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

public void testNoBreak() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
      null, null, null, true);
}

Source File: TestOpenNLPTokenizerFactory.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testTokenizer() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
  assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
}

org.apache.lucene.analysis.util.ClasspathResourceLoader Java Examples