org.apache.lucene.analysis.synonym.SynonymGraphFilter Java Examples

The following examples show how to use org.apache.lucene.analysis.synonym.SynonymGraphFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestConcatenateGraphFilter.java From lucene-solr with Apache License 2.0

6 votes

@Test
public void testSeparatorWithSynonyms() throws IOException {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("three words synonym"), true);
  Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = " mykeyword another keyword   ";
  tokenizer.setReader(new StringReader(input));
  SynonymGraphFilter filter = new SynonymGraphFilter(tokenizer, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, '-', false, 100);
  assertTokenStreamContents(stream, new String[] {
      "mykeyword-another-keyword",
      "mysynonym-another-keyword",
      "three words synonym-another-keyword"
  }, null, null, new int[] { 1, 0 ,0});
}

Example #2

Source File: TestConditionalTokenFilter.java From lucene-solr with Apache License 2.0

6 votes

public void testWrapGraphs() throws Exception {

    TokenStream stream = whitespaceMockTokenizer("a b c d e");

    SynonymMap sm;
    try (Analyzer analyzer = new MockAnalyzer(random())) {
      SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
      parser.parse(new StringReader("a b, f\nc d, g"));
      sm = parser.build();
    }

    TokenStream ts = new SkipMatchingFilter(stream, in -> new SynonymGraphFilter(in, sm, true), "c");

    assertTokenStreamContents(ts, new String[]{
        "f", "a", "b", "c", "d", "e"
        },
        null, null, null,
        new int[]{
        1, 0, 1, 1, 1, 1
        },
        new int[]{
        2, 1, 1, 1, 1, 1
        });

  }

Example #3

Source File: TestRemoveDuplicatesTokenFilter.java From lucene-solr with Apache License 2.0

6 votes

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(3);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymGraphFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
    analyzer.close();
  }
}

Example #4

Source File: SynonymGraphTokenFilterFactory.java From crate with Apache License 2.0

6 votes

@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
                                                          List<TokenFilterFactory> previousTokenFilters,
                                                          Function<String, TokenFilterFactory> allFilters) {
    final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
    final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
    final String name = name();
    return new TokenFilterFactory() {
        @Override
        public String name() {
            return name;
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            // fst is null means no synonyms
            return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, ignoreCase);
        }
    };
}