org.apache.lucene.analysis.synonym.SynonymFilter Java Examples

The following examples show how to use org.apache.lucene.analysis.synonym.SynonymFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestLimitTokenPositionFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMaxPosition3WithSynomyms() throws IOException {
  for (final boolean consumeAll : new boolean[]{true, false}) {
    MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
    // if we are consuming all tokens, we can use the checks, otherwise we can't
    tokenizer.setEnableChecks(consumeAll);

    SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(new CharsRef("one"), new CharsRef("first"), true);
    builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
    builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
    CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
    SynonymMap.Builder.join(new String[]{"and", "indubitably", "single", "only"}, multiWordCharsRef);
    builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
    SynonymMap.Builder.join(new String[]{"dopple", "ganger"}, multiWordCharsRef);
    builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
    SynonymMap synonymMap = builder.build();
    @SuppressWarnings("deprecation")
    TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
    stream = new LimitTokenPositionFilter(stream, 3, consumeAll);

    // "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
    assertTokenStreamContents(stream,
        new String[]{"one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger"},
        new int[]{1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0});
  }
}
 
Example #2
Source File: SynonymTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
                                                          List<TokenFilterFactory> previousTokenFilters,
                                                          Function<String, TokenFilterFactory> allFilters) {
    final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
    final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
    final String name = name();
    return new TokenFilterFactory() {
        @Override
        public String name() {
            return name;
        }

        @Override
        public TokenStream create(TokenStream tokenStream) {
            return synonyms.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonyms, false);
        }
    };
}
 
Example #3
Source File: TestConcatenateGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSynonym() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  tokenizer.setReader(new StringReader("mykeyword"));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter);
  assertTokenStreamContents(stream, new String[] {"mykeyword", "mysynonym"}, null, null, new int[] { 1, 0 });
}
 
Example #4
Source File: TestConcatenateGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSynonyms() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = "mykeyword another keyword";
  tokenStream.setReader(new StringReader(input));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, SEP_LABEL, false, 100);
  String[] expectedOutputs = new String[2];
  CharsRefBuilder expectedOutput = new CharsRefBuilder();
  expectedOutput.append("mykeyword");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[0] = expectedOutput.toCharsRef().toString();
  expectedOutput.clear();
  expectedOutput.append("mysynonym");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[1] = expectedOutput.toCharsRef().toString();
  assertTokenStreamContents(stream, expectedOutputs, null, null, new int[]{1, 0});
}
 
Example #5
Source File: TestConcatenateGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testValidNumberOfExpansions() throws IOException {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  for (int i = 0; i < 256; i++) {
    builder.add(new CharsRef("" + (i+1)), new CharsRef("" + (1000 + (i+1))), true);
  }
  StringBuilder valueBuilder = new StringBuilder();
  for (int i = 0 ; i < 8 ; i++) {
    valueBuilder.append(i+1);
    valueBuilder.append(" ");
  }
  MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  tokenizer.setReader(new StringReader(valueBuilder.toString()));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenizer, builder.build(), true);

  int count;
  try (ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter)) {
    stream.reset();
    ConcatenateGraphFilter.BytesRefBuilderTermAttribute attr = stream.addAttribute(ConcatenateGraphFilter.BytesRefBuilderTermAttribute.class);
    count = 0;
    while (stream.incrementToken()) {
      count++;
      assertNotNull(attr.getBytesRef());
      assertTrue(attr.getBytesRef().length > 0);
    }
  }
  assertEquals(count, 256);
}
 
Example #6
Source File: SynonymTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
@Override
public TokenStream create(TokenStream tokenStream) {
    // fst is null means no synonyms
    return synonymMap.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonymMap, ignoreCase);
}