org.apache.lucene.util.automaton.ByteRunAutomaton Java Examples

The following examples show how to use org.apache.lucene.util.automaton.ByteRunAutomaton. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestFuzzyQuery.java From lucene-solr with Apache License 2.0

6 votes

public void testVisitor() {
  FuzzyQuery q = new FuzzyQuery(new Term("field", "blob"), 2);
  AtomicBoolean visited = new AtomicBoolean(false);
  q.visit(new QueryVisitor() {
    @Override
    public void consumeTermsMatching(Query query, String field, Supplier<ByteRunAutomaton> automaton) {
      visited.set(true);
      ByteRunAutomaton a = automaton.get();
      assertMatches(a, "blob");
      assertMatches(a, "bolb");
      assertMatches(a, "blobby");
      assertNoMatches(a, "bolbby");
    }
  });
  assertTrue(visited.get());
}

Example #2

Source File: LabelledCharArrayMatcher.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns a representation of the automaton that matches char[] instead of byte[]
 */
static LabelledCharArrayMatcher wrap(String label, ByteRunAutomaton runAutomaton) {
    return wrap(label, (chars, offset, length) -> {
      int state = 0;
      final int maxIdx = offset + length;
      for (int i = offset; i < maxIdx; i++) {
        final int code = chars[i];
        int b;
        // UTF16 to UTF8   (inlined logic from UnicodeUtil.UTF16toUTF8 )
        if (code < 0x80) {
          state = runAutomaton.step(state, code);
          if (state == -1) return false;
        } else if (code < 0x800) {
          b = (0xC0 | (code >> 6));
          state = runAutomaton.step(state, b);
          if (state == -1) return false;
          b = (0x80 | (code & 0x3F));
          state = runAutomaton.step(state, b);
          if (state == -1) return false;
        } else {
          // more complex
          byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
          int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
          for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
            state = runAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
            if (state == -1) return false;
          }
          break;
        }
      }
      return runAutomaton.isAccept(state);
    });
}

Example #3

Source File: TermInSetQuery.java From lucene-solr with Apache License 2.0

5 votes

private ByteRunAutomaton asByteRunAutomaton() {
  TermIterator iterator = termData.iterator();
  List<Automaton> automata = new ArrayList<>();
  for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
    automata.add(Automata.makeBinary(term));
  }
  return new CompiledAutomaton(Operations.union(automata)).runAutomaton;

}

Example #4

Source File: IncludeExclude.java From Elasticsearch with Apache License 2.0

4 votes

private AutomatonBackedStringFilter(Automaton automaton) {
    this.runAutomaton = new ByteRunAutomaton(automaton);
}

Example #5

Source File: MultiTermHighlighting.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void consumeTermsMatching(Query query, String field, Supplier<ByteRunAutomaton> automaton) {
  runAutomata.add(LabelledCharArrayMatcher.wrap(query.toString(), automaton.get()));
}

Example #6

Source File: TestFuzzyQuery.java From lucene-solr with Apache License 2.0

4 votes

private static void assertMatches(ByteRunAutomaton automaton, String text) {
  BytesRef b = new BytesRef(text);
  assertTrue(automaton.run(b.bytes, b.offset, b.length));
}

Example #7

Source File: TestFuzzyQuery.java From lucene-solr with Apache License 2.0

4 votes

private static void assertNoMatches(ByteRunAutomaton automaton, String text) {
  BytesRef b = new BytesRef(text);
  assertFalse(automaton.run(b.bytes, b.offset, b.length));
}

Example #8

Source File: QueryVisitor.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Called by leaf queries that match on a class of terms
 *
 * @param query     the leaf query
 * @param field     the field queried against
 * @param automaton a supplier for an automaton defining which terms match
 *
 * @lucene.experimental
 */
public void consumeTermsMatching(Query query, String field, Supplier<ByteRunAutomaton> automaton) {
  visitLeaf(query); // default impl for backward compatibility
}