org.apache.lucene.util.automaton.RegExp Java Exaples

Source File: QueryParserTestBase.java From lucene-solr with Apache License 2.0

6 votes

public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
  if (result instanceof BooleanQuery) {
    assertEquals(0, ((BooleanQuery) result).clauses().size());
  }
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
  result = ((BoostQuery) result).getQuery();
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}

Source File: MtasToken.java From mtas with Apache License 2.0

6 votes

/**
 * Creates the automaton map.
 *
 * @param prefix the prefix
 * @param valueList the value list
 * @param filter the filter
 * @return the map
 */
public static Map<String, Automaton> createAutomatonMap(String prefix,
    List<String> valueList, Boolean filter) {
  HashMap<String, Automaton> automatonMap = new HashMap<>();
  if (valueList != null) {
    for (String item : valueList) {
      if (filter) {
        item = item.replaceAll("([\\\"\\)\\(\\<\\>\\.\\@\\#\\]\\[\\{\\}])",
            "\\\\$1");
      }
      automatonMap.put(item,
          new RegExp(prefix + MtasToken.DELIMITER + item + "\u0000*")
              .toAutomaton());
    }
  }
  return automatonMap;
}

Source File: MockTokenFilterFactory.java From lucene-solr with Apache License 2.0

6 votes

/** Creates a new MockTokenizerFactory */
public MockTokenFilterFactory(Map<String, String> args) {
  super(args);
  String stopset = get(args, "stopset", Arrays.asList("english", "empty"), null, false);
  String stopregex = get(args, "stopregex");
  if (null != stopset) {
    if (null != stopregex) {
      throw new IllegalArgumentException("Parameters stopset and stopregex cannot both be specified.");
    }
    if ("english".equalsIgnoreCase(stopset)) {
      filter = MockTokenFilter.ENGLISH_STOPSET;
    } else { // must be "empty"
      filter = MockTokenFilter.EMPTY_STOPSET;
    }
  } else if (null != stopregex) {
    RegExp regex = new RegExp(stopregex);
    filter = new CharacterRunAutomaton(regex.toAutomaton());
  } else {
    throw new IllegalArgumentException
        ("Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified.");
  }
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}

Source File: TestMockAnalyzer.java From lucene-solr with Apache License 2.0

6 votes

/** Test a configuration where word starts with one uppercase */
public void testUppercase() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "FooBarBAZ",
      new String[] { "Foo", "Bar", "B", "A", "Z"},
      new int[] { 0, 3, 6, 7, 8 },
      new int[] { 3, 6, 7, 8, 9 }
  );
  assertAnalyzesTo(a, "aFooBar",
      new String[] { "Foo", "Bar" },
      new int[] { 1, 4 },
      new int[] { 4, 7 }
  );
  checkRandomData(random(), a, 100);
}

Source File: HighlighterTest.java From lucene-solr with Apache License 2.0

6 votes

public void testMaxSizeEndHighlight() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {
    @Override
    public void run() throws Exception {
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
      TermQuery query = new TermQuery(new Term("text", "searchterm"));

      String text = "this is a text with searchterm in it";
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "text", fm);
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(36);
      String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
      assertTrue(
          "Matched text should contain remainder of text after highlighted query ",
          match.endsWith("in it"));
    }
  };
  helper.start();
}

Source File: TestIntervals.java From lucene-solr with Apache License 2.0

6 votes

public void testMultiTerm() throws IOException {
  RegExp re = new RegExp("p.*e");
  IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());

  checkIntervals(source, "field1", 5, new int[][]{
      {},
      { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
      { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
      { 7, 7 },
      { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
      { 0, 0 }
  });

  IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
    IntervalsSource s = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
    for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
      s.intervals("field1", ctx);
    }
  });
  assertEquals("Automaton [\\p(.)*\\e] expanded to too many terms (limit 1)", e.getMessage());

  checkVisits(source, 1);
}

Source File: TestMockAnalyzer.java From lucene-solr with Apache License 2.0

6 votes

/** Test a configuration where three characters makes a term */
public void testThreeChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("...").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "foo", "bar"},
      new int[] { 0, 3 },
      new int[] { 3, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "foo" },
      new int[] { 0 },
      new int[] { 3 },
      new int[] { 1 },
      5
  );
  checkRandomData(random(), a, 100);
}

Source File: TestMockAnalyzer.java From lucene-solr with Apache License 2.0

6 votes

/** Test a configuration where two characters makes a term */
public void testTwoChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("..").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "fo", "ob", "ar"},
      new int[] { 0, 2, 4 },
      new int[] { 2, 4, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "fo", "ob" },
      new int[] { 0, 2 },
      new int[] { 2, 4 },
      new int[] { 1, 1 },
      5
  );
  checkRandomData(random(), a, 100);
}

Source File: TestRegexpQuery.java From lucene-solr with Apache License 2.0

6 votes

public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = Operations.union(Arrays
        .asList(Automata.makeString("quick"),
        Automata.makeString("brown"),
        Automata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
    myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
  assertEquals(1, searcher.search(query, 5).totalHits.value);
}

Source File: TestFieldCacheRewriteMethod.java From lucene-solr with Apache License 2.0

5 votes

/** Test fieldcache rewrite against filter rewrite */
@Override
protected void assertSame(String regexp) throws IOException {   
  RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  fieldCache.setRewriteMethod(new DocValuesRewriteMethod());
  
  RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
  
  TopDocs fieldCacheDocs = searcher1.search(fieldCache, 25);
  TopDocs filterDocs = searcher2.search(filter, 25);

  CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filterDocs.scoreDocs);
}

Source File: QueryParserTestBase.java From lucene-solr with Apache License 2.0

5 votes

public void testPhraseQueryPositionIncrements() throws Exception {
  CharacterRunAutomaton stopStopList =
  new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());

  CommonQueryParserConfiguration qp
      = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
  qp.setEnablePositionIncrements(true);

  PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
  phraseQuery.add(new Term("field", "1"));
  phraseQuery.add(new Term("field", "2"), 2);
  assertEquals(phraseQuery.build(), getQuery("\"1 stop 2\"",qp));
}

Source File: TestDocValuesRewriteMethod.java From lucene-solr with Apache License 2.0

5 votes

/** check that the # of hits is the same as if the query
 * is run against the inverted index
 */
protected void assertSame(String regexp) throws IOException {
  RegexpQuery docValues = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  docValues.setRewriteMethod(new DocValuesRewriteMethod());
  RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
 
  TopDocs invertedDocs = searcher1.search(inverted, 25);
  TopDocs docValuesDocs = searcher2.search(docValues, 25);

  CheckHits.checkEqual(inverted, invertedDocs.scoreDocs, docValuesDocs.scoreDocs);
}

Source File: TestDocValuesRewriteMethod.java From lucene-solr with Apache License 2.0

5 votes

public void testEquals() throws Exception {
  RegexpQuery a1 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery a2 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery b = new RegexpQuery(new Term(fieldName, "[bB]"), RegExp.NONE);
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  
  a1.setRewriteMethod(new DocValuesRewriteMethod());
  a2.setRewriteMethod(new DocValuesRewriteMethod());
  b.setRewriteMethod(new DocValuesRewriteMethod());
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  QueryUtils.check(a1);
}

Source File: TestSpanFirstQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testStartPositions() throws Exception {
  Directory dir = newDirectory();
  
  // mimic StopAnalyzer
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton());
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);
  Document doc = new Document();
  doc.add(newTextField("field", "the quick brown fox", Field.Store.NO));
  writer.addDocument(doc);
  Document doc2 = new Document();
  doc2.add(newTextField("field", "quick brown fox", Field.Store.NO));
  writer.addDocument(doc2);
  
  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  
  // user queries on "starts-with quick"
  SpanQuery sfq = spanFirstQuery(spanTermQuery("field", "quick"), 1);
  assertEquals(1, searcher.search(sfq, 10).totalHits.value);
  
  // user queries on "starts-with the quick"
  SpanQuery include = spanFirstQuery(spanTermQuery("field", "quick"), 2);
  sfq = spanNotQuery(include, sfq);
  assertEquals(1, searcher.search(sfq, 10).totalHits.value);
  
  writer.close();
  reader.close();
  dir.close();
}

Source File: TestRegexpRandom2.java From lucene-solr with Apache License 2.0

5 votes

/** check that the # of hits is the same as from a very
 * simple regexpquery implementation.
 */
protected void assertSame(String regexp) throws IOException {   
  RegexpQuery smart = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
 
  TopDocs smartDocs = searcher1.search(smart, 25);
  TopDocs dumbDocs = searcher2.search(dumb, 25);

  CheckHits.checkEqual(smart, smartDocs.scoreDocs, dumbDocs.scoreDocs);
}

Source File: MapperQueryParser.java From Elasticsearch with Apache License 2.0

5 votes

private Query getRegexpQuerySingle(String field, String termStr) throws ParseException {
    currentFieldType = null;
    Analyzer oldAnalyzer = getAnalyzer();
    try {
        currentFieldType = parseContext.fieldMapper(field);
        if (currentFieldType != null) {
            if (!forcedAnalyzer) {
                setAnalyzer(parseContext.getSearchAnalyzer(currentFieldType));
            }
            Query query = null;
            if (currentFieldType.useTermQueryWithQueryString()) {
                query = currentFieldType.regexpQuery(termStr, RegExp.ALL, maxDeterminizedStates, multiTermRewriteMethod, parseContext);
            }
            if (query == null) {
                query = super.getRegexpQuery(field, termStr);
            }
            return query;
        }
        return super.getRegexpQuery(field, termStr);
    } catch (RuntimeException e) {
        if (settings.lenient()) {
            return null;
        }
        throw e;
    } finally {
        setAnalyzer(oldAnalyzer);
    }
}

Source File: TestFieldCacheRewriteMethod.java From lucene-solr with Apache License 2.0

5 votes

public void testEquals() throws Exception {
  RegexpQuery a1 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery a2 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery b = new RegexpQuery(new Term(fieldName, "[bB]"), RegExp.NONE);
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  
  a1.setRewriteMethod(new DocValuesRewriteMethod());
  a2.setRewriteMethod(new DocValuesRewriteMethod());
  b.setRewriteMethod(new DocValuesRewriteMethod());
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  QueryUtils.check(a1);
}

Source File: TestTermsEnum.java From lucene-solr with Apache License 2.0

5 votes

public void testIntersectRegexp() throws Exception {
  Directory d = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), d);
  Document doc = new Document();
  doc.add(newStringField("field", "foobar", Field.Store.NO));
  w.addDocument(doc);
  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
  String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
  assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
  r.close();
  w.close();
  d.close();
}

Source File: TestQueryBuilder.java From lucene-solr with Apache License 2.0

5 votes

public void testPhraseQueryPositionIncrements() throws Exception {
  PhraseQuery.Builder pqBuilder = new PhraseQuery.Builder();
  pqBuilder.add(new Term("field", "1"), 0);
  pqBuilder.add(new Term("field", "2"), 2);
  PhraseQuery expected = pqBuilder.build();
  CharacterRunAutomaton stopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());

  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopList);

  QueryBuilder builder = new QueryBuilder(analyzer);
  assertEquals(expected, builder.createPhraseQuery("field", "1 stop 2"));
}

Source File: TestBlockPostingsFormat3.java From lucene-solr with Apache License 2.0

5 votes

public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
  if (leftTerms == null || rightTerms == null) {
    assertNull(leftTerms);
    assertNull(rightTerms);
    return;
  }
  assertTermsStatistics(leftTerms, rightTerms);
  
  // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

  boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
  boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
  TermsEnum leftTermsEnum = leftTerms.iterator();
  TermsEnum rightTermsEnum = rightTerms.iterator();
  assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
  
  assertTermsSeeking(leftTerms, rightTerms);
  
  if (deep) {
    int numIntersections = atLeast(3);
    for (int i = 0; i < numIntersections; i++) {
      String re = AutomatonTestUtil.randomRegexp(random());
      CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
      if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
        // TODO: test start term too
        TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
        TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
        assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
      }
    }
  }
}

Source File: RegexCriteriaValidator.java From nexus-public with Eclipse Public License 1.0

5 votes

/**
 * Ensures that a regular expression entered is a valid pattern.
 *
 * @param expression
 * @throws InvalidExpressionException when the expression is deemed invalid
 */
public static String validate(final String expression) {
  try {
    new RegExp(expression);
  }
  catch (IllegalArgumentException e) {  // NOSONAR
    throw new InvalidExpressionException(
        format("Invalid regular expression pattern: %s", e.getMessage()));
  }
  return expression;
}

Source File: TestMockAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

/** Test a configuration that behaves a lot like LengthFilter */
public void testLength() throws Exception {
  CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, length5);
  assertAnalyzesTo(a, "ok toolong fine notfine",
      new String[] { "ok", "fine" },
      new int[] { 1, 2 });
}

Source File: IncludeExclude.java From Elasticsearch with Apache License 2.0

5 votes

/**
 * @param include   The regular expression pattern for the terms to be included
 * @param exclude   The regular expression pattern for the terms to be excluded
 */
public IncludeExclude(RegExp include, RegExp exclude) {
    if (include == null && exclude == null) {
        throw new IllegalArgumentException();
    }
    this.include = include;
    this.exclude = exclude;
    this.includeValues = null;
    this.excludeValues = null;
}

Source File: IncludeExclude.java From Elasticsearch with Apache License 2.0

5 votes

public IncludeExclude includeExclude() {
    RegExp includePattern =  include != null ? new RegExp(include) : null;
    RegExp excludePattern = exclude != null ? new RegExp(exclude) : null;
    if (includePattern != null || excludePattern != null) {
        if (includeValues != null || excludeValues != null) {
            throw new IllegalArgumentException("Can only use regular expression include/exclude or a set of values, not both");
        }
        return new IncludeExclude(includePattern, excludePattern);
    } else if (includeValues != null || excludeValues != null) {
        return new IncludeExclude(includeValues, excludeValues);
    } else {
        return null;
    }
}

Source File: RegexCompletionQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  // If an empty regex is provided, we return an automaton that matches nothing. This ensures
  // consistency with PrefixCompletionQuery, which returns no results for an empty term.
  Automaton automaton = getTerm().text().isEmpty()
      ? Automata.makeEmpty()
      : new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates);
  return new CompletionWeight(this, automaton);
}

Source File: SimplePatternSplitTokenizerFactory.java From lucene-solr with Apache License 2.0

5 votes

/** Creates a new SimpleSplitPatternTokenizerFactory */
public SimplePatternSplitTokenizerFactory(Map<String,String> args) {
  super(args);
  maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
  dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
  if (args.isEmpty() == false) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}

Source File: SimplePatternTokenizerFactory.java From lucene-solr with Apache License 2.0

5 votes

/** Creates a new SimplePatternTokenizerFactory */
public SimplePatternTokenizerFactory(Map<String,String> args) {
  super(args);
  maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
  dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
  if (args.isEmpty() == false) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}

Source File: MinHashFilterTest.java From lucene-solr with Apache License 2.0

5 votes

private static Tokenizer createMockShingleTokenizer(int shingleSize, String shingles) {
  MockTokenizer tokenizer = new MockTokenizer(
      new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){" + (shingleSize - 1) + "}").toAutomaton()),
      true);
  tokenizer.setEnableChecks(true);
  if (shingles != null) {
    tokenizer.setReader(new StringReader(shingles));
  }
  return tokenizer;
}

Source File: LuceneTestCase.java From lucene-solr with Apache License 2.0

5 votes

/** 
 * Terms api equivalency 
 */
public void assertTermsEquals(String info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, boolean deep) throws IOException {
  if (leftTerms == null || rightTerms == null) {
    assertNull(info, leftTerms);
    assertNull(info, rightTerms);
    return;
  }
  assertTermsStatisticsEquals(info, leftTerms, rightTerms);
  assertEquals("hasOffsets", leftTerms.hasOffsets(), rightTerms.hasOffsets());
  assertEquals("hasPositions", leftTerms.hasPositions(), rightTerms.hasPositions());
  assertEquals("hasPayloads", leftTerms.hasPayloads(), rightTerms.hasPayloads());

  TermsEnum leftTermsEnum = leftTerms.iterator();
  TermsEnum rightTermsEnum = rightTerms.iterator();
  assertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);
  
  assertTermsSeekingEquals(info, leftTerms, rightTerms);
  
  if (deep) {
    int numIntersections = atLeast(3);
    for (int i = 0; i < numIntersections; i++) {
      String re = AutomatonTestUtil.randomRegexp(random());
      CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
      if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
        // TODO: test start term too
        TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
        TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
        assertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, rarely());
      }
    }
  }
}

Source File: TestMockAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

/** Test a configuration where each character is a term */
public void testSingleChar() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp(".").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "f", "o", "o", "b", "a", "r" },
      new int[] { 0, 1, 2, 3, 4, 5 },
      new int[] { 1, 2, 3, 4, 5, 6 }
  );
  checkRandomData(random(), a, 100);
}

org.apache.lucene.util.automaton.RegExp Java Examples