Java Code Examples for org.apache.lucene.util.automaton.Automata#makeString()
The following examples show how to use
org.apache.lucene.util.automaton.Automata#makeString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestQPHelper.java From lucene-solr with Apache License 2.0 | 7 votes |
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertMatchNoDocsQuery(q); assertFalse(q instanceof BoostQuery); }
Example 2
Source File: ContextQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) { final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString()); final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR); if (matchAllContexts || contexts.size() == 0) { return Operations.concatenate(matchAllAutomaton, sep); } else { Automaton contextsAutomaton = null; for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) { final ContextMetaData contextMetaData = entry.getValue(); final IntsRef ref = entry.getKey(); Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length); if (contextMetaData.exact == false) { contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton); } contextAutomaton = Operations.concatenate(contextAutomaton, sep); if (contextsAutomaton == null) { contextsAutomaton = contextAutomaton; } else { contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton); } } return contextsAutomaton; } }
Example 3
Source File: TestPrecedenceQueryParser.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); PrecedenceQueryParser qp = new PrecedenceQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3", "field"); assertNotNull(q); }
Example 4
Source File: QueryParserTestBase.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0",qp); assertNotNull(q); q = getQuery("\"hello\"^2.0",qp); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0",qp); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0",qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); CommonQueryParserConfiguration qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertMatchNoDocsQuery(q); assertFalse(q instanceof BoostQuery); }
Example 5
Source File: GeolocationContextMapping.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public Automaton toAutomaton() { Automaton automaton; if(precisions == null || precisions.length == 0) { automaton = Automata.makeString(location); } else { automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0])))); for (int i = 1; i < precisions.length; i++) { final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i]))); automaton = Operations.union(automaton, Automata.makeString(cell)); } } return automaton; }
Example 6
Source File: TokenStreamOffsetStrategy.java From lucene-solr with Apache License 2.0 | 5 votes |
private static CharArrayMatcher[] convertTermsToMatchers(BytesRef[] terms, CharArrayMatcher[] matchers) { CharArrayMatcher[] newAutomata = new CharArrayMatcher[terms.length + matchers.length]; for (int i = 0; i < terms.length; i++) { String termString = terms[i].utf8ToString(); CharacterRunAutomaton a = new CharacterRunAutomaton(Automata.makeString(termString)); newAutomata[i] = LabelledCharArrayMatcher.wrap(termString, a::run); } // Append existing automata (that which is used for MTQs) System.arraycopy(matchers, 0, newAutomata, terms.length, matchers.length); return newAutomata; }
Example 7
Source File: SearchEquivalenceTestBase.java From lucene-solr with Apache License 2.0 | 5 votes |
@BeforeClass public static void beforeClass() throws Exception { Random random = random(); directory = newDirectory(); stopword = "" + randomChar(); CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword)); analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset); RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer); Document doc = new Document(); Field id = new StringField("id", "", Field.Store.NO); Field field = new TextField("field", "", Field.Store.NO); doc.add(id); doc.add(field); // index some docs int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100); for (int i = 0; i < numDocs; i++) { id.setStringValue(Integer.toString(i)); field.setStringValue(randomFieldContents()); iw.addDocument(doc); } // delete some docs int numDeletes = numDocs/20; for (int i = 0; i < numDeletes; i++) { Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs))); if (random.nextBoolean()) { iw.deleteDocuments(toDelete); } else { iw.deleteDocuments(new TermQuery(toDelete)); } } reader = iw.getReader(); s1 = newSearcher(reader); s2 = newSearcher(reader); iw.close(); }
Example 8
Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testEquals() { AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata .makeString("foobar")); // reference to a1 AutomatonQuery a2 = a1; // same as a1 (accepts the same language, same term) AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"), Operations.concatenate( Automata.makeString("foo"), Automata.makeString("bar"))); // different than a1 (same term, but different language) AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"), Automata.makeString("different")); // different than a1 (different term, same language) AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"), Automata.makeString("foobar")); assertEquals(a1.hashCode(), a2.hashCode()); assertEquals(a1, a2); assertEquals(a1.hashCode(), a3.hashCode()); assertEquals(a1, a3); // different class AutomatonQuery w1 = new WildcardQuery(newTerm("foobar")); // different class AutomatonQuery w2 = new RegexpQuery(newTerm("foobar")); assertFalse(a1.equals(w1)); assertFalse(a1.equals(w2)); assertFalse(w1.equals(w2)); assertFalse(a1.equals(a4)); assertFalse(a1.equals(a5)); assertFalse(a1.equals(null)); }
Example 9
Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Test that rewriting to a single term works as expected, preserves * MultiTermQuery semantics. */ public void testRewriteSingleTerm() throws IOException { AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeString("piece")); Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN); assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum); assertEquals(1, automatonQueryNrHits(aq)); }
Example 10
Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Test that rewriting to a prefix query works as expected, preserves * MultiTermQuery semantics. */ public void testRewritePrefix() throws IOException { Automaton pfx = Automata.makeString("do"); Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString()); AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton); assertEquals(3, automatonQueryNrHits(aq)); }
Example 11
Source File: TestIndexWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testStopwordsPosIncHole2() throws Exception { // use two stopfilters for testing here Directory dir = newDirectory(); final Automaton secondSet = Automata.makeString("foobar"); Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET); stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet)); return new TokenStreamComponents(tokenizer, stream); } }; RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a); Document doc = new Document(); doc.add(new TextField("body", "just a foobar", Field.Store.NO)); doc.add(new TextField("body", "test of gaps", Field.Store.NO)); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher is = newSearcher(ir); PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.add(new Term("body", "just"), 0); builder.add(new Term("body", "test"), 3); PhraseQuery pq = builder.build(); // body:"just ? ? test" assertEquals(1, is.search(pq, 5).totalHits.value); ir.close(); dir.close(); }
Example 12
Source File: HighlighterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMaxSizeHighlightTruncates() throws Exception { TestHighlightRunner helper = new TestHighlightRunner() { @Override public void run() throws Exception { String goodWord = "goodtoken"; CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("stoppedtoken")); // we disable MockTokenizer checks because we will forcefully limit the // tokenstream and call end() before incrementToken() returns false. final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); analyzer.setEnableChecks(false); TermQuery query = new TermQuery(new Term("data", goodWord)); String match; StringBuilder sb = new StringBuilder(); sb.append(goodWord); for (int i = 0; i < 10000; i++) { sb.append(" "); // only one stopword sb.append("stoppedtoken"); } SimpleHTMLFormatter fm = new SimpleHTMLFormatter(); Highlighter hg = getHighlighter(query, "data", fm);// new Highlighter(fm, // new // QueryTermScorer(query)); hg.setTextFragmenter(new NullFragmenter()); hg.setMaxDocCharsToAnalyze(100); match = hg.getBestFragment(analyzer, "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg .getMaxDocCharsToAnalyze()); // add another tokenized word to the overrall length - but set way // beyond // the length of text under consideration (after a large slug of stop // words // + whitespace) sb.append(" "); sb.append(goodWord); match = hg.getBestFragment(analyzer, "data", sb.toString()); assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg .getMaxDocCharsToAnalyze()); } }; helper.start(); }
Example 13
Source File: TestGraphTokenizers.java From lucene-solr with Apache License 2.0 | 4 votes |
private Automaton s2a(String s) { return Automata.makeString(s); }