org.apache.lucene.search.PhraseQuery Java Examples
The following examples show how to use
org.apache.lucene.search.PhraseQuery.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestDocument.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPositionIncrementMultiFields() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.addDocument(makeDocumentWithFields()); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); PhraseQuery query = new PhraseQuery("indexed_not_tokenized", "test1", "test2"); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(1, hits.length); doAssert(searcher.doc(hits[0].doc), true); writer.close(); reader.close(); dir.close(); }
Example #2
Source File: FieldQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch) throws IOException { this.fieldMatch = fieldMatch; Set<Query> flatQueries = new LinkedHashSet<>(); flatten( query, reader, flatQueries, 1f ); saveTerms( flatQueries, reader ); Collection<Query> expandQueries = expand( flatQueries ); for( Query flatQuery : expandQueries ){ QueryPhraseMap rootMap = getRootMap( flatQuery ); rootMap.add( flatQuery, reader ); float boost = 1f; while (flatQuery instanceof BoostQuery) { BoostQuery bq = (BoostQuery) flatQuery; flatQuery = bq.getQuery(); boost *= bq.getBoost(); } if( !phraseHighlight && flatQuery instanceof PhraseQuery ){ PhraseQuery pq = (PhraseQuery)flatQuery; if( pq.getTerms().length > 1 ){ for( Term term : pq.getTerms() ) rootMap.addTerm( term, boost ); } } } }
Example #3
Source File: FieldQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
private String getKey( Query query ){ if( !fieldMatch ) return null; while (query instanceof BoostQuery) { query = ((BoostQuery) query).getQuery(); } if( query instanceof TermQuery ) return ((TermQuery)query).getTerm().field(); else if ( query instanceof PhraseQuery ){ PhraseQuery pq = (PhraseQuery)query; Term[] terms = pq.getTerms(); return terms[0].field(); } else if (query instanceof MultiTermQuery) { return ((MultiTermQuery)query).getField(); } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); }
Example #4
Source File: HighlighterTest.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testGetBestFragmentsFilteredPhraseQuery() throws Exception { TestHighlightRunner helper = new TestHighlightRunner() { @Override public void run() throws Exception { numHighlights = 0; PhraseQuery pq = new PhraseQuery("contents", "john", "kennedy"); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(pq, Occur.MUST); bq.add(TermRangeQuery.newStringRange("contents", "john", "john", true, true), Occur.FILTER); doSearching(bq.build()); doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this); // Currently highlights "John" and "Kennedy" separately assertTrue("Failed to find correct number of highlights " + numHighlights + " found", numHighlights == 2); } }; helper.start(); }
Example #5
Source File: FieldQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
void saveTerms( Collection<Query> flatQueries, IndexReader reader ) throws IOException{ for( Query query : flatQueries ){ while (query instanceof BoostQuery) { query = ((BoostQuery) query).getQuery(); } Set<String> termSet = getTermSet( query ); if( query instanceof TermQuery ) termSet.add( ((TermQuery)query).getTerm().text() ); else if( query instanceof PhraseQuery ){ for( Term term : ((PhraseQuery)query).getTerms() ) termSet.add( term.text() ); } else if (query instanceof MultiTermQuery && reader != null) { BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader); for (BooleanClause clause : mtqTerms) { termSet.add (((TermQuery) clause.getQuery()).getTerm().text()); } } else throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." ); } }
Example #6
Source File: NGramQueryParser.java From spacewalk with GNU General Public License v2.0 | 6 votes |
protected Query getFieldQuery(String defaultField, String queryText) throws ParseException { Query orig = super.getFieldQuery(defaultField, queryText); if (!(orig instanceof PhraseQuery)) { log.debug("Returning default query. No phrase query translation."); return orig; } /** * A ngram when parsed will become a series of smaller search terms, * these terms are grouped together into a PhraseQuery. We are taking * that PhraseQuery and breaking out each ngram term then combining all * ngrams together to form a BooleanQuery. */ PhraseQuery pq = (PhraseQuery)orig; return new NGramQuery(pq, useMust); }
Example #7
Source File: StringFieldType.java From crate with Apache License 2.0 | 6 votes |
@Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; stream.reset(); while (stream.incrementToken()) { if (enablePosIncrements) { position += posIncrAtt.getPositionIncrement(); } else { position += 1; } builder.add(new Term(field, termAtt.getBytesRef()), position); } return builder.build(); }
Example #8
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testMatchesSlopBug() throws IOException { IndexReader ir = indexSomeFields(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); Query query = new PhraseQuery(2, "title", "this", "is", "the", "field"); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); String[] snippets = highlighter.highlight("title", query, topDocs, 10); assertEquals(1, snippets.length); if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) { assertEquals("<b>This is the title field</b>.", snippets[0]); } else { assertEquals("<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]); } ir.close(); }
Example #9
Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0 | 6 votes |
@Test public void testBuildFromDocument() { Document doc = new Document(); doc.add(new TextField("field1", "some text", Field.Store.NO)); doc.add(new TextField("field1", "some more text", Field.Store.NO)); doc.add(new StringField("field2", "untokenized text", Field.Store.NO)); analyzer.setPositionIncrementGap(100); MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer); assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f)); assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f)); assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f)); assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f)); assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f)); assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f)); }
Example #10
Source File: LumongoMultiFieldQueryParser.java From lumongo with Apache License 2.0 | 6 votes |
private Query applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); PhraseQuery pq = (PhraseQuery) q; org.apache.lucene.index.Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } q = builder.build(); } else if (q instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) q; if (slop != mpq.getSlop()) { q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } return q; }
Example #11
Source File: NGramQueryParser.java From uyuni with GNU General Public License v2.0 | 6 votes |
protected Query getFieldQuery(String defaultField, String queryText) throws ParseException { Query orig = super.getFieldQuery(defaultField, queryText); if (!(orig instanceof PhraseQuery)) { log.debug("Returning default query. No phrase query translation."); return orig; } /** * A ngram when parsed will become a series of smaller search terms, * these terms are grouped together into a PhraseQuery. We are taking * that PhraseQuery and breaking out each ngram term then combining all * ngrams together to form a BooleanQuery. */ PhraseQuery pq = (PhraseQuery)orig; return new NGramQuery(pq, useMust); }
Example #12
Source File: TestQPHelper.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPositionIncrement() throws Exception { StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 int expectedPositions[] = { 1, 3, 4, 6, 9 }; PhraseQuery pq = (PhraseQuery) qp.parse(qtxt, "a"); // System.out.println("Query text: "+qtxt); // System.out.println("Result: "+pq); Term t[] = pq.getTerms(); int pos[] = pq.getPositions(); for (int i = 0; i < t.length; i++) { // System.out.println(i+". "+t[i]+" pos: "+pos[i]); assertEquals("term " + i + " = " + t[i] + " has wrong term-position!", expectedPositions[i], pos[i]); } }
Example #13
Source File: MultiFieldQueryParser.java From lucene-solr with Apache License 2.0 | 6 votes |
private Query applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); PhraseQuery pq = (PhraseQuery) q; org.apache.lucene.index.Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } q = builder.build(); } else if (q instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)q; if (slop != mpq.getSlop()) { q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } return q; }
Example #14
Source File: TestReadOnlyIndex.java From lucene-solr with Apache License 2.0 | 6 votes |
private Void doTestReadOnlyIndex() throws Exception { Directory dir = FSDirectory.open(indexPath); IndexReader ireader = DirectoryReader.open(dir); IndexSearcher isearcher = newSearcher(ireader); // borrows from TestDemo, but not important to keep in sync with demo assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm)))); Query query = new TermQuery(new Term("fieldname", "text")); TopDocs hits = isearcher.search(query, 1); assertEquals(1, hits.totalHits.value); // Iterate through the results: for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc); assertEquals(text, hitDoc.get("fieldname")); } // Test simple phrase query PhraseQuery phraseQuery = new PhraseQuery("fieldname", "to", "be"); assertEquals(1, isearcher.count(phraseQuery)); ireader.close(); return null; // void }
Example #15
Source File: TestBooleanSimilarity.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPhraseScoreIsEqualToBoost() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig().setSimilarity(new BooleanSimilarity())); Document doc = new Document(); doc.add(new TextField("foo", "bar baz quux", Store.NO)); w.addDocument(doc); DirectoryReader reader = w.getReader(); w.close(); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new BooleanSimilarity()); PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux"); TopDocs topDocs = searcher.search(query, 2); assertEquals(1, topDocs.totalHits.value); assertEquals(1f, topDocs.scoreDocs[0].score, 0f); topDocs = searcher.search(new BoostQuery(query, 7), 2); assertEquals(1, topDocs.totalHits.value); assertEquals(7f, topDocs.scoreDocs[0].score, 0f); reader.close(); dir.close(); }
Example #16
Source File: TestSpanSearchEquivalence.java From lucene-solr with Apache License 2.0 | 6 votes |
/** SpanNearQuery([A, B], 0, true) = "A B" */ public void testSpanNearVersusPhrase() throws Exception { Term t1 = randomTerm(); Term t2 = randomTerm(); SpanQuery subquery[] = new SpanQuery[] { spanQuery(new SpanTermQuery(t1)), spanQuery(new SpanTermQuery(t2)) }; SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true)); PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes()); if (t1.equals(t2)) { assertSameSet(q1, q2); } else { assertSameScores(q1, q2); } }
Example #17
Source File: QueryBuilder.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Creates simple phrase query from the cached tokenstream contents */ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class); PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); int position = -1; float phraseBoost = DEFAULT_BOOST; stream.reset(); while (stream.incrementToken()) { if (enablePositionIncrements) { position += posIncrAtt.getPositionIncrement(); } else { position += 1; } builder.add(new Term(field, termAtt.getBytesRef()), position); phraseBoost *= boostAtt.getBoost(); } PhraseQuery query = builder.build(); if (phraseBoost == DEFAULT_BOOST) { return query; } return new BoostQuery(query, phraseBoost); }
Example #18
Source File: LuceneSearcher.java From uncc2014watsonsim with GNU General Public License v2.0 | 6 votes |
/** * Create a Lucene query using the bigrams in the given text * @param text */ public BooleanQuery queryFromSkipBigrams(String text) { BooleanQuery q = new BooleanQuery(); String prev_word = null; for (String word : text.split("\\W+")) { if (prev_word != null) { PhraseQuery pq = new PhraseQuery(); pq.setSlop(1); pq.add(new Term("text", prev_word)); pq.add(new Term("text", word)); q.add(pq, BooleanClause.Occur.SHOULD); } q.add(new TermQuery(new Term("text", word)), BooleanClause.Occur.SHOULD); prev_word = word; } return q; }
Example #19
Source File: QueryParserTestBase.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testPositionIncrement() throws Exception { CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); qp.setEnablePositionIncrements(true); String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\""; // 0 2 5 7 8 int expectedPositions[] = {1,3,4,6,9}; PhraseQuery pq = (PhraseQuery) getQuery(qtxt,qp); //System.out.println("Query text: "+qtxt); //System.out.println("Result: "+pq); Term t[] = pq.getTerms(); int pos[] = pq.getPositions(); for (int i = 0; i < t.length; i++) { //System.out.println(i+". "+t[i]+" pos: "+pos[i]); assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]); } }
Example #20
Source File: QueryTransformer.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
@Override public Query handle(Query query, QueryTransformer queryTransformer) { PhraseQuery phraseQuery = (PhraseQuery) query; SpanNearQuery.Builder builder = new SpanNearQuery.Builder(phraseQuery.getTerms()[0].field(), true); int i = 0; int position = -1; for (Term term : phraseQuery.getTerms()) { if (i > 0) { int gap = (phraseQuery.getPositions()[i] - position) - 1; if (gap > 0) { builder.addGap(gap); } } position = phraseQuery.getPositions()[i]; builder.addClause(new CustomSpanPayloadCheckQuery(new SpanTermQuery(term), Collections.singletonList(null))); i++; } return builder.setSlop(phraseQuery.getSlop()).build(); }
Example #21
Source File: LuceneQueryTranslator.java From imhotep with Apache License 2.0 | 6 votes |
public static Query rewrite(org.apache.lucene.search.Query q, Set<String> intFields) { if (q instanceof TermQuery) { return rewrite((TermQuery)q, intFields); } else if (q instanceof BooleanQuery) { return rewrite((BooleanQuery)q, intFields); } else if (q instanceof RangeQuery) { return rewrite((RangeQuery)q, intFields); } else if (q instanceof ConstantScoreRangeQuery) { return rewrite((ConstantScoreRangeQuery)q, intFields); } else if (q instanceof PrefixQuery) { return rewrite((PrefixQuery)q, intFields); } else if (q instanceof PhraseQuery) { return rewrite((PhraseQuery)q, intFields); } throw new IllegalArgumentException("unsupported lucene query type: " + q.getClass().getSimpleName()); }
Example #22
Source File: NGramQuery.java From spacewalk with GNU General Public License v2.0 | 5 votes |
/** * * @param pq PhraseQuery to break up and convert to NGramQuery * Forms a BooleanQuery with each term in the original PhraseQuery OR'd. * Note: Assumes that each term has already been tokenized into a ngram, * this method will not re-tokenize terms. * @param useMust controls if BooleanClause.Occur SHOULD or MUST is used. */ public NGramQuery(PhraseQuery pq, boolean useMust) { Term[] terms = pq.getTerms(); for (int i = 0; i < terms.length; i++) { BooleanClause.Occur occur = BooleanClause.Occur.SHOULD; if (useMust) { occur = BooleanClause.Occur.MUST; } add(new TermQuery(terms[i]), occur); } }
Example #23
Source File: Searcher.java From act with GNU General Public License v3.0 | 5 votes |
private BooleanQuery makeQuery(String synonym, String field) { BooleanQuery bq = new BooleanQuery(); // Set the synonym as a required phrase query. Phrase queries handle multi-word synonyms, but require construction. String queryString = synonym.trim().toLowerCase(); String[] parts = queryString.split("\\s+"); PhraseQuery query = new PhraseQuery(); Arrays.stream(parts).forEach(p -> query.add(new Term(field, p))); bq.add(query, BooleanClause.Occur.MUST); // Append all keywords as optional clauses. The more of these we find, the higher the score will be. KEYWORDS.forEach(term -> bq.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD)); return bq; }
Example #24
Source File: TestQueryBuilder.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testCJKPhrase() throws Exception { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); PhraseQuery expected = new PhraseQuery("field", "中", "国"); QueryBuilder builder = new QueryBuilder(analyzer); assertEquals(expected, builder.createPhraseQuery("field", "中国")); }
Example #25
Source File: FieldQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, float aBoost, PhraseQuery b, float bBoost ){ if( a.getSlop() != b.getSlop() ) return; Term[] ats = a.getTerms(); Term[] bts = b.getTerms(); if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return; checkOverlap( expandQueries, ats, bts, a.getSlop(), aBoost); checkOverlap( expandQueries, bts, ats, b.getSlop(), bBoost ); }
Example #26
Source File: QueryParserTestBase.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testAutoGeneratePhraseQueriesOn() throws Exception { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); PhraseQuery expected = new PhraseQuery("field", "中", "国"); CommonQueryParserConfiguration qp = getParserConfig(analyzer); if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method ((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533 } setAutoGeneratePhraseQueries(qp, true); assertEquals(expected, getQuery("中国",qp)); }
Example #27
Source File: QueryParserTestBase.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testCJKSloppyPhrase() throws Exception { // individual CJK chars as terms SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); PhraseQuery expected = new PhraseQuery(3, "field", "中", "国"); assertEquals(expected, getQuery("\"中国\"~3", analyzer)); }
Example #28
Source File: TestSimpleQueryParser.java From lucene-solr with Apache License 2.0 | 5 votes |
/** test some OR'd phrases using '|' operator */ public void testORPhrase() throws Exception { PhraseQuery phrase1 = new PhraseQuery("field", "foo", "bar"); PhraseQuery phrase2 = new PhraseQuery("field", "star", "wars"); BooleanQuery.Builder expected = new BooleanQuery.Builder(); expected.add(phrase1, Occur.SHOULD); expected.add(phrase2, Occur.SHOULD); assertEquals(expected.build(), parse("\"foo bar\"|\"star wars\"")); }
Example #29
Source File: TestSimpleQueryParser.java From lucene-solr with Apache License 2.0 | 5 votes |
/** test some AND'd phrases using '+' operator */ public void testANDPhrase() throws Exception { PhraseQuery phrase1 = new PhraseQuery("field", "foo", "bar"); PhraseQuery phrase2 = new PhraseQuery("field", "star", "wars"); BooleanQuery.Builder expected = new BooleanQuery.Builder(); expected.add(phrase1, Occur.MUST); expected.add(phrase2, Occur.MUST); assertEquals(expected.build(), parse("\"foo bar\"+\"star wars\"")); }
Example #30
Source File: PhraseQueryExtractor.java From solr-redis with Apache License 2.0 | 5 votes |
@Override public void extractSubQueriesFields(final PhraseQuery q, final Iterable<QueryExtractor<? extends Query>> extractors, final Set<String> extractedFields) throws UnsupportedOperationException { for (final Term term : q.getTerms()) { extractedFields.add(term.field()); } }