Java Code Examples for org.apache.lucene.document.Field#setStringValue()
The following examples show how to use
org.apache.lucene.document.Field#setStringValue() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIndexWriterReader.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testForceMergeDeletes() throws Throwable { Directory dir = newDirectory(); final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(newLogMergePolicy())); Document doc = new Document(); doc.add(newTextField("field", "a b c", Field.Store.NO)); Field id = newStringField("id", "", Field.Store.NO); doc.add(id); id.setStringValue("0"); w.addDocument(doc); id.setStringValue("1"); w.addDocument(doc); w.deleteDocuments(new Term("id", "0")); IndexReader r = w.getReader(); w.forceMergeDeletes(); w.close(); r.close(); r = DirectoryReader.open(dir); assertEquals(1, r.numDocs()); assertFalse(r.hasDeletions()); r.close(); dir.close(); }
Example 2
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 6 votes |
private IndexReader indexSomeFields() throws IOException { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.NONE); ft.setTokenized(false); ft.setStored(true); ft.freeze(); Field title = new Field("title", "", fieldType); Field text = new Field("text", "", fieldType); Field category = new Field("category", "", fieldType); Document doc = new Document(); doc.add(title); doc.add(text); doc.add(category); title.setStringValue("This is the title field."); text.setStringValue("This is the text field. You can put some text if you want."); category.setStringValue("This is the category field."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); return ir; }
Example 3
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testBasics() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); body.setStringValue("Highlighting the first term. Hope it works."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new IntervalQuery("body", Intervals.term("highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]); assertEquals("<b>Highlighting</b> the first term. ", snippets[1]); ir.close(); }
Example 4
Source File: TestMaxTermFrequency.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { super.setUp(); dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)) .setMergePolicy(newLogMergePolicy()); config.setSimilarity(new TestSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); Document doc = new Document(); Field foo = newTextField("foo", "", Field.Store.NO); doc.add(foo); for (int i = 0; i < 100; i++) { foo.setStringValue(addValue()); writer.addDocument(doc); } reader = writer.getReader(); writer.close(); }
Example 5
Source File: TestSimilarityProvider.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { super.setUp(); directory = newDirectory(); PerFieldSimilarityWrapper sim = new ExampleSimilarityProvider(); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setSimilarity(sim); RandomIndexWriter iw = new RandomIndexWriter(random(), directory, iwc); Document doc = new Document(); Field field = newTextField("foo", "", Field.Store.NO); doc.add(field); Field field2 = newTextField("bar", "", Field.Store.NO); doc.add(field2); field.setStringValue("quick brown fox"); field2.setStringValue("quick brown fox"); iw.addDocument(doc); field.setStringValue("jumps over lazy brown dog"); field2.setStringValue("jumps over lazy brown dog"); iw.addDocument(doc); reader = iw.getReader(); iw.close(); searcher = newSearcher(reader); searcher.setSimilarity(sim); }
Example 6
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testOneSentence() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new IntervalQuery("body", Intervals.term("test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.close(); }
Example 7
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testOneSentence() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.close(); }
Example 8
Source File: TestUnifiedHighlighterMTQ.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testWildcardInDisjunctionMax() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); DisjunctionMaxQuery query = new DisjunctionMaxQuery( Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); ir.close(); }
Example 9
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSpecificDocIDs() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); body.setStringValue("Highlighting the first term. Hope it works."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new IntervalQuery("body", Intervals.term("highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); ScoreDoc[] hits = topDocs.scoreDocs; int[] docIDs = new int[2]; docIDs[0] = hits[0].doc; docIDs[1] = hits[1].doc; String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, docIDs, new int[] {1}).get("body"); assertEquals(2, snippets.length); assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]); assertEquals("<b>Highlighting</b> the first term. ", snippets[1]); ir.close(); }
Example 10
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testMultipleTerms() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); body.setStringValue("Highlighting the first term. Hope it works."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD) .build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]); assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]); ir.close(); }
Example 11
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testEncode() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) { @Override protected PassageFormatter getFormatter(String field) { return new DefaultPassageFormatter("<b>", "</b>", "... ", true); } }; Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(1, snippets.length); assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]); ir.close(); }
Example 12
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testHighlightAllText() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just highlighting from postings. This is also a much sillier test. Feel free to test test test test test test test."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) { @Override protected BreakIterator getBreakIterator(String field) { return new WholeBreakIterator(); } }; highlighter.setMaxLength(10000); Query query = new TermQuery(new Term("body", "test")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs, 2); assertEquals(1, snippets.length); assertEquals("This is a <b>test</b>. Just highlighting from postings. This is also a much sillier <b>test</b>. Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]); ir.close(); }
Example 13
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testMultipleTerms() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); body.setStringValue("Highlighting the first term. Hope it works."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = new IntervalQuery("body", Intervals.or( Intervals.term("highlighting"), Intervals.term("just"), Intervals.term("first"))); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]); assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]); ir.close(); }
Example 14
Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testObjectFormatter() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) { @Override protected PassageFormatter getFormatter(String field) { return new PassageFormatter() { PassageFormatter defaultFormatter = new DefaultPassageFormatter(); @Override public String[] format(Passage passages[], String content) { // Just turns the String snippet into a length 2 // array of String return new String[]{"blah blah", defaultFormatter.format(passages, content).toString()}; } }; } }; Query query = new IntervalQuery("body", Intervals.term("highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); int[] docIDs = new int[1]; docIDs[0] = topDocs.scoreDocs[0].doc; Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[] {"body"}, query, docIDs, new int[] {1}); Object[] bodySnippets = snippets.get("body"); assertEquals(1, bodySnippets.length); assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0])); ir.close(); }
Example 15
Source File: TestSumDocFreq.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testSumDocFreq() throws Exception { final int numDocs = atLeast(500); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); Field id = newStringField("id", "", Field.Store.NO); Field field1 = newTextField("foo", "", Field.Store.NO); Field field2 = newTextField("bar", "", Field.Store.NO); doc.add(id); doc.add(field1); doc.add(field2); for (int i = 0; i < numDocs; i++) { id.setStringValue("" + i); char ch1 = (char) TestUtil.nextInt(random(), 'a', 'z'); char ch2 = (char) TestUtil.nextInt(random(), 'a', 'z'); field1.setStringValue("" + ch1 + " " + ch2); ch1 = (char) TestUtil.nextInt(random(), 'a', 'z'); ch2 = (char) TestUtil.nextInt(random(), 'a', 'z'); field2.setStringValue("" + ch1 + " " + ch2); writer.addDocument(doc); } IndexReader ir = writer.getReader(); assertSumDocFreq(ir); ir.close(); int numDeletions = atLeast(20); for (int i = 0; i < numDeletions; i++) { writer.deleteDocuments(new Term("id", "" + random().nextInt(numDocs))); } writer.forceMerge(1); writer.close(); ir = DirectoryReader.open(dir); assertSumDocFreq(ir); ir.close(); dir.close(); }
Example 16
Source File: TestBooleanQuery.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testFilterClauseDoesNotImpactScore() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); Document doc = new Document(); Field f = newTextField("field", "a b c d", Field.Store.NO); doc.add(f); w.addDocument(doc); f.setStringValue("b d"); w.addDocument(doc); f.setStringValue("a d"); w.addDocument(doc); w.commit(); DirectoryReader reader = w.getReader(); final IndexSearcher searcher = new IndexSearcher(reader); BooleanQuery.Builder qBuilder = new BooleanQuery.Builder(); BooleanQuery q = qBuilder.build(); qBuilder.add(new TermQuery(new Term("field", "a")), Occur.FILTER); // With a single clause, we will rewrite to the underlying // query. Make sure that it returns null scores assertSameScoresWithoutFilters(searcher, qBuilder.build()); // Now with two clauses, we will get a conjunction scorer // Make sure it returns null scores qBuilder.add(new TermQuery(new Term("field", "b")), Occur.FILTER); q = qBuilder.build(); assertSameScoresWithoutFilters(searcher, q); // Now with a scoring clause, we need to make sure that // the boolean scores are the same as those from the term // query qBuilder.add(new TermQuery(new Term("field", "c")), Occur.SHOULD); q = qBuilder.build(); assertSameScoresWithoutFilters(searcher, q); // FILTER and empty SHOULD qBuilder = new BooleanQuery.Builder(); qBuilder.add(new TermQuery(new Term("field", "a")), Occur.FILTER); qBuilder.add(new TermQuery(new Term("field", "e")), Occur.SHOULD); q = qBuilder.build(); assertSameScoresWithoutFilters(searcher, q); // mix of FILTER and MUST qBuilder = new BooleanQuery.Builder(); qBuilder.add(new TermQuery(new Term("field", "a")), Occur.FILTER); qBuilder.add(new TermQuery(new Term("field", "d")), Occur.MUST); q = qBuilder.build(); assertSameScoresWithoutFilters(searcher, q); // FILTER + minShouldMatch qBuilder = new BooleanQuery.Builder(); qBuilder.add(new TermQuery(new Term("field", "b")), Occur.FILTER); qBuilder.add(new TermQuery(new Term("field", "a")), Occur.SHOULD); qBuilder.add(new TermQuery(new Term("field", "d")), Occur.SHOULD); qBuilder.setMinimumNumberShouldMatch(1); q = qBuilder.build(); assertSameScoresWithoutFilters(searcher, q); reader.close(); w.close(); dir.close(); }
Example 17
Source File: TestConcurrentMergeScheduler.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testDeleteMerging() throws IOException { Directory directory = newDirectory(); LogDocMergePolicy mp = new LogDocMergePolicy(); // Force degenerate merging so we can get a mix of // merging of segments with and without deletes at the // start: mp.setMinMergeDocs(1000); IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random())) .setMergePolicy(mp)); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); doc.add(idField); for(int i=0;i<10;i++) { if (VERBOSE) { System.out.println("\nTEST: cycle"); } for(int j=0;j<100;j++) { idField.setStringValue(Integer.toString(i*100+j)); writer.addDocument(doc); } int delID = i; while(delID < 100*(1+i)) { if (VERBOSE) { System.out.println("TEST: del " + delID); } writer.deleteDocuments(new Term("id", ""+delID)); delID += 10; } writer.commit(); } writer.close(); IndexReader reader = DirectoryReader.open(directory); // Verify that we did not lose any deletes... assertEquals(450, reader.numDocs()); reader.close(); directory.close(); }
Example 18
Source File: TestUnifiedHighlighterMTQ.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testRanges() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test."); iw.addDocument(doc); body.setStringValue("Test a one sentence document."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer); Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); String snippets[] = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // null start query = TermRangeQuery.newStringRange("body", null, "tf", true, true); topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]); // null end query = TermRangeQuery.newStringRange("body", "ta", null, true, true); topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // exact start inclusive query = TermRangeQuery.newStringRange("body", "test", "tf", true, true); topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // exact end inclusive query = TermRangeQuery.newStringRange("body", "ta", "test", true, true); topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", query, topDocs); assertEquals(2, snippets.length); assertEquals("This is a <b>test</b>.", snippets[0]); assertEquals("<b>Test</b> a one sentence document.", snippets[1]); // exact start exclusive BooleanQuery bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(TermRangeQuery.newStringRange("body", "test", "tf", false, true), BooleanClause.Occur.SHOULD) .build(); topDocs = searcher.search(bq, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", bq, topDocs); assertEquals(2, snippets.length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); // exact end exclusive bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(TermRangeQuery.newStringRange("body", "ta", "test", true, false), BooleanClause.Occur.SHOULD) .build(); topDocs = searcher.search(bq, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", bq, topDocs); assertEquals(2, snippets.length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); // wrong field highlighter.setFieldMatcher(null);//default bq = new BooleanQuery.Builder() .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) .add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD) .build(); topDocs = searcher.search(bq, 10, Sort.INDEXORDER); assertEquals(2, topDocs.totalHits.value); snippets = highlighter.highlight("body", bq, topDocs); assertEquals(2, snippets.length); assertEquals("This is a test.", snippets[0]); assertEquals("Test a one sentence document.", snippets[1]); ir.close(); }
Example 19
Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testObjectFormatter() throws Exception { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Field body = new Field("body", "", fieldType); Document doc = new Document(); doc.add(body); body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore."); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) { @Override protected PassageFormatter getFormatter(String field) { return new PassageFormatter() { PassageFormatter defaultFormatter = new DefaultPassageFormatter(); @Override public String[] format(Passage passages[], String content) { // Just turns the String snippet into a length 2 // array of String return new String[]{"blah blah", defaultFormatter.format(passages, content).toString()}; } }; } }; Query query = new TermQuery(new Term("body", "highlighting")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(1, topDocs.totalHits.value); int[] docIDs = new int[1]; docIDs[0] = topDocs.scoreDocs[0].doc; Map<String, Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, docIDs, new int[]{1}); Object[] bodySnippets = snippets.get("body"); assertEquals(1, bodySnippets.length); assertTrue(Arrays.equals(new String[]{"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0])); ir.close(); }
Example 20
Source File: BaseStoredFieldsFormatTestCase.java From lucene-solr with Apache License 2.0 | 4 votes |
@Nightly public void testBigDocuments() throws IOException { assumeWorkingMMapOnWindows(); // "big" as "much bigger than the chunk size" // for this test we force a FS dir // we can't just use newFSDirectory, because this test doesn't really index anything. // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484) Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments"))); IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random())); iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30)); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER); } final Document emptyDoc = new Document(); // emptyDoc final Document bigDoc1 = new Document(); // lot of small fields final Document bigDoc2 = new Document(); // 1 very big field final Field idField = new StringField("id", "", Store.NO); emptyDoc.add(idField); bigDoc1.add(idField); bigDoc2.add(idField); final FieldType onlyStored = new FieldType(StringField.TYPE_STORED); onlyStored.setIndexOptions(IndexOptions.NONE); final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored); final int numFields = RandomNumbers.randomIntBetween(random(), 500000, 1000000); for (int i = 0; i < numFields; ++i) { bigDoc1.add(smallField); } final Field bigField = new Field("fld", randomByteArray(RandomNumbers.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored); bigDoc2.add(bigField); final int numDocs = atLeast(5); final Document[] docs = new Document[numDocs]; for (int i = 0; i < numDocs; ++i) { docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2)); } for (int i = 0; i < numDocs; ++i) { idField.setStringValue("" + i); iw.addDocument(docs[i]); if (random().nextInt(numDocs) == 0) { iw.commit(); } } iw.commit(); iw.forceMerge(1); // look at what happens when big docs are merged final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir)); final IndexSearcher searcher = new IndexSearcher(rd); for (int i = 0; i < numDocs; ++i) { final Query query = new TermQuery(new Term("id", "" + i)); final TopDocs topDocs = searcher.search(query, 1); assertEquals("" + i, 1, topDocs.totalHits.value); final Document doc = rd.document(topDocs.scoreDocs[0].doc); assertNotNull(doc); final IndexableField[] fieldValues = doc.getFields("fld"); assertEquals(docs[i].getFields("fld").length, fieldValues.length); if (fieldValues.length > 0) { assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue()); } } rd.close(); iw.close(); dir.close(); }