org.apache.lucene.document.Field#setStringValue

Source File: TestIndexWriterReader.java From lucene-solr with Apache License 2.0

6 votes

public void testForceMergeDeletes() throws Throwable {
  Directory dir = newDirectory();
  final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                               .setMergePolicy(newLogMergePolicy()));
  Document doc = new Document();
  doc.add(newTextField("field", "a b c", Field.Store.NO));
  Field id = newStringField("id", "", Field.Store.NO);
  doc.add(id);
  id.setStringValue("0");
  w.addDocument(doc);
  id.setStringValue("1");
  w.addDocument(doc);
  w.deleteDocuments(new Term("id", "0"));

  IndexReader r = w.getReader();
  w.forceMergeDeletes();
  w.close();
  r.close();
  r = DirectoryReader.open(dir);
  assertEquals(1, r.numDocs());
  assertFalse(r.hasDeletions());
  r.close();
  dir.close();
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

6 votes

private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

6 votes

public void testBasics() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.term("highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
  ir.close();
}

Source File: TestMaxTermFrequency.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true))
                               .setMergePolicy(newLogMergePolicy());
  config.setSimilarity(new TestSimilarity());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  Document doc = new Document();
  Field foo = newTextField("foo", "", Field.Store.NO);
  doc.add(foo);
  for (int i = 0; i < 100; i++) {
    foo.setStringValue(addValue());
    writer.addDocument(doc);
  }
  reader = writer.getReader();
  writer.close();
}

Source File: TestSimilarityProvider.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void setUp() throws Exception {
  super.setUp();
  directory = newDirectory();
  PerFieldSimilarityWrapper sim = new ExampleSimilarityProvider();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())).setSimilarity(sim);
  RandomIndexWriter iw = new RandomIndexWriter(random(), directory, iwc);
  Document doc = new Document();
  Field field = newTextField("foo", "", Field.Store.NO);
  doc.add(field);
  Field field2 = newTextField("bar", "", Field.Store.NO);
  doc.add(field2);

  field.setStringValue("quick brown fox");
  field2.setStringValue("quick brown fox");
  iw.addDocument(doc);
  field.setStringValue("jumps over lazy brown dog");
  field2.setStringValue("jumps over lazy brown dog");
  iw.addDocument(doc);
  reader = iw.getReader();
  iw.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(sim);
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

6 votes

public void testOneSentence() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.term("test"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
  ir.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

5 votes

public void testOneSentence() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new TermQuery(new Term("body", "test"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}

Source File: TestUnifiedHighlighterMTQ.java From lucene-solr with Apache License 2.0

5 votes

public void testWildcardInDisjunctionMax() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  DisjunctionMaxQuery query = new DisjunctionMaxQuery(
      Collections.singleton(new WildcardQuery(new Term("body", "te*"))), 0);
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

5 votes

public void testSpecificDocIDs() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.term("highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  ScoreDoc[] hits = topDocs.scoreDocs;
  int[] docIDs = new int[2];
  docIDs[0] = hits[0].doc;
  docIDs[1] = hits[1].doc;
  String snippets[] = highlighter.highlightFields(new String[] {"body"}, query, docIDs, new int[] {1}).get("body");
  assertEquals(2, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the first term. ", snippets[1]);
  ir.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

5 votes

public void testMultipleTerms() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "highlighting")), BooleanClause.Occur.SHOULD)
      .add(new TermQuery(new Term("body", "just")), BooleanClause.Occur.SHOULD)
      .add(new TermQuery(new Term("body", "first")), BooleanClause.Occur.SHOULD)
      .build();
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);

  ir.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

5 votes

public void testEncode() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected PassageFormatter getFormatter(String field) {
      return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
    }
  };
  Query query = new TermQuery(new Term("body", "highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from &lt;i&gt;postings&lt;&#x2F;i&gt;. ", snippets[0]);

  ir.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

5 votes

public void testHighlightAllText() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected BreakIterator getBreakIterator(String field) {
      return new WholeBreakIterator();
    }
  };
  highlighter.setMaxLength(10000);
  Query query = new TermQuery(new Term("body", "test"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertEquals("This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.", snippets[0]);

  ir.close();
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

5 votes

public void testMultipleTerms() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.or(
      Intervals.term("highlighting"),
      Intervals.term("just"),
      Intervals.term("first")));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
  ir.close();
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

4 votes

public void testObjectFormatter() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected PassageFormatter getFormatter(String field) {
      return new PassageFormatter() {
        PassageFormatter defaultFormatter = new DefaultPassageFormatter();

        @Override
        public String[] format(Passage passages[], String content) {
          // Just turns the String snippet into a length 2
          // array of String
          return new String[]{"blah blah", defaultFormatter.format(passages, content).toString()};
        }
      };
    }
  };

  Query query = new IntervalQuery("body", Intervals.term("highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  int[] docIDs = new int[1];
  docIDs[0] = topDocs.scoreDocs[0].doc;
  Map<String,Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[] {"body"}, query, docIDs,
      new int[] {1});
  Object[] bodySnippets = snippets.get("body");
  assertEquals(1, bodySnippets.length);
  assertTrue(Arrays.equals(new String[] {"blah blah", "Just a test <b>highlighting</b> from postings. "},
      (String[]) bodySnippets[0]));
  ir.close();
}

Source File: TestSumDocFreq.java From lucene-solr with Apache License 2.0

4 votes

public void testSumDocFreq() throws Exception {
  final int numDocs = atLeast(500);
  
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  
  Document doc = new Document();
  Field id = newStringField("id", "", Field.Store.NO);
  Field field1 = newTextField("foo", "", Field.Store.NO);
  Field field2 = newTextField("bar", "", Field.Store.NO);
  doc.add(id);
  doc.add(field1);
  doc.add(field2);
  for (int i = 0; i < numDocs; i++) {
    id.setStringValue("" + i);
    char ch1 = (char) TestUtil.nextInt(random(), 'a', 'z');
    char ch2 = (char) TestUtil.nextInt(random(), 'a', 'z');
    field1.setStringValue("" + ch1 + " " + ch2);
    ch1 = (char) TestUtil.nextInt(random(), 'a', 'z');
    ch2 = (char) TestUtil.nextInt(random(), 'a', 'z');
    field2.setStringValue("" + ch1 + " " + ch2);
    writer.addDocument(doc);
  }
  
  IndexReader ir = writer.getReader();
  
  assertSumDocFreq(ir);    
  ir.close();
  
  int numDeletions = atLeast(20);
  for (int i = 0; i < numDeletions; i++) {
    writer.deleteDocuments(new Term("id", "" + random().nextInt(numDocs)));
  }
  writer.forceMerge(1);
  writer.close();
  
  ir = DirectoryReader.open(dir);
  assertSumDocFreq(ir);
  ir.close();
  dir.close();
}

Source File: TestBooleanQuery.java From lucene-solr with Apache License 2.0

4 votes

public void testFilterClauseDoesNotImpactScore() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  Field f = newTextField("field", "a b c d", Field.Store.NO);
  doc.add(f);
  w.addDocument(doc);
  f.setStringValue("b d");
  w.addDocument(doc);
  f.setStringValue("a d");
  w.addDocument(doc);
  w.commit();

  DirectoryReader reader = w.getReader();
  final IndexSearcher searcher = new IndexSearcher(reader);

  BooleanQuery.Builder qBuilder = new BooleanQuery.Builder();
  BooleanQuery q = qBuilder.build();
  qBuilder.add(new TermQuery(new Term("field", "a")), Occur.FILTER);

  // With a single clause, we will rewrite to the underlying
  // query. Make sure that it returns null scores
  assertSameScoresWithoutFilters(searcher, qBuilder.build());

  // Now with two clauses, we will get a conjunction scorer
  // Make sure it returns null scores
  qBuilder.add(new TermQuery(new Term("field", "b")), Occur.FILTER);
  q = qBuilder.build();
  assertSameScoresWithoutFilters(searcher, q);

  // Now with a scoring clause, we need to make sure that
  // the boolean scores are the same as those from the term
  // query
  qBuilder.add(new TermQuery(new Term("field", "c")), Occur.SHOULD);
  q = qBuilder.build();
  assertSameScoresWithoutFilters(searcher, q);

  // FILTER and empty SHOULD
  qBuilder = new BooleanQuery.Builder();
  qBuilder.add(new TermQuery(new Term("field", "a")), Occur.FILTER);
  qBuilder.add(new TermQuery(new Term("field", "e")), Occur.SHOULD);
  q = qBuilder.build();
  assertSameScoresWithoutFilters(searcher, q);

  // mix of FILTER and MUST
  qBuilder = new BooleanQuery.Builder();
  qBuilder.add(new TermQuery(new Term("field", "a")), Occur.FILTER);
  qBuilder.add(new TermQuery(new Term("field", "d")), Occur.MUST);
  q = qBuilder.build();
  assertSameScoresWithoutFilters(searcher, q);

  // FILTER + minShouldMatch
  qBuilder = new BooleanQuery.Builder();
  qBuilder.add(new TermQuery(new Term("field", "b")), Occur.FILTER);
  qBuilder.add(new TermQuery(new Term("field", "a")), Occur.SHOULD);
  qBuilder.add(new TermQuery(new Term("field", "d")), Occur.SHOULD);
  qBuilder.setMinimumNumberShouldMatch(1);
  q = qBuilder.build();
  assertSameScoresWithoutFilters(searcher, q);

  reader.close();
  w.close();
  dir.close();
}

Source File: TestConcurrentMergeScheduler.java From lucene-solr with Apache License 2.0

4 votes

public void testDeleteMerging() throws IOException {
  Directory directory = newDirectory();

  LogDocMergePolicy mp = new LogDocMergePolicy();
  // Force degenerate merging so we can get a mix of
  // merging of segments with and without deletes at the
  // start:
  mp.setMinMergeDocs(1000);
  IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random()))
                                                    .setMergePolicy(mp));

  Document doc = new Document();
  Field idField = newStringField("id", "", Field.Store.YES);
  doc.add(idField);
  for(int i=0;i<10;i++) {
    if (VERBOSE) {
      System.out.println("\nTEST: cycle");
    }
    for(int j=0;j<100;j++) {
      idField.setStringValue(Integer.toString(i*100+j));
      writer.addDocument(doc);
    }

    int delID = i;
    while(delID < 100*(1+i)) {
      if (VERBOSE) {
        System.out.println("TEST: del " + delID);
      }
      writer.deleteDocuments(new Term("id", ""+delID));
      delID += 10;
    }

    writer.commit();
  }

  writer.close();
  IndexReader reader = DirectoryReader.open(directory);
  // Verify that we did not lose any deletes...
  assertEquals(450, reader.numDocs());
  reader.close();
  directory.close();
}

Source File: TestUnifiedHighlighterMTQ.java From lucene-solr with Apache License 2.0

4 votes

public void testRanges() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  // null start
  query = TermRangeQuery.newStringRange("body", null, "tf", true, true);
  topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This <b>is</b> <b>a</b> <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> <b>a</b> <b>one</b> <b>sentence</b> <b>document</b>.", snippets[1]);

  // null end
  query = TermRangeQuery.newStringRange("body", "ta", null, true, true);
  topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("<b>This</b> is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  // exact start inclusive
  query = TermRangeQuery.newStringRange("body", "test", "tf", true, true);
  topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  // exact end inclusive
  query = TermRangeQuery.newStringRange("body", "ta", "test", true, true);
  topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  // exact start exclusive
  BooleanQuery bq = new BooleanQuery.Builder()
      .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
      .add(TermRangeQuery.newStringRange("body", "test", "tf", false, true), BooleanClause.Occur.SHOULD)
      .build();
  topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", bq, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a test.", snippets[0]);
  assertEquals("Test a one sentence document.", snippets[1]);

  // exact end exclusive
  bq = new BooleanQuery.Builder()
      .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
      .add(TermRangeQuery.newStringRange("body", "ta", "test", true, false), BooleanClause.Occur.SHOULD)
      .build();
  topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", bq, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a test.", snippets[0]);
  assertEquals("Test a one sentence document.", snippets[1]);

  // wrong field
  highlighter.setFieldMatcher(null);//default
  bq = new BooleanQuery.Builder()
      .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
      .add(TermRangeQuery.newStringRange("bogus", "ta", "tf", true, true), BooleanClause.Occur.SHOULD)
      .build();
  topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", bq, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a test.", snippets[0]);
  assertEquals("Test a one sentence document.", snippets[1]);

  ir.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

4 votes

public void testObjectFormatter() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected PassageFormatter getFormatter(String field) {
      return new PassageFormatter() {
        PassageFormatter defaultFormatter = new DefaultPassageFormatter();

        @Override
        public String[] format(Passage passages[], String content) {
          // Just turns the String snippet into a length 2
          // array of String
          return new String[]{"blah blah", defaultFormatter.format(passages, content).toString()};
        }
      };
    }
  };

  Query query = new TermQuery(new Term("body", "highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  int[] docIDs = new int[1];
  docIDs[0] = topDocs.scoreDocs[0].doc;
  Map<String, Object[]> snippets = highlighter.highlightFieldsAsObjects(new String[]{"body"}, query, docIDs, new int[]{1});
  Object[] bodySnippets = snippets.get("body");
  assertEquals(1, bodySnippets.length);
  assertTrue(Arrays.equals(new String[]{"blah blah", "Just a test <b>highlighting</b> from postings. "}, (String[]) bodySnippets[0]));

  ir.close();
}

Source File: BaseStoredFieldsFormatTestCase.java From lucene-solr with Apache License 2.0

4 votes

@Nightly
public void testBigDocuments() throws IOException {
  assumeWorkingMMapOnWindows();
  
  // "big" as "much bigger than the chunk size"
  // for this test we force a FS dir
  // we can't just use newFSDirectory, because this test doesn't really index anything.
  // so if we get NRTCachingDir+SimpleText, we make massive stored fields and OOM (LUCENE-4484)
  Directory dir = new MockDirectoryWrapper(random(), new MMapDirectory(createTempDir("testBigDocuments")));
  IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
  iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConf);

  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper) dir).setThrottling(Throttling.NEVER);
  }

  final Document emptyDoc = new Document(); // emptyDoc
  final Document bigDoc1 = new Document(); // lot of small fields
  final Document bigDoc2 = new Document(); // 1 very big field

  final Field idField = new StringField("id", "", Store.NO);
  emptyDoc.add(idField);
  bigDoc1.add(idField);
  bigDoc2.add(idField);

  final FieldType onlyStored = new FieldType(StringField.TYPE_STORED);
  onlyStored.setIndexOptions(IndexOptions.NONE);

  final Field smallField = new Field("fld", randomByteArray(random().nextInt(10), 256), onlyStored);
  final int numFields = RandomNumbers.randomIntBetween(random(), 500000, 1000000);
  for (int i = 0; i < numFields; ++i) {
    bigDoc1.add(smallField);
  }

  final Field bigField = new Field("fld", randomByteArray(RandomNumbers.randomIntBetween(random(), 1000000, 5000000), 2), onlyStored);
  bigDoc2.add(bigField);

  final int numDocs = atLeast(5);
  final Document[] docs = new Document[numDocs];
  for (int i = 0; i < numDocs; ++i) {
    docs[i] = RandomPicks.randomFrom(random(), Arrays.asList(emptyDoc, bigDoc1, bigDoc2));
  }
  for (int i = 0; i < numDocs; ++i) {
    idField.setStringValue("" + i);
    iw.addDocument(docs[i]);
    if (random().nextInt(numDocs) == 0) {
      iw.commit();
    }
  }
  iw.commit();
  iw.forceMerge(1); // look at what happens when big docs are merged
  final DirectoryReader rd = maybeWrapWithMergingReader(DirectoryReader.open(dir));
  final IndexSearcher searcher = new IndexSearcher(rd);
  for (int i = 0; i < numDocs; ++i) {
    final Query query = new TermQuery(new Term("id", "" + i));
    final TopDocs topDocs = searcher.search(query, 1);
    assertEquals("" + i, 1, topDocs.totalHits.value);
    final Document doc = rd.document(topDocs.scoreDocs[0].doc);
    assertNotNull(doc);
    final IndexableField[] fieldValues = doc.getFields("fld");
    assertEquals(docs[i].getFields("fld").length, fieldValues.length);
    if (fieldValues.length > 0) {
      assertEquals(docs[i].getFields("fld")[0].binaryValue(), fieldValues[0].binaryValue());
    }
  }
  rd.close();
  iw.close();
  dir.close();
}

Java Code Examples for org.apache.lucene.document.Field#setStringValue()