org.apache.lucene.index.IndexReader#close

Source File: GeoNameResolver.java From lucene-geo-gazetteer with Apache License 2.0

6 votes

/**
 * Search corresponding GeoName for each location entity
 * @param count
 * 			  Number of results for one locations
 * @param querystr
 *            it's the NER actually
 *
 * @return HashMap each name has a list of resolved entities
 * @throws IOException
 * @throws RuntimeException
 */

public HashMap<String, List<Location>> searchGeoName(String indexerPath,
												   List<String> locationNameEntities,
												   int count) throws IOException {

	if (locationNameEntities.size() == 0
			|| locationNameEntities.get(0).length() == 0)
		return new HashMap<String, List<Location>>();
	IndexReader reader = createIndexReader(indexerPath);
	HashMap<String, List<Location>> resolvedEntities =
			resolveEntities(locationNameEntities, count, reader);
	reader.close();
	return resolvedEntities;

}

Source File: TestTermRangeQuery.java From lucene-solr with Apache License 2.0

6 votes

public void testAllDocs() throws Exception {
  initializeIndex(new String[]{"A", "B", "C", "D"});
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);

  TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
  assertEquals(4, searcher.search(query, 1000).scoreDocs.length);

  query = TermRangeQuery.newStringRange("content", "", null, true, true);
  assertEquals(4, searcher.search(query, 1000).scoreDocs.length);

  query = TermRangeQuery.newStringRange("content", "", null, true, false);
  assertEquals(4, searcher.search(query, 1000).scoreDocs.length);

  // and now another one
  query = TermRangeQuery.newStringRange("content", "B", null, true, true);
  assertEquals(3, searcher.search(query, 1000).scoreDocs.length);
  reader.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Make sure highlighter we can customize how emtpy
 * highlight is returned.
 */
public void testCustomEmptyHighlights() throws Exception {
  indexAnalyzer.setPositionIncrementGap(10);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Document doc = new Document();

  Field body = new Field("body", "test this is.  another sentence this test has.  far away is that planet.", fieldType);
  doc.add(body);
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setMaxNoHighlightPassages(0);// don't want any default summary
  Query query = new TermQuery(new Term("body", "highlighting"));
  int[] docIDs = new int[]{0};
  String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIDs, new int[]{2}).get("body");
  assertEquals(1, snippets.length);
  assertNull(snippets[0]);

  ir.close();
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

5 votes

public void testBooleanMustNot() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "This sentence has both terms.  This sentence has only terms.", fieldType);
  Document document = new Document();
  document.add(body);
  iw.addDocument(document);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher searcher = newSearcher(ir);

  BooleanQuery query2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "both")), BooleanClause.Occur.MUST_NOT)
      .build();

  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "terms")), BooleanClause.Occur.SHOULD)
      .add(query2, BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(1, topDocs.totalHits.value);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setMaxLength(Integer.MAX_VALUE - 1);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertFalse(snippets[0].contains("<b>both</b>"));
  ir.close();
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

5 votes

public void testCambridgeMA() throws Exception {
  BufferedReader r = new BufferedReader(new InputStreamReader(
      this.getClass().getResourceAsStream("CambridgeMA.utf8"), StandardCharsets.UTF_8));
  String text = r.readLine();
  r.close();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  Field body = new Field("body", text, fieldType);
  Document document = new Document();
  document.add(body);
  iw.addDocument(document);
  IndexReader ir = iw.getReader();
  try {
  iw.close();
  IndexSearcher searcher = newSearcher(ir);
  Query query =  new IntervalQuery("body",
           Intervals.unordered(Intervals.term("porter"),
               Intervals.term("square"),
               Intervals.term("massachusetts")));
  TopDocs topDocs = searcher.search(query, 10);
  assertEquals(1, topDocs.totalHits.value);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setMaxLength(Integer.MAX_VALUE - 1);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertTrue(snippets[0].contains("<b>Square</b>"));
  assertTrue(snippets[0].contains("<b>Porter</b>"));
  } finally{ir.close();}
}

Source File: TestTermRangeQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testInclusiveLowerNull() throws Exception {
  //http://issues.apache.org/jira/browse/LUCENE-38
  Analyzer analyzer = new SingleCharAnalyzer();
  Query query = TermRangeQuery.newStringRange("content", null, "C", true, true);
  initializeIndex(new String[]{"A", "B", "","C", "D"}, analyzer);
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  long numHits = searcher.search(query, 1000).totalHits.value;
  // When Lucene-38 is fixed, use the assert on the next line:
  assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 4, numHits);
  // until Lucene-38 is fixed, use this assert
  //assertEquals("A,B,<empty string>,C,D => A,B,<empty string>,C in range", 3, hits.length());
  reader.close();
  initializeIndex(new String[]{"A", "B", "", "D"}, analyzer);
  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);
  numHits = searcher.search(query, 1000).totalHits.value;
  // When Lucene-38 is fixed, use the assert on the next line:
  assertEquals("A,B,<empty string>,D - A, B and <empty string> in range", 3, numHits);
  // until Lucene-38 is fixed, use this assert
  //assertEquals("A,B,<empty string>,D => A, B and <empty string> in range", 2, hits.length());
  reader.close();
  addDoc("C");
  reader = DirectoryReader.open(dir);
  searcher = newSearcher(reader);
  numHits = searcher.search(query, 1000).totalHits.value;
  // When Lucene-38 is fixed, use the assert on the next line:
  assertEquals("C added => A,B,<empty string>,C in range", 4, numHits);
  // until Lucene-38 is fixed, use this assert
  //assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
   reader.close();
}

Source File: TestElevationComparator.java From lucene-solr with Apache License 2.0

5 votes

public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      directory,
      newIndexWriterConfig(new MockAnalyzer(random())).
          setMaxBufferedDocs(2).
          setMergePolicy(newLogMergePolicy(1000)).
          setSimilarity(new ClassicSimilarity())
  );
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

  r.close();
  directory.close();
}

Source File: TestMultiPhraseQuery.java From lucene-solr with Apache License 2.0

5 votes

private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
  Directory dir = newDirectory(); // random dir
  IndexWriterConfig cfg = newIndexWriterConfig(null);
  IndexWriter writer = new IndexWriter(dir, cfg);
  Document doc = new Document();
  doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
  writer.addDocument(doc);
  IndexReader r = DirectoryReader.open(writer);
  writer.close();
  IndexSearcher s = newSearcher(r);
  
  if (VERBOSE) {
    System.out.println("QUERY=" + q);
  }
  
  TopDocs hits = s.search(q, 1);
  assertEquals("wrong number of results", nExpected, hits.totalHits.value);
  
  if (VERBOSE) {
    for(int hit=0;hit<hits.totalHits.value;hit++) {
      ScoreDoc sd = hits.scoreDocs[hit];
      System.out.println("  hit doc=" + sd.doc + " score=" + sd.score);
    }
  }
  
  r.close();
  dir.close();
}

Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0

5 votes

public void testSimpleHighlightTest() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
  
  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered 
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}

Source File: TestUnifiedHighlighterMTQ.java From lucene-solr with Apache License 2.0

5 votes

public void testOneRegexp() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new RegexpQuery(new Term("body", "te.*"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  // wrong field
  highlighter.setFieldMatcher(null);//default
  BooleanQuery bq = new BooleanQuery.Builder()
      .add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
      .add(new RegexpQuery(new Term("bogus", "te.*")), BooleanClause.Occur.SHOULD)
      .build();
  topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", bq, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a test.", snippets[0]);
  assertEquals("Test a one sentence document.", snippets[1]);

  ir.close();
}

Source File: TestBlockJoin.java From lucene-solr with Apache License 2.0

5 votes

public void testToChildBlockJoinQueryExplain() throws Exception {
  final Directory dir = newDirectory();
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir);

  final List<Document> docs = new ArrayList<>();
  docs.add(makeJob("java", 2007));
  docs.add(makeJob("python", 2010));
  docs.add(makeResume("Lisa", "United Kingdom"));
  w.addDocuments(docs);

  docs.clear();
  docs.add(makeJob("java", 2006));
  docs.add(makeJob("ruby", 2005));
  docs.add(makeResume("Frank", "United States"));
  w.addDocuments(docs);
  w.deleteDocuments(new Term("skill", "java")); // delete the first child of every parent

  IndexReader r = w.getReader();
  w.close();
  IndexSearcher s = newSearcher(r, false);

  // Create a filter that defines "parent" documents in the index - in this case resumes
  BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
  Query parentQuery = new PrefixQuery(new Term("country", "United"));

  ToChildBlockJoinQuery toChildQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter);

  TopDocs hits = s.search(toChildQuery, 10);
  assertEquals(hits.scoreDocs.length, 2);
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    assertEquals(hits.scoreDocs[i].score, s.explain(toChildQuery, hits.scoreDocs[i].doc).getValue().doubleValue(), 0f);
  }

  r.close();
  dir.close();
}

Source File: IndexInfo.java From alfresco-repository with GNU Lesser General Public License v3.0

5 votes

public int getNumberOfIndexedFields() throws IOException
{
    IndexReader reader = getMainIndexReferenceCountingReadOnlyIndexReader();
    try
    {
        return reader.getFieldNames(IndexReader.FieldOption.INDEXED).size();
    }
    finally
    {
        reader.close();
    }
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

5 votes

public void testHighlightAllText() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.  Just highlighting from postings. This is also a much sillier test.  Feel free to test test test test test test test.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected BreakIterator getBreakIterator(String field) {
      return new WholeBreakIterator();
    }
  };
  highlighter.setMaxLength(10000);
  Query query = new IntervalQuery("body", Intervals.term("test"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs, 2);
  assertEquals(1, snippets.length);
  assertEquals(
      "This is a <b>test</b>.  Just highlighting from postings. This is also a much sillier <b>test</b>.  Feel free to <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b> <b>test</b>.",
      snippets[0]);
  ir.close();
}

Source File: TestMatchAllDocsQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testEarlyTermination() throws IOException {

    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(analyzer).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
    final int numDocs = 500;
    for (int i = 0; i < numDocs; i++) {
      addDoc("doc" + i, iw);
    }
    IndexReader ir = DirectoryReader.open(iw);

    IndexSearcher is = newSearcher(ir);

    final int totalHitsThreshold = 200;
    TopScoreDocCollector c = TopScoreDocCollector.create(10, null, totalHitsThreshold);

    is.search(new MatchAllDocsQuery(), c);
    assertEquals(totalHitsThreshold+1, c.totalHits);
    assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, c.totalHitsRelation);

    TopScoreDocCollector c1 = TopScoreDocCollector.create(10, null, numDocs);

    is.search(new MatchAllDocsQuery(), c1);
    assertEquals(numDocs, c1.totalHits);
    assertEquals(TotalHits.Relation.EQUAL_TO, c1.totalHitsRelation);

    iw.close();
    ir.close();
    dir.close();

  }

Source File: TokenSourcesTest.java From lucene-solr with Apache License 2.0

4 votes

public void testOverlapWithOffsetExactPhrase()
    throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    // no positions!
    customType.setStoreTermVectorOffsets(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
    // query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
    // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
    final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        new SpanTermQuery(new Term(FIELD, "the")),
        new SpanTermQuery(new Term(FIELD, "fox"))}, 0, true);

    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("<B>the fox</B> did not jump",
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}

Source File: TestCachingTokenFilter.java From lucene-solr with Apache License 2.0

4 votes

public void testCaching() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  AtomicInteger resetCount = new AtomicInteger(0);
  TokenStream stream = new TokenStream() {
    private int index = 0;
    private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

    @Override
    public void reset() throws IOException {
      super.reset();
      resetCount.incrementAndGet();
    }

    @Override
    public boolean incrementToken() {
      if (index == tokens.length) {
        return false;
      } else {
        clearAttributes();
        termAtt.append(tokens[index++]);
        offsetAtt.setOffset(0,0);
        return true;
      }        
    }
    
  };

  stream = new CachingTokenFilter(stream);

  doc.add(new TextField("preanalyzed", stream));

  // 1) we consume all tokens twice before we add the doc to the index
  assertFalse(((CachingTokenFilter)stream).isCached());
  stream.reset();
  assertFalse(((CachingTokenFilter) stream).isCached());
  checkTokens(stream);
  stream.reset();  
  checkTokens(stream);
  assertTrue(((CachingTokenFilter)stream).isCached());

  // 2) now add the document to the index and verify if all tokens are indexed
  //    don't reset the stream here, the DocumentWriter should do that implicitly
  writer.addDocument(doc);
  
  IndexReader reader = writer.getReader();
  PostingsEnum termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                                        "preanalyzed",
                                                                        new BytesRef("term1"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, termPositions.freq());
  assertEquals(0, termPositions.nextPosition());

  termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                   "preanalyzed",
                                                   new BytesRef("term2"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(2, termPositions.freq());
  assertEquals(1, termPositions.nextPosition());
  assertEquals(3, termPositions.nextPosition());
  
  termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                   "preanalyzed",
                                                   new BytesRef("term3"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, termPositions.freq());
  assertEquals(2, termPositions.nextPosition());
  reader.close();
  writer.close();
  // 3) reset stream and consume tokens again
  stream.reset();
  checkTokens(stream);

  assertEquals(1, resetCount.get());

  dir.close();
}

Source File: TestDocTermOrds.java From lucene-solr with Apache License 2.0

4 votes

public void testSimple() throws Exception {
  Directory dir = newDirectory();
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
  Document doc = new Document();
  Field field = newTextField("field", "", Field.Store.NO);
  doc.add(field);
  field.setStringValue("a b c");
  w.addDocument(doc);

  field.setStringValue("d e f");
  w.addDocument(doc);

  field.setStringValue("a f");
  w.addDocument(doc);
  
  final IndexReader r = w.getReader();
  w.close();

  final LeafReader ar = SlowCompositeReaderWrapper.wrap(r);
  TestUtil.checkReader(ar);
  final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field");
  SortedSetDocValues iter = dto.iterator(ar);
  
  assertEquals(0, iter.nextDoc());
  assertEquals(0, iter.nextOrd());
  assertEquals(1, iter.nextOrd());
  assertEquals(2, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());
  
  assertEquals(1, iter.nextDoc());
  assertEquals(3, iter.nextOrd());
  assertEquals(4, iter.nextOrd());
  assertEquals(5, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

  assertEquals(2, iter.nextDoc());
  assertEquals(0, iter.nextOrd());
  assertEquals(5, iter.nextOrd());
  assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd());

  r.close();
  dir.close();
}

Source File: DependentTermQueryBuilderTest.java From querqy with Apache License 2.0

4 votes

@Test
public void testCreateWeight() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f2", "v1 v1", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Term qTerm1 = new Term("f1", "v1");
    Term qTerm2 = new Term("f2", "v1");
    dfc.newClause();
    dfc.prepareTerm(qTerm1);
    dfc.prepareTerm(qTerm2);
    dfc.finishedUserQuery();

    DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm1, fieldBoost1);
    DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm2, fieldBoost2);


    TopDocs topDocs = indexSearcher.search(query2, 10);

    final Weight weight2 = query2.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight2.explain(indexReader.leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();
    assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
    assertTrue(explainText.contains("4 = docFreq")); // 4 * df of f1:v1
    assertTrue(explainText.contains("2.0 = freq")); // don't use tf

    indexReader.close();
    directory.close();
    analyzer.close();

}

Source File: TestUnifiedHighlighterMTQ.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Runs a query with two MTQs and confirms the formatter
 * can tell which query matched which hit.
 */
public void testWhichMTQMatched() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  // use a variety of common MTQ types
  BooleanQuery query = new BooleanQuery.Builder()
      .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
      .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
      .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
      .build();
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);

  // Default formatter just bolds each hit:
  assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);

  // Now use our own formatter, that also stuffs the
  // matching term's text into the result:
  highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {

    @Override
    protected PassageFormatter getFormatter(String field) {
      return new PassageFormatter() {

        @Override
        public Object format(Passage passages[], String content) {
          // Copied from DefaultPassageFormatter, but
          // tweaked to include the matched term:
          StringBuilder sb = new StringBuilder();
          int pos = 0;
          for (Passage passage : passages) {
            // don't add ellipsis if its the first one, or if its connected.
            if (passage.getStartOffset() > pos && pos > 0) {
              sb.append("... ");
            }
            pos = passage.getStartOffset();
            for (int i = 0; i < passage.getNumMatches(); i++) {
              int start = passage.getMatchStarts()[i];
              int end = passage.getMatchEnds()[i];
              // its possible to have overlapping terms
              if (start > pos) {
                sb.append(content, pos, start);
              }
              if (end > pos) {
                sb.append("<b>");
                sb.append(content, Math.max(pos, start), end);
                sb.append('(');
                sb.append(passage.getMatchTerms()[i].utf8ToString());
                sb.append(')');
                sb.append("</b>");
                pos = end;
              }
            }
            // its possible a "term" from the analyzer could span a sentence boundary.
            sb.append(content, pos, Math.max(pos, passage.getEndOffset()));
            pos = passage.getEndOffset();
          }
          return sb.toString();
        }
      };
    }
  };

  assertEquals(1, topDocs.totalHits.value);
  snippets = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);

  assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);

  ir.close();
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

3 votes

private String[] formatWithMatchExceedingContentLength(String bodyText) throws IOException {

    int maxLength = 17;

    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

    final Field body = new Field("body", bodyText, fieldType);

    Document doc = new Document();
    doc.add(body);

    iw.addDocument(doc);

    IndexReader ir = iw.getReader();
    iw.close();

    IndexSearcher searcher = newSearcher(ir);

    Query query = new IntervalQuery("body", Intervals.term("test"));

    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    assertEquals(1, topDocs.totalHits.value);

    UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setMaxLength(maxLength);
    String snippets[] = highlighter.highlight("body", query, topDocs);

    ir.close();
    return snippets;
  }

Java Code Examples for org.apache.lucene.index.IndexReader#close()