org.apache.lucene.search.TopDocs Java Exaples

Source File: TestPersistentProvenanceRepository.java From localization_nifi with Apache License 2.0

7 votes

private List<Document> runQuery(final File indexDirectory, final List<File> storageDirs, final String query) throws IOException, ParseException {
    try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) {
        final IndexSearcher searcher = new IndexSearcher(directoryReader);

        final Analyzer analyzer = new SimpleAnalyzer();
        final org.apache.lucene.search.Query luceneQuery = new QueryParser("uuid", analyzer).parse(query);

        final Query q = new Query("");
        q.setMaxResults(1000);
        final TopDocs topDocs = searcher.search(luceneQuery, 1000);

        final List<Document> docs = new ArrayList<>();
        for (final ScoreDoc scoreDoc : topDocs.scoreDocs) {
            final int docId = scoreDoc.doc;
            final Document d = directoryReader.document(docId);
            docs.add(d);
        }

        return docs;
    }
}

Source File: LuceneTranslator.java From Indra with MIT License

6 votes

private Map<String, List<String>> doTranslate(Set<String> terms) {

        Map<String, List<String>> res = new HashMap<>();

        try {
            TopDocs topDocs = LuceneUtils.getTopDocs(searcher, terms, TERM_FIELD);


            if (topDocs != null) {
                for (ScoreDoc sd : topDocs.scoreDocs) {
                    Document doc = searcher.doc(sd.doc);
                    Map<String, Double> content = convert(doc.getBinaryValue(TRANSLATION_FIELD).bytes);
                    res.put(doc.get(TERM_FIELD), getRelevantTranslations(content));
                }
            }

        } catch (IOException e) {
            logger.error(e.getMessage());
            //TODO throw new expection here.
            e.printStackTrace();
        }

        return res;
    }

Source File: TestNumericRangeQuery32.java From lucene-solr with Apache License 2.0

6 votes

private void testLeftOpenRange(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  int count=3000;
  int upper=(count-1)*distance + (distance/3) + startOffset;
  LegacyNumericRangeQuery<Integer> q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, true, true);
  TopDocs topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
  ScoreDoc[] sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", count, sd.length );
  Document doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
  
  q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, false, true);
  topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
  sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", count, sd.length );
  doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
}

Source File: TestUnifiedHighlighterStrictPhrases.java From lucene-solr with Apache License 2.0

6 votes

public void testMaxLen() throws IOException {
  indexWriter.addDocument(newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
  initReaderSearcherHighlighter();
  highlighter.setMaxLength(21);

  BooleanQuery query = new BooleanQuery.Builder()
      .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
      .add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
      .add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);

  final boolean weightMatches = highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES);
  if (fieldType == UHTestHelper.reanalysisType || weightMatches) {
    if (weightMatches) {
      assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
    } else {
      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
    }
  } else {
    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
  }
}

Source File: TestSelectiveWeightCreation.java From lucene-solr with Apache License 2.0

6 votes

private LTRScoringQuery.ModelWeight performQuery(TopDocs hits,
    IndexSearcher searcher, int docid, LTRScoringQuery model) throws IOException,
    ModelException {
  final List<LeafReaderContext> leafContexts = searcher.getTopReaderContext()
      .leaves();
  final int n = ReaderUtil.subIndex(hits.scoreDocs[0].doc, leafContexts);
  final LeafReaderContext context = leafContexts.get(n);
  final int deBasedDoc = hits.scoreDocs[0].doc - context.docBase;

  final Weight weight = searcher.createWeight(searcher.rewrite(model), ScoreMode.COMPLETE, 1);
  final Scorer scorer = weight.scorer(context);

  // rerank using the field final-score
  scorer.iterator().advance(deBasedDoc);
  scorer.score();
  assertTrue(weight instanceof LTRScoringQuery.ModelWeight);
  final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) weight;
  return modelWeight;

}

Source File: TestUnifiedHighlighterStrictPhrases.java From lucene-solr with Apache License 2.0

6 votes

public void testWithSameTermQuery() throws IOException {
  indexWriter.addDocument(newDoc("Yin yang, yin gap yang"));
  initReaderSearcherHighlighter();

  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "yin")), BooleanClause.Occur.MUST)
      .add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
      // add queries for other fields; we shouldn't highlight these because of that.
      .add(new TermQuery(new Term("title", "yang")), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  dupMatchAllowed.set(false); // We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
  String[] snippets = highlighter.highlight("body", query, topDocs);
  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
  } else {
    assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
  }
}

Source File: DocsReader.java From localization_nifi with Apache License 2.0

6 votes

public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
        final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }

    final long start = System.nanoTime();
    final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    final int numDocs = Math.min(scoreDocs.length, maxResults);
    final List<Document> docs = new ArrayList<>(numDocs);

    for (int i = numDocs - 1; i >= 0; i--) {
        final int docId = scoreDocs[i].doc;
        final Document d = indexReader.document(docId);
        docs.add(d);
    }

    final long readDocuments = System.nanoTime() - start;
    logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments));
    return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
}

Source File: TestUnifiedHighlighterTermIntervals.java From lucene-solr with Apache License 2.0

6 votes

public void testMatchesSlopBug() throws IOException {
  IndexReader ir = indexSomeFields();
  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("title", Intervals.maxgaps(random().nextBoolean() ? 1 : 2,
      Intervals.ordered(
          Intervals.term("this"), Intervals.term("is"), Intervals.term("the"), Intervals.term("field"))));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String[] snippets = highlighter.highlight("title", query, topDocs, 10);
  assertEquals(1, snippets.length);
  if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertEquals("" + highlighter.getFlags("title"),
        "<b>This is the title field</b>.", snippets[0]);
  } else {
    assertEquals("" + highlighter.getFlags("title"),
        "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]);
  }
  ir.close();
}

Source File: TestLucene.java From RedisDirectory with Apache License 2.0

6 votes

public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}

Source File: TestFieldScoreQuery.java From lucene-solr with Apache License 2.0

6 votes

private void doTestExactScore (ValueSource valueSource) throws Exception {
  Query functionQuery = getFunctionQuery(valueSource);
  IndexReader r = DirectoryReader.open(dir);
  IndexSearcher s = newSearcher(r);
  TopDocs td = s.search(functionQuery,1000);
  assertEquals("All docs should be matched!",N_DOCS,td.totalHits.value);
  ScoreDoc sd[] = td.scoreDocs;
  for (ScoreDoc aSd : sd) {
    float score = aSd.score;
    log(s.explain(functionQuery, aSd.doc));
    String id = s.getIndexReader().document(aSd.doc).get(ID_FIELD);
    float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
    assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
  }
  r.close();
}

Source File: TestPayloadScoreQuery.java From lucene-solr with Apache License 2.0

6 votes

private static void checkQuery(SpanQuery query, PayloadFunction function, boolean includeSpanScore, int[] expectedDocs, float[] expectedScores) throws IOException {

    assertTrue("Expected docs and scores arrays must be the same length!", expectedDocs.length == expectedScores.length);

    PayloadScoreQuery psq = new PayloadScoreQuery(query, function, PayloadDecoder.FLOAT_DECODER, includeSpanScore);
    TopDocs hits = searcher.search(psq, expectedDocs.length);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
      if (i > expectedDocs.length - 1)
        fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
      if (hits.scoreDocs[i].doc != expectedDocs[i])
        fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
      assertEquals("Bad score in document " + expectedDocs[i], expectedScores[i], hits.scoreDocs[i].score, 0.000001);
    }

    if (hits.scoreDocs.length > expectedDocs.length)
      fail("Unexpected hit in document " + hits.scoreDocs[expectedDocs.length]);

    QueryUtils.check(random(), psq, searcher);
  }

Source File: KNearestNeighborClassifier.java From lucene-solr with Apache License 2.0

6 votes

private TopDocs knnSearch(String text) throws IOException {
  BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
  for (String fieldName : textFieldNames) {
    String boost = null;
    mlt.setBoost(true); //terms boost actually helps in MLT queries
    if (fieldName.contains("^")) {
      String[] field2boost = fieldName.split("\\^");
      fieldName = field2boost[0];
      boost = field2boost[1];
    }
    if (boost != null) {
      mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it
    }
    mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
    mlt.setBoostFactor(1);// restore neutral boost for next field
  }
  Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
  mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
  if (query != null) {
    mltQuery.add(query, BooleanClause.Occur.MUST);
  }
  return indexSearcher.search(mltQuery.build(), k);
}

Source File: TestHierarchicalDocBuilder.java From lucene-solr with Apache License 2.0

6 votes

private void assertSearch(Query query, String field, String... values) throws IOException {
  /* The limit of search queue is doubled to catch the error in case when for some reason there are more docs than expected  */
  SolrIndexSearcher searcher = req.getSearcher();
  TopDocs result = searcher.search(query, values.length * 2);
  assertEquals(values.length, result.totalHits.value);
  List<String> actualValues = new ArrayList<String>();
  for (int index = 0; index < values.length; ++index) {
    Document doc = searcher.doc(result.scoreDocs[index].doc);
    actualValues.add(doc.get(field));
  }
  
  for (String expectedValue: values) {
    boolean removed = actualValues.remove(expectedValue);
    if (!removed) {
      fail("Search result does not contain expected values");
    }
  }
}

Source File: ExplorerQueryTests.java From elasticsearch-learning-to-rank with Apache License 2.0

6 votes

public void testBooleanQuery() throws Exception {
    TermQuery tq1 = new TermQuery(new Term("text", "cow"));
    TermQuery tq2 = new TermQuery(new Term("text", "brown"));
    TermQuery tq3 = new TermQuery(new Term("text", "how"));

    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(tq1, BooleanClause.Occur.SHOULD);
    builder.add(tq2, BooleanClause.Occur.SHOULD);
    builder.add(tq3, BooleanClause.Occur.SHOULD);

    Query q = builder.build();
    String statsType = "sum_raw_tf";

    ExplorerQuery eq = new ExplorerQuery(q, statsType);

    // Verify tf score
    TopDocs docs = searcher.search(eq, 4);
    assertThat(docs.scoreDocs[0].score, equalTo(3.0f));
}

Source File: HighlighterTest.java From lucene-solr with Apache License 2.0

6 votes

private void searchIndex() throws IOException, InvalidTokenOffsetsException {
  Query query = new TermQuery(new Term("t_text1", "random"));
  IndexReader reader = DirectoryReader.open(dir1);
  IndexSearcher searcher = newSearcher(reader);
  // This scorer can return negative idf -> null fragment
  Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
  // This scorer doesn't use idf (patch version)
  //Scorer scorer = new QueryTermScorer( query, "t_text1" );
  Highlighter h = new Highlighter( scorer );

  TopDocs hits = searcher.search(query, 10);
  for( int i = 0; i < hits.totalHits.value; i++ ){
    Document doc = searcher.doc( hits.scoreDocs[i].doc );
    String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
    if (VERBOSE) System.out.println("result:" +  result);
    assertEquals("more <B>random</B> words for second field", result);
  }
  reader.close();
}

Source File: SORecommender.java From scava with Eclipse Public License 2.0

6 votes

public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
	Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
	try {
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		if (isBm25 == false) {
			ClassicSimilarity CS = new ClassicSimilarity();
			searcher.setSimilarity(CS);
		}
		TopDocs docs = searcher.search(query, hitsPerPage);
		return docs;
	} catch (Exception e) {
		logger.error(e.getMessage());
		return null;
	}
}

Source File: TestNumericTerms64.java From lucene-solr with Apache License 2.0

6 votes

private void testSorting(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  // 10 random tests, the index order is ascending,
  // so using a reverse sort field should retun descending documents
  int num = TestUtil.nextInt(random(), 10, 20);
  for (int i = 0; i < num; i++) {
    long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
    long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
    if (lower>upper) {
      long a=lower; lower=upper; upper=a;
    }
    Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
    TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
    if (topDocs.totalHits.value==0) continue;
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertNotNull(sd);
    long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
    for (int j=1; j<sd.length; j++) {
      long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
      assertTrue("Docs should be sorted backwards", last>act );
      last=act;
    }
  }
}

Source File: DistanceFacetsExample.java From lucene-solr with Apache License 2.0

6 votes

/** User drills down on the specified range. */
public TopDocs drillDown(DoubleRange range) throws IOException {

  // Passing no baseQuery means we drill down on all
  // documents ("browse only"):
  DrillDownQuery q = new DrillDownQuery(null);
  final DoubleValuesSource vs = getDistanceValueSource();
  q.add("field", range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
  DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
      @Override
      protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {        
        assert drillSideways.length == 1;
        return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
      }
    };
  return ds.search(q, 10).hits;
}

Source File: TestUnifiedHighlighterStrictPhrases.java From lucene-solr with Apache License 2.0

6 votes

public void testBasics() throws IOException {
  indexWriter.addDocument(newDoc("Yin yang, filter")); // filter out. test getTermToSpanLists reader 1-doc filter
  indexWriter.addDocument(newDoc("yin alone, Yin yang, yin gap yang"));
  initReaderSearcherHighlighter();

  //query:  -filter +"yin yang"
  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "filter")), BooleanClause.Occur.MUST_NOT)
      .add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
      .build();


  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);
  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
  } else {
    assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
  }
}

Source File: TestBooleanSimilarity.java From lucene-solr with Apache License 2.0

6 votes

public void testPhraseScoreIsEqualToBoost() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir,
      newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
  Document doc = new Document();
  doc.add(new TextField("foo", "bar baz quux", Store.NO));
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  w.close();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new BooleanSimilarity());

  PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");

  TopDocs topDocs = searcher.search(query, 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(1f, topDocs.scoreDocs[0].score, 0f);

  topDocs = searcher.search(new BoostQuery(query, 7), 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(7f, topDocs.scoreDocs[0].score, 0f);

  reader.close();
  dir.close();
}

Source File: LuceneQueryTestCase.java From jstarcraft-core with Apache License 2.0

6 votes

@Test
public void testPointExactQuery() throws Exception {
    // 精确查询
    Query exactQuery = IntPoint.newExactQuery("id", 1);
    TopDocs search = searcher.search(exactQuery, 1000);
    Assert.assertEquals(1, search.totalHits.value);
}

Source File: AddBlockUpdateTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testOverwrite() throws IOException{
  assertU(add(
    nest(doc("id","X", parent, "X"),
           doc(child,"a", "id", "66"),
           doc(child,"b", "id", "66"))));
  assertU(add(
    nest(doc("id","Y", parent, "Y"),
           doc(child,"a", "id", "66"),
           doc(child,"b", "id", "66"))));
  String overwritten = random().nextBoolean() ? "X": "Y";
  String dubbed = overwritten.equals("X") ? "Y":"X";

  assertU(add(
      nest(doc("id",overwritten, parent, overwritten),
             doc(child,"c","id", "66"),
             doc(child,"d","id", "66")), "overwrite", "true"));
  assertU(add(
      nest(doc("id",dubbed, parent, dubbed),
             doc(child,"c","id", "66"),
             doc(child,"d","id", "66")), "overwrite", "false"));

  assertU(commit());

  assertQ(req(parent+":"+overwritten, "//*[@numFound='1']"));
  assertQ(req(parent+":"+dubbed, "//*[@numFound='2']"));

  final SolrIndexSearcher searcher = getSearcher();
  assertSingleParentOf(searcher, one("ab"), dubbed);

  final TopDocs docs = searcher.search(join(one("cd")), 10);
  assertEquals(2, docs.totalHits.value);
  final String pAct = searcher.doc(docs.scoreDocs[0].doc).get(parent)+
                      searcher.doc(docs.scoreDocs[1].doc).get(parent);
  assertTrue(pAct.contains(dubbed) && pAct.contains(overwritten) && pAct.length()==2);

  assertQ(req("id:66", "//*[@numFound='6']"));
  assertQ(req(child+":(a b)", "//*[@numFound='2']"));
  assertQ(req(child+":(c d)", "//*[@numFound='4']"));
}

Source File: ExplorerQueryTests.java From elasticsearch-learning-to-rank with Apache License 2.0

5 votes

public void testQueryWithTermPositionAvgWithNoTerm() throws Exception {
    Query q = new TermQuery(new Term("text", "xxxxxxxxxxxxxxxxxx"));
    String statsType = "avg_raw_tp";

    ExplorerQuery eq = new ExplorerQuery(q, statsType);

    // Basic query check, should match 1 docs
    assertThat(searcher.count(eq), equalTo(0));

    // Verify explain
    TopDocs docs = searcher.search(eq, 6);

    assertThat(docs.scoreDocs.length, equalTo(0));
}

Source File: TestUnifiedHighlighter.java From lucene-solr with Apache License 2.0

5 votes

public void testEncode() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected PassageFormatter getFormatter(String field) {
      return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
    }
  };
  Query query = new TermQuery(new Term("body", "highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from &lt;i&gt;postings&lt;&#x2F;i&gt;. ", snippets[0]);

  ir.close();
}

Source File: AuthQueryIT.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

/**
 * Queries the index and asserts if the count matches documents returned.
 * @param queryString
 * @param count
 * @throws IOException
 * @throws org.apache.lucene.queryparser.classic.ParseException
 */
private void assertFTSQuery(String queryString,
                          int count,
                          String... name) throws IOException, ParseException
{
    SolrServletRequest solrQueryRequest = null;
    RefCounted<SolrIndexSearcher>refCounted = null;
    try
    {
        solrQueryRequest = new SolrServletRequest(getCore(), null);
        refCounted = getCore().getSearcher(false, true, null);
        SolrIndexSearcher solrIndexSearcher = refCounted.get();
        
        SearchParameters searchParameters = new SearchParameters();
        searchParameters.setQuery(queryString);
        Query query = dataModel.getFTSQuery(new Pair<SearchParameters, Boolean>(searchParameters, Boolean.FALSE),
                solrQueryRequest, FTSQueryParser.RerankPhase.SINGLE_PASS);
        TopDocs docs = solrIndexSearcher.search(query, count * 2 + 10);
    
        Assert.assertEquals(count, docs.totalHits);
    } 
    finally
    {
        refCounted.decref();
        solrQueryRequest.close();
    }
}

Source File: AlfrescoReRankQParserPlugin.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

private void scaleScores(TopDocs topDocs, Map<Integer, Float> scoreMap)
{
    float maxScore = topDocs.getMaxScore();
    float newMax = -Float.MAX_VALUE;

    for(ScoreDoc scoreDoc : topDocs.scoreDocs) {
        float score = scoreDoc.score;

        Float oldScore = scoreMap.get(scoreDoc.doc);

        // check if the score has been changed after rescoring
        boolean rescored = oldScore != null && score != oldScore;

        // If maxScore is different from 0, the score is divided by maxscore
        scoreDoc.score = score / (maxScore != 0? maxScore : 1);

        // If the document has been rescored, the score is increased by 1.
        // This results in having all the rescored element scores in (1,2] range.
        if (rescored)
        {
            scoreDoc.score += 1;
        }

        if(scoreDoc.score > newMax)
        {
            newMax = scoreDoc.score;
        }
    }

    assert(newMax <= 2);
    topDocs.setMaxScore(newMax);
}

Source File: BM25NBClassifier.java From lucene-solr with Apache License 2.0

5 votes

private double getTermProbForClass(Term classTerm, String... words) throws IOException {
  BooleanQuery.Builder builder = new BooleanQuery.Builder();
  builder.add(new BooleanClause(new TermQuery(classTerm), BooleanClause.Occur.MUST));
  for (String textFieldName : textFieldNames) {
    for (String word : words) {
      builder.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
    }
  }
  if (query != null) {
    builder.add(query, BooleanClause.Occur.MUST);
  }
  TopDocs search = indexSearcher.search(builder.build(), 1);
  return search.totalHits.value > 0 ? search.scoreDocs[0].score : 1;
}

Source File: TopicSearcher.java From tagme with Apache License 2.0

5 votes

public int getIdByTitle(String title) throws IOException
{
	TermQuery q = new TermQuery(new Term(TopicIndexer.FIELD_TITLE, title));
	TopDocs td = index.search(q, 1);
	if (td.totalHits == 0) return -1;
	else return Integer.parseInt(index.doc(td.scoreDocs[0].doc).get(TopicIndexer.FIELD_WID));
}

Source File: TestOrdValues.java From lucene-solr with Apache License 2.0

5 votes

private void doTestExactScore(String field, boolean inOrder) throws Exception {
  IndexReader r = DirectoryReader.open(dir);
  IndexSearcher s = newSearcher(r);
  ValueSource vs;
  if (inOrder) {
    vs = new OrdFieldSource(field);
  } else {
    vs = new ReverseOrdFieldSource(field);
  }
  Query q = new FunctionQuery(vs);
  TopDocs td = s.search(q, 1000);
  assertEquals("All docs should be matched!", N_DOCS, td.totalHits.value);
  ScoreDoc sd[] = td.scoreDocs;
  for (int i = 0; i < sd.length; i++) {
    float score = sd[i].score;
    String id = s.getIndexReader().document(sd[i].doc).get(ID_FIELD);
    log("-------- " + i + ". Explain doc " + id);
    log(s.explain(q, sd[i].doc));
    float expectedScore = N_DOCS - i - 1;
    assertEquals("score of result " + i + " should be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
    String expectedId = inOrder
            ? id2String(N_DOCS - i) // in-order ==> larger  values first
            : id2String(i + 1);     // reverse  ==> smaller values first
    assertTrue("id of result " + i + " should be " + expectedId + " != " + score, expectedId.equals(id));
  }
  r.close();
}

Source File: SolrInformationServer.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

private boolean isInIndex(long id, LRU cache, String fieldName, boolean populateCache, SolrCore core) throws IOException
{
    if(cache.containsKey(id))
    {
        return true;
    }
    else
    {
        RefCounted<SolrIndexSearcher> refCounted = null;
        try
        {
            if(populateCache)
            {
                cache.put(id, null); // Safe to add this here because we reset this on rollback.
            }
            refCounted = core.getSearcher();
            SolrIndexSearcher searcher = refCounted.get();
            FieldType fieldType = searcher.getSchema().getField(fieldName).getType();
            TermQuery q = new TermQuery(new Term(fieldName, fieldType.readableToIndexed(Long.toString(id))));
            TopDocs topDocs = searcher.search(q, 1);
            return topDocs.totalHits > 0;
        }
        finally
        {
            ofNullable(refCounted).ifPresent(RefCounted::decref);
        }
    }
}

org.apache.lucene.search.TopDocs Java Examples