org.apache.lucene.search.TotalHitCountCollector Java Exaples

Source File: SimpleNaiveBayesClassifier.java From lucene-solr with Apache License 2.0

6 votes

/**
 * count the number of documents in the index having at least a value for the 'class' field
 *
 * @return the no. of documents having a value for the 'class' field
 * @throws IOException if accessing to term vectors or search fails
 */
protected int countDocsWithClass() throws IOException {
  Terms terms = MultiTerms.getTerms(this.indexReader, this.classFieldName);
  int docCount;
  if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount
    TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
    BooleanQuery.Builder q = new BooleanQuery.Builder();
    q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
    if (query != null) {
      q.add(query, BooleanClause.Occur.MUST);
    }
    indexSearcher.search(q.build(),
        classQueryCountCollector);
    docCount = classQueryCountCollector.getTotalHits();
  } else {
    docCount = terms.getDocCount();
  }
  return docCount;
}

Source File: SimpleNaiveBayesClassifier.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 * @param word the token produced by the analyzer
 * @param term the term representing the class
 * @return the number of documents of the input class
 * @throws IOException if a low level I/O problem happens
 */
private int getWordFreqForClass(String word, Term term) throws IOException {
  BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
  BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
  for (String textFieldName : textFieldNames) {
    subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
  }
  booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
  booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
  if (query != null) {
    booleanQuery.add(query, BooleanClause.Occur.MUST);
  }
  TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
  indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
  return totalHitCountCollector.getTotalHits();
}

Source File: ChronixRetentionHandler.java From chronix.server with Apache License 2.0

6 votes

/**
 * Searches the index, if older documents exists. Updates the solr query response.
 *
 * @param req - the solr query request information
 * @param rsp - the solr query response information
 * @return true if the hit count is greater zero, otherwise false
 * @throws SyntaxError, IOException if bad things happen
 */
private boolean olderDocumentsExists(String queryString, SolrQueryRequest req, SolrQueryResponse rsp) throws SyntaxError, IOException {
    String defType = req.getParams().get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);

    QParser queryParser = QParser.getParser(queryString, defType, req);
    Query query = queryParser.getQuery();

    TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
    req.getSearcher().search(query, totalHitCountCollector);

    rsp.add("query", String.format("%s:[* TO NOW-%s]", queryField, timeSeriesAge));
    rsp.add("queryTechnical", queryString);
    rsp.add("removedDocuments", totalHitCountCollector.getTotalHits());

    return totalHitCountCollector.getTotalHits() != 0;
}

Source File: HeatmapFacetCounterTest.java From lucene-solr with Apache License 2.0

5 votes

private int countMatchingDocsAtLevel(Point pt, int facetLevel) throws IOException {
  // we use IntersectsPrefixTreeFilter directly so that we can specify the level to go to exactly.
  RecursivePrefixTreeStrategy strategy = (RecursivePrefixTreeStrategy) this.strategy;
  Query filter = new IntersectsPrefixTreeQuery(
      pt, strategy.getFieldName(), grid, facetLevel, grid.getMaxLevels());
  final TotalHitCountCollector collector = new TotalHitCountCollector();
  indexSearcher.search(filter, collector);
  cellsValidated++;
  if (collector.getTotalHits() > 0) {
    cellValidatedNonZero++;
  }
  return collector.getTotalHits();
}

Source File: SimpleNaiveBayesDocumentClassifier.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns the number of documents of the input class ( from the whole index or from a subset)
 * that contains the word ( in a specific field or in all the fields if no one selected)
 *
 * @param word      the token produced by the analyzer
 * @param fieldName the field the word is coming from
 * @param term      the class term
 * @return number of documents of the input class
 * @throws java.io.IOException If there is a low-level I/O error
 */
private int getWordFreqForClass(String word, String fieldName, Term term) throws IOException {
  BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
  BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
  subQuery.add(new BooleanClause(new TermQuery(new Term(fieldName, word)), BooleanClause.Occur.SHOULD));
  booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
  booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST));
  if (query != null) {
    booleanQuery.add(query, BooleanClause.Occur.MUST);
  }
  TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
  indexSearcher.search(booleanQuery.build(), totalHitCountCollector);
  return totalHitCountCollector.getTotalHits();
}

Source File: Grouping.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected Collector createFirstPassCollector() throws IOException {
  // Ok we don't want groups, but do want a total count
  if (actualGroupsToFind <= 0) {
    fallBackCollector = new TotalHitCountCollector();
    return fallBackCollector;
  }

  groupSort = groupSort == null ? Sort.RELEVANCE : groupSort;
  firstPass = new FirstPassGroupingCollector<>(new TermGroupSelector(groupBy), groupSort, actualGroupsToFind);
  return firstPass;
}

Source File: Grouping.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected Collector createFirstPassCollector() throws IOException {
  // Ok we don't want groups, but do want a total count
  if (actualGroupsToFind <= 0) {
    fallBackCollector = new TotalHitCountCollector();
    return fallBackCollector;
  }

  groupSort = groupSort == null ? Sort.RELEVANCE : groupSort;
  firstPass = new FirstPassGroupingCollector<>(newSelector(), searcher.weightSort(groupSort), actualGroupsToFind);
  return firstPass;
}

Source File: CommandHandler.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Invokes search with the specified filter and collector.  
 * If a time limit has been specified then wrap the collector in the TimeLimitingCollector
 */
private void searchWithTimeLimiter(Query query, 
                                   ProcessedFilter filter, 
                                   Collector collector) throws IOException {
  if (queryCommand.getTimeAllowed() > 0 ) {
    collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), queryCommand.getTimeAllowed());
  }

  TotalHitCountCollector hitCountCollector = new TotalHitCountCollector();
  if (includeHitCount) {
    collector = MultiCollector.wrap(collector, hitCountCollector);
  }

  query = QueryUtils.combineQueryAndFilter(query, filter.filter);

  if (filter.postFilter != null) {
    filter.postFilter.setLastDelegate(collector);
    collector = filter.postFilter;
  }

  try {
    searcher.search(query, collector);
  } catch (TimeLimitingCollector.TimeExceededException | ExitableDirectoryReader.ExitingReaderException x) {
    partialResults = true;
    log.warn("Query: {}; {}", query, x.getMessage());
  }

  if (includeHitCount) {
    totalHitCount = hitCountCollector.getTotalHits();
  }
}

Source File: LuceneIndexer.java From MtgDesktopCompanion with GNU General Public License v3.0

5 votes

@Override
public List<MagicCard> search(String q)
{
	if(dir==null)
		open();
	
	List<MagicCard> ret = new ArrayList<>();
	
	try (IndexReader indexReader = DirectoryReader.open(dir))
	{
		 IndexSearcher searcher = new IndexSearcher(indexReader);
		 Query query = new QueryParser("name", analyzer).parse(q);
		 logger.trace(query);
		 
		 TotalHitCountCollector collector = new TotalHitCountCollector();
		 searcher.search(query,collector);
		 
		 TopDocs top= searcher.search(query, Math.max(1, collector.getTotalHits()));
		 
		 for(int i =0;i<top.totalHits.value;i++)
			 ret.add(serializer.fromJson(searcher.doc(top.scoreDocs[i].doc).get("data"),MagicCard.class));
		 
		 
	} catch (Exception e) {
		logger.error(e);
	}
	
	return ret;
}

Source File: FacetStorageTest.java From lumongo with Apache License 2.0

5 votes

/** User runs a query and counts facets. */
private List<FacetResult> search() throws IOException {
	DirectoryReader indexReader = DirectoryReader.open(directory);
	IndexSearcher searcher = new IndexSearcher(indexReader);
	SortedSetDocValuesReaderState state = new DefaultSortedSetDocValuesReaderState(indexReader);
	
	// Aggregates the facet counts
	FacetsCollector fc = new FacetsCollector();
	
	// MatchAllDocsQuery is for "browsing" (counts facets
	// for all non-deleted docs in the index); normally
	// you'd use a "normal" query:
	//FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc);

	TotalHitCountCollector collector = new TotalHitCountCollector();
	searcher.search(new MatchAllDocsQuery(), MultiCollector.wrap(collector, fc));
	
	// Retrieve results
	Facets facets = new SortedSetDocValuesFacetCounts(state, fc);
	
	List<FacetResult> results = new ArrayList<>();
	results.add(facets.getTopChildren(10, "Author"));
	results.add(facets.getTopChildren(10, "Publish Year"));
	indexReader.close();
	
	return results;
}

Source File: EngineTestCase.java From crate with Apache License 2.0

5 votes

protected static void assertVisibleCount(InternalEngine engine, int numDocs, boolean refresh) throws IOException {
    if (refresh) {
        engine.refresh("test");
    }
    try (Engine.Searcher searcher = engine.acquireSearcher("test")) {
        final TotalHitCountCollector collector = new TotalHitCountCollector();
        searcher.searcher().search(new MatchAllDocsQuery(), collector);
        assertThat(collector.getTotalHits(), equalTo(numDocs));
    }
}

Source File: CachingNaiveBayesClassifier.java From lucene-solr with Apache License 2.0

4 votes

private Map<BytesRef, Integer> getWordFreqForClassess(String word) throws IOException {

    Map<BytesRef, Integer> insertPoint;
    insertPoint = termCClassHitCache.get(word);

    // if we get the answer from the cache
    if (insertPoint != null) {
      if (!insertPoint.isEmpty()) {
        return insertPoint;
      }
    }

    Map<BytesRef, Integer> searched = new ConcurrentHashMap<>();

    // if we dont get the answer, but it's relevant we must search it and insert to the cache
    if (insertPoint != null || !justCachedTerms) {
      for (BytesRef cclass : cclasses) {
        BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
        BooleanQuery.Builder subQuery = new BooleanQuery.Builder();
        for (String textFieldName : textFieldNames) {
          subQuery.add(new BooleanClause(new TermQuery(new Term(textFieldName, word)), BooleanClause.Occur.SHOULD));
        }
        booleanQuery.add(new BooleanClause(subQuery.build(), BooleanClause.Occur.MUST));
        booleanQuery.add(new BooleanClause(new TermQuery(new Term(classFieldName, cclass)), BooleanClause.Occur.MUST));
        if (query != null) {
          booleanQuery.add(query, BooleanClause.Occur.MUST);
        }
        TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector();
        indexSearcher.search(booleanQuery.build(), totalHitCountCollector);

        int ret = totalHitCountCollector.getTotalHits();
        if (ret != 0) {
          searched.put(cclass, ret);
        }
      }
      if (insertPoint != null) {
        // threadsafe and concurrent write
        termCClassHitCache.put(word, searched);
      }
    }

    return searched;
  }

Source File: ScoreNormalizer.java From semantic-knowledge-graph with Apache License 2.0

4 votes

private static int getTotalDocs(NodeContext context) throws IOException {
    TotalHitCountCollector collector = new TotalHitCountCollector();
    context.req.getSearcher().search(new MatchAllDocsQuery(), collector);
    return collector.getTotalHits();
}

org.apache.lucene.search.TotalHitCountCollector Java Examples