org.apache.lucene.index.IndexReader#numDocs

Source File: HasChildQueryParser.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public Query rewrite(IndexReader reader) throws IOException {
    if (getBoost() != 1.0F) {
        return super.rewrite(reader);
    }
    if (reader instanceof DirectoryReader) {
        String joinField = ParentFieldMapper.joinField(parentType);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        indexSearcher.setQueryCache(null);
        indexSearcher.setSimilarity(similarity);
        IndexParentChildFieldData indexParentChildFieldData = parentChildIndexFieldData.loadGlobal((DirectoryReader) reader);
        MultiDocValues.OrdinalMap ordinalMap = ParentChildIndexFieldData.getOrdinalMap(indexParentChildFieldData, parentType);
        return JoinUtil.createJoinQuery(joinField, innerQuery, toQuery, indexSearcher, scoreMode, ordinalMap, minChildren, maxChildren);
    } else {
        if (reader.leaves().isEmpty() && reader.numDocs() == 0) {
            // asserting reader passes down a MultiReader during rewrite which makes this
            // blow up since for this query to work we have to have a DirectoryReader otherwise
            // we can't load global ordinals - for this to work we simply check if the reader has no leaves
            // and rewrite to match nothing
            return new MatchNoDocsQuery();
        }
        throw new IllegalStateException("can't load global ordinals for reader of type: " + reader.getClass() + " must be a DirectoryReader");
    }
}

Source File: LuceneTranslationMemory.java From modernmt with Apache License 2.0

6 votes

@Override
public void dump(long memory, Consumer<Entry> consumer) throws IOException {
    IndexSearcher searcher = getIndexSearcher();
    IndexReader reader = getIndexReader();

    int size = reader.numDocs();
    if (size == 0)
        return;

    Query memoryQuery = new TermQuery(documentBuilder.makeMemoryTerm(memory));
    TopDocs docs = searcher.search(memoryQuery, size);

    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document document = reader.document(scoreDoc.doc);
        if (documentBuilder.getMemory(document) > 0) {
            TranslationMemory.Entry entry = documentBuilder.asEntry(document);
            consumer.accept(entry);
        }
    }
}

Source File: LuceneTranslationMemory.java From modernmt with Apache License 2.0

6 votes

@Override
public void dumpAll(Consumer<Entry> consumer) throws IOException {
    IndexSearcher searcher = getIndexSearcher();
    IndexReader reader = getIndexReader();

    int size = reader.numDocs();
    if (size == 0)
        return;

    TopDocs docs = searcher.search(new MatchAllDocsQuery(), size);

    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document document = reader.document(scoreDoc.doc);
        if (documentBuilder.getMemory(document) > 0) {
            TranslationMemory.Entry entry = documentBuilder.asEntry(document);
            consumer.accept(entry);
        }
    }
}

Source File: IndexInfo.java From alfresco-repository with GNU Lesser General Public License v3.0

5 votes

public int getNumberOfDocuments() throws IOException
{
    IndexReader reader = getMainIndexReferenceCountingReadOnlyIndexReader();
    try
    {
        return reader.numDocs();
    }
    finally
    {
        reader.close();
    }
}

Source File: SignificantStringTermsAggregator.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public SignificantStringTerms buildEmptyAggregation() {
    // We need to account for the significance of a miss in our global stats - provide corpus size as context
    ContextIndexSearcher searcher = context.searchContext().searcher();
    IndexReader topReader = searcher.getIndexReader();
    int supersetSize = topReader.numDocs();
    return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}

Source File: GlobalOrdinalsSignificantTermsAggregator.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public SignificantStringTerms buildEmptyAggregation() {
    // We need to account for the significance of a miss in our global stats - provide corpus size as context
    ContextIndexSearcher searcher = context.searchContext().searcher();
    IndexReader topReader = searcher.getIndexReader();
    int supersetSize = topReader.numDocs();
    return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}

Source File: SignificantLongTermsAggregator.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public SignificantLongTerms buildEmptyAggregation() {
    // We need to account for the significance of a miss in our global stats - provide corpus size as context
    ContextIndexSearcher searcher = context.searchContext().searcher();
    IndexReader topReader = searcher.getIndexReader();
    int supersetSize = topReader.numDocs();
    return new SignificantLongTerms(0, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}

Source File: TestSpellChecker.java From lucene-solr with Apache License 2.0

5 votes

private int numdoc() throws IOException {
  IndexReader rs = DirectoryReader.open(spellindex);
  int num = rs.numDocs();
  assertTrue(num != 0);
  //System.out.println("num docs: " + num);
  rs.close();
  return num;
}

Source File: TestFieldCacheTermsFilter.java From lucene-solr with Apache License 2.0

5 votes

public void testMissingTerms() throws Exception {
  String fieldName = "field1";
  Directory rd = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), rd);
  for (int i = 0; i < 100; i++) {
    Document doc = new Document();
    int term = i * 10; //terms are units of 10;
    doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
    doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
    w.addDocument(doc);
  }
  IndexReader reader = w.getReader();
  w.close();

  IndexSearcher searcher = newSearcher(reader);
  int numDocs = reader.numDocs();
  ScoreDoc[] results;

  List<String> terms = new ArrayList<>();
  terms.add("5");
  results = searcher.search(new DocValuesTermsQuery(fieldName,  terms.toArray(new String[0])), numDocs).scoreDocs;
  assertEquals("Must match nothing", 0, results.length);

  terms = new ArrayList<>();
  terms.add("10");
  results = searcher.search(new DocValuesTermsQuery(fieldName,  terms.toArray(new String[0])), numDocs).scoreDocs;
  assertEquals("Must match 1", 1, results.length);

  terms = new ArrayList<>();
  terms.add("10");
  terms.add("20");
  results = searcher.search(new DocValuesTermsQuery(fieldName,  terms.toArray(new String[0])), numDocs).scoreDocs;
  assertEquals("Must match 2", 2, results.length);

  reader.close();
  rd.close();
}

Source File: OLuceneIndexManagerAbstract.java From orientdb-lucene with Apache License 2.0

5 votes

public long size(final ValuesTransformer<V> transformer) {

    IndexReader reader = null;
    IndexSearcher searcher = null;
    try {
      reader = getSearcher().getIndexReader();
    } catch (IOException e) {
      OLogManager.instance().error(this, "Error on getting size of Lucene index", e);
    } finally {
      if (searcher != null) {
        release(searcher);
      }
    }
    return reader.numDocs();
  }

Source File: DocFreq.java From lumongo with Apache License 2.0

5 votes

public DocFreq(IndexReader indexReader, String field) {
	this.indexReader = indexReader;
	this.field = field;
	this.docFreqMap = new HashMap<>();
	this.similarity = new ClassicSimilarity();
	this.numDocs = indexReader.numDocs();
}

Source File: TermQuery.java From alfresco-repository with GNU Lesser General Public License v3.0

4 votes

public Explanation explain(IndexReader reader, int doc)
  throws IOException {

  ComplexExplanation result = new ComplexExplanation();
  result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");

  Explanation idfExpl =
    new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) +
        ", numDocs=" + reader.numDocs() + ")");

  // explain query weight
  Explanation queryExpl = new Explanation();
  queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");

  Explanation boostExpl = new Explanation(getBoost(), "boost");
  if (getBoost() != 1.0f)
    queryExpl.addDetail(boostExpl);
  queryExpl.addDetail(idfExpl);

  Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
  queryExpl.addDetail(queryNormExpl);

  queryExpl.setValue(boostExpl.getValue() *
                     idfExpl.getValue() *
                     queryNormExpl.getValue());

  result.addDetail(queryExpl);

  // explain field weight
  String field = term.field();
  ComplexExplanation fieldExpl = new ComplexExplanation();
  fieldExpl.setDescription("fieldWeight("+term+" in "+doc+
                           "), product of:");

  Explanation tfExpl = scorer(reader).explain(doc);
  fieldExpl.addDetail(tfExpl);
  fieldExpl.addDetail(idfExpl);

  Explanation fieldNormExpl = new Explanation();
  byte[] fieldNorms = reader.norms(field);
  float fieldNorm =
    fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
  fieldNormExpl.setValue(fieldNorm);
  fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
  fieldExpl.addDetail(fieldNormExpl);
  
  fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
  fieldExpl.setValue(tfExpl.getValue() *
                     idfExpl.getValue() *
                     fieldNormExpl.getValue());

  result.addDetail(fieldExpl);
  result.setMatch(fieldExpl.getMatch());
  
  // combine them
  result.setValue(queryExpl.getValue() * fieldExpl.getValue());

  if (queryExpl.getValue() == 1.0f)
    return fieldExpl;

  return result;
}

Source File: TermInSetQueryTest.java From lucene-solr with Apache License 2.0

4 votes

public void testDuel() throws IOException {
  final int iters = atLeast(2);
  final String field = "f";
  for (int iter = 0; iter < iters; ++iter) {
    final List<BytesRef> allTerms = new ArrayList<>();
    final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
    for (int i = 0; i < numTerms; ++i) {
      final String value = TestUtil.randomAnalysisString(random(), 10, true);
      allTerms.add(new BytesRef(value));
    }
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
      doc.add(new StringField(field, term, Store.NO));
      iw.addDocument(doc);
    }
    if (numTerms > 1 && random().nextBoolean()) {
      iw.deleteDocuments(new TermQuery(new Term(field, allTerms.get(0))));
    }
    iw.commit();
    final IndexReader reader = iw.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    iw.close();

    if (reader.numDocs() == 0) {
      // may occasionally happen if all documents got the same term
      IOUtils.close(reader, dir);
      continue;
    }

    for (int i = 0; i < 100; ++i) {
      final float boost = random().nextFloat() * 10;
      final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
      List<BytesRef> queryTerms = new ArrayList<>();
      for (int j = 0; j < numQueryTerms; ++j) {
        queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
      }
      final BooleanQuery.Builder bq = new BooleanQuery.Builder();
      for (BytesRef t : queryTerms) {
        bq.add(new TermQuery(new Term(field, t)), Occur.SHOULD);
      }
      final Query q1 = new ConstantScoreQuery(bq.build());
      final Query q2 = new TermInSetQuery(field, queryTerms);
      assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost), true);
    }

    reader.close();
    dir.close();
  }
}

Source File: TestMultiTermConstantScore.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testRangeQueryRand() throws IOException {
  // NOTE: uses index build in *super* setUp

  IndexReader reader = signedIndexReader;
  IndexSearcher search = newSearcher(reader);

  String minRP = pad(signedIndexDir.minR);
  String maxRP = pad(signedIndexDir.maxR);

  int numDocs = reader.numDocs();

  assertEquals("num of docs", numDocs, 1 + maxId - minId);

  ScoreDoc[] result;

  // test extremes, bounded on both ends

  result = search.search(csrq("rand", minRP, maxRP, T, T), numDocs).scoreDocs;
  assertEquals("find all", numDocs, result.length);

  result = search.search(csrq("rand", minRP, maxRP, T, F), numDocs).scoreDocs;
  assertEquals("all but biggest", numDocs - 1, result.length);

  result = search.search(csrq("rand", minRP, maxRP, F, T), numDocs).scoreDocs;
  assertEquals("all but smallest", numDocs - 1, result.length);

  result = search.search(csrq("rand", minRP, maxRP, F, F), numDocs).scoreDocs;
  assertEquals("all but extremes", numDocs - 2, result.length);

  // unbounded

  result = search.search(csrq("rand", minRP, null, T, F), numDocs).scoreDocs;
  assertEquals("smallest and up", numDocs, result.length);

  result = search.search(csrq("rand", null, maxRP, F, T), numDocs).scoreDocs;
  assertEquals("biggest and down", numDocs, result.length);

  result = search.search(csrq("rand", minRP, null, F, F), numDocs).scoreDocs;
  assertEquals("not smallest, but up", numDocs - 1, result.length);

  result = search.search(csrq("rand", null, maxRP, F, F), numDocs).scoreDocs;
  assertEquals("not biggest, but down", numDocs - 1, result.length);

  // very small sets

  result = search.search(csrq("rand", minRP, minRP, F, F), numDocs).scoreDocs;
  assertEquals("min,min,F,F", 0, result.length);
  result = search.search(csrq("rand", maxRP, maxRP, F, F), numDocs).scoreDocs;
  assertEquals("max,max,F,F", 0, result.length);

  result = search.search(csrq("rand", minRP, minRP, T, T), numDocs).scoreDocs;
  assertEquals("min,min,T,T", 1, result.length);
  result = search.search(csrq("rand", null, minRP, F, T), numDocs).scoreDocs;
  assertEquals("nul,min,F,T", 1, result.length);

  result = search.search(csrq("rand", maxRP, maxRP, T, T), numDocs).scoreDocs;
  assertEquals("max,max,T,T", 1, result.length);
  result = search.search(csrq("rand", maxRP, null, T, F), numDocs).scoreDocs;
  assertEquals("max,nul,T,T", 1, result.length);
}

Source File: QueryAutoStopWordAnalyzer.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency percentage
 * greater than the given maxPercentDocs
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
 *                      contain a term, after which the word is considered to be a stop word
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    float maxPercentDocs) throws IOException {
  this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
}

Source File: LuceneHelper.java From dexter with Apache License 2.0

3 votes

/**
 * @return the number of documents indexed
 */
public int numDocs() {
	IndexReader reader = getReader();

	return reader.numDocs();

}

Java Code Examples for org.apache.lucene.index.IndexReader#numDocs()