Java Code Examples for org.apache.lucene.index.SegmentReader#maxDoc()

The following examples show how to use org.apache.lucene.index.SegmentReader#maxDoc() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MtasSearchTestConsistency.java From mtas with Apache License 2.0

6 votes

/**
 * Gets the live docs.
 *
 * @param indexReader the index reader
 * @return the live docs
 */
private static ArrayList<Integer> getLiveDocs(IndexReader indexReader) {
  ArrayList<Integer> list = new ArrayList<>();
  ListIterator<LeafReaderContext> iterator = indexReader.leaves()
      .listIterator();
  while (iterator.hasNext()) {
    LeafReaderContext lrc = iterator.next();
    SegmentReader r = (SegmentReader) lrc.reader();
    for (int docId = 0; docId < r.maxDoc(); docId++) {
      if (r.numDocs() == r.maxDoc() || r.getLiveDocs().get(docId)) {
        list.add(lrc.docBase + docId);
      }
    }
  }
  return list;
}

Example 2

Source File: MtasDocumentIndex.java From inception with Apache License 2.0

4 votes

private long doCountResults(IndexSearcher searcher,
    SearchQueryRequest aRequest, MtasSpanQuery q) throws IOException
{
    ListIterator<LeafReaderContext> leafReaderContextIterator = searcher.getIndexReader()
            .leaves().listIterator();

    Map<Long, Long> annotatableDocuments = listAnnotatableDocuments(aRequest.getProject(),
        aRequest.getUser());

    final float boost = 0;
    SpanWeight spanweight = q.rewrite(searcher.getIndexReader()).createWeight(searcher, false,
            boost);

    long numResults = 0;

    while (leafReaderContextIterator.hasNext()) {
        LeafReaderContext leafReaderContext = leafReaderContextIterator.next();
        try {
            Spans spans = spanweight.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
            SegmentReader segmentReader = (SegmentReader) leafReaderContext.reader();
            if (spans != null) {
                while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                    if (segmentReader.numDocs() == segmentReader.maxDoc()
                            || segmentReader.getLiveDocs().get(spans.docID())) {
                        Document document = segmentReader.document(spans.docID());

                        // Retrieve user
                        String user = document.get(FIELD_USER);

                        // Retrieve source and annotation document ids
                        String rawSourceDocumentId = document.get(FIELD_SOURCE_DOCUMENT_ID);
                        String rawAnnotationDocumentId = document
                                .get(FIELD_ANNOTATION_DOCUMENT_ID);
                        if (rawSourceDocumentId == null || rawAnnotationDocumentId == null) {
                            log.trace("Indexed document lacks source/annotation document IDs"
                                    + " - source: {}, annotation: {}", rawSourceDocumentId,
                                rawAnnotationDocumentId);
                            continue;

                        }
                        long sourceDocumentId = Long.valueOf(rawSourceDocumentId);
                        long annotationDocumentId = Long.valueOf(rawAnnotationDocumentId);

                        // If the query is limited to a given document, skip any results
                        // which are not in the given document
                        Optional<SourceDocument> limitedToDocument = aRequest
                                .getLimitedToDocument();
                        if (limitedToDocument.isPresent() && !Objects
                            .equals(limitedToDocument.get().getId(), sourceDocumentId)) {
                            log.trace("Query limited to document {}, skipping results for "
                                    + "document {}", limitedToDocument.get().getId(),
                                sourceDocumentId);
                            continue;
                        }

                        if (annotatableDocuments.containsKey(sourceDocumentId)
                            && annotationDocumentId == -1) {
                            // Exclude result if the retrieved document is a sourcedocument
                            // (that is, has annotationDocument = -1) AND it has a
                            // corresponding annotation document for this user
                            log.trace("Skipping results from indexed source document {} in" 
                                + "favor of results from the corresponding annotation "
                                + "document", sourceDocumentId);
                            continue;
                        }
                        else if (annotationDocumentId != -1 && !aRequest.getUser().getUsername()
                            .equals(user)) {
                            // Exclude result if the retrieved document is an annotation
                            // document (that is, annotationDocument != -1 and its username
                            // is different from the quering user
                            log.trace("Skipping results from annotation document for user {} "
                                    + "which does not match the requested user {}", user,
                                aRequest.getUser().getUsername());
                            continue;
                        }

                        while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                            numResults++;
                        }
                    }
                }
            }
        }
        catch (Exception e) {
            log.error("Unable to process query results", e);
            numResults = -1;
        }
    }
    return numResults;
}

Example 3

Source File: MtasUimaParserLuceneTest.java From inception with Apache License 2.0

4 votes

private static void doQuery(IndexReader indexReader, String field, MtasSpanQuery q,
        List<String> prefixes)
    throws IOException
{
    ListIterator<LeafReaderContext> iterator = indexReader.leaves().listIterator();
    IndexSearcher searcher = new IndexSearcher(indexReader);
    final float boost = 0;
    SpanWeight spanweight = q.rewrite(indexReader).createWeight(searcher, false, boost);

    while (iterator.hasNext()) {
        System.out.println("#### new iteration ####");
        LeafReaderContext lrc = iterator.next();
        Spans spans = spanweight.getSpans(lrc, SpanWeight.Postings.POSITIONS);
        SegmentReader segmentReader = (SegmentReader) lrc.reader();
        Terms terms = segmentReader.terms(field);
        CodecInfo mtasCodecInfo = CodecInfo.getCodecInfoFromTerms(terms);
        if (spans != null) {
            while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
                if (segmentReader.numDocs() == segmentReader.maxDoc()
                        || segmentReader.getLiveDocs().get(spans.docID())) {
                    String idValue = segmentReader.document(spans.docID()).getField(FIELD_ID)
                            .stringValue();
                    System.out.println("********  New doc " + spans.docID() + "-" + idValue);
                    while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
                        System.out.println("------");
                        List<MtasTokenString> tokens = mtasCodecInfo
                                .getPrefixFilteredObjectsByPositions(field, spans.docID(),
                                        prefixes, spans.startPosition(),
                                        (spans.endPosition() - 1));
                        for (MtasTokenString token : tokens) {
                            System.out.print("docId: " + (lrc.docBase + spans.docID()) + ", ");
                            System.out.print(" position: " + token.getPositionStart()
                                    + (!Objects.equals(token.getPositionEnd(),
                                            token.getPositionStart())
                                                    ? "-" + token.getPositionEnd()
                                                    : ""));
                            System.out.print(" offset: " + token.getOffsetStart() + "-"
                                    + token.getOffsetEnd());
                            System.out.print(" mtasId: " + token.getId());
                            System.out.println(" " + token.getPrefix()
                                    + (token.getPostfix() != null ? ":" + token.getPostfix()
                                            : "")
                                    + ", ");
                        }
                        System.out.println("------");
                        List<MtasTreeHit<String>> hits = mtasCodecInfo
                                .getPositionedTermsByPrefixesAndPositionRange(field,
                                        spans.docID(), prefixes, spans.startPosition(),
                                        (spans.endPosition() - 1));
                        for (MtasTreeHit<String> hit : hits) {
                            System.out.print("docId: " + (lrc.docBase + spans.docID()) + ", ");
                            System.out.print("position: " + hit.startPosition
                                    + (hit.endPosition != hit.startPosition
                                            ? "-" + hit.endPosition
                                            : ""));
                            System.out.println(" " + CodecUtil.termPrefix(hit.data)
                                    + (CodecUtil.termValue(hit.data) != null
                                            ? ":" + CodecUtil.termValue(hit.data)
                                            : "")
                                    + ", ");
                        }
                    }
                    // if (prefixes != null && !prefixes.isEmpty()) {
                    // }
                }
            }
        }
    }
}