org.apache.lucene.document.DocumentStoredFieldVisitor Java Exaples

Source File: Catalog.java From cxf with Apache License 2.0

6 votes

@GET
@Produces(MediaType.APPLICATION_JSON)
public JsonArray getBooks() throws IOException {
    final IndexReader reader = DirectoryReader.open(directory);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final JsonArrayBuilder builder = Json.createArrayBuilder();

    try {
        final Query query = new MatchAllDocsQuery();

        for (final ScoreDoc scoreDoc: searcher.search(query, 1000).scoreDocs) {
            final DocumentStoredFieldVisitor fieldVisitor =
                new DocumentStoredFieldVisitor(LuceneDocumentMetadata.SOURCE_FIELD);

            reader.document(scoreDoc.doc, fieldVisitor);
            builder.add(fieldVisitor
                    .getDocument()
                    .getField(LuceneDocumentMetadata.SOURCE_FIELD)
                    .stringValue());
        }

        return builder.build();
    } finally {
        reader.close();
    }
}

Source File: SearchTravRetLoadFieldSelectorTask.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
  if (fieldsToLoad == null) {
    return ir.document(id);
  } else {
    DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad);
    ir.document(id, visitor);
    return visitor.getDocument();
  }
}

Source File: IndexReader.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Like {@link #document(int)} but only loads the specified
 * fields.  Note that this is simply sugar for {@link
 * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}.
 */
public final Document document(int docID, Set<String> fieldsToLoad)
    throws IOException {
  final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(
      fieldsToLoad);
  document(docID, visitor);
  return visitor.getDocument();
}

Source File: GenericRecordReader.java From incubator-retired-blur with Apache License 2.0

5 votes

private void fetchBlurRecord() throws IOException {
  DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
  _fieldsReader.visitDocument(_docId, visitor);
  BlurRecord blurRecord = new BlurRecord();
  String rowId = RowDocumentUtil.readRecord(visitor.getDocument(), blurRecord);
  blurRecord.setRowId(rowId);
  _rowId = new Text(rowId);
  _tableBlurRecord = new TableBlurRecord(_table, blurRecord);
}

Source File: CheckIndex.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Test stored fields.
 * @lucene.experimental
 */
public static Status.StoredFieldStatus testStoredFields(CodecReader reader, PrintStream infoStream, boolean failFast) throws IOException {
  long startNS = System.nanoTime();
  final Status.StoredFieldStatus status = new Status.StoredFieldStatus();

  try {
    if (infoStream != null) {
      infoStream.print("    test: stored fields.......");
    }

    // Scan stored fields for all documents
    final Bits liveDocs = reader.getLiveDocs();
    StoredFieldsReader storedFields = reader.getFieldsReader().getMergeInstance();
    for (int j = 0; j < reader.maxDoc(); ++j) {
      // Intentionally pull even deleted documents to
      // make sure they too are not corrupt:
      DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      storedFields.visitDocument(j, visitor);
      Document doc = visitor.getDocument();
      if (liveDocs == null || liveDocs.get(j)) {
        status.docCount++;
        status.totFields += doc.getFields().size();
      }
    }      

    // Validate docCount
    if (status.docCount != reader.numDocs()) {
      throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
    }

    msg(infoStream, String.format(Locale.ROOT, "OK [%d total field count; avg %.1f fields per doc] [took %.3f sec]",
                                  status.totFields,
                                  (((float) status.totFields)/status.docCount),
                                  nsToSec(System.nanoTime() - startNS)));
  } catch (Throwable e) {
    if (failFast) {
      throw IOUtils.rethrowAlways(e);
    }
    msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]");
    status.error = e;
    if (infoStream != null) {
      e.printStackTrace(infoStream);
    }
  }

  return status;
}

Source File: IndexSizeEstimatorTest.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}

Source File: IndexReader.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Returns the stored fields of the <code>n</code><sup>th</sup>
 * <code>Document</code> in this index.  This is just
 * sugar for using {@link DocumentStoredFieldVisitor}.
 * <p>
 * <b>NOTE:</b> for performance reasons, this method does not check if the
 * requested document is deleted, and therefore asking for a deleted document
 * may yield unspecified results. Usually this is not required, however you
 * can test if the doc is deleted by checking the {@link
 * Bits} returned from {@link MultiBits#getLiveDocs}.
 *
 * <b>NOTE:</b> only the content of a field is returned,
 * if that field was stored during indexing.  Metadata
 * like boost, omitNorm, IndexOptions, tokenized, etc.,
 * are not preserved.
 * 
 * @throws CorruptIndexException if the index is corrupt
 * @throws IOException if there is a low-level IO error
 */
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
// IndexableField
public final Document document(int docID) throws IOException {
  final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
  document(docID, visitor);
  return visitor.getDocument();
}

org.apache.lucene.document.DocumentStoredFieldVisitor Java Examples