Java Code Examples for org.apache.lucene.index.IndexReader#document()
The following examples show how to use
org.apache.lucene.index.IndexReader#document() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneTranslationMemory.java From modernmt with Apache License 2.0 | 6 votes |
@Override public void dump(long memory, Consumer<Entry> consumer) throws IOException { IndexSearcher searcher = getIndexSearcher(); IndexReader reader = getIndexReader(); int size = reader.numDocs(); if (size == 0) return; Query memoryQuery = new TermQuery(documentBuilder.makeMemoryTerm(memory)); TopDocs docs = searcher.search(memoryQuery, size); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document document = reader.document(scoreDoc.doc); if (documentBuilder.getMemory(document) > 0) { TranslationMemory.Entry entry = documentBuilder.asEntry(document); consumer.accept(entry); } } }
Example 2
Source File: LuceneTranslationMemory.java From modernmt with Apache License 2.0 | 6 votes |
@Override public void dumpAll(Consumer<Entry> consumer) throws IOException { IndexSearcher searcher = getIndexSearcher(); IndexReader reader = getIndexReader(); int size = reader.numDocs(); if (size == 0) return; TopDocs docs = searcher.search(new MatchAllDocsQuery(), size); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document document = reader.document(scoreDoc.doc); if (documentBuilder.getMemory(document) > 0) { TranslationMemory.Entry entry = documentBuilder.asEntry(document); consumer.accept(entry); } } }
Example 3
Source File: AbstractLuceneIndexerImpl.java From alfresco-repository with GNU Lesser General Public License v3.0 | 6 votes |
protected boolean locateContainer(String nodeRef, IndexReader reader) { boolean found = false; try { TermDocs td = reader.termDocs(new Term("ID", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); if (document.getField("ISCONTAINER") != null) { found = true; break; } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete container and below for " + nodeRef, e); } return found; }
Example 4
Source File: Catalog.java From cxf with Apache License 2.0 | 6 votes |
@GET @Produces(MediaType.APPLICATION_JSON) public JsonArray getBooks() throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final JsonArrayBuilder builder = Json.createArrayBuilder(); try { final Query query = new MatchAllDocsQuery(); for (final ScoreDoc scoreDoc: searcher.search(query, 1000).scoreDocs) { final DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor(LuceneDocumentMetadata.SOURCE_FIELD); reader.document(scoreDoc.doc, fieldVisitor); builder.add(fieldVisitor .getDocument() .getField(LuceneDocumentMetadata.SOURCE_FIELD) .stringValue()); } return builder.build(); } finally { reader.close(); } }
Example 5
Source File: TestBlockJoin.java From lucene-solr with Apache License 2.0 | 6 votes |
private void compareHits(IndexReader r, IndexReader joinR, TopDocs controlHits, Map<Integer, TopDocs> joinResults) throws Exception { int currentParentID = -1; int childHitSlot = 0; TopDocs childHits = new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]); for (ScoreDoc controlHit : controlHits.scoreDocs) { Document controlDoc = r.document(controlHit.doc); int parentID = Integer.parseInt(controlDoc.get("parentID")); if (parentID != currentParentID) { assertEquals(childHitSlot, childHits.scoreDocs.length); currentParentID = parentID; childHitSlot = 0; childHits = joinResults.get(parentID); } String controlChildID = controlDoc.get("childID"); Document childDoc = joinR.document(childHits.scoreDocs[childHitSlot++].doc); String childID = childDoc.get("childID"); assertEquals(controlChildID, childID); } }
Example 6
Source File: LuceneHelper.java From dexter with Apache License 2.0 | 6 votes |
private Document getDoc(int wikiId) { IndexReader reader = getReader(); // System.out.println("get docId "+pos); if (wikiId <= 0) return null; int docId = getLuceneId(wikiId); if (docId < 0) { logger.warn("no id for wikiId {}", wikiId); return null; } logger.debug("get wikiId {} -> docId {}", wikiId, docId); Document doc = null; try { doc = reader.document(docId); } catch (Exception e) { logger.error("retrieving doc in position {} {}", docId, e.toString()); System.exit(-1); } return doc; }
Example 7
Source File: TestBlockJoin.java From lucene-solr with Apache License 2.0 | 6 votes |
private void compareChildHits(IndexReader r, IndexReader joinR, TopDocs results, TopDocs joinResults) throws Exception { assertEquals(results.totalHits.value, joinResults.totalHits.value); assertEquals(results.scoreDocs.length, joinResults.scoreDocs.length); for(int hitCount=0;hitCount<results.scoreDocs.length;hitCount++) { ScoreDoc hit = results.scoreDocs[hitCount]; ScoreDoc joinHit = joinResults.scoreDocs[hitCount]; Document doc1 = r.document(hit.doc); Document doc2 = joinR.document(joinHit.doc); assertEquals("hit " + hitCount + " differs", doc1.get("childID"), doc2.get("childID")); // don't compare scores -- they are expected to differ assertTrue(hit instanceof FieldDoc); assertTrue(joinHit instanceof FieldDoc); FieldDoc hit0 = (FieldDoc) hit; FieldDoc joinHit0 = (FieldDoc) joinHit; assertArrayEquals(hit0.fields, joinHit0.fields); } }
Example 8
Source File: SearchTravRetHighlightTask.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception { IndexReader reader = searcher.getIndexReader(); highlighter.setFragmentScorer(new QueryScorer(q)); // highlighter.setTextFragmenter(); unfortunately no sentence mechanism, not even regex. Default here is trivial for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) { Document document = reader.document(scoreDoc.doc, hlFields); Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null; for (IndexableField indexableField : document) { TokenStream tokenStream; if (termVecs) { tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields, indexableField.stringValue(), analyzer, maxDocCharsToAnalyze); } else { tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue()); } // will close TokenStream: String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags); preventOptimizeAway = fragments.length; } } }
Example 9
Source File: AbstractLuceneIndexerImpl.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
protected static Set<String> deleteReference(Collection<String> nodeRefs, IndexReader reader, boolean delete) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); for (String nodeRef : nodeRefs) { try { TermDocs td = reader.termDocs(new Term("PARENT", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } catch (IOException e) { throw new LuceneIndexException("Failed to delete node by parent for " + nodeRef, e); } } return refs; }
Example 10
Source File: AbstractLuceneIndexerImpl.java From alfresco-repository with GNU Lesser General Public License v3.0 | 5 votes |
protected static Set<String> deleteContainerAndBelow(String nodeRef, IndexReader reader, boolean delete, boolean cascade) throws LuceneIndexException { Set<String> refs = new LinkedHashSet<String>(); try { if (delete) { reader.deleteDocuments(new Term("ID", nodeRef)); } refs.add(nodeRef); if (cascade) { TermDocs td = reader.termDocs(new Term("ANCESTOR", nodeRef)); while (td.next()) { int doc = td.doc(); Document document = reader.document(doc); String[] ids = document.getValues("ID"); refs.add(ids[ids.length - 1]); if (delete) { reader.deleteDocument(doc); } } td.close(); } } catch (IOException e) { throw new LuceneIndexException("Failed to delete container and below for " + nodeRef, e); } return refs; }
Example 11
Source File: SearchTravRetLoadFieldSelectorTask.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected Document retrieveDoc(IndexReader ir, int id) throws IOException { if (fieldsToLoad == null) { return ir.document(id); } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); ir.document(id, visitor); return visitor.getDocument(); } }
Example 12
Source File: LuceneIndex.java From rdf4j with BSD 3-Clause "New" or "Revised" License | 4 votes |
private static Document readDocument(IndexReader reader, int docId, Set<String> fieldsToLoad) throws IOException { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); reader.document(docId, visitor); return visitor.getDocument(); }
Example 13
Source File: TestBinaryDocument.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testBinaryFieldInIndex() throws Exception { FieldType ft = new FieldType(); ft.setStored(true); StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes(StandardCharsets.UTF_8)); Field stringFldStored = new Field("stringStored", binaryValStored, ft); Document doc = new Document(); doc.add(binaryFldStored); doc.add(stringFldStored); /** test for field count */ assertEquals(2, doc.getFields().size()); /** add the doc to a ram index */ Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); writer.addDocument(doc); /** open a reader and fetch the document */ IndexReader reader = writer.getReader(); Document docFromReader = reader.document(0); assertTrue(docFromReader != null); /** fetch the binary stored field and compare its content with the original one */ BytesRef bytes = docFromReader.getBinaryValue("binaryStored"); assertNotNull(bytes); String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8); assertTrue(binaryFldStoredTest.equals(binaryValStored)); /** fetch the string field and compare its content with the original one */ String stringFldStoredTest = docFromReader.get("stringStored"); assertTrue(stringFldStoredTest.equals(binaryValStored)); writer.close(); reader.close(); dir.close(); }
Example 14
Source File: Catalog.java From cxf with Apache License 2.0 | 4 votes |
@GET @Produces(MediaType.APPLICATION_JSON) @CrossOriginResourceSharing(allowAllOrigins = true) @Path("/search") public Response findBook(@Context SearchContext searchContext, @Context final UriInfo uri) throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final JsonArrayBuilder builder = Json.createArrayBuilder(); try { visitor.reset(); visitor.visit(searchContext.getCondition(SearchBean.class)); final Query query = visitor.getQuery(); if (query != null) { final TopDocs topDocs = searcher.search(query, 1000); for (final ScoreDoc scoreDoc: topDocs.scoreDocs) { final Document document = reader.document(scoreDoc.doc); final String source = document .getField(LuceneDocumentMetadata.SOURCE_FIELD) .stringValue(); builder.add( Json.createObjectBuilder() .add("source", source) .add("score", scoreDoc.score) .add("url", uri.getBaseUriBuilder() .path(Catalog.class) .path(source) .build().toString()) ); } } return Response.ok(builder.build()).build(); } finally { reader.close(); } }
Example 15
Source File: ReadTask.java From lucene-solr with Apache License 2.0 | 4 votes |
protected Document retrieveDoc(IndexReader ir, int id) throws IOException { return ir.document(id); }
Example 16
Source File: TokenSources.java From lucene-solr with Apache License 2.0 | 4 votes |
@Deprecated // maintenance reasons LUCENE-6445 public static TokenStream getTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer) throws IOException { Document doc = reader.document(docId); return getTokenStream(doc, field, analyzer); }
Example 17
Source File: TopicIndexer.java From tagme with Apache License 2.0 | 4 votes |
@Override public void makeIndex(String lang, File workingDir) throws IOException { IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang)); Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset(); IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer())); Document doc = new Document(); Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED); Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED); Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO); Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO); doc.add(fWID); doc.add(fTitle); doc.add(fAbstract); doc.add(fBestAnchor); int max = articles.maxDoc(); PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest"); plog.setEnd(max); plog.start("Start indexing..."); for(int i=0; i<max; i++) { plog.update(0); Document oldDoc = articles.document(i); PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE)); if (type == PageType.TOPIC) { int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID)); fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID)); fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT)); fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE)); String bestAnchor = bestAnchorMap.get(wid); if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2); fBestAnchor.setValue(bestAnchor==null?"":bestAnchor); String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT); if (cats != null) { for (int j=0; j<cats.length; j++) doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED)); } index.addDocument(doc); plog.update(1); doc.removeFields(FIELD_CAT); } } plog.stop(); log.info("Now optimizing..."); index.optimize(); index.close(); //we cannot call this because the index is still in the temporary dir //so TopicDocs will be created using old index // log.info("Index Done, now creating WID->DOC_ID map"); // // TopicDocs td = new TopicDocs(lang); // td.forceParsing(); log.info("Done."); }
Example 18
Source File: AnchorTrieDump.java From tagme with Apache License 2.0 | 4 votes |
@Override protected AnchorTrie parseSet() throws IOException { IndexReader anchors = Indexes.getReader(RepositoryDirs.ANCHORS.getPath(lang)); AnchorTrie trie = new AnchorTrie(); int maxdoc = anchors.maxDoc(); PLogger plog = new PLogger(log, Step.TEN_MINUTES, "anchors", "skipped", "duplicates"); plog.setEnd(0, maxdoc); plog.start("Inserting in to trie..."); for(int i=0; i<maxdoc; i++) { plog.update(0); Document doc = anchors.document(i); if (doc == null){ plog.update(1); continue; } String anchorText = doc.get(AnchorIndexer.FIELD_TEXT); String serial = doc.get(AnchorIndexer.FIELD_OBJECT); Anchor anchorObj = Anchor.deserialize(serial); if (anchorObj == null){ plog.update(1); continue; } boolean done = trie.add(anchorText, anchorObj); if (!done) plog.update(2); } plog.stop(); log.info("Now trimming..."); trie.trim(); log.info("Done."); return trie; }
Example 19
Source File: MemoryIndex.java From netbeans with Apache License 2.0 | 4 votes |
@Override public <S, T> void queryDocTerms( @NonNull Map<? super T, Set<S>> result, @NonNull Convertor<? super Document, T> convertor, @NonNull Convertor<? super Term, S> termConvertor, @NullAllowed FieldSelector selector, @NullAllowed AtomicBoolean cancel, @NonNull Query... queries) throws IOException, InterruptedException { Parameters.notNull("result", result); //NOI18N Parameters.notNull("convertor", convertor); //NOI18N Parameters.notNull("termConvertor", termConvertor); //NOI18N Parameters.notNull("queries", queries); //NOI18N if (selector == null) { selector = AllFieldsSelector.INSTANCE; } lock.readLock().lock(); try { final IndexReader in = getReader(); if (in == null) { return; } final BitSet bs = new BitSet(in.maxDoc()); final Collector c = new BitSetCollector(bs); final Searcher searcher = new IndexSearcher(in); final TermCollector termCollector = new TermCollector(c); try { for (Query q : queries) { if (cancel != null && cancel.get()) { throw new InterruptedException (); } if (q instanceof TermCollector.TermCollecting) { ((TermCollector.TermCollecting)q).attach(termCollector); } else { throw new IllegalArgumentException ( String.format("Query: %s does not implement TermCollecting", //NOI18N q.getClass().getName())); } searcher.search(q, termCollector); } } finally { searcher.close(); } for (int docNum = bs.nextSetBit(0); docNum >= 0; docNum = bs.nextSetBit(docNum+1)) { if (cancel != null && cancel.get()) { throw new InterruptedException (); } final Document doc = in.document(docNum, selector); final T value = convertor.convert(doc); if (value != null) { final Set<Term> terms = termCollector.get(docNum); if (terms != null) { result.put (value, convertTerms(termConvertor, terms)); } } } } finally { lock.readLock().unlock(); } }
Example 20
Source File: MemoryIndex.java From netbeans with Apache License 2.0 | 4 votes |
@Override public <T> void query( @NonNull Collection<? super T> result, @NonNull Convertor<? super Document, T> convertor, @NullAllowed FieldSelector selector, @NullAllowed AtomicBoolean cancel, @NonNull Query... queries) throws IOException, InterruptedException { Parameters.notNull("queries", queries); //NOI18N Parameters.notNull("convertor", convertor); //NOI18N Parameters.notNull("result", result); //NOI18N if (selector == null) { selector = AllFieldsSelector.INSTANCE; } lock.readLock().lock(); try { final IndexReader in = getReader(); if (in == null) { return; } final BitSet bs = new BitSet(in.maxDoc()); final Collector c = new BitSetCollector(bs); final Searcher searcher = new IndexSearcher(in); try { for (Query q : queries) { if (cancel != null && cancel.get()) { throw new InterruptedException (); } searcher.search(q, c); } } finally { searcher.close(); } for (int docNum = bs.nextSetBit(0); docNum >= 0; docNum = bs.nextSetBit(docNum+1)) { if (cancel != null && cancel.get()) { throw new InterruptedException (); } final Document doc = in.document(docNum, selector); final T value = convertor.convert(doc); if (value != null) { result.add (value); } } } finally { lock.readLock().unlock(); } }