Java Code Examples for org.apache.lucene.document.Document#get()
The following examples show how to use
org.apache.lucene.document.Document#get() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneResultSet.java From orientdb-lucene with Apache License 2.0 | 6 votes |
@Override public OIdentifiable next() { if (localIndex == array.length) { localIndex = 0; fetchMoreResult(); } final ScoreDoc score = array[localIndex++]; Document ret = null; OContextualRecordId res = null; try { ret = queryContext.searcher.doc(score.doc); String rId = ret.get(OLuceneIndexManagerAbstract.RID); res = new OContextualRecordId(rId); manager.onRecordAddedToResultSet(queryContext, res, ret, score); } catch (IOException e) { e.printStackTrace(); } index++; return res; }
Example 2
Source File: LtrQueryTests.java From elasticsearch-learning-to-rank with Apache License 2.0 | 6 votes |
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores, RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException { Document d = searcherUnderTest.doc(scoreDoc.doc); String idVal = d.get("id"); int docId = Integer.decode(idVal); float modelScore = scores[docId]; float queryScore = scoreDoc.score; assertEquals("Scores match with similarity " + similarity.getClass(), modelScore, queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore)); if (!(similarity instanceof TFIDFSimilarity)) { // There are precision issues with these similarities when using explain // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain Explanation expl = searcherUnderTest.explain(ltrQuery, docId); assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(), queryScore, 5 * Math.ulp(modelScore)); checkFeatureNames(expl, features); } }
Example 3
Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0 | 6 votes |
public void iterateThroughDocList() throws IOException { int n = reader.maxDoc(); if (n>100) { n = 100; } for (int i = 0; i < n; i++) { Document doc = reader.document(i); // the doc.get pulls out the values stored - ONLY if you store the fields String docnum = doc.get("docnum"); String title = doc.get("title"); System.out.println("ID: " + i); System.out.println("docnum and title: " + docnum + " " + title); //System.out.println(doc.get("content")); iterateThroughDocTermVector(i); } }
Example 4
Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0 | 6 votes |
public void countFieldData() throws IOException { int n = reader.maxDoc(); int nt = 0; int nc = 0; for (int i = 0; i < n; i++) { Document doc = reader.document(i); // the doc.get pulls out the values stored - ONLY if you store the fields String title = doc.get(Lucene4IRConstants.FIELD_TITLE); String content = doc.get(Lucene4IRConstants.FIELD_CONTENT); if (title.length()>0){ nt++; } if (content.length()>0){ nc++; } } System.out.println("Num Docs: " +n + " Docs with Title text: " + nt + " Docs with Contents text: "+ nc); }
Example 5
Source File: SearchEngineIndexer.java From gravitee-management-rest-api with Apache License 2.0 | 6 votes |
public void remove(Document document) throws TechnicalException { String type = document.get(TYPE_FIELD); String id = document.get(ID_FIELD); logger.debug("Removing document type[{}] ID[{}]", type, id); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term(ID_FIELD, id)), BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(TYPE_FIELD, type)), BooleanClause.Occur.MUST); try { writer.deleteDocuments(bq.build()); } catch (IOException ioe) { logger.error("Fail to index document with ID: {}", id, ioe); throw new TechnicalException("Fail to index document with ID: " + id, ioe); } }
Example 6
Source File: TestFuzzyQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSingleQueryExactMatchScoresHighest() throws Exception { //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking Directory directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), directory); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smith", writer); addDoc("smythe", writer); addDoc("smdssasd", writer); IndexReader reader = writer.getReader(); IndexSearcher searcher = newSearcher(reader); searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework writer.close(); String searchTerms[] = { "smith", "smythe", "smdssasd" }; for (String searchTerm : searchTerms) { FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; Document bestDoc = searcher.doc(hits[0].doc); assertTrue(hits.length > 0); String topMatch = bestDoc.get("field"); assertEquals(searchTerm, topMatch); if (hits.length > 1) { Document worstDoc = searcher.doc(hits[hits.length - 1].doc); String worstMatch = worstDoc.get("field"); assertNotSame(searchTerm, worstMatch); } } reader.close(); directory.close(); }
Example 7
Source File: OlatFullIndexer.java From olat with Apache License 2.0 | 5 votes |
private void incrementDocumentTypeCounter(final Document document) { final String documentType = document.get(AbstractOlatDocument.DOCUMENTTYPE_FIELD_NAME); int intValue = 0; if (documentCounters.containsKey(documentType)) { final Integer docCounter = documentCounters.get(documentType); intValue = docCounter.intValue(); } intValue++; documentCounters.put(documentType, new Integer(intValue)); }
Example 8
Source File: SearchResultsImpl.java From olat with Apache License 2.0 | 5 votes |
/** * Create a result document. Return null if the identity has not enough privileges to see the document. * * @param doc * @param query * @param analyzer * @param doHighlight * @param identity * @param roles * @return * @throws IOException */ private ResultDocument createResultDocument(final Document doc, final int pos, final Query query, final Analyzer analyzer, final boolean doHighlight, final Identity identity, final Roles roles) throws IOException { boolean hasAccess = false; if (roles.isOLATAdmin()) { hasAccess = true; } else { String resourceUrl = doc.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); if (resourceUrl == null) { resourceUrl = ""; } final BusinessControl businessControl = BusinessControlFactory.getInstance().createFromString(resourceUrl); hasAccess = mainIndexer.checkAccess(null, businessControl, identity, roles); } ResultDocument resultDoc; if (hasAccess) { resultDoc = new ResultDocument(doc, pos); if (doHighlight) { doHighlight(query, analyzer, doc, resultDoc); } } else { resultDoc = null; } return resultDoc; }
Example 9
Source File: TripleIndexContext.java From AGDISTIS with GNU Affero General Public License v3.0 | 5 votes |
private List<Triple> getFromIndex(int maxNumberOfResults, BooleanQuery bq) throws IOException { log.debug("\t start asking index by context..."); ScoreDoc[] hits = isearcher.search(bq, null, maxNumberOfResults).scoreDocs; if (hits.length == 0) { return new ArrayList<Triple>(); } List<Triple> triples = new ArrayList<Triple>(); String s, p, o; for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); s = hitDoc.get(FIELD_NAME_CONTEXT); p = hitDoc.get(FIELD_NAME_URI); o = hitDoc.get(FIELD_NAME_URI_COUNT); Triple triple = new Triple(s, p, o); triples.add(triple); } log.debug("\t finished asking index..."); Collections.sort(triples); if (triples.size() < 500) { return triples.subList(0, triples.size()); } else { return triples.subList(0, 500); } }
Example 10
Source File: CourseServiceImpl.java From TinyMooc with Apache License 2.0 | 5 votes |
public List<Course> getCourses(String query) { try { List<Course> qlist = new ArrayList<Course>(); IndexSearcher indexSearcher = new IndexSearcher(INDEXPATH); long begin = new Date().getTime(); //下面的是进行title,content 两个范围内进行收索. SHOULD 表示OR BooleanClause.Occur[] clauses = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}; Query queryOBJ = MultiFieldQueryParser.parse(query, new String[]{"courseIntro", "courseTitle"}, clauses, new StandardAnalyzer());//parser.parse(query); Filter filter = null; //################# 搜索相似度最高的记录 ################### TopDocs topDocs = indexSearcher.search(queryOBJ, filter, 1000); Course course = null; //输出结果 for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document targetDoc = indexSearcher.doc(scoreDoc.doc); course = new Course(); String courseIntro = targetDoc.get("courseIntro"); String courseTitle = targetDoc.get("courseTitle"); String courseId = targetDoc.get("courseId"); TokenStream contentTokenStream = analyzer.tokenStream("courseIntro", new StringReader(courseIntro)); TokenStream titleTokenStream = analyzer.tokenStream("courseTitle", new StringReader(courseTitle)); course.setCourseIntro(courseIntro); course.setCourseTitle(courseTitle); course.setCourseId(courseId); course.setType(targetDoc.get("type")); course.setCourseState(targetDoc.get("courseState")); qlist.add(course); } indexSearcher.close(); return qlist; } catch (Exception e) { logger.error("getCourses error."); return null; } }
Example 11
Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0 | 5 votes |
public void iterateThroughDocListAll() throws IOException { int n = reader.maxDoc(); for (int i = 0; i < n; i++) { Document doc = reader.document(i); // the doc.get pulls out the values stored - ONLY if you store the fields String docnum = doc.get("docnum"); String all = doc.get(Lucene4IRConstants.FIELD_ALL).trim(); if (all.length() == 0) { System.out.println("docnum: " + docnum); } } }
Example 12
Source File: OperatorGlobalSearchGUIProvider.java From rapidminer-studio with GNU Affero General Public License v3.0 | 5 votes |
@Override public DragGestureListener getDragAndDropSupport(final Document document) { String operatorKey = document.get(GlobalSearchUtilities.FIELD_UNIQUE_ID); if (operatorKey == null) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.gui.processeditor.global_search.OperatorSearchManager.error.no_key"); return null; } try { return new OperatorDragGesture(OperatorService.getOperatorDescription(operatorKey).createOperatorInstance()); } catch (OperatorCreationException e) { return null; } }
Example 13
Source File: SearchEngineIndexer.java From gravitee-management-rest-api with Apache License 2.0 | 5 votes |
public long index(Document document) throws TechnicalException { logger.debug("Updating a document into the Lucene index"); String id = document.get(ID_FIELD); try { long seq = writer.updateDocument(new Term(ID_FIELD, id), document); writer.commit(); return seq; } catch (IOException ioe) { logger.error("Fail to index document with ID: {}", id, ioe); throw new TechnicalException("Fail to index document with ID: " + id, ioe); } }
Example 14
Source File: NGramTestSetup.java From uyuni with GNU General Public License v2.0 | 5 votes |
protected void displayHits(Hits hits) throws IOException { for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); String name = doc.get("name"); String description = doc.get("description"); log.info("Hit<" + i + "> Score< " + hits.score(i) + "> name = <" + name + "> description = <" + description + ">"); } }
Example 15
Source File: DocumentBuilder.java From modernmt with Apache License 2.0 | 4 votes |
public static String getId(Document self) { return self.get(DOC_ID_FIELD); }
Example 16
Source File: DumpTermsApp.java From lucene4ir with Apache License 2.0 | 4 votes |
public void extractBigramsFromStoredText() throws IOException { HashMap<String, Integer> hmap = new HashMap<String, Integer>(); int n = reader.maxDoc(); for (int i = 0; i < n; i++) { Document doc = reader.document(i); String all = doc.get(lucene4ir.Lucene4IRConstants.FIELD_ALL); Analyzer a = new StandardAnalyzer(); TokenStream ts = a.tokenStream(null, all); ts.reset(); String w1 = ""; String w2 = ""; while (ts.incrementToken()) { w1 = w2; w2 = ts.getAttribute(CharTermAttribute.class).toString(); if (w1 != "") { //System.out.println(w1 + " " + w2); String key = w1 + " " + w2; if (hmap.containsKey(key)==true) { int v = hmap.get(key); hmap.put(key,v+1); } else { hmap.put(key, 1); } } } } Set set = hmap.entrySet(); Iterator iterator = set.iterator(); while(iterator.hasNext()) { Map.Entry me = (Map.Entry)iterator.next(); if ((int)me.getValue() > 2) { System.out.print(me.getKey() + ": "); System.out.println(me.getValue()); } } }
Example 17
Source File: LumongoSegment.java From lumongo with Apache License 2.0 | 4 votes |
private ScoredResult.Builder handleDocResult(IndexSearcher is, SortRequest sortRequest, boolean sorting, ScoreDoc[] results, int i, FetchType resultFetchType, List<String> fieldsToReturn, List<String> fieldsToMask, List<LumongoHighlighter> highlighterList, List<AnalysisHandler> analysisHandlerList) throws Exception { int docId = results[i].doc; Set<String> fieldsToFetch = fetchSet; if (indexConfig.getIndexSettings().getStoreDocumentInIndex()) { if (FetchType.FULL.equals(resultFetchType)) { fieldsToFetch = fetchSetWithDocument; } else if (FetchType.META.equals(resultFetchType)) { fieldsToFetch = fetchSetWithMeta; } } Document d = is.doc(docId, fieldsToFetch); IndexableField f = d.getField(LumongoConstants.TIMESTAMP_FIELD); long timestamp = f.numericValue().longValue(); ScoredResult.Builder srBuilder = ScoredResult.newBuilder(); String uniqueId = d.get(LumongoConstants.ID_FIELD); if (!highlighterList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) { throw new Exception("Highlighting requires a full fetch of the document"); } if (!analysisHandlerList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) { throw new Exception("Analysis requires a full fetch of the document"); } if (!FetchType.NONE.equals(resultFetchType)) { handleStoredDoc(srBuilder, uniqueId, d, resultFetchType, fieldsToReturn, fieldsToMask, highlighterList, analysisHandlerList); } srBuilder.setScore(results[i].score); srBuilder.setUniqueId(uniqueId); srBuilder.setTimestamp(timestamp); srBuilder.setDocId(docId); srBuilder.setSegment(segmentNumber); srBuilder.setIndexName(indexName); srBuilder.setResultIndex(i); if (sorting) { handleSortValues(sortRequest, results[i], srBuilder); } return srBuilder; }
Example 18
Source File: MtasDocumentIndex.java From inception with Apache License 2.0 | 4 votes |
private long doCountResults(IndexSearcher searcher, SearchQueryRequest aRequest, MtasSpanQuery q) throws IOException { ListIterator<LeafReaderContext> leafReaderContextIterator = searcher.getIndexReader() .leaves().listIterator(); Map<Long, Long> annotatableDocuments = listAnnotatableDocuments(aRequest.getProject(), aRequest.getUser()); final float boost = 0; SpanWeight spanweight = q.rewrite(searcher.getIndexReader()).createWeight(searcher, false, boost); long numResults = 0; while (leafReaderContextIterator.hasNext()) { LeafReaderContext leafReaderContext = leafReaderContextIterator.next(); try { Spans spans = spanweight.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); SegmentReader segmentReader = (SegmentReader) leafReaderContext.reader(); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { if (segmentReader.numDocs() == segmentReader.maxDoc() || segmentReader.getLiveDocs().get(spans.docID())) { Document document = segmentReader.document(spans.docID()); // Retrieve user String user = document.get(FIELD_USER); // Retrieve source and annotation document ids String rawSourceDocumentId = document.get(FIELD_SOURCE_DOCUMENT_ID); String rawAnnotationDocumentId = document .get(FIELD_ANNOTATION_DOCUMENT_ID); if (rawSourceDocumentId == null || rawAnnotationDocumentId == null) { log.trace("Indexed document lacks source/annotation document IDs" + " - source: {}, annotation: {}", rawSourceDocumentId, rawAnnotationDocumentId); continue; } long sourceDocumentId = Long.valueOf(rawSourceDocumentId); long annotationDocumentId = Long.valueOf(rawAnnotationDocumentId); // If the query is limited to a given document, skip any results // which are not in the given document Optional<SourceDocument> limitedToDocument = aRequest .getLimitedToDocument(); if (limitedToDocument.isPresent() && !Objects .equals(limitedToDocument.get().getId(), sourceDocumentId)) { log.trace("Query limited to document {}, skipping results for " + "document {}", limitedToDocument.get().getId(), sourceDocumentId); continue; } if (annotatableDocuments.containsKey(sourceDocumentId) && annotationDocumentId == -1) { // Exclude result if the retrieved document is a sourcedocument // (that is, has annotationDocument = -1) AND it has a // corresponding annotation document for this user log.trace("Skipping results from indexed source document {} in" + "favor of results from the corresponding annotation " + "document", sourceDocumentId); continue; } else if (annotationDocumentId != -1 && !aRequest.getUser().getUsername() .equals(user)) { // Exclude result if the retrieved document is an annotation // document (that is, annotationDocument != -1 and its username // is different from the quering user log.trace("Skipping results from annotation document for user {} " + "which does not match the requested user {}", user, aRequest.getUser().getUsername()); continue; } while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { numResults++; } } } } } catch (Exception e) { log.error("Unable to process query results", e); numResults = -1; } } return numResults; }
Example 19
Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0 | 4 votes |
public void extractBigramsFromStoredText() throws IOException { HashMap<String, Integer> hmap = new HashMap<String, Integer>(); int n = reader.maxDoc(); for (int i = 0; i < n; i++) { Document doc = reader.document(i); String all = doc.get(Lucene4IRConstants.FIELD_ALL); //String[] words = all.split(" "); //for(String w: words ){ // System.out.println(w); //} // int n = words.length; // for (int i=1; i<n; i++){ // System.out.println(words[i-1].toLowerCase().trim() + " " + words[i].toLowerCase().trim()); // } Analyzer a = new StandardAnalyzer(); TokenStream ts = a.tokenStream(null, all); ts.reset(); String w1 = ""; String w2 = ""; while (ts.incrementToken()) { w1 = w2; w2 = ts.getAttribute(CharTermAttribute.class).toString(); if (w1 != "") { //System.out.println(w1 + " " + w2); String key = w1 + " " + w2; if (hmap.containsKey(key)==true) { int v = hmap.get(key); hmap.put(key,v+1); } else { hmap.put(key, 1); } } } } Set set = hmap.entrySet(); Iterator iterator = set.iterator(); while(iterator.hasNext()) { Map.Entry me = (Map.Entry)iterator.next(); if ((int)me.getValue() > 2) { System.out.print(me.getKey() + ": "); System.out.println(me.getValue()); } } }
Example 20
Source File: RetrievalAppQueryExpansion.java From lucene4ir with Apache License 2.0 | 4 votes |
public void processQueryFile(){ /* Assumes the query file contains a qno followed by the query terms. One query per line. i.e. Q1 hello world Q2 hello hello Q3 hello etc */ try { BufferedReader br = new BufferedReader(new FileReader(p.queryFile)); File file = new File(p.resultFile); FileWriter fw = new FileWriter(file); try { String line = br.readLine(); while (line != null){ String[] parts = line.split(" "); String qno = parts[0]; String queryTerms = ""; for (int i=1; i<parts.length; i++) { queryTerms = queryTerms + " " + parts[i]; } ScoreDoc[] scored = runQuery(qno, queryTerms); int n = Math.min(p.maxResults, scored.length); for(int i=0; i<n; i++){ Document doc = searcher.doc(scored[i].doc); String docno = doc.get("docnum"); fw.write(qno + " QO " + docno + " " + (i+1) + " " + scored[i].score + " " + p.runTag); fw.write(System.lineSeparator()); } line = br.readLine(); } } finally { br.close(); fw.close(); } } catch (Exception e){ System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }