Java Code Examples for org.apache.lucene.document.Document#getField()
The following examples show how to use
org.apache.lucene.document.Document#getField() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneSearchIndex.java From dremio-oss with Apache License 2.0 | 6 votes |
private List<Doc> toDocs(ScoreDoc[] hits, Searcher searcher) throws IOException{ List<Doc> documentList = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { ScoreDoc scoreDoc = hits[i]; Document doc = searcher.doc(scoreDoc.doc); IndexableField idField = doc.getField("_id"); if(idField == null){ // deleted between index hit and retrieval. continue; } final BytesRef ref = idField.binaryValue(); final byte[] bytes = new byte[ref.length]; System.arraycopy(ref.bytes, ref.offset, bytes, 0, ref.length); Doc outputDoc = new Doc(scoreDoc, bytes, 0 /*version*/); documentList.add(outputDoc); } return documentList; }
Example 2
Source File: SnapshotTagResult.java From spacewalk with GNU General Public License v2.0 | 6 votes |
/** * Constructs a result object * @param rankIn order of results returned from lucene * @param scoreIn score of this hit as defined by lucene query * @param doc lucene document containing data fields */ public SnapshotTagResult(int rankIn, float scoreIn, Document doc) { if (doc.getField("id") != null) { setId(doc.getField("id").stringValue()); } if (doc.getField("name") != null) { setName(doc.getField("name").stringValue()); } if (doc.getField("serverId") != null) { setServerId(doc.getField("serverId").stringValue()); } if (doc.getField("snapshotId") != null) { setSnapshotId(doc.getField("snapshotId").stringValue()); } setRank(rankIn); setScore(scoreIn); }
Example 3
Source File: DocResult.java From uyuni with GNU General Public License v2.0 | 6 votes |
/** * Constructs a result object * @param rankIn order of results returned from lucene * @param scoreIn score of this hit as defined by lucene query * @param doc lucene document containing data fields */ public DocResult(int rankIn, float scoreIn, Document doc) { if (doc.getField("url") != null) { setUrl(doc.getField("url").stringValue()); setId(doc.getField("url").stringValue()); } if (doc.getField("title") != null) { setTitle(doc.getField("title").stringValue()); setName(doc.getField("title").stringValue()); } else { setTitle("EMPTY"); setName("EMPTY"); } setRank(rankIn); setScore(scoreIn); }
Example 4
Source File: DocResult.java From spacewalk with GNU General Public License v2.0 | 6 votes |
/** * Constructs a result object * @param rankIn order of results returned from lucene * @param scoreIn score of this hit as defined by lucene query * @param doc lucene document containing data fields */ public DocResult(int rankIn, float scoreIn, Document doc) { if (doc.getField("url") != null) { setUrl(doc.getField("url").stringValue()); setId(doc.getField("url").stringValue()); } if (doc.getField("title") != null) { setTitle(doc.getField("title").stringValue()); setName(doc.getField("title").stringValue()); } else { setTitle("EMPTY"); setName("EMPTY"); } setRank(rankIn); setScore(scoreIn); }
Example 5
Source File: DocumentDictionary.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns the value of the <code>weightField</code> for the current document. * Retrieves the value for the <code>weightField</code> if it's stored (using <code>doc</code>) * or if it's indexed as {@link NumericDocValues} (using <code>docId</code>) for the document. * If no value is found, then the weight is 0. */ protected long getWeight(Document doc, int docId) throws IOException { IndexableField weight = doc.getField(weightField); if (weight != null) { // found weight as stored return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0; } else if (weightValues != null) { // found weight as NumericDocValue if (weightValues.docID() < docId) { weightValues.advance(docId); } if (weightValues.docID() == docId) { return weightValues.longValue(); } else { // missing return 0; } } else { // fall back return 0; } }
Example 6
Source File: HardwareDeviceResult.java From spacewalk with GNU General Public License v2.0 | 5 votes |
/** * Constructs a result object * @param rankIn order of results returned from lucene * @param scoreIn score of this hit as defined by lucene query * @param doc lucene document containing data fields */ public HardwareDeviceResult(int rankIn, float scoreIn, Document doc) { if (doc.getField("description") != null) { setName(doc.getField("description").stringValue()); } if (doc.getField("serverId") != null) { setServerId(doc.getField("serverId").stringValue()); } if (doc.getField("id") != null) { setId(doc.getField("id").stringValue()); } setRank(rankIn); setScore(scoreIn); }
Example 7
Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testWithOptionalPayload() throws IOException { Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random()); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); // Create a document that is missing the payload field Document doc = new Document(); Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES); doc.add(field); // do not store the payload or the contexts Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100); doc.add(weight); writer.addDocument(doc); writer.commit(); writer.close(); IndexReader ir = DirectoryReader.open(dir); // Even though the payload field is missing, the dictionary iterator should not skip the document // because the payload field is optional. Dictionary dictionaryOptionalPayload = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME); InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator(); BytesRef f = inputIterator.next(); assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME); assertEquals(inputIterator.weight(), weightField.numericValue().longValue()); IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME); assertNull(payloadField); assertTrue(inputIterator.payload().length == 0); IOUtils.close(ir, analyzer, dir); }
Example 8
Source File: ServerCustomInfoResult.java From spacewalk with GNU General Public License v2.0 | 5 votes |
/** * Constructs a result object * @param rankIn order of results returned from lucene * @param scoreIn score of this hit as defined by lucene query * @param doc lucene document containing data fields */ public ServerCustomInfoResult(int rankIn, float scoreIn, Document doc) { if (doc.getField("value") != null) { setValue(doc.getField("value").stringValue()); } if (doc.getField("serverId") != null) { setServerId(doc.getField("serverId").stringValue()); } if (doc.getField("id") != null) { setId(doc.getField("id").stringValue()); } setRank(rankIn); setScore(scoreIn); }
Example 9
Source File: SolrInformationServer.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
@Override public Set<Long> getErrorDocIds() throws IOException { Set<Long> errorDocIds = new HashSet<>(); RefCounted<SolrIndexSearcher> refCounted = null; try { refCounted = this.core.getSearcher(); SolrIndexSearcher searcher = refCounted.get(); TermQuery errorQuery = new TermQuery(new Term(FIELD_DOC_TYPE, DOC_TYPE_ERROR_NODE)); DocListCollector docListCollector = new DocListCollector(); searcher.search(errorQuery, docListCollector); IntArrayList docList = docListCollector.getDocs(); int size = docList.size(); for (int i = 0; i < size; ++i) { int doc = docList.get(i); Document document = searcher.doc(doc, REQUEST_ONLY_ID_FIELD); IndexableField id = document.getField(FIELD_SOLR4_ID); String idString = id.stringValue(); if (idString.startsWith(PREFIX_ERROR)) { idString = idString.substring(PREFIX_ERROR.length()); } errorDocIds.add(Long.valueOf(idString)); } } finally { ofNullable(refCounted).ifPresent(RefCounted::decref); } return errorDocIds; }
Example 10
Source File: DocsReader.java From nifi with Apache License 2.0 | 5 votes |
private long getByteOffset(final Document d, final RecordReader reader) { final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX); if ( blockField != null ) { final int blockIndex = blockField.numericValue().intValue(); final TocReader tocReader = reader.getTocReader(); return tocReader.getBlockOffset(blockIndex); } return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue(); }
Example 11
Source File: FieldValueFeature.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public float score() throws IOException { try { final Document document = context.reader().document(itr.docID(), fieldAsSet); final IndexableField indexableField = document.getField(field); if (indexableField == null) { return getDefaultValue(); } final Number number = indexableField.numericValue(); if (number != null) { return number.floatValue(); } else { final String string = indexableField.stringValue(); if (string.length() == 1) { // boolean values in the index are encoded with the // a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { return 1; } if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { return 0; } } } } catch (final IOException e) { throw new FeatureException( e.toString() + ": " + "Unable to extract feature for " + name, e); } return getDefaultValue(); }
Example 12
Source File: SpatialClusteringComponent.java From solr-spatial-clustering with Apache License 2.0 | 5 votes |
private static String getFieldString(Document document, String name) { IndexableField field = document.getField(name); if (field == null) { return null; } return field.stringValue(); }
Example 13
Source File: IndexManager.java From dependency-track with Apache License 2.0 | 5 votes |
/** * Updates a Field in a Document. * @param doc the Lucene Document to update the field in * @param name the name of the field * @param value the value of the field * @since 3.0.0 */ protected void updateField(final Document doc, final String name, String value) { if (StringUtils.isBlank(value)) { value = ""; } final Field field = (Field) doc.getField(name); field.setStringValue(value); }
Example 14
Source File: LargeFieldTest.java From lucene-solr with Apache License 2.0 | 5 votes |
private void assertLazyLoaded(Document d, String fieldName) { IndexableField field = d.getField(fieldName); if (fieldName == BIG_FIELD) { assertTrue(field instanceof SolrDocumentFetcher.LargeLazyField); assertTrue(((SolrDocumentFetcher.LargeLazyField)field).hasBeenLoaded()); } else { assertTrue(field instanceof LazyDocument.LazyField); assertTrue(((LazyDocument.LazyField)field).hasBeenLoaded()); } }
Example 15
Source File: HardwareDeviceResult.java From uyuni with GNU General Public License v2.0 | 5 votes |
/** * Constructs a result object * @param rankIn order of results returned from lucene * @param scoreIn score of this hit as defined by lucene query * @param doc lucene document containing data fields */ public HardwareDeviceResult(int rankIn, float scoreIn, Document doc) { if (doc.getField("description") != null) { setName(doc.getField("description").stringValue()); } if (doc.getField("serverId") != null) { setServerId(doc.getField("serverId").stringValue()); } if (doc.getField("id") != null) { setId(doc.getField("id").stringValue()); } setRank(rankIn); setScore(scoreIn); }
Example 16
Source File: LukeRequestHandler.java From lucene-solr with Apache License 2.0 | 4 votes |
private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req) throws Exception { SolrIndexSearcher searcher = req.getSearcher(); SolrParams params = req.getParams(); Set<String> fields = null; String fl = params.get(CommonParams.FL); if (fl != null) { fields = new TreeSet<>(Arrays.asList(fl.split( "[,\\s]+" ))); } LeafReader reader = searcher.getSlowAtomicReader(); IndexSchema schema = searcher.getSchema(); // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields! Set<String> fieldNames = new TreeSet<>(); for(FieldInfo fieldInfo : reader.getFieldInfos()) { fieldNames.add(fieldInfo.name); } // Walk the term enum and keep a priority queue for each map in our set SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>(); for (String fieldName : fieldNames) { if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) { continue; //we're not interested in this field Still an issue here } SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>(); SchemaField sfield = schema.getFieldOrNull( fieldName ); FieldType ftype = (sfield==null)?null:sfield.getType(); fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() ); fieldMap.add("schema", getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); } Terms terms = reader.terms(fieldName); if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified) finfo.add( fieldName, fieldMap ); continue; } if(sfield != null && sfield.indexed() ) { if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS,true)) { Document doc = getFirstLiveDoc(terms, reader); if (doc != null) { // Found a document with this field try { IndexableField fld = doc.getField(fieldName); if (fld != null) { fieldMap.add("index", getFieldFlags(fld)); } else { // it is a non-stored field... fieldMap.add("index", "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: {}", fieldName); } } } fieldMap.add("docs", terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); } // Add the field finfo.add( fieldName, fieldMap ); } return finfo; }
Example 17
Source File: RoutingFieldMapper.java From Elasticsearch with Apache License 2.0 | 4 votes |
public String value(Document document) { Field field = (Field) document.getField(fieldType().names().indexName()); return field == null ? null : (String)fieldType().value(field); }
Example 18
Source File: DocumentUtil.java From netbeans with Apache License 2.0 | 4 votes |
@Override public String convert(Document doc) { Field field = doc.getField(FIELD_SOURCE); return field == null ? null : field.stringValue(); }
Example 19
Source File: LumongoSegment.java From lumongo with Apache License 2.0 | 4 votes |
private ScoredResult.Builder handleDocResult(IndexSearcher is, SortRequest sortRequest, boolean sorting, ScoreDoc[] results, int i, FetchType resultFetchType, List<String> fieldsToReturn, List<String> fieldsToMask, List<LumongoHighlighter> highlighterList, List<AnalysisHandler> analysisHandlerList) throws Exception { int docId = results[i].doc; Set<String> fieldsToFetch = fetchSet; if (indexConfig.getIndexSettings().getStoreDocumentInIndex()) { if (FetchType.FULL.equals(resultFetchType)) { fieldsToFetch = fetchSetWithDocument; } else if (FetchType.META.equals(resultFetchType)) { fieldsToFetch = fetchSetWithMeta; } } Document d = is.doc(docId, fieldsToFetch); IndexableField f = d.getField(LumongoConstants.TIMESTAMP_FIELD); long timestamp = f.numericValue().longValue(); ScoredResult.Builder srBuilder = ScoredResult.newBuilder(); String uniqueId = d.get(LumongoConstants.ID_FIELD); if (!highlighterList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) { throw new Exception("Highlighting requires a full fetch of the document"); } if (!analysisHandlerList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) { throw new Exception("Analysis requires a full fetch of the document"); } if (!FetchType.NONE.equals(resultFetchType)) { handleStoredDoc(srBuilder, uniqueId, d, resultFetchType, fieldsToReturn, fieldsToMask, highlighterList, analysisHandlerList); } srBuilder.setScore(results[i].score); srBuilder.setUniqueId(uniqueId); srBuilder.setTimestamp(timestamp); srBuilder.setDocId(docId); srBuilder.setSegment(segmentNumber); srBuilder.setIndexName(indexName); srBuilder.setResultIndex(i); if (sorting) { handleSortValues(sortRequest, results[i], srBuilder); } return srBuilder; }
Example 20
Source File: BooleanPerceptronClassifier.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Creates a {@link BooleanPerceptronClassifier} * * @param indexReader the reader on the index to be used for classification * @param analyzer an {@link Analyzer} used to analyze unseen text * @param query a {@link Query} to eventually filter the docs used for training the classifier, or {@code null} * if all the indexed docs should be used * @param batchSize the size of the batch of docs to use for updating the perceptron weights * @param bias the bias used for class separation * @param classFieldName the name of the field used as the output for the classifier * @param textFieldName the name of the field used as input for the classifier * @throws IOException if the building of the underlying {@link FST} fails and / or {@link TermsEnum} for the text field * cannot be found */ public BooleanPerceptronClassifier(IndexReader indexReader, Analyzer analyzer, Query query, Integer batchSize, Double bias, String classFieldName, String textFieldName) throws IOException { this.textTerms = MultiTerms.getTerms(indexReader, textFieldName); if (textTerms == null) { throw new IOException("term vectors need to be available for field " + textFieldName); } this.analyzer = analyzer; this.textFieldName = textFieldName; if (bias == null || bias == 0d) { // automatic assign the bias to be the average total term freq double t = (double) indexReader.getSumTotalTermFreq(textFieldName) / (double) indexReader.getDocCount(textFieldName); if (t != -1) { this.bias = t; } else { throw new IOException( "bias cannot be assigned since term vectors for field " + textFieldName + " do not exist"); } } else { this.bias = bias; } // TODO : remove this map as soon as we have a writable FST SortedMap<String, Double> weights = new ConcurrentSkipListMap<>(); TermsEnum termsEnum = textTerms.iterator(); BytesRef textTerm; while ((textTerm = termsEnum.next()) != null) { weights.put(textTerm.utf8ToString(), (double) termsEnum.totalTermFreq()); } updateFST(weights); IndexSearcher indexSearcher = new IndexSearcher(indexReader); int batchCount = 0; BooleanQuery.Builder q = new BooleanQuery.Builder(); q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, "*")), BooleanClause.Occur.MUST)); if (query != null) { q.add(new BooleanClause(query, BooleanClause.Occur.MUST)); } // run the search and use stored field values for (ScoreDoc scoreDoc : indexSearcher.search(q.build(), Integer.MAX_VALUE).scoreDocs) { Document doc = indexSearcher.doc(scoreDoc.doc); IndexableField textField = doc.getField(textFieldName); // get the expected result IndexableField classField = doc.getField(classFieldName); if (textField != null && classField != null) { // assign class to the doc ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue()); Boolean assignedClass = classificationResult.getAssignedClass(); Boolean correctClass = Boolean.valueOf(classField.stringValue()); long modifier = correctClass.compareTo(assignedClass); if (modifier != 0) { updateWeights(indexReader, scoreDoc.doc, assignedClass, weights, modifier, batchCount % batchSize == 0); } batchCount++; } } weights.clear(); // free memory while waiting for GC }