org.apache.lucene.document.Document#getField

Source File: LuceneSearchIndex.java From dremio-oss with Apache License 2.0

6 votes

private List<Doc> toDocs(ScoreDoc[] hits, Searcher searcher) throws IOException{
  List<Doc> documentList = new ArrayList<>();
  for (int i = 0; i < hits.length; ++i) {
    ScoreDoc scoreDoc = hits[i];
    Document doc = searcher.doc(scoreDoc.doc);
    IndexableField idField = doc.getField("_id");
    if(idField == null){
      // deleted between index hit and retrieval.
      continue;
    }
    final BytesRef ref = idField.binaryValue();
    final byte[] bytes = new byte[ref.length];
    System.arraycopy(ref.bytes, ref.offset, bytes, 0, ref.length);
    Doc outputDoc = new Doc(scoreDoc, bytes, 0 /*version*/);
    documentList.add(outputDoc);
  }
  return documentList;
}

Source File: SnapshotTagResult.java From spacewalk with GNU General Public License v2.0

6 votes

/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public SnapshotTagResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    if (doc.getField("name") != null) {
        setName(doc.getField("name").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("snapshotId") != null) {
        setSnapshotId(doc.getField("snapshotId").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}

Source File: DocResult.java From uyuni with GNU General Public License v2.0

6 votes

/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public DocResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("url") != null) {
        setUrl(doc.getField("url").stringValue());
        setId(doc.getField("url").stringValue());
    }
    if (doc.getField("title") != null) {
        setTitle(doc.getField("title").stringValue());
        setName(doc.getField("title").stringValue());
    }
    else {
        setTitle("EMPTY");
        setName("EMPTY");
    }
    setRank(rankIn);
    setScore(scoreIn);
}

Source File: DocResult.java From spacewalk with GNU General Public License v2.0

6 votes

/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public DocResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("url") != null) {
        setUrl(doc.getField("url").stringValue());
        setId(doc.getField("url").stringValue());
    }
    if (doc.getField("title") != null) {
        setTitle(doc.getField("title").stringValue());
        setName(doc.getField("title").stringValue());
    }
    else {
        setTitle("EMPTY");
        setName("EMPTY");
    }
    setRank(rankIn);
    setScore(scoreIn);
}

Source File: DocumentDictionary.java From lucene-solr with Apache License 2.0

6 votes

/** 
 * Returns the value of the <code>weightField</code> for the current document.
 * Retrieves the value for the <code>weightField</code> if it's stored (using <code>doc</code>)
 * or if it's indexed as {@link NumericDocValues} (using <code>docId</code>) for the document.
 * If no value is found, then the weight is 0.
 */
protected long getWeight(Document doc, int docId) throws IOException {
  IndexableField weight = doc.getField(weightField);
  if (weight != null) { // found weight as stored
    return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0;
  } else if (weightValues != null) {  // found weight as NumericDocValue
    if (weightValues.docID() < docId) {
      weightValues.advance(docId);
    }
    if (weightValues.docID() == docId) {
      return weightValues.longValue();
    } else {
      // missing
      return 0;
    }
  } else { // fall back
    return 0;
  }
}

Source File: HardwareDeviceResult.java From spacewalk with GNU General Public License v2.0

5 votes

/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public HardwareDeviceResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("description") != null) {
        setName(doc.getField("description").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}

Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testWithOptionalPayload() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  // Create a document that is missing the payload field
  Document doc = new Document();
  Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES);
  doc.add(field);
  // do not store the payload or the contexts
  Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100);
  doc.add(weight);
  writer.addDocument(doc);
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);

  // Even though the payload field is missing, the dictionary iterator should not skip the document
  // because the payload field is optional.
  Dictionary dictionaryOptionalPayload =
      new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
  InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator();
  BytesRef f = inputIterator.next();
  assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
  IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
  assertEquals(inputIterator.weight(), weightField.numericValue().longValue());
  IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
  assertNull(payloadField);
  assertTrue(inputIterator.payload().length == 0);
  IOUtils.close(ir, analyzer, dir);
}

Source File: ServerCustomInfoResult.java From spacewalk with GNU General Public License v2.0

5 votes

/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public ServerCustomInfoResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("value") != null) {
        setValue(doc.getField("value").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}

Source File: SolrInformationServer.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

@Override
public Set<Long> getErrorDocIds() throws IOException
{
    Set<Long> errorDocIds = new HashSet<>();
    RefCounted<SolrIndexSearcher> refCounted = null;
    try
    {
        refCounted = this.core.getSearcher();
        SolrIndexSearcher searcher = refCounted.get();
        TermQuery errorQuery = new TermQuery(new Term(FIELD_DOC_TYPE, DOC_TYPE_ERROR_NODE));
        DocListCollector docListCollector = new DocListCollector();
        searcher.search(errorQuery, docListCollector);
        IntArrayList docList = docListCollector.getDocs();
        int size = docList.size();

        for (int i = 0; i < size; ++i)
        {
            int doc = docList.get(i);
            Document document = searcher.doc(doc, REQUEST_ONLY_ID_FIELD);
            IndexableField id = document.getField(FIELD_SOLR4_ID);
            String idString = id.stringValue();

            if (idString.startsWith(PREFIX_ERROR))
            {
                idString = idString.substring(PREFIX_ERROR.length());
            }

            errorDocIds.add(Long.valueOf(idString));
        }
    }
    finally
    {
        ofNullable(refCounted).ifPresent(RefCounted::decref);
    }
    return errorDocIds;
}

Source File: DocsReader.java From nifi with Apache License 2.0

5 votes

private long getByteOffset(final Document d, final RecordReader reader) {
    final IndexableField blockField = d.getField(FieldNames.BLOCK_INDEX);
    if ( blockField != null ) {
        final int blockIndex = blockField.numericValue().intValue();
        final TocReader tocReader = reader.getTocReader();
        return tocReader.getBlockOffset(blockIndex);
    }

    return d.getField(FieldNames.STORAGE_FILE_OFFSET).numericValue().longValue();
}

Source File: FieldValueFeature.java From lucene-solr with Apache License 2.0

5 votes

@Override
public float score() throws IOException {

  try {
    final Document document = context.reader().document(itr.docID(),
        fieldAsSet);
    final IndexableField indexableField = document.getField(field);
    if (indexableField == null) {
      return getDefaultValue();
    }
    final Number number = indexableField.numericValue();
    if (number != null) {
      return number.floatValue();
    } else {
      final String string = indexableField.stringValue();
      if (string.length() == 1) {
        // boolean values in the index are encoded with the
        // a single char contained in TRUE_TOKEN or FALSE_TOKEN
        // (see BoolField)
        if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
          return 1;
        }
        if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
          return 0;
        }
      }
    }
  } catch (final IOException e) {
    throw new FeatureException(
        e.toString() + ": " +
            "Unable to extract feature for "
            + name, e);
  }
  return getDefaultValue();
}

Source File: SpatialClusteringComponent.java From solr-spatial-clustering with Apache License 2.0

5 votes

private static String getFieldString(Document document, String name) {
    IndexableField field = document.getField(name);
    if (field == null) {
        return null;
    }

    return field.stringValue();
}

Source File: IndexManager.java From dependency-track with Apache License 2.0

5 votes

/**
 * Updates a Field in a Document.
 * @param doc the Lucene Document to update the field in
 * @param name the name of the field
 * @param value the value of the field
 * @since 3.0.0
 */
protected void updateField(final Document doc, final String name, String value) {
    if (StringUtils.isBlank(value)) {
        value = "";
    }
    final Field field = (Field) doc.getField(name);
    field.setStringValue(value);
}

Source File: LargeFieldTest.java From lucene-solr with Apache License 2.0

5 votes

private void assertLazyLoaded(Document d, String fieldName) {
  IndexableField field = d.getField(fieldName);
  if (fieldName == BIG_FIELD) {
    assertTrue(field instanceof SolrDocumentFetcher.LargeLazyField);
    assertTrue(((SolrDocumentFetcher.LargeLazyField)field).hasBeenLoaded());
  } else {
    assertTrue(field instanceof LazyDocument.LazyField);
    assertTrue(((LazyDocument.LazyField)field).hasBeenLoaded());
  }
}

Source File: HardwareDeviceResult.java From uyuni with GNU General Public License v2.0

5 votes

/**
 * Constructs a result object
 * @param rankIn order of results returned from lucene
 * @param scoreIn score of this hit as defined by lucene query
 * @param doc lucene document containing data fields
 */
public HardwareDeviceResult(int rankIn, float scoreIn, Document doc) {
    if (doc.getField("description") != null) {
        setName(doc.getField("description").stringValue());
    }
    if (doc.getField("serverId") != null) {
        setServerId(doc.getField("serverId").stringValue());
    }
    if (doc.getField("id") != null) {
        setId(doc.getField("id").stringValue());
    }
    setRank(rankIn);
    setScore(scoreIn);
}

Source File: LukeRequestHandler.java From lucene-solr with Apache License 2.0

4 votes

private static SimpleOrderedMap<Object> getIndexedFieldsInfo(SolrQueryRequest req)
    throws Exception {

  SolrIndexSearcher searcher = req.getSearcher();
  SolrParams params = req.getParams();

  Set<String> fields = null;
  String fl = params.get(CommonParams.FL);
  if (fl != null) {
    fields = new TreeSet<>(Arrays.asList(fl.split( "[,\\s]+" )));
  }

  LeafReader reader = searcher.getSlowAtomicReader();
  IndexSchema schema = searcher.getSchema();

  // Don't be tempted to put this in the loop below, the whole point here is to alphabetize the fields!
  Set<String> fieldNames = new TreeSet<>();
  for(FieldInfo fieldInfo : reader.getFieldInfos()) {
    fieldNames.add(fieldInfo.name);
  }

  // Walk the term enum and keep a priority queue for each map in our set
  SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();

  for (String fieldName : fieldNames) {
    if (fields != null && ! fields.contains(fieldName) && ! fields.contains("*")) {
      continue; //we're not interested in this field Still an issue here
    }

    SimpleOrderedMap<Object> fieldMap = new SimpleOrderedMap<>();

    SchemaField sfield = schema.getFieldOrNull( fieldName );
    FieldType ftype = (sfield==null)?null:sfield.getType();

    fieldMap.add( "type", (ftype==null)?null:ftype.getTypeName() );
    fieldMap.add("schema", getFieldFlags(sfield));
    if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) {
      fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName()));
    }
    Terms terms = reader.terms(fieldName);
    if (terms == null) { // Not indexed, so we need to report what we can (it made it through the fl param if specified)
      finfo.add( fieldName, fieldMap );
      continue;
    }

    if(sfield != null && sfield.indexed() ) {
      if (params.getBool(INCLUDE_INDEX_FIELD_FLAGS,true)) {
        Document doc = getFirstLiveDoc(terms, reader);

        if (doc != null) {
          // Found a document with this field
          try {
            IndexableField fld = doc.getField(fieldName);
            if (fld != null) {
              fieldMap.add("index", getFieldFlags(fld));
            } else {
              // it is a non-stored field...
              fieldMap.add("index", "(unstored field)");
            }
          } catch (Exception ex) {
            log.warn("error reading field: {}", fieldName);
          }
        }
      }
      fieldMap.add("docs", terms.getDocCount());
    }
    if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) {
      getDetailedFieldInfo(req, fieldName, fieldMap);
    }
    // Add the field
    finfo.add( fieldName, fieldMap );
  }
  return finfo;
}

Source File: RoutingFieldMapper.java From Elasticsearch with Apache License 2.0

4 votes

public String value(Document document) {
    Field field = (Field) document.getField(fieldType().names().indexName());
    return field == null ? null : (String)fieldType().value(field);
}

Source File: DocumentUtil.java From netbeans with Apache License 2.0

4 votes

@Override
public String convert(Document doc) {
    Field field = doc.getField(FIELD_SOURCE);
    return field == null ? null : field.stringValue();
}

Source File: LumongoSegment.java From lumongo with Apache License 2.0

4 votes

private ScoredResult.Builder handleDocResult(IndexSearcher is, SortRequest sortRequest, boolean sorting, ScoreDoc[] results, int i,
		FetchType resultFetchType, List<String> fieldsToReturn, List<String> fieldsToMask, List<LumongoHighlighter> highlighterList,
		List<AnalysisHandler> analysisHandlerList) throws Exception {
	int docId = results[i].doc;

	Set<String> fieldsToFetch = fetchSet;
	if (indexConfig.getIndexSettings().getStoreDocumentInIndex()) {
		if (FetchType.FULL.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithDocument;
		}
		else if (FetchType.META.equals(resultFetchType)) {
			fieldsToFetch = fetchSetWithMeta;
		}
	}

	Document d = is.doc(docId, fieldsToFetch);

	IndexableField f = d.getField(LumongoConstants.TIMESTAMP_FIELD);
	long timestamp = f.numericValue().longValue();

	ScoredResult.Builder srBuilder = ScoredResult.newBuilder();
	String uniqueId = d.get(LumongoConstants.ID_FIELD);

	if (!highlighterList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Highlighting requires a full fetch of the document");
	}

	if (!analysisHandlerList.isEmpty() && !FetchType.FULL.equals(resultFetchType)) {
		throw new Exception("Analysis requires a full fetch of the document");
	}

	if (!FetchType.NONE.equals(resultFetchType)) {
		handleStoredDoc(srBuilder, uniqueId, d, resultFetchType, fieldsToReturn, fieldsToMask, highlighterList, analysisHandlerList);
	}

	srBuilder.setScore(results[i].score);

	srBuilder.setUniqueId(uniqueId);

	srBuilder.setTimestamp(timestamp);

	srBuilder.setDocId(docId);
	srBuilder.setSegment(segmentNumber);
	srBuilder.setIndexName(indexName);
	srBuilder.setResultIndex(i);

	if (sorting) {
		handleSortValues(sortRequest, results[i], srBuilder);
	}
	return srBuilder;
}

Source File: BooleanPerceptronClassifier.java From lucene-solr with Apache License 2.0

4 votes

/**
 * Creates a {@link BooleanPerceptronClassifier}
 *
 * @param indexReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param batchSize      the size of the batch of docs to use for updating the perceptron weights
 * @param bias      the bias used for class separation
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldName  the name of the field used as input for the classifier
 * @throws IOException if the building of the underlying {@link FST} fails and / or {@link TermsEnum} for the text field
 *                     cannot be found
 */
public BooleanPerceptronClassifier(IndexReader indexReader, Analyzer analyzer, Query query, Integer batchSize,
                                   Double bias, String classFieldName, String textFieldName) throws IOException {
  this.textTerms = MultiTerms.getTerms(indexReader, textFieldName);

  if (textTerms == null) {
    throw new IOException("term vectors need to be available for field " + textFieldName);
  }

  this.analyzer = analyzer;
  this.textFieldName = textFieldName;

  if (bias == null || bias == 0d) {
    // automatic assign the bias to be the average total term freq
    double t = (double) indexReader.getSumTotalTermFreq(textFieldName) / (double) indexReader.getDocCount(textFieldName);
    if (t != -1) {
      this.bias = t;
    } else {
      throw new IOException(
              "bias cannot be assigned since term vectors for field "
                      + textFieldName + " do not exist");
    }
  } else {
    this.bias = bias;
  }

  // TODO : remove this map as soon as we have a writable FST
  SortedMap<String, Double> weights = new ConcurrentSkipListMap<>();

  TermsEnum termsEnum = textTerms.iterator();
  BytesRef textTerm;
  while ((textTerm = termsEnum.next()) != null) {
    weights.put(textTerm.utf8ToString(), (double) termsEnum.totalTermFreq());
  }
  updateFST(weights);

  IndexSearcher indexSearcher = new IndexSearcher(indexReader);

  int batchCount = 0;

  BooleanQuery.Builder q = new BooleanQuery.Builder();
  q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, "*")), BooleanClause.Occur.MUST));
  if (query != null) {
    q.add(new BooleanClause(query, BooleanClause.Occur.MUST));
  }
  // run the search and use stored field values
  for (ScoreDoc scoreDoc : indexSearcher.search(q.build(),
          Integer.MAX_VALUE).scoreDocs) {
    Document doc = indexSearcher.doc(scoreDoc.doc);

    IndexableField textField = doc.getField(textFieldName);

    // get the expected result
    IndexableField classField = doc.getField(classFieldName);

    if (textField != null && classField != null) {
      // assign class to the doc
      ClassificationResult<Boolean> classificationResult = assignClass(textField.stringValue());
      Boolean assignedClass = classificationResult.getAssignedClass();

      Boolean correctClass = Boolean.valueOf(classField.stringValue());
      long modifier = correctClass.compareTo(assignedClass);
      if (modifier != 0) {
        updateWeights(indexReader, scoreDoc.doc, assignedClass,
                weights, modifier, batchCount % batchSize == 0);
      }
      batchCount++;
    }
  }
  weights.clear(); // free memory while waiting for GC
}

Java Code Examples for org.apache.lucene.document.Document#getField()