org.apache.lucene.index.FieldInfo#getIndexOptions

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException {
  final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  if (fieldInfo == null) {
    // field does not exist or has no value
    return new Bits.MatchNoBits(reader.maxDoc());
  } 
  
  if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
    // doc values case
  } else if (parser instanceof PointParser) {
    // points case
  } else {
    // postings case
    if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
      return new Bits.MatchNoBits(reader.maxDoc());
    }
  }
  BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, parser));
  return bitsEntry.bits;
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
  if (valuesIn == null) {
    valuesIn = reader.getSortedDocValues(field);
  }

  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  }

  final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
  if (info == null) {
    return DocValues.emptyBinary();
  } else if (info.getDocValuesType() != DocValuesType.NONE) {
    throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
  } else if (info.getIndexOptions() == IndexOptions.NONE) {
    return DocValues.emptyBinary();
  }

  BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
  return impl.iterator();
}

Source File: DocumentField.java From lucene-solr with Apache License 2.0

5 votes

static DocumentField of(FieldInfo finfo, IndexableField field, IndexReader reader, int docId)
    throws IOException {

  Objects.requireNonNull(finfo);
  Objects.requireNonNull(reader);

  DocumentField dfield = new DocumentField();

  dfield.name = finfo.name;
  dfield.idxOptions = finfo.getIndexOptions();
  dfield.hasTermVectors = finfo.hasVectors();
  dfield.hasPayloads = finfo.hasPayloads();
  dfield.hasNorms = finfo.hasNorms();

  if (finfo.hasNorms()) {
    NumericDocValues norms = MultiDocValues.getNormValues(reader, finfo.name);
    if (norms.advanceExact(docId)) {
      dfield.norm = norms.longValue();
    }
  }

  dfield.dvType = finfo.getDocValuesType();

  dfield.pointDimensionCount = finfo.getPointDimensionCount();
  dfield.pointNumBytes = finfo.getPointNumBytes();

  if (field != null) {
    dfield.isStored = field.fieldType().stored();
    dfield.stringValue = field.stringValue();
    if (field.binaryValue() != null) {
      dfield.binaryValue = BytesRef.deepCopyOf(field.binaryValue());
    }
    dfield.numericValue = field.numericValue();
  }

  return dfield;
}

Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0

5 votes

public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

  this.postingsReader = postingsReader;
  final IndexInput in = state.directory.openInput(termsFileName, state.context);

  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
                                     FSTTermsWriter.TERMS_VERSION_START,
                                     FSTTermsWriter.TERMS_VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.checksumEntireFile(in);
    this.postingsReader.init(in, state);
    seekDir(in);

    final FieldInfos fieldInfos = state.fieldInfos;
    final int numFields = in.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = in.readVInt();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      long numTerms = in.readVLong();
      long sumTotalTermFreq = in.readVLong();
      // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
      long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
      int docCount = in.readVInt();
      TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
      TermsReader previous = fields.put(fieldInfo.name, current);
      checkFieldSummary(state.segmentInfo, in, current, previous);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}

Source File: IDVersionPostingsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void setField(FieldInfo fieldInfo) {
  super.setField(fieldInfo);
  if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
    throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
  }
  // LUCENE-5693: because CheckIndex cross-checks term vectors with postings even for deleted docs, and because our PF only indexes the
  // non-deleted documents on flush, CheckIndex will see this as corruption:
  if (fieldInfo.hasVectors()) {
    throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption");
  }
  lastState = emptyState;
}

Source File: PushPostingsWriterBase.java From lucene-solr with Apache License 2.0

5 votes

/** 
 * Sets the current field for writing, and returns the
 * fixed length of long[] metadata (which is fixed per
 * field), called when the writing switches to another field. */
@Override
public void setField(FieldInfo fieldInfo) {
  this.fieldInfo = fieldInfo;
  indexOptions = fieldInfo.getIndexOptions();

  writeFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
  writePositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
  writeOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;        
  writePayloads = fieldInfo.hasPayloads();

  if (writeFreqs == false) {
    enumFlags = 0;
  } else if (writePositions == false) {
    enumFlags = PostingsEnum.FREQS;
  } else if (writeOffsets == false) {
    if (writePayloads) {
      enumFlags = PostingsEnum.PAYLOADS;
    } else {
      enumFlags = PostingsEnum.POSITIONS;
    }
  } else {
    if (writePayloads) {
      enumFlags = PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS;
    } else {
      enumFlags = PostingsEnum.OFFSETS;
    }
  }
}

Source File: PerFieldPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

public FieldsReader(final SegmentReadState readState) throws IOException {

      // Read _X.per and init each format:
      boolean success = false;
      try {
        // Read field name -> format name
        for (FieldInfo fi : readState.fieldInfos) {
          if (fi.getIndexOptions() != IndexOptions.NONE) {
            final String fieldName = fi.name;
            final String formatName = fi.getAttribute(PER_FIELD_FORMAT_KEY);
            if (formatName != null) {
              // null formatName means the field is in fieldInfos, but has no postings!
              final String suffix = fi.getAttribute(PER_FIELD_SUFFIX_KEY);
              if (suffix == null) {
                throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
              }
              PostingsFormat format = PostingsFormat.forName(formatName);
              String segmentSuffix = getSuffix(formatName, suffix);
              if (!formats.containsKey(segmentSuffix)) {
                formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
              }
              fields.put(fieldName, formats.get(segmentSuffix));
            }
          }
        }
        success = true;
      } finally {
        if (!success) {
          IOUtils.closeWhileHandlingException(formats.values());
        }
      }

      this.segment = readState.segmentInfo.name;
    }

Source File: PerFieldMergeState.java From lucene-solr with Apache License 2.0

5 votes

FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
  // Copy all the input FieldInfo objects since the field numbering must be kept consistent
  super(toArray(src));

  boolean hasVectors = false;
  boolean hasProx = false;
  boolean hasPayloads = false;
  boolean hasOffsets = false;
  boolean hasFreq = false;
  boolean hasNorms = false;
  boolean hasDocValues = false;
  boolean hasPointValues = false;

  this.filteredNames = new HashSet<>(filterFields);
  this.filtered = new ArrayList<>(filterFields.size());
  for (FieldInfo fi : src) {
    if (this.filteredNames.contains(fi.name)) {
      this.filtered.add(fi);
      hasVectors |= fi.hasVectors();
      hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
      hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      hasNorms |= fi.hasNorms();
      hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
      hasPayloads |= fi.hasPayloads();
      hasPointValues |= (fi.getPointDimensionCount() != 0);
    }
  }

  this.filteredHasVectors = hasVectors;
  this.filteredHasProx = hasProx;
  this.filteredHasPayloads = hasPayloads;
  this.filteredHasOffsets = hasOffsets;
  this.filteredHasFreq = hasFreq;
  this.filteredHasNorms = hasNorms;
  this.filteredHasDocValues = hasDocValues;
  this.filteredHasPointValues = hasPointValues;
}

Source File: CollapsingQParserPlugin.java From lucene-solr with Apache License 2.0

5 votes

ReaderWrapper(LeafReader leafReader, String field) {
  super(leafReader);

  // TODO can we just do "field" and not bother with the other fields?
  List<FieldInfo> newInfos = new ArrayList<>(in.getFieldInfos().size());
  for (FieldInfo fieldInfo : in.getFieldInfos()) {
    if (fieldInfo.name.equals(field)) {
      FieldInfo f = new FieldInfo(fieldInfo.name,
          fieldInfo.number,
          fieldInfo.hasVectors(),
          fieldInfo.hasNorms(),
          fieldInfo.hasPayloads(),
          fieldInfo.getIndexOptions(),
          DocValuesType.NONE,
          fieldInfo.getDocValuesGen(),
          fieldInfo.attributes(),
          fieldInfo.getPointDimensionCount(),
          fieldInfo.getPointIndexDimensionCount(),
          fieldInfo.getPointNumBytes(),
          fieldInfo.isSoftDeletesField());
      newInfos.add(f);
    } else {
      newInfos.add(fieldInfo);
    }
  }
  FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
  this.fieldInfos = infos;
}

Source File: UninvertDocValuesMergePolicyFactory.java From lucene-solr with Apache License 2.0

5 votes

private UninvertingReader.Type getUninversionType(FieldInfo fi) {
  SchemaField sf = schema.getFieldOrNull(fi.name);
  
  if (null != sf &&
      sf.hasDocValues() &&
      fi.getDocValuesType() == DocValuesType.NONE &&
      fi.getIndexOptions() != IndexOptions.NONE) {
    return sf.getType().getUninversionType(sf);
  } else {
    return null;
  }
}

Source File: FSTTermOutputs.java From lucene-solr with Apache License 2.0

4 votes

protected FSTTermOutputs(FieldInfo fieldInfo) {
  this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

4 votes

@Override
public NumericDocValues getNumerics(LeafReader reader, String field, Parser parser) throws IOException {
  if (parser == null) {
    throw new NullPointerException();
  }
  final NumericDocValues valuesIn = reader.getNumericDocValues(field);
  if (valuesIn != null) {
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptyNumeric();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    }
    
    if (parser instanceof PointParser) {
      // points case
      // no points in this segment
      if (info.getPointDimensionCount() == 0) {
        return DocValues.emptyNumeric();
      }
      if (info.getPointDimensionCount() != 1) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed with dimensions=" + info.getPointDimensionCount());
      }
      PointValues values = reader.getPointValues(field);
      // no actual points for this field (e.g. all points deleted)
      if (values == null || values.size() == 0) {
        return DocValues.emptyNumeric();
      }
      // not single-valued
      if (values.size() != values.getDocCount()) {
        throw new IllegalStateException("Type mismatch: " + field + " was indexed with multiple values, numValues=" + values.size() + ",numDocs=" + values.getDocCount());
      }
    } else {
      // postings case 
      // not indexed
      if (info.getIndexOptions() == IndexOptions.NONE) {
        return DocValues.emptyNumeric();
      }
    }

    return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator();
  }
}

Source File: UnifiedHighlighter.java From lucene-solr with Apache License 2.0

3 votes

/**
 * Determine the offset source for the specified field.  The default algorithm is as follows:
 * <ol>
 * <li>This calls {@link #getFieldInfo(String)}. Note this returns null if there is no searcher or if the
 * field isn't found there.</li>
 * <li> If there's a field info it has
 * {@link IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS} then {@link OffsetSource#POSTINGS} is
 * returned.</li>
 * <li>If there's a field info and {@link FieldInfo#hasVectors()} then {@link OffsetSource#TERM_VECTORS} is
 * returned (note we can't check here if the TV has offsets; if there isn't then an exception will get thrown
 * down the line).</li>
 * <li>Fall-back: {@link OffsetSource#ANALYSIS} is returned.</li>
 * </ol>
 * <p>
 * Note that the highlighter sometimes switches to something else based on the query, such as if you have
 * {@link OffsetSource#POSTINGS_WITH_TERM_VECTORS} but in fact don't need term vectors.
 */
protected OffsetSource getOffsetSource(String field) {
  FieldInfo fieldInfo = getFieldInfo(field);
  if (fieldInfo != null) {
    if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
      return fieldInfo.hasVectors() ? OffsetSource.POSTINGS_WITH_TERM_VECTORS : OffsetSource.POSTINGS;
    }
    if (fieldInfo.hasVectors()) { // unfortunately we can't also check if the TV has offsets
      return OffsetSource.TERM_VECTORS;
    }
  }
  return OffsetSource.ANALYSIS;
}

Java Code Examples for org.apache.lucene.index.FieldInfo#getIndexOptions()