org.apache.lucene.index.FieldInfo Java Exaples

Source File: PointsWriter.java From lucene-solr with Apache License 2.0

6 votes

/** Default merge implementation to merge incoming points readers by visiting all their points and
 *  adding to this writer */
public void merge(MergeState mergeState) throws IOException {
  // check each incoming reader
  for (PointsReader reader : mergeState.pointsReaders) {
    if (reader != null) {
      reader.checkIntegrity();
    }
  }
  // merge field at a time
  for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
    if (fieldInfo.getPointDimensionCount() != 0) {
      mergeOneField(mergeState, fieldInfo);
    }
  }
  finish();
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public BinaryDocValues getTerms(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  BinaryDocValues valuesIn = reader.getBinaryDocValues(field);
  if (valuesIn == null) {
    valuesIn = reader.getSortedDocValues(field);
  }

  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  }

  final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
  if (info == null) {
    return DocValues.emptyBinary();
  } else if (info.getDocValuesType() != DocValuesType.NONE) {
    throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
  } else if (info.getIndexOptions() == IndexOptions.NONE) {
    return DocValues.emptyBinary();
  }

  BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
  return impl.iterator();
}

Source File: DiskDocValuesProducer.java From incubator-retired-blur with Apache License 2.0

6 votes

@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
  SortedSetDocValues sortedSetDocValues = _sortedSetDocValuesCache.get(field.number);
  if (sortedSetDocValues != null) {
    return sortedSetDocValues;
  }
  synchronized (_sortedSetDocValuesCache) {
    sortedSetDocValues = _sortedSetDocValuesCache.get(field.number);
    if (sortedSetDocValues != null) {
      return sortedSetDocValues;
    }
    sortedSetDocValues = newSortedSetDocValues(field);
    if (_cache && sortedSetDocValues != null) {
      _sortedSetDocValuesCache.put(field.number, sortedSetDocValues);
    }
    return sortedSetDocValues;
  }
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}

Source File: RecoverySourcePruneMergePolicy.java From crate with Apache License 2.0

6 votes

@Override
public StoredFieldsReader getFieldsReader() {
    StoredFieldsReader fieldsReader = super.getFieldsReader();
    return new FilterStoredFieldsReader(fieldsReader) {
        @Override
        public void visitDocument(int docID, StoredFieldVisitor visitor) throws IOException {
            if (recoverySourceToKeep != null && recoverySourceToKeep.get(docID)) {
                super.visitDocument(docID, visitor);
            } else {
                super.visitDocument(docID, new FilterStoredFieldVisitor(visitor) {
                    @Override
                    public Status needsField(FieldInfo fieldInfo) throws IOException {
                        if (recoverySourceField.equals(fieldInfo.name)) {
                            return Status.NO;
                        }
                        return super.needsField(fieldInfo);
                    }
                });
            }
        }
    };
}

Source File: Lucene84PostingsReader.java From lucene-solr with Apache License 2.0

6 votes

@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) throws IOException {
  if (state.docFreq <= BLOCK_SIZE) {
    // no skip data
    return new SlowImpactsEnum(postings(fieldInfo, state, null, flags));
  }

  final boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
  final boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
  final boolean indexHasPayloads = fieldInfo.hasPayloads();

  if (indexHasPositions == false || PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) == false) {
    return new BlockImpactsDocsEnum(fieldInfo, (IntBlockTermState) state);
  }

  if (indexHasPositions &&
      PostingsEnum.featureRequested(flags, PostingsEnum.POSITIONS) &&
      (indexHasOffsets == false || PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS) == false) &&
      (indexHasPayloads == false || PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS) == false)) {
    return new BlockImpactsPostingsEnum(fieldInfo, (IntBlockTermState) state);
  }

  return new BlockImpactsEverythingEnum(fieldInfo, (IntBlockTermState) state, flags);
}

Source File: DiskDocValuesConsumer.java From incubator-retired-blur with Apache License 2.0

6 votes

@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.SORTED_SET);
  // write the ord -> byte[] as a binary field
  addBinaryField(field, values);
  // write the stream of ords as a numeric field
  // NOTE: we could return an iterator that delta-encodes these within a doc
  addNumericField(field, ords);
  
  // write the doc -> ord count as a absolute index to the stream
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.NUMERIC);
  meta.writeVInt(PackedInts.VERSION_CURRENT);
  meta.writeLong(data.getFilePointer());
  meta.writeVLong(maxDoc);
  meta.writeVInt(BLOCK_SIZE);

  final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
  long addr = 0;
  for (Number v : docToOrdCount) {
    addr += v.longValue();
    writer.add(addr);
  }
  writer.finish();
}

Source File: FacetFieldProcessorByHashDV.java From lucene-solr with Apache License 2.0

6 votes

FacetFieldProcessorByHashDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
  super(fcontext, freq, sf);
  if (freq.mincount == 0) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        getClass()+" doesn't support mincount=0");
  }
  if (freq.prefix != null) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        getClass()+" doesn't support prefix"); // yet, but it could
  }
  FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
  if (fieldInfo != null &&
      fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
      fieldInfo.getDocValuesType() != DocValuesType.SORTED &&
      fieldInfo.getDocValuesType() != DocValuesType.SORTED_NUMERIC) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
        getClass()+" only support single valued number/string with docValues");
  }
}

Source File: OrdsBlockTreeTermsWriter.java From lucene-solr with Apache License 2.0

5 votes

public FieldMetaData(FieldInfo fieldInfo, Output rootCode, long numTerms, long indexStartFP,
                     long sumTotalTermFreq, long sumDocFreq, int docCount,
                     BytesRef minTerm, BytesRef maxTerm) {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
  this.rootCode = rootCode;
  this.indexStartFP = indexStartFP;
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq;
  this.sumDocFreq = sumDocFreq;
  this.docCount = docCount;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
}

Source File: FSTTermsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void write(Fields fields, NormsProducer norms) throws IOException {
  for(String field : fields) {
    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }
    FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
    boolean hasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    TermsEnum termsEnum = terms.iterator();
    TermsWriter termsWriter = new TermsWriter(fieldInfo);

    long sumTotalTermFreq = 0;
    long sumDocFreq = 0;
    FixedBitSet docsSeen = new FixedBitSet(maxDoc);

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
          
      BlockTermState termState = postingsWriter.writeTerm(term, termsEnum, docsSeen, norms);
      if (termState != null) {
        termsWriter.finishTerm(term, termState);
        sumTotalTermFreq += termState.totalTermFreq;
        sumDocFreq += termState.docFreq;
      }
    }

    termsWriter.finish(hasFreq ? sumTotalTermFreq : -1, sumDocFreq, docsSeen.cardinality());
  }
}

Source File: SolrDocumentFetcher.java From lucene-solr with Apache License 2.0

5 votes

/** Executes a stored field visitor against a hit from the document cache */
private void visitFromCached(Document document, StoredFieldVisitor visitor) throws IOException {
  for (IndexableField f : document) {
    final FieldInfo info = searcher.getFieldInfos().fieldInfo(f.name());
    final StoredFieldVisitor.Status needsField = visitor.needsField(info);
    if (needsField == StoredFieldVisitor.Status.STOP) return;
    if (needsField == StoredFieldVisitor.Status.NO) continue;
    BytesRef binaryValue = f.binaryValue();
    if (binaryValue != null) {
      visitor.binaryField(info, toByteArrayUnwrapIfPossible(binaryValue));
      continue;
    }
    Number numericValue = f.numericValue();
    if (numericValue != null) {
      if (numericValue instanceof Double) {
        visitor.doubleField(info, numericValue.doubleValue());
      } else if (numericValue instanceof Integer) {
        visitor.intField(info, numericValue.intValue());
      } else if (numericValue instanceof Float) {
        visitor.floatField(info, numericValue.floatValue());
      } else if (numericValue instanceof Long) {
        visitor.longField(info, numericValue.longValue());
      } else {
        throw new AssertionError();
      }
      continue;
    }
    // must be String
    if (f instanceof LargeLazyField) { // optimization to avoid premature string conversion
      visitor.stringField(info, toStringUnwrapIfPossible(((LargeLazyField) f).readBytes()));
    } else {
      visitor.stringField(info, f.stringValue());
    }
  }
}

Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0

5 votes

private boolean checkReadMask(FieldInfo fieldInfo) throws IOException {
  final String message = _readMaskFieldsAndMessages.get(fieldInfo.name);
  if (message != null) {
    if (message.isEmpty()) {
      return true;
    }
    _visitor.stringField(fieldInfo, message);
    return true;
  }
  return false;
}

Source File: BlockTreeTermsReader.java From incubator-retired-blur with Apache License 2.0

5 votes

FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, IndexInput indexIn) throws IOException {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq; 
  this.sumDocFreq = sumDocFreq; 
  this.docCount = docCount;
  this.indexStartFP = indexStartFP;
  this.rootCode = rootCode;
  // if (DEBUG) {
  //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
  // }

  rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;

  if (indexIn != null) {
    final IndexInput clone = indexIn.clone();
    //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
    clone.seek(indexStartFP);
    index = new FST<BytesRef>(clone, ByteSequenceOutputs.getSingleton());
    
    /*
    if (false) {
      final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
      Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
      Util.toDot(index, w, false, false);
      System.out.println("FST INDEX: SAVED to " + dotFileName);
      w.close();
    }
    */
  } else {
    index = null;
  }
}

Source File: STUniformSplitTermsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void merge(MergeState mergeState, NormsProducer normsProducer) throws IOException {
  if (mergeState.needsIndexSort) {
    // This custom merging does not support sorted index.
    // Fall back to the default merge, which is inefficient for this postings format.
    super.merge(mergeState, normsProducer);
    return;
  }
  FieldsProducer[] fieldsProducers = mergeState.fieldsProducers;
  List<TermIterator<SegmentTerms>> segmentTermsList = new ArrayList<>(fieldsProducers.length);
  for (int segmentIndex = 0; segmentIndex < fieldsProducers.length; segmentIndex++) {
    FieldsProducer fieldsProducer = fieldsProducers[segmentIndex];
    // Iterate the FieldInfo provided by mergeState.fieldInfos because they may be
    // filtered by PerFieldMergeState.
    for (FieldInfo fieldInfo : mergeState.fieldInfos[segmentIndex]) {
      // Iterate all fields only the get the *first* Terms instanceof STUniformSplitTerms.
      // See the break below.
      Terms terms = fieldsProducer.terms(fieldInfo.name);
      if (terms != null) {
        if (!(terms instanceof STUniformSplitTerms)) {
          // Terms is not directly an instance of STUniformSplitTerms, it is wrapped/filtered.
          // Fall back to the default merge, which is inefficient for this postings format.
          super.merge(mergeState, normsProducer);
          return;
        }
        STUniformSplitTerms sharedTerms = (STUniformSplitTerms) terms;
        segmentTermsList.add(new SegmentTerms(
            segmentIndex, sharedTerms.createMergingBlockReader(), mergeState.docMaps[segmentIndex]));
        // We have the STUniformSplitTerms for the segment. Break the field
        // loop to iterate the next segment.
        break;
      }
    }
  }
  writeSegment((blockWriter, dictionaryBuilder) -> mergeSegments(mergeState, normsProducer, segmentTermsList, blockWriter, dictionaryBuilder));
}

Source File: Lucene60PointsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void finish() throws IOException {
  if (finished) {
    throw new IllegalStateException("already finished");
  }
  finished = true;
  CodecUtil.writeFooter(dataOut);

  String indexFileName = IndexFileNames.segmentFileName(writeState.segmentInfo.name,
                                                        writeState.segmentSuffix,
                                                        Lucene60PointsFormat.INDEX_EXTENSION);
  // Write index file
  try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) {
    CodecUtil.writeIndexHeader(indexOut,
                               Lucene60PointsFormat.META_CODEC_NAME,
                               Lucene60PointsFormat.INDEX_VERSION_CURRENT,
                               writeState.segmentInfo.getId(),
                               writeState.segmentSuffix);
    int count = indexFPs.size();
    indexOut.writeVInt(count);
    for(Map.Entry<String,Long> ent : indexFPs.entrySet()) {
      FieldInfo fieldInfo = writeState.fieldInfos.fieldInfo(ent.getKey());
      if (fieldInfo == null) {
        throw new IllegalStateException("wrote field=\"" + ent.getKey() + "\" but that field doesn't exist in FieldInfos");
      }
      indexOut.writeVInt(fieldInfo.number);
      indexOut.writeVLong(ent.getValue());
    }
    CodecUtil.writeFooter(indexOut);
  }
}

Source File: Lucene80DocValuesConsumer.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  meta.writeInt(field.number);
  meta.writeByte(Lucene80DocValuesFormat.SORTED_NUMERIC);

  long[] stats = writeValues(field, valuesProducer);
  int numDocsWithField = Math.toIntExact(stats[0]);
  long numValues = stats[1];
  assert numValues >= numDocsWithField;

  meta.writeInt(numDocsWithField);
  if (numValues > numDocsWithField) {
    long start = data.getFilePointer();
    meta.writeLong(start);
    meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

    final DirectMonotonicWriter addressesWriter = DirectMonotonicWriter.getInstance(meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT);
    long addr = 0;
    addressesWriter.add(addr);
    SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      addr += values.docValueCount();
      addressesWriter.add(addr);
    }
    addressesWriter.finish();
    meta.writeLong(data.getFilePointer() - start);
  }
}

Source File: DiskDocValuesProducer.java From incubator-retired-blur with Apache License 2.0

5 votes

private BinaryDocValues getFixedBinary(FieldInfo field, final BinaryEntry bytes) {
  final IndexInput data = this.data.clone();

  return new LongBinaryDocValues() {

    private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() {
      @Override
      protected IndexInput initialValue() {
        return data.clone();
      }
    };

    @Override
    public void get(long id, BytesRef result) {
      long address = bytes.offset + id * bytes.maxLength;
      try {
        IndexInput indexInput = in.get();
        indexInput.seek(address);
        // NOTE: we could have one buffer, but various consumers (e.g.
        // FieldComparatorSource)
        // assume "they" own the bytes after calling this!
        final byte[] buffer = new byte[bytes.maxLength];
        indexInput.readBytes(buffer, 0, buffer.length);
        result.bytes = buffer;
        result.offset = 0;
        result.length = buffer.length;
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  };
}

Source File: AssertingNormsFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void addNormsField(FieldInfo field, NormsProducer valuesProducer) throws IOException {
  NumericDocValues values = valuesProducer.getNorms(field);

  int docID;
  int lastDocID = -1;
  while ((docID = values.nextDoc()) != NO_MORE_DOCS) {
    assert docID >= 0 && docID < maxDoc;
    assert docID > lastDocID;
    lastDocID = docID;
    long value = values.longValue();
  }

  in.addNormsField(field, valuesProducer);
}

Source File: PerFieldDocValuesFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void merge(MergeState mergeState) throws IOException {
  Map<DocValuesConsumer, Collection<String>> consumersToField = new IdentityHashMap<>();

  // Group each consumer by the fields it handles
  for (FieldInfo fi : mergeState.mergeFieldInfos) {
    if (fi.getDocValuesType() == DocValuesType.NONE) {
      continue;
    }
    // merge should ignore current format for the fields being merged
    DocValuesConsumer consumer = getInstance(fi, true);
    Collection<String> fieldsForConsumer = consumersToField.get(consumer);
    if (fieldsForConsumer == null) {
      fieldsForConsumer = new ArrayList<>();
      consumersToField.put(consumer, fieldsForConsumer);
    }
    fieldsForConsumer.add(fi.name);
  }

  // Delegate the merge to the appropriate consumer
  PerFieldMergeState pfMergeState = new PerFieldMergeState(mergeState);
  try {
    for (Map.Entry<DocValuesConsumer, Collection<String>> e : consumersToField.entrySet()) {
      e.getKey().merge(pfMergeState.apply(e.getValue()));
    }
  } finally {
    pfMergeState.reset();
  }
}

Source File: FieldReader.java From lucene-solr with Apache License 2.0

5 votes

FieldReader(BlockTreeTermsReader parent, FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
            long indexStartFP, IndexInput metaIn, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
  assert numTerms > 0;
  this.fieldInfo = fieldInfo;
  //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
  this.parent = parent;
  this.numTerms = numTerms;
  this.sumTotalTermFreq = sumTotalTermFreq;
  this.sumDocFreq = sumDocFreq;
  this.docCount = docCount;
  this.rootCode = rootCode;
  this.minTerm = minTerm;
  this.maxTerm = maxTerm;
  // if (DEBUG) {
  //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
  // }
  rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
  // Initialize FST always off-heap.
  final IndexInput clone = indexIn.clone();
  clone.seek(indexStartFP);
  if (metaIn == indexIn) { // Only true before Lucene 8.6
    index = new FST<>(clone, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
  } else {
    index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
  }
  /*
    if (false) {
    final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
    Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
    Util.toDot(index, w, false, false);
    System.out.println("FST INDEX: SAVED to " + dotFileName);
    w.close();
    }
   */
}

Source File: IDVersionPostingsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void setField(FieldInfo fieldInfo) {
  super.setField(fieldInfo);
  if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
    throw new IllegalArgumentException("field must be index using IndexOptions.DOCS_AND_FREQS_AND_POSITIONS");
  }
  // LUCENE-5693: because CheckIndex cross-checks term vectors with postings even for deleted docs, and because our PF only indexes the
  // non-deleted documents on flush, CheckIndex will see this as corruption:
  if (fieldInfo.hasVectors()) {
    throw new IllegalArgumentException("field cannot index term vectors: CheckIndex will report this as index corruption");
  }
  lastState = emptyState;
}

Source File: FieldsVisitor.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
    final String value = new String(bytes, StandardCharsets.UTF_8);
    if (UidFieldMapper.NAME.equals(fieldInfo.name)) {
        uid = Uid.createUid(value);
    } else {
        addValue(fieldInfo.name, value);
    }
}

Source File: StoredFieldsWriter.java From lucene-solr with Apache License 2.0

5 votes

void reset(FieldInfo field) {
  if (remapper != null) {
    // field numbers are not aligned, we need to remap to the new field number
    currentField = remapper.fieldInfo(field.name);
  } else {
    currentField = field;
  }
  binaryValue = null;
  stringValue = null;
  numericValue = null;
}

Source File: Lucene60PointsReader.java From lucene-solr with Apache License 2.0

5 votes

/** Returns the underlying {@link BKDReader}.
 *
 * @lucene.internal */
@Override
public PointValues getValues(String fieldName) {
  FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName);
  if (fieldInfo == null) {
    throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized");
  }
  if (fieldInfo.getPointDimensionCount() == 0) {
    throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index point values");
  }

  return readers.get(fieldInfo.number);
}

Source File: IndexManager.java From incubator-retired-blur with Apache License 2.0

5 votes

private static String getRowId(IndexReader reader, int docId) throws CorruptIndexException, IOException {
  reader.document(docId, new StoredFieldVisitor() {
    @Override
    public Status needsField(FieldInfo fieldInfo) throws IOException {
      if (ROW_ID.equals(fieldInfo.name)) {
        return StoredFieldVisitor.Status.STOP;
      }
      return StoredFieldVisitor.Status.NO;
    }
  });
  return reader.document(docId).get(ROW_ID);
}

Source File: FieldReadCallback.java From deprecated-security-advanced-modules with Apache License 2.0

5 votes

public void stringFieldRead(final FieldInfo fieldInfo, final byte[] fieldValue) {
    try {
        if(!recordField(fieldInfo.name, true)) {
            return;
        }
        fieldRead0(fieldInfo.name, new String(fieldValue, StandardCharsets.UTF_8));
    } catch (Exception e) {
        log.error("Unexpected error reading string field '{}' in index '{}'", fieldInfo.name, index.getName());
    }
}

Source File: Lucene84PostingsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void encodeTerm(DataOutput out, FieldInfo fieldInfo, BlockTermState _state, boolean absolute) throws IOException {
  IntBlockTermState state = (IntBlockTermState)_state;
  if (absolute) {
    lastState = emptyState;
    assert lastState.docStartFP == 0;
  }

  if (lastState.singletonDocID != -1 && state.singletonDocID != -1 && state.docStartFP == lastState.docStartFP) {
    // With runs of rare values such as ID fields, the increment of pointers in the docs file is often 0.
    // Furthermore some ID schemes like auto-increment IDs or Flake IDs are monotonic, so we encode the delta
    // between consecutive doc IDs to save space.
    final long delta = (long) state.singletonDocID - lastState.singletonDocID;
    out.writeVLong((BitUtil.zigZagEncode(delta) << 1) | 0x01);
  } else {
    out.writeVLong((state.docStartFP - lastState.docStartFP) << 1);
    if (state.singletonDocID != -1) {
      out.writeVInt(state.singletonDocID);
    }
  }

  if (writePositions) {
    out.writeVLong(state.posStartFP - lastState.posStartFP);
    if (writePayloads || writeOffsets) {
      out.writeVLong(state.payStartFP - lastState.payStartFP);
    }
  }
  if (writePositions) {
    if (state.lastPosBlockOffset != -1) {
      out.writeVLong(state.lastPosBlockOffset);
    }
  }
  if (state.skipOffset != -1) {
    out.writeVLong(state.skipOffset);
  }
  lastState = state;
}

Source File: FieldMetadata.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Constructs field metadata for reading or writing.
 * @param maxDoc The total number of documents in the segment being written.
 * @param isMutable Set true if this FieldMetadata is created for writing the index. Set false if it is used for reading the index.
 */
protected FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
  assert isMutable || maxDoc == 0;
  this.fieldInfo = fieldInfo;
  this.isMutable = isMutable;
  // docsSeen must not be set if this FieldMetadata is immutable, that means it is used for reading the index.
  this.docsSeen = isMutable ? new FixedBitSet(maxDoc) : null;
  this.dictionaryStartFP = -1;
  this.firstBlockStartFP = -1;
  this.lastBlockStartFP = -1;
}

Source File: BloomFilteringPostingsFormat.java From lucene-solr with Apache License 2.0

5 votes

private void saveAppropriatelySizedBloomFilter(IndexOutput bloomOutput,
    FuzzySet bloomFilter, FieldInfo fieldInfo) throws IOException {
  
  FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo,
      bloomFilter);
  if (rightSizedSet == null) {
    rightSizedSet = bloomFilter;
  }
  rightSizedSet.serialize(bloomOutput);
}

Source File: Lucene50FieldInfosFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) throws IOException {
  final String fileName = IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, EXTENSION);
  try (IndexOutput output = directory.createOutput(fileName, context)) {
    CodecUtil.writeIndexHeader(output, Lucene50FieldInfosFormat.CODEC_NAME, Lucene50FieldInfosFormat.FORMAT_CURRENT, segmentInfo.getId(), segmentSuffix);
    output.writeVInt(infos.size());
    for (FieldInfo fi : infos) {
      fi.checkConsistency();

      output.writeString(fi.name);
      output.writeVInt(fi.number);

      byte bits = 0x0;
      if (fi.hasVectors()) bits |= STORE_TERMVECTOR;
      if (fi.omitsNorms()) bits |= OMIT_NORMS;
      if (fi.hasPayloads()) bits |= STORE_PAYLOADS;
      output.writeByte(bits);

      output.writeByte(indexOptionsByte(fi.getIndexOptions()));

      // pack the DV type and hasNorms in one byte
      output.writeByte(docValuesByte(fi.getDocValuesType()));
      output.writeLong(fi.getDocValuesGen());
      output.writeMapOfStrings(fi.attributes());
    }
    CodecUtil.writeFooter(output);
  }
}

org.apache.lucene.index.FieldInfo Java Examples