org.apache.lucene.index.IndexOptions#DOCS

Source File: PreAnalyzedField.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Utility method to create a {@link org.apache.lucene.document.FieldType}
 * based on the {@link SchemaField}
 */
public static org.apache.lucene.document.FieldType createFieldType(SchemaField field) {
  if (!field.indexed() && !field.stored()) {
    log.trace("Ignoring unindexed/unstored field: {}", field);
    return null;
  }
  org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType();
  newType.setTokenized(field.isTokenized());
  newType.setStored(field.stored());
  newType.setOmitNorms(field.omitNorms());
  IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  if (field.omitTermFreqAndPositions()) {
    options = IndexOptions.DOCS;
  } else if (field.omitPositions()) {
    options = IndexOptions.DOCS_AND_FREQS;
  } else if (field.storeOffsetsWithPositions()) {
    options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  }
  newType.setIndexOptions(options);
  newType.setStoreTermVectors(field.storeTermVector());
  newType.setStoreTermVectorOffsets(field.storeTermOffsets());
  newType.setStoreTermVectorPositions(field.storeTermPositions());
  newType.setStoreTermVectorPayloads(field.storeTermPayloads());
  return newType;
}

Source File: SchemaField.java From lucene-solr with Apache License 2.0

6 votes

@Override
public IndexOptions indexOptions() {
  if (!indexed()) {
    return IndexOptions.NONE;
  }
  
  IndexOptions options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  if (omitTermFreqAndPositions()) {
    options = IndexOptions.DOCS;
  } else if (omitPositions()) {
    options = IndexOptions.DOCS_AND_FREQS;
  } else if (storeOffsetsWithPositions()) {
    options = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  }

  return options;
}

Source File: BlockTermsWriter.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void close() throws IOException {
  if (out != null) {
    try {
      final long dirStart = out.getFilePointer();
      
      out.writeVInt(fields.size());
      for(FieldMetaData field : fields) {
        out.writeVInt(field.fieldInfo.number);
        out.writeVLong(field.numTerms);
        out.writeVLong(field.termsStartPointer);
        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
          out.writeVLong(field.sumTotalTermFreq);
        }
        out.writeVLong(field.sumDocFreq);
        out.writeVInt(field.docCount);
      }
      writeTrailer(dirStart);
      CodecUtil.writeFooter(out);
    } finally {
      IOUtils.close(out, postingsWriter, termsIndexWriter);
      out = null;
    }
  }
}

Source File: Lucene50FieldInfosFormat.java From lucene-solr with Apache License 2.0

6 votes

private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
  switch (b) {
  case 0:
    return IndexOptions.NONE;
  case 1:
    return IndexOptions.DOCS;
  case 2:
    return IndexOptions.DOCS_AND_FREQS;
  case 3:
    return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  case 4:
    return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  default:
    // BUG
    throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
  }
}

Source File: Lucene60FieldInfosFormat.java From lucene-solr with Apache License 2.0

6 votes

private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
  switch (b) {
  case 0:
    return IndexOptions.NONE;
  case 1:
    return IndexOptions.DOCS;
  case 2:
    return IndexOptions.DOCS_AND_FREQS;
  case 3:
    return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  case 4:
    return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  default:
    // BUG
    throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
  }
}

Source File: TypeParsers.java From crate with Apache License 2.0

5 votes

private static IndexOptions nodeIndexOptionValue(final Object propNode) {
    final String value = propNode.toString();
    if (INDEX_OPTIONS_OFFSETS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    } else if (INDEX_OPTIONS_POSITIONS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
    } else if (INDEX_OPTIONS_FREQS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS_AND_FREQS;
    } else if (INDEX_OPTIONS_DOCS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS;
    } else {
        throw new ElasticsearchParseException("failed to parse index option [{}]", value);
    }
}

Source File: TypeParsers.java From Elasticsearch with Apache License 2.0

5 votes

private static IndexOptions nodeIndexOptionValue(final Object propNode) {
    final String value = propNode.toString();
    if (INDEX_OPTIONS_OFFSETS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    } else if (INDEX_OPTIONS_POSITIONS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
    } else if (INDEX_OPTIONS_FREQS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS_AND_FREQS;
    } else if (INDEX_OPTIONS_DOCS.equalsIgnoreCase(value)) {
        return IndexOptions.DOCS;
    } else {
        throw new ElasticsearchParseException("failed to parse index option [{}]", value);
    }
}

Source File: TermVectorLeafReader.java From lucene-solr with Apache License 2.0

5 votes

public TermVectorLeafReader(String field, Terms terms) {
  fields = new Fields() {
    @Override
    public Iterator<String> iterator() {
      return Collections.singletonList(field).iterator();
    }

    @Override
    public Terms terms(String fld) throws IOException {
      if (!field.equals(fld)) {
        return null;
      }
      return terms;
    }

    @Override
    public int size() {
      return 1;
    }
  };

  IndexOptions indexOptions;
  if (!terms.hasFreqs()) {
    indexOptions = IndexOptions.DOCS;
  } else if (!terms.hasPositions()) {
    indexOptions = IndexOptions.DOCS_AND_FREQS;
  } else if (!terms.hasOffsets()) {
    indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  } else {
    indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
  }
  FieldInfo fieldInfo = new FieldInfo(field, 0,
                                      true, true, terms.hasPayloads(),
                                      indexOptions, DocValuesType.NONE, -1, Collections.emptyMap(), 0, 0, 0, false);
  fieldInfos = new FieldInfos(new FieldInfo[]{fieldInfo});
}

Source File: FSTTermsWriter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void close() throws IOException {
  if (out != null) {
    boolean success = false;
    try {
      // write field summary
      final long dirStart = out.getFilePointer();
      
      out.writeVInt(fields.size());
      for (FieldMetaData field : fields) {
        out.writeVInt(field.fieldInfo.number);
        out.writeVLong(field.numTerms);
        if (field.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
          out.writeVLong(field.sumTotalTermFreq);
        }
        out.writeVLong(field.sumDocFreq);
        out.writeVInt(field.docCount);
        field.dict.save(out, out);
      }
      writeTrailer(out, dirStart);
      CodecUtil.writeFooter(out);
      success = true;
    } finally {
      if (success) {
        IOUtils.close(out, postingsWriter);
      } else {
        IOUtils.closeWhileHandlingException(out, postingsWriter);
      }
      out = null;
    }
  }
}

Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0

5 votes

public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) throws IOException {
  final String termsFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

  this.postingsReader = postingsReader;
  final IndexInput in = state.directory.openInput(termsFileName, state.context);

  boolean success = false;
  try {
    CodecUtil.checkIndexHeader(in, FSTTermsWriter.TERMS_CODEC_NAME,
                                     FSTTermsWriter.TERMS_VERSION_START,
                                     FSTTermsWriter.TERMS_VERSION_CURRENT,
                                     state.segmentInfo.getId(), state.segmentSuffix);
    CodecUtil.checksumEntireFile(in);
    this.postingsReader.init(in, state);
    seekDir(in);

    final FieldInfos fieldInfos = state.fieldInfos;
    final int numFields = in.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = in.readVInt();
      FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
      long numTerms = in.readVLong();
      long sumTotalTermFreq = in.readVLong();
      // if frequencies are omitted, sumTotalTermFreq=sumDocFreq and we only write one value
      long sumDocFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? sumTotalTermFreq : in.readVLong();
      int docCount = in.readVInt();
      TermsReader current = new TermsReader(fieldInfo, in, numTerms, sumTotalTermFreq, sumDocFreq, docCount);
      TermsReader previous = fields.put(fieldInfo.name, current);
      checkFieldSummary(state.segmentInfo, in, current, previous);
    }
    success = true;
  } finally {
    if (success) {
      IOUtils.close(in);
    } else {
      IOUtils.closeWhileHandlingException(in);
    }
  }
}

Source File: TFIDFSimilarity.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}

Source File: PerFieldMergeState.java From lucene-solr with Apache License 2.0

5 votes

FilterFieldInfos(FieldInfos src, Collection<String> filterFields) {
  // Copy all the input FieldInfo objects since the field numbering must be kept consistent
  super(toArray(src));

  boolean hasVectors = false;
  boolean hasProx = false;
  boolean hasPayloads = false;
  boolean hasOffsets = false;
  boolean hasFreq = false;
  boolean hasNorms = false;
  boolean hasDocValues = false;
  boolean hasPointValues = false;

  this.filteredNames = new HashSet<>(filterFields);
  this.filtered = new ArrayList<>(filterFields.size());
  for (FieldInfo fi : src) {
    if (this.filteredNames.contains(fi.name)) {
      this.filtered.add(fi);
      hasVectors |= fi.hasVectors();
      hasProx |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
      hasFreq |= fi.getIndexOptions() != IndexOptions.DOCS;
      hasOffsets |= fi.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
      hasNorms |= fi.hasNorms();
      hasDocValues |= fi.getDocValuesType() != DocValuesType.NONE;
      hasPayloads |= fi.hasPayloads();
      hasPointValues |= (fi.getPointDimensionCount() != 0);
    }
  }

  this.filteredHasVectors = hasVectors;
  this.filteredHasProx = hasProx;
  this.filteredHasPayloads = hasPayloads;
  this.filteredHasOffsets = hasOffsets;
  this.filteredHasFreq = hasFreq;
  this.filteredHasNorms = hasNorms;
  this.filteredHasDocValues = hasDocValues;
  this.filteredHasPointValues = hasPointValues;
}

Source File: SimilarityBase.java From lucene-solr with Apache License 2.0

5 votes

/** Encodes the document length in the same way as {@link BM25Similarity}. */
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}

Source File: BM25Similarity.java From lucene-solr with Apache License 2.0

5 votes

@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}

Source File: SimpleTextFieldsReader.java From lucene-solr with Apache License 2.0

4 votes

@Override
public long totalTermFreq() {
  return indexOptions == IndexOptions.DOCS ? docFreq : totalTermFreq;
}

Source File: IdFieldMapper.java From Elasticsearch with Apache License 2.0

4 votes

@Override
protected IndexOptions getDefaultIndexOption() {
    return IndexOptions.DOCS;
}

Source File: BlockTermsReader.java From lucene-solr with Apache License 2.0

4 votes

private void decodeMetaData() throws IOException {
  //System.out.println("BTR.decodeMetadata mdUpto=" + metaDataUpto + " vs termCount=" + state.termBlockOrd + " state=" + state);
  if (!seekPending) {
    // TODO: cutover to random-access API
    // here.... really stupid that we have to decode N
    // wasted term metadata just to get to the N+1th
    // that we really need...

    // lazily catch up on metadata decode:
    final int limit = state.termBlockOrd;
    boolean absolute = metaDataUpto == 0;
    // TODO: better API would be "jump straight to term=N"???
    while (metaDataUpto < limit) {
      //System.out.println("  decode mdUpto=" + metaDataUpto);
      // TODO: we could make "tiers" of metadata, ie,
      // decode docFreq/totalTF but don't decode postings
      // metadata; this way caller could get
      // docFreq/totalTF w/o paying decode cost for
      // postings

      // TODO: if docFreq were bulk decoded we could
      // just skipN here:

      // docFreq, totalTermFreq
      state.docFreq = freqReader.readVInt();
      //System.out.println("    dF=" + state.docFreq);
      if (fieldInfo.getIndexOptions() == IndexOptions.DOCS) {
        state.totalTermFreq = state.docFreq; // all postings have tf=1
      } else {
        state.totalTermFreq = state.docFreq + freqReader.readVLong();
        //System.out.println("    totTF=" + state.totalTermFreq);
      }
      // metadata
      postingsReader.decodeTerm(bytesReader, fieldInfo, state, absolute);
      metaDataUpto++;
      absolute = false;
    }
  } else {
    //System.out.println("  skip! seekPending");
  }
}

Source File: FSTTermOutputs.java From lucene-solr with Apache License 2.0

4 votes

protected FSTTermOutputs(FieldInfo fieldInfo) {
  this.hasPos = fieldInfo.getIndexOptions() != IndexOptions.DOCS;
}

Source File: TypeParsers.java From Elasticsearch with Apache License 2.0

4 votes

/**
 * Parse common field attributes such as {@code doc_values} or {@code store}.
 */
public static void parseField(FieldMapper.Builder builder, String name, Map<String, Object> fieldNode, Mapper.TypeParser.ParserContext parserContext) {
    Version indexVersionCreated = parserContext.indexVersionCreated();
    for (Iterator<Map.Entry<String, Object>> iterator = fieldNode.entrySet().iterator(); iterator.hasNext();) {
        Map.Entry<String, Object> entry = iterator.next();
        final String propName = Strings.toUnderscoreCase(entry.getKey());
        final Object propNode = entry.getValue();
        if (propName.equals("index_name") && indexVersionCreated.before(Version.V_2_0_0_beta1)) {
            builder.indexName(propNode.toString());
            iterator.remove();
        } else if (propName.equals("store")) {
            builder.store(parseStore(name, propNode.toString()));
            iterator.remove();
        } else if (propName.equals("index")) {
            parseIndex(name, propNode.toString(), builder);
            iterator.remove();
        } else if (propName.equals(DOC_VALUES)) {
            builder.docValues(nodeBooleanValue(propNode));
            iterator.remove();
        } else if (propName.equals("boost")) {
            builder.boost(nodeFloatValue(propNode));
            iterator.remove();
        } else if (propName.equals("omit_norms")) {
            builder.omitNorms(nodeBooleanValue(propNode));
            iterator.remove();
        } else if (propName.equals("norms")) {
            final Map<String, Object> properties = nodeMapValue(propNode, "norms");
            for (Iterator<Entry<String, Object>> propsIterator = properties.entrySet().iterator(); propsIterator.hasNext();) {
                Entry<String, Object> entry2 = propsIterator.next();
                final String propName2 = Strings.toUnderscoreCase(entry2.getKey());
                final Object propNode2 = entry2.getValue();
                if (propName2.equals("enabled")) {
                    builder.omitNorms(!nodeBooleanValue(propNode2));
                    propsIterator.remove();
                } else if (propName2.equals(Loading.KEY)) {
                    builder.normsLoading(Loading.parse(nodeStringValue(propNode2, null), null));
                    propsIterator.remove();
                }
            }
            DocumentMapperParser.checkNoRemainingFields(propName, properties, parserContext.indexVersionCreated());
            iterator.remove();
        } else if (propName.equals("omit_term_freq_and_positions")) {
            final IndexOptions op = nodeBooleanValue(propNode) ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
            if (indexVersionCreated.onOrAfter(Version.V_1_0_0_RC2)) {
                throw new ElasticsearchParseException("'omit_term_freq_and_positions' is not supported anymore - use ['index_options' : 'docs']  instead");
            }
            // deprecated option for BW compat
            builder.indexOptions(op);
            iterator.remove();
        } else if (propName.equals("index_options")) {
            builder.indexOptions(nodeIndexOptionValue(propNode));
            iterator.remove();
        } else if (propName.equals("include_in_all")) {
            builder.includeInAll(nodeBooleanValue(propNode));
            iterator.remove();
        } else if (propName.equals("postings_format") && indexVersionCreated.before(Version.V_2_0_0_beta1)) {
            // ignore for old indexes
            iterator.remove();
        } else if (propName.equals("doc_values_format") && indexVersionCreated.before(Version.V_2_0_0_beta1)) {
            // ignore for old indexes
            iterator.remove();
        } else if (propName.equals("similarity")) {
            builder.similarity(parserContext.similarityLookupService().similarity(propNode.toString()));
            iterator.remove();
        } else if (propName.equals("fielddata")) {
            final Settings settings = Settings.builder().put(SettingsLoader.Helper.loadNestedFromMap(nodeMapValue(propNode, "fielddata"))).build();
            builder.fieldDataSettings(settings);
            iterator.remove();
        } else if (propName.equals("copy_to")) {
            if (parserContext.isWithinMultiField()) {
                if (indexVersionCreated.after(Version.V_2_1_0) ||
                    (indexVersionCreated.after(Version.V_2_0_1) && indexVersionCreated.before(Version.V_2_1_0))) {
                    throw new MapperParsingException("copy_to in multi fields is not allowed. Found the copy_to in field [" + name + "] which is within a multi field.");
                } else {
                    ESLoggerFactory.getLogger("mapping [" + parserContext.type() + "]").warn("Found a copy_to in field [" + name + "] which is within a multi field. This feature has been removed and the copy_to will be ignored.");
                    // we still parse this, otherwise the message will only appear once and the copy_to removed. After that it will appear again. Better to have it always.
                }
            }
            parseCopyFields(propNode, builder);
            iterator.remove();
        }
    }
    if (indexVersionCreated.before(Version.V_2_2_0)) {
        // analyzer, search_analyzer, term_vectors were accepted on all fields
        // before 2.2, even though it made little sense
        parseAnalyzersAndTermVectors(builder, name, fieldNode, parserContext);
    }
}

Java Code Examples for org.apache.lucene.index.IndexOptions#DOCS