org.apache.lucene.index.DocValues#emptySorted

Source File: ToParentBlockJoinSortField.java From lucene-solr with Apache License 2.0

6 votes

private FieldComparator<?> getStringComparator(int numHits) {
  return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) {

    @Override
    protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
      SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
      final BlockJoinSelector.Type type = order
          ? BlockJoinSelector.Type.MAX
          : BlockJoinSelector.Type.MIN;
      final BitSet parents = parentFilter.getBitSet(context);
      final BitSet children = childFilter.getBitSet(context);
      if (children == null) {
        return DocValues.emptySorted();
      }
      return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
    }

  };
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}

Source File: AbstractAtomicParentChildFieldData.java From Elasticsearch with Apache License 2.0

5 votes

public static AtomicParentChildFieldData empty() {
    return new AbstractAtomicParentChildFieldData() {

        @Override
        public long ramBytesUsed() {
            return 0;
        }
        
        @Override
        public Collection<Accountable> getChildResources() {
            return Collections.emptyList();
        }

        @Override
        public void close() {
        }

        @Override
        public SortedDocValues getOrdinalsValues(String type) {
            return DocValues.emptySorted();
        }

        @Override
        public Set<String> types() {
            return ImmutableSet.of();
        }
    };
}

Source File: ParentChildAtomicFieldData.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public SortedDocValues getOrdinalsValues(String type) {
    AtomicOrdinalsFieldData atomicFieldData = typeToIds.get(type);
    if (atomicFieldData != null) {
        return MultiValueMode.MIN.select(atomicFieldData.getOrdinalsValues());
    } else {
        return DocValues.emptySorted();
    }
}

Source File: FieldUtil.java From lucene-solr with Apache License 2.0

5 votes

public static SortedDocValues getSortedDocValues(QueryContext context, SchemaField field, QParser qparser) throws IOException {
  SortedDocValues si = context.searcher().getSlowAtomicReader().getSortedDocValues( field.getName() );
  // if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) {
  // }

  return si == null ? DocValues.emptySorted() : si;
}

Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0

4 votes

@Override
protected void collectDocs() throws IOException {
  int domainSize = fcontext.base.size();

  if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
    return;
  }

  // TODO: refactor some of this logic into a base class
  boolean countOnly = collectAcc==null && allBucketsAcc==null;
  boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

  // Are we expecting many hits per bucket?
  // FUTURE: pro-rate for nTerms?
  // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
  // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
  // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
  // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
  // this was for heap docvalues produced by UninvertingReader
  // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
  long domainMultiplier = multiValuedField ? 4L : 2L;
  boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);  // +3 to increase test coverage with small tests

  // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
  // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
  // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
  // the docid is not used)
  boolean canDoPerSeg = countOnly && fullRange;
  boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

  if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg;  // internal - override perSeg heuristic

  final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
  Filter filter = fcontext.base.getTopFilter();

  for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
    LeafReaderContext subCtx = leaves.get(subIdx);

    setNextReaderFirstPhase(subCtx);

    DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
    DocIdSetIterator disi = dis.iterator();

    SortedDocValues singleDv = null;
    SortedSetDocValues multiDv = null;
    if (multiValuedField) {
      // TODO: get sub from multi?
      multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
      if (multiDv == null) {
        multiDv = DocValues.emptySortedSet();
      }
      // some codecs may optimize SortedSet storage for single-valued fields
      // this will be null if this is not a wrapped single valued docvalues.
      if (unwrap_singleValued_multiDv) {
        singleDv = DocValues.unwrapSingleton(multiDv);
      }
    } else {
      singleDv = subCtx.reader().getSortedDocValues(sf.getName());
      if (singleDv == null) {
        singleDv = DocValues.emptySorted();
      }
    }

    LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

    if (singleDv != null) {
      if (accumSeg) {
        collectPerSeg(singleDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(singleDv, disi, toGlobal);
        } else {
          collectDocs(singleDv, disi, toGlobal);
        }
      }
    } else {
      if (accumSeg) {
        collectPerSeg(multiDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(multiDv, disi, toGlobal);
        } else {
          collectDocs(multiDv, disi, toGlobal);
        }
      }
    }
  }

  reuse = null;  // better GC
}

Java Code Examples for org.apache.lucene.index.DocValues#emptySorted()