org.apache.lucene.index.DocValues#unwrapSingleton

Source File: LongValueFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

private void countAllMultiValued(IndexReader reader, String field) throws IOException {

    for (LeafReaderContext context : reader.leaves()) {

      SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
      if (values == null) {
        // this field has no doc values for this segment
        continue;
      }
      NumericDocValues singleValues = DocValues.unwrapSingleton(values);
      if (singleValues != null) {
        countAllOneSegment(singleValues);
      } else {
        int doc;
        while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
          int limit = values.docValueCount();
          totCount += limit;
          for (int i = 0; i < limit; i++) {
            increment(values.nextValue());
          }
        }
      }
    }
  }

Source File: SortedSetSelector.java From lucene-solr with Apache License 2.0

6 votes

/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
  if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
    throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
  }
  
  SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
  if (singleton != null) {
    // it's actually single-valued in practice, but indexed as multi-valued,
    // so just sort on the underlying single-valued dv directly.
    // regardless of selector type, this optimization is safe!
    return singleton;
  } else {
    switch(selector) {
      case MIN: return new MinValue(sortedSet);
      case MAX: return new MaxValue(sortedSet);
      case MIDDLE_MIN: return new MiddleMinValue(sortedSet);
      case MIDDLE_MAX: return new MiddleMaxValue(sortedSet);
      default: 
        throw new AssertionError();
    }
  }
}

Source File: SortedNumericDVIndexFieldData.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public SortedNumericDoubleValues getDoubleValues() {
    try {
        SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);

        NumericDocValues single = DocValues.unwrapSingleton(raw);
        if (single != null) {
            return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw));
        } else {
            return new MultiFloatValues(raw);
        }
    } catch (IOException e) {
        throw new IllegalStateException("Cannot load doc values", e);
    }
}

Source File: GeoPointArrayAtomicFieldData.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public MultiGeoPointValues getGeoPointValues() {
    final RandomAccessOrds ords = ordinals.ordinals();
    final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
    final GeoPoint point = new GeoPoint(Double.NaN, Double.NaN);
    if (singleOrds != null) {
        final GeoPointValues values = new GeoPointValues() {
            @Override
            public GeoPoint get(int docID) {
                final int ord = singleOrds.getOrd(docID);
                if (ord >= 0) {
                    return point.resetFromIndexHash(indexedPoints.get(ord));
                }
                return point.reset(Double.NaN, Double.NaN);
            }
        };
        return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc));
    }
    return new MultiGeoPointValues() {
        @Override
        public GeoPoint valueAt(int index) {
            return point.resetFromIndexHash(indexedPoints.get(ords.ordAt(index)));
        }

        @Override
        public void setDocument(int docId) {
            ords.setDocument(docId);
        }

        @Override
        public int count() {
            return ords.cardinality();
        }
    };
}

Source File: LongValueFacetCounts.java From lucene-solr with Apache License 2.0

5 votes

/** Counts directly from SortedNumericDocValues. */
private void countMultiValued(String field, List<MatchingDocs> matchingDocs) throws IOException {

  for (MatchingDocs hits : matchingDocs) {
    SortedNumericDocValues values = hits.context.reader().getSortedNumericDocValues(field);
    if (values == null) {
      // this field has no doc values for this segment
      continue;
    }

    NumericDocValues singleValues = DocValues.unwrapSingleton(values);

    if (singleValues != null) {
      countOneSegment(singleValues, hits);
    } else {

      DocIdSetIterator it = ConjunctionDISI.intersectIterators(
                               Arrays.asList(hits.bits.iterator(), values));
    
      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
        int limit = values.docValueCount();
        totCount += limit;
        for (int i = 0; i < limit; i++) {
          increment(values.nextValue());
        }
      }
    }
  }
}

Source File: IndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost);

  return new ConstantScoreWeight(this, boost) {
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field);
      NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);

      if (numericValues != null) {
        Sort indexSort = context.reader().getMetaData().getSort();
        if (indexSort != null
            && indexSort.getSort().length > 0
            && indexSort.getSort()[0].getField().equals(field)) {

          SortField sortField = indexSort.getSort()[0];
          DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
          return new ConstantScoreScorer(this, score(), scoreMode, disi);
        }
      }
      return fallbackWeight.scorer(context);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      // Both queries should always return the same values, so we can just check
      // if the fallback query is cacheable.
      return fallbackWeight.isCacheable(ctx);
    }
  };
}

Source File: SortedNumericSelector.java From lucene-solr with Apache License 2.0

4 votes

/** 
 * Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector 
 * and numericType.
 */
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
  if (numericType != SortField.Type.INT &&
      numericType != SortField.Type.LONG && 
      numericType != SortField.Type.FLOAT &&
      numericType != SortField.Type.DOUBLE) {
    throw new IllegalArgumentException("numericType must be a numeric type");
  }
  final NumericDocValues view;
  NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
  if (singleton != null) {
    // it's actually single-valued in practice, but indexed as multi-valued,
    // so just sort on the underlying single-valued dv directly.
    // regardless of selector type, this optimization is safe!
    view = singleton;
  } else { 
    switch(selector) {
      case MIN: 
        view = new MinValue(sortedNumeric);
        break;
      case MAX:
        view = new MaxValue(sortedNumeric);
        break;
      default: 
        throw new AssertionError();
    }
  }
  // undo the numericutils sortability
  switch(numericType) {
    case FLOAT:
      return new FilterNumericDocValues(view) {
        @Override
        public long longValue() throws IOException {
          return NumericUtils.sortableFloatBits((int) in.longValue());
        }
      };
    case DOUBLE:
      return new FilterNumericDocValues(view) {
        @Override
        public long longValue() throws IOException {
          return NumericUtils.sortableDoubleBits(in.longValue());
        }
      };
    default:
      return view;
  }
}

Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0

4 votes

@Override
protected void collectDocs() throws IOException {
  int domainSize = fcontext.base.size();

  if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
    return;
  }

  // TODO: refactor some of this logic into a base class
  boolean countOnly = collectAcc==null && allBucketsAcc==null;
  boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

  // Are we expecting many hits per bucket?
  // FUTURE: pro-rate for nTerms?
  // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
  // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
  // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
  // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
  // this was for heap docvalues produced by UninvertingReader
  // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
  long domainMultiplier = multiValuedField ? 4L : 2L;
  boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);  // +3 to increase test coverage with small tests

  // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
  // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
  // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
  // the docid is not used)
  boolean canDoPerSeg = countOnly && fullRange;
  boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

  if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg;  // internal - override perSeg heuristic

  final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
  Filter filter = fcontext.base.getTopFilter();

  for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
    LeafReaderContext subCtx = leaves.get(subIdx);

    setNextReaderFirstPhase(subCtx);

    DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
    DocIdSetIterator disi = dis.iterator();

    SortedDocValues singleDv = null;
    SortedSetDocValues multiDv = null;
    if (multiValuedField) {
      // TODO: get sub from multi?
      multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
      if (multiDv == null) {
        multiDv = DocValues.emptySortedSet();
      }
      // some codecs may optimize SortedSet storage for single-valued fields
      // this will be null if this is not a wrapped single valued docvalues.
      if (unwrap_singleValued_multiDv) {
        singleDv = DocValues.unwrapSingleton(multiDv);
      }
    } else {
      singleDv = subCtx.reader().getSortedDocValues(sf.getName());
      if (singleDv == null) {
        singleDv = DocValues.emptySorted();
      }
    }

    LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

    if (singleDv != null) {
      if (accumSeg) {
        collectPerSeg(singleDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(singleDv, disi, toGlobal);
        } else {
          collectDocs(singleDv, disi, toGlobal);
        }
      }
    } else {
      if (accumSeg) {
        collectPerSeg(multiDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(multiDv, disi, toGlobal);
        } else {
          collectDocs(multiDv, disi, toGlobal);
        }
      }
    }
  }

  reuse = null;  // better GC
}

Java Code Examples for org.apache.lucene.index.DocValues#unwrapSingleton()