Java Code Examples for org.apache.lucene.index.DocValues#unwrapSingleton()
The following examples show how to use
org.apache.lucene.index.DocValues#unwrapSingleton() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LongValueFacetCounts.java From lucene-solr with Apache License 2.0 | 6 votes |
private void countAllMultiValued(IndexReader reader, String field) throws IOException { for (LeafReaderContext context : reader.leaves()) { SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field); if (values == null) { // this field has no doc values for this segment continue; } NumericDocValues singleValues = DocValues.unwrapSingleton(values); if (singleValues != null) { countAllOneSegment(singleValues); } else { int doc; while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int limit = values.docValueCount(); totCount += limit; for (int i = 0; i < limit; i++) { increment(values.nextValue()); } } } } }
Example 2
Source File: SortedSetSelector.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */ public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) { if (sortedSet.getValueCount() >= Integer.MAX_VALUE) { throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported"); } SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet); if (singleton != null) { // it's actually single-valued in practice, but indexed as multi-valued, // so just sort on the underlying single-valued dv directly. // regardless of selector type, this optimization is safe! return singleton; } else { switch(selector) { case MIN: return new MinValue(sortedSet); case MAX: return new MaxValue(sortedSet); case MIDDLE_MIN: return new MiddleMinValue(sortedSet); case MIDDLE_MAX: return new MiddleMaxValue(sortedSet); default: throw new AssertionError(); } } }
Example 3
Source File: SortedNumericDVIndexFieldData.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public SortedNumericDoubleValues getDoubleValues() { try { SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field); NumericDocValues single = DocValues.unwrapSingleton(raw); if (single != null) { return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw)); } else { return new MultiFloatValues(raw); } } catch (IOException e) { throw new IllegalStateException("Cannot load doc values", e); } }
Example 4
Source File: GeoPointArrayAtomicFieldData.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override public MultiGeoPointValues getGeoPointValues() { final RandomAccessOrds ords = ordinals.ordinals(); final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords); final GeoPoint point = new GeoPoint(Double.NaN, Double.NaN); if (singleOrds != null) { final GeoPointValues values = new GeoPointValues() { @Override public GeoPoint get(int docID) { final int ord = singleOrds.getOrd(docID); if (ord >= 0) { return point.resetFromIndexHash(indexedPoints.get(ord)); } return point.reset(Double.NaN, Double.NaN); } }; return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc)); } return new MultiGeoPointValues() { @Override public GeoPoint valueAt(int index) { return point.resetFromIndexHash(indexedPoints.get(ords.ordAt(index))); } @Override public void setDocument(int docId) { ords.setDocument(docId); } @Override public int count() { return ords.cardinality(); } }; }
Example 5
Source File: LongValueFacetCounts.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Counts directly from SortedNumericDocValues. */ private void countMultiValued(String field, List<MatchingDocs> matchingDocs) throws IOException { for (MatchingDocs hits : matchingDocs) { SortedNumericDocValues values = hits.context.reader().getSortedNumericDocValues(field); if (values == null) { // this field has no doc values for this segment continue; } NumericDocValues singleValues = DocValues.unwrapSingleton(values); if (singleValues != null) { countOneSegment(singleValues, hits); } else { DocIdSetIterator it = ConjunctionDISI.intersectIterators( Arrays.asList(hits.bits.iterator(), values)); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { int limit = values.docValueCount(); totCount += limit; for (int i = 0; i < limit; i++) { increment(values.nextValue()); } } } } }
Example 6
Source File: IndexSortSortedNumericDocValuesRangeQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost); return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field); NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues); if (numericValues != null) { Sort indexSort = context.reader().getMetaData().getSort(); if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getField().equals(field)) { SortField sortField = indexSort.getSort()[0]; DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues); return new ConstantScoreScorer(this, score(), scoreMode, disi); } } return fallbackWeight.scorer(context); } @Override public boolean isCacheable(LeafReaderContext ctx) { // Both queries should always return the same values, so we can just check // if the fallback query is cacheable. return fallbackWeight.isCacheable(ctx); } }; }
Example 7
Source File: SortedNumericSelector.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector * and numericType. */ public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) { if (numericType != SortField.Type.INT && numericType != SortField.Type.LONG && numericType != SortField.Type.FLOAT && numericType != SortField.Type.DOUBLE) { throw new IllegalArgumentException("numericType must be a numeric type"); } final NumericDocValues view; NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric); if (singleton != null) { // it's actually single-valued in practice, but indexed as multi-valued, // so just sort on the underlying single-valued dv directly. // regardless of selector type, this optimization is safe! view = singleton; } else { switch(selector) { case MIN: view = new MinValue(sortedNumeric); break; case MAX: view = new MaxValue(sortedNumeric); break; default: throw new AssertionError(); } } // undo the numericutils sortability switch(numericType) { case FLOAT: return new FilterNumericDocValues(view) { @Override public long longValue() throws IOException { return NumericUtils.sortableFloatBits((int) in.longValue()); } }; case DOUBLE: return new FilterNumericDocValues(view) { @Override public long longValue() throws IOException { return NumericUtils.sortableDoubleBits(in.longValue()); } }; default: return view; } }
Example 8
Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override protected void collectDocs() throws IOException { int domainSize = fcontext.base.size(); if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket? return; } // TODO: refactor some of this logic into a base class boolean countOnly = collectAcc==null && allBucketsAcc==null; boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount(); // Are we expecting many hits per bucket? // FUTURE: pro-rate for nTerms? // FUTURE: better take into account number of values in multi-valued fields. This info is available for indexed fields. // FUTURE: take into account that bigger ord maps are more expensive than smaller ones // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower // than per-segment counting was a domain of 658k docs. At that point, top 10 buckets had 6-7 matches each. // this was for heap docvalues produced by UninvertingReader // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data. long domainMultiplier = multiValuedField ? 4L : 2L; boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3); // +3 to increase test coverage with small tests // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value, // then collect per-segment before mapping to global ords at the end. This will save redundant seg->global ord mappings. // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where // the docid is not used) boolean canDoPerSeg = countOnly && fullRange; boolean accumSeg = manyHitsPerBucket && canDoPerSeg; if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg; // internal - override perSeg heuristic final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves(); Filter filter = fcontext.base.getTopFilter(); for (int subIdx = 0; subIdx < leaves.size(); subIdx++) { LeafReaderContext subCtx = leaves.get(subIdx); setNextReaderFirstPhase(subCtx); DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs DocIdSetIterator disi = dis.iterator(); SortedDocValues singleDv = null; SortedSetDocValues multiDv = null; if (multiValuedField) { // TODO: get sub from multi? multiDv = subCtx.reader().getSortedSetDocValues(sf.getName()); if (multiDv == null) { multiDv = DocValues.emptySortedSet(); } // some codecs may optimize SortedSet storage for single-valued fields // this will be null if this is not a wrapped single valued docvalues. if (unwrap_singleValued_multiDv) { singleDv = DocValues.unwrapSingleton(multiDv); } } else { singleDv = subCtx.reader().getSortedDocValues(sf.getName()); if (singleDv == null) { singleDv = DocValues.emptySorted(); } } LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx); if (singleDv != null) { if (accumSeg) { collectPerSeg(singleDv, disi, toGlobal); } else { if (canDoPerSeg && toGlobal != null) { collectCounts(singleDv, disi, toGlobal); } else { collectDocs(singleDv, disi, toGlobal); } } } else { if (accumSeg) { collectPerSeg(multiDv, disi, toGlobal); } else { if (canDoPerSeg && toGlobal != null) { collectCounts(multiDv, disi, toGlobal); } else { collectDocs(multiDv, disi, toGlobal); } } } } reuse = null; // better GC }