Java Code Examples for org.apache.lucene.index.SortedDocValues#getValueCount()
The following examples show how to use
org.apache.lucene.index.SortedDocValues#getValueCount() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LegacyDocValuesIterables.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Converts {@link SortedDocValues} into an {@code Iterable<BytesRef>} for all the values. * * @deprecated Consume {@link SortedDocValues} instead. */ @Deprecated public static Iterable<BytesRef> valuesIterable(final SortedDocValues values) { return new Iterable<BytesRef>() { @Override public Iterator<BytesRef> iterator() { return new Iterator<BytesRef>() { private int nextOrd; @Override public boolean hasNext() { return nextOrd < values.getValueCount(); } @Override public BytesRef next() { try { return values.lookupOrd(nextOrd++); } catch (IOException e) { throw new RuntimeException(e); } } }; } }; }
Example 2
Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0 | 6 votes |
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */ static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { // First count in seg-ord space: final int segCounts[]; if (map == null) { segCounts = counts; } else { segCounts = new int[1+si.getValueCount()]; } int doc; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (si.advanceExact(doc)) { segCounts[1+si.ordValue()]++; } else { segCounts[0]++; } } // migrate to global ords (if necessary) if (map != null) { migrateGlobal(counts, segCounts, subIndex, map); } }
Example 3
Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0 | 5 votes |
/** accumulates per-segment single-valued facet counts */ static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException { if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) { // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): // collect separately per-segment, then map to global ords accumSingleSeg(counts, si, disi, subIndex, map); } else { // otherwise: do collect+map on the fly accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map); } }
Example 4
Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0 | 5 votes |
private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException { int segMax = singleDv.getValueCount() + 1; final int[] counts = getCountArr( segMax ); /** alternate trial implementations // ord // FieldUtil.visitOrds(singleDv, disi, (doc,ord)->{counts[ord+1]++;} ); FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi); while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { counts[ ordValues.getOrd() + 1]++; } **/ // calculate segment-local counts int doc; if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) { FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv; while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { counts[fc.getOrd(doc) + 1]++; } } else { while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (singleDv.advanceExact(doc)) { counts[singleDv.ordValue() + 1]++; } } } // convert segment-local counts to global counts for (int i=1; i<segMax; i++) { int segCount = counts[i]; if (segCount > 0) { int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1); countAcc.incrementCount(slot, segCount); } } }
Example 5
Source File: TestFieldCacheVsDocValues.java From lucene-solr with Apache License 2.0 | 5 votes |
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception { // can be null for the segment if no docs actually had any SortedDocValues // in this case FC.getDocTermsOrds returns EMPTY if (actual == null) { assertEquals(expected.getValueCount(), 0); return; } assertEquals(expected.getValueCount(), actual.getValueCount()); // compare ord lists while (true) { int docID = expected.nextDoc(); if (docID == NO_MORE_DOCS) { assertEquals(NO_MORE_DOCS, actual.nextDoc()); break; } assertEquals(docID, actual.nextDoc()); assertEquals(expected.ordValue(), actual.ordValue()); assertEquals(expected.binaryValue(), actual.binaryValue()); } // compare ord dictionary for (long i = 0; i < expected.getValueCount(); i++) { final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd((int) i)); final BytesRef actualBytes = actual.lookupOrd((int) i); assertEquals(expectedBytes, actualBytes); } // compare termsenum assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum()); }
Example 6
Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
@Override public SortedDocValues getSortedDocValues(String field) throws IOException { final SortedDocValues sortedDocValues = in.getSortedDocValues(field); if (sortedDocValues == null) { return null; } return new SortedDocValues() { @Override public void lookupOrd(int ord, BytesRef result) { sortedDocValues.lookupOrd(ord, result); } @Override public int getValueCount() { return sortedDocValues.getValueCount(); } @Override public int getOrd(int docID) { try { if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) { return sortedDocValues.getOrd(docID); } return -1; // Default missing value. } catch (IOException e) { throw new RuntimeException(e); } } }; }
Example 7
Source File: JoinUtil.java From lucene-solr with Apache License 2.0 | 4 votes |
/** * A query time join using global ordinals over a dedicated join field. * * This join has certain restrictions and requirements: * 1) A document can only refer to one other document. (but can be referred by one or more documents) * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field * that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account. * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field * should store the join values as UTF-8 strings. * 4) An ordinal map must be provided that is created on top of the join field. * * Note: min and max filtering and the avg score mode will require this join to keep track of the number of times * a document matches per join value. This will increase the per join cost in terms of execution time and memory. * * @param joinField The {@link SortedDocValues} field containing the join values * @param fromQuery The query containing the actual user query. Also the fromQuery can only match "from" documents. * @param toQuery The query identifying all documents on the "to" side. * @param searcher The index searcher used to execute the from query * @param scoreMode Instructs how scores from the fromQuery are mapped to the returned query * @param ordinalMap The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map * needs to be provided. * @param min Optionally the minimum number of "from" documents that are required to match for a "to" document * to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code> * disables the min and max "from" documents filtering * @param max Optionally the maximum number of "from" documents that are allowed to match for a "to" document * to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code> * disables the min and max "from" documents filtering * @return a {@link Query} instance that can be used to join documents based on the join field * @throws IOException If I/O related errors occur */ public static Query createJoinQuery(String joinField, Query fromQuery, Query toQuery, IndexSearcher searcher, ScoreMode scoreMode, OrdinalMap ordinalMap, int min, int max) throws IOException { int numSegments = searcher.getIndexReader().leaves().size(); final long valueCount; if (numSegments == 0) { return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments"); } else if (numSegments == 1) { // No need to use the ordinal map, because there is just one segment. ordinalMap = null; LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader(); SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField); if (joinSortedDocValues != null) { valueCount = joinSortedDocValues.getValueCount(); } else { return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values"); } } else { if (ordinalMap == null) { throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment"); } valueCount = ordinalMap.getValueCount(); } final Query rewrittenFromQuery = searcher.rewrite(fromQuery); final Query rewrittenToQuery = searcher.rewrite(toQuery); GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector; switch (scoreMode) { case Total: globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max); break; case Min: globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max); break; case Max: globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max); break; case Avg: globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max); break; case None: if (min <= 0 && max == Integer.MAX_VALUE) { GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount); searcher.search(rewrittenFromQuery, globalOrdinalsCollector); return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery, rewrittenFromQuery, searcher.getTopReaderContext().id()); } else { globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max); break; } default: throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode)); } searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector); return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery, rewrittenFromQuery, min, max, searcher.getTopReaderContext().id()); }
Example 8
Source File: Lucene80DocValuesConsumer.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException { SortedDocValues values = valuesProducer.getSorted(field); int numDocsWithField = 0; for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { numDocsWithField++; } if (numDocsWithField == 0) { meta.writeLong(-2); // docsWithFieldOffset meta.writeLong(0L); // docsWithFieldLength meta.writeShort((short) -1); // jumpTableEntryCount meta.writeByte((byte) -1); // denseRankPower } else if (numDocsWithField == maxDoc) { meta.writeLong(-1); // docsWithFieldOffset meta.writeLong(0L); // docsWithFieldLength meta.writeShort((short) -1); // jumpTableEntryCount meta.writeByte((byte) -1); // denseRankPower } else { long offset = data.getFilePointer(); meta.writeLong(offset); // docsWithFieldOffset values = valuesProducer.getSorted(field); final short jumpTableentryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER); meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength meta.writeShort(jumpTableentryCount); meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER); } meta.writeInt(numDocsWithField); if (values.getValueCount() <= 1) { meta.writeByte((byte) 0); // bitsPerValue meta.writeLong(0L); // ordsOffset meta.writeLong(0L); // ordsLength } else { int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1); meta.writeByte((byte) numberOfBitsPerOrd); // bitsPerValue long start = data.getFilePointer(); meta.writeLong(start); // ordsOffset DirectWriter writer = DirectWriter.getInstance(data, numDocsWithField, numberOfBitsPerOrd); values = valuesProducer.getSorted(field); for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) { writer.add(values.ordValue()); } writer.finish(); meta.writeLong(data.getFilePointer() - start); // ordsLength } addTermsDict(DocValues.singleton(valuesProducer.getSorted(field))); }
Example 9
Source File: ReverseOrdFieldSource.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override @SuppressWarnings({"rawtypes"}) public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { final int off = readerContext.docBase; final LeafReader r; Object o = context.get("searcher"); if (o instanceof SolrIndexSearcher) { @SuppressWarnings("resource") final SolrIndexSearcher is = (SolrIndexSearcher) o; SchemaField sf = is.getSchema().getFieldOrNull(field); if (sf != null && sf.getType().isPointField()) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "rord() is not supported over Points based field " + field); } if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) { // it's a single-valued numeric field: we must currently create insanity :( List<LeafReaderContext> leaves = is.getIndexReader().leaves(); LeafReader insaneLeaves[] = new LeafReader[leaves.size()]; int upto = 0; for (LeafReaderContext raw : leaves) { insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field); } r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves)); } else { // reuse ordinalmap r = ((SolrIndexSearcher)o).getSlowAtomicReader(); } } else { IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader(); r = SlowCompositeReaderWrapper.wrap(topReader); } // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN); final int end = sindex.getValueCount(); return new IntDocValues(this) { @Override public int intVal(int doc) throws IOException { if (doc+off > sindex.docID()) { sindex.advance(doc+off); } if (doc+off == sindex.docID()) { return (end - sindex.ordValue() - 1); } else { return end; } } }; }