org.apache.lucene.index.SortedDocValues#getValueCount

Source File: LegacyDocValuesIterables.java From lucene-solr with Apache License 2.0

6 votes

/** Converts {@link SortedDocValues} into an {@code Iterable&lt;BytesRef&gt;} for all the values.
 *
 * @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<BytesRef> valuesIterable(final SortedDocValues values) {
  return new Iterable<BytesRef>() {
    @Override
    public Iterator<BytesRef> iterator() {
      return new Iterator<BytesRef>() {
        private int nextOrd;
  
        @Override
        public boolean hasNext() {
          return nextOrd < values.getValueCount();
        }

        @Override
        public BytesRef next() {
          try {
            return values.lookupOrd(nextOrd++);
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        }
      };
    }
  };
}

Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0

6 votes

/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  // First count in seg-ord space:
  final int segCounts[];
  if (map == null) {
    segCounts = counts;
  } else {
    segCounts = new int[1+si.getValueCount()];
  }
  
  int doc;
  while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (si.advanceExact(doc)) {
      segCounts[1+si.ordValue()]++;
    } else {
      segCounts[0]++;
    }
  }
  
  // migrate to global ords (if necessary)
  if (map != null) {
    migrateGlobal(counts, segCounts, subIndex, map);
  }
}

Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0

5 votes

/** accumulates per-segment single-valued facet counts */
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
    // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): 
    //   collect separately per-segment, then map to global ords
    accumSingleSeg(counts, si, disi, subIndex, map);
  } else {
    // otherwise: do collect+map on the fly
    accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map);
  }
}

Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0

5 votes

private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
  int segMax = singleDv.getValueCount() + 1;
  final int[] counts = getCountArr( segMax );

  /** alternate trial implementations
   // ord
   // FieldUtil.visitOrds(singleDv, disi,  (doc,ord)->{counts[ord+1]++;} );

  FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
  while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
    counts[ ordValues.getOrd() + 1]++;
  }
   **/


  // calculate segment-local counts
  int doc;
  if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
    FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      counts[fc.getOrd(doc) + 1]++;
    }
  } else {
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      if (singleDv.advanceExact(doc)) {
        counts[singleDv.ordValue() + 1]++;
      }
    }
  }

  // convert segment-local counts to global counts
  for (int i=1; i<segMax; i++) {
    int segCount = counts[i];
    if (segCount > 0) {
      int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1);
      countAcc.incrementCount(slot, segCount);
    }
  }
}

Source File: TestFieldCacheVsDocValues.java From lucene-solr with Apache License 2.0

5 votes

private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
  // can be null for the segment if no docs actually had any SortedDocValues
  // in this case FC.getDocTermsOrds returns EMPTY
  if (actual == null) {
    assertEquals(expected.getValueCount(), 0);
    return;
  }
  assertEquals(expected.getValueCount(), actual.getValueCount());

  // compare ord lists
  while (true) {
    int docID = expected.nextDoc();
    if (docID == NO_MORE_DOCS) {
      assertEquals(NO_MORE_DOCS, actual.nextDoc());
      break;
    }
    assertEquals(docID, actual.nextDoc());
    assertEquals(expected.ordValue(), actual.ordValue());
    assertEquals(expected.binaryValue(), actual.binaryValue());
  }
  
  // compare ord dictionary
  for (long i = 0; i < expected.getValueCount(); i++) {
    final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd((int) i));
    final BytesRef actualBytes = actual.lookupOrd((int) i);
    assertEquals(expectedBytes, actualBytes);
  }
  
  // compare termsenum
  assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
  final SortedDocValues sortedDocValues = in.getSortedDocValues(field);
  if (sortedDocValues == null) {
    return null;
  }
  return new SortedDocValues() {

    @Override
    public void lookupOrd(int ord, BytesRef result) {
      sortedDocValues.lookupOrd(ord, result);
    }

    @Override
    public int getValueCount() {
      return sortedDocValues.getValueCount();
    }

    @Override
    public int getOrd(int docID) {
      try {
        if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) {
          return sortedDocValues.getOrd(docID);
        }
        return -1; // Default missing value.
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  };
}

Source File: JoinUtil.java From lucene-solr with Apache License 2.0

4 votes

/**
 * A query time join using global ordinals over a dedicated join field.
 *
 * This join has certain restrictions and requirements:
 * 1) A document can only refer to one other document. (but can be referred by one or more documents)
 * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
 *    that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
 * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
 *    should store the join values as UTF-8 strings.
 * 4) An ordinal map must be provided that is created on top of the join field.
 *
 * Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
 * a document matches per join value. This will increase the per join cost in terms of execution time and memory.
 *
 * @param joinField   The {@link SortedDocValues} field containing the join values
 * @param fromQuery   The query containing the actual user query. Also the fromQuery can only match "from" documents.
 * @param toQuery     The query identifying all documents on the "to" side.
 * @param searcher    The index searcher used to execute the from query
 * @param scoreMode   Instructs how scores from the fromQuery are mapped to the returned query
 * @param ordinalMap  The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
 *                    needs to be provided.
 * @param min         Optionally the minimum number of "from" documents that are required to match for a "to" document
 *                    to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @param max         Optionally the maximum number of "from" documents that are allowed to match for a "to" document
 *                    to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @return a {@link Query} instance that can be used to join documents based on the join field
 * @throws IOException If I/O related errors occur
 */
public static Query createJoinQuery(String joinField,
                                    Query fromQuery,
                                    Query toQuery,
                                    IndexSearcher searcher,
                                    ScoreMode scoreMode,
                                    OrdinalMap ordinalMap,
                                    int min,
                                    int max) throws IOException {
  int numSegments = searcher.getIndexReader().leaves().size();
  final long valueCount;
  if (numSegments == 0) {
    return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
  } else if (numSegments == 1) {
    // No need to use the ordinal map, because there is just one segment.
    ordinalMap = null;
    LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
    SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
    if (joinSortedDocValues != null) {
      valueCount = joinSortedDocValues.getValueCount();
    } else {
      return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
    }
  } else {
    if (ordinalMap == null) {
      throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
    }
    valueCount = ordinalMap.getValueCount();
  }

  final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
  final Query rewrittenToQuery = searcher.rewrite(toQuery);
  GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
  switch (scoreMode) {
    case Total:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
      break;
    case Min:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
      break;
    case Max:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
      break;
    case Avg:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
      break;
    case None:
      if (min <= 0 && max == Integer.MAX_VALUE) {
        GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
        searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
        return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery,
            rewrittenFromQuery, searcher.getTopReaderContext().id());
      } else {
        globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
        break;
      }
    default:
      throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
  }
  searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
  return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery,
      rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}

Source File: Lucene80DocValuesConsumer.java From lucene-solr with Apache License 2.0

4 votes

private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  SortedDocValues values = valuesProducer.getSorted(field);
  int numDocsWithField = 0;
  for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
    numDocsWithField++;
  }

  if (numDocsWithField == 0) {
    meta.writeLong(-2); // docsWithFieldOffset
    meta.writeLong(0L); // docsWithFieldLength
    meta.writeShort((short) -1); // jumpTableEntryCount
    meta.writeByte((byte) -1);   // denseRankPower
  } else if (numDocsWithField == maxDoc) {
    meta.writeLong(-1); // docsWithFieldOffset
    meta.writeLong(0L); // docsWithFieldLength
    meta.writeShort((short) -1); // jumpTableEntryCount
    meta.writeByte((byte) -1);   // denseRankPower
  } else {
    long offset = data.getFilePointer();
    meta.writeLong(offset); // docsWithFieldOffset
    values = valuesProducer.getSorted(field);
    final short jumpTableentryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
    meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
    meta.writeShort(jumpTableentryCount);
    meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
  }

  meta.writeInt(numDocsWithField);
  if (values.getValueCount() <= 1) {
    meta.writeByte((byte) 0); // bitsPerValue
    meta.writeLong(0L); // ordsOffset
    meta.writeLong(0L); // ordsLength
  } else {
    int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
    meta.writeByte((byte) numberOfBitsPerOrd); // bitsPerValue
    long start = data.getFilePointer();
    meta.writeLong(start); // ordsOffset
    DirectWriter writer = DirectWriter.getInstance(data, numDocsWithField, numberOfBitsPerOrd);
    values = valuesProducer.getSorted(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      writer.add(values.ordValue());
    }
    writer.finish();
    meta.writeLong(data.getFilePointer() - start); // ordsLength
  }

  addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}

Source File: ReverseOrdFieldSource.java From lucene-solr with Apache License 2.0

4 votes

@Override
@SuppressWarnings({"rawtypes"})
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
  final int off = readerContext.docBase;
  final LeafReader r;
  Object o = context.get("searcher");
  if (o instanceof SolrIndexSearcher) {
    @SuppressWarnings("resource")  final SolrIndexSearcher is = (SolrIndexSearcher) o;
    SchemaField sf = is.getSchema().getFieldOrNull(field);
    if (sf != null && sf.getType().isPointField()) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
          "rord() is not supported over Points based field " + field);
    }
    if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
      // it's a single-valued numeric field: we must currently create insanity :(
      List<LeafReaderContext> leaves = is.getIndexReader().leaves();
      LeafReader insaneLeaves[] = new LeafReader[leaves.size()];
      int upto = 0;
      for (LeafReaderContext raw : leaves) {
        insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
      }
      r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
    } else {
      // reuse ordinalmap
      r = ((SolrIndexSearcher)o).getSlowAtomicReader();
    }
  } else {
    IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
    r = SlowCompositeReaderWrapper.wrap(topReader);
  }
  // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
  final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
  final int end = sindex.getValueCount();

  return new IntDocValues(this) {
    @Override
    public int intVal(int doc) throws IOException {
      if (doc+off > sindex.docID()) {
        sindex.advance(doc+off);
      }
      if (doc+off == sindex.docID()) {
        return (end - sindex.ordValue() - 1);
      } else {
        return end;
      }
    }
  };
}

Java Code Examples for org.apache.lucene.index.SortedDocValues#getValueCount()