org.apache.lucene.index.SortedSetDocValues#getValueCount

Source File: LegacyDocValuesIterables.java From lucene-solr with Apache License 2.0

6 votes

/** Converts {@link SortedSetDocValues} into an {@code Iterable&lt;BytesRef&gt;} for all the values.
 *
 * @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<BytesRef> valuesIterable(final SortedSetDocValues values) {
  return new Iterable<BytesRef>() {
    @Override
    public Iterator<BytesRef> iterator() {
      return new Iterator<BytesRef>() {
        private long nextOrd;
  
        @Override
        public boolean hasNext() {
          return nextOrd < values.getValueCount();
        }

        @Override
        public BytesRef next() {
          try {
            return values.lookupOrd(nextOrd++);
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        }
      };
    }
  };
}

Source File: SortedSetSelector.java From lucene-solr with Apache License 2.0

6 votes

/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
  if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
    throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
  }
  
  SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
  if (singleton != null) {
    // it's actually single-valued in practice, but indexed as multi-valued,
    // so just sort on the underlying single-valued dv directly.
    // regardless of selector type, this optimization is safe!
    return singleton;
  } else {
    switch(selector) {
      case MIN: return new MinValue(sortedSet);
      case MAX: return new MaxValue(sortedSet);
      case MIDDLE_MIN: return new MiddleMinValue(sortedSet);
      case MIDDLE_MAX: return new MiddleMaxValue(sortedSet);
      default: 
        throw new AssertionError();
    }
  }
}

Source File: TermsQParserPlugin.java From lucene-solr with Apache License 2.0

6 votes

private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

6 votes

private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0

6 votes

private void collectPerSeg(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
  int segMax = (int)multiDv.getValueCount();
  final int[] counts = getCountArr( segMax );

  int doc;
  while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (multiDv.advanceExact(doc)) {
      for(;;) {
        int segOrd = (int)multiDv.nextOrd();
        if (segOrd < 0) break;
        counts[segOrd]++;
      }
    }
  }

  for (int i=0; i<segMax; i++) {
    int segCount = counts[i];
    if (segCount > 0) {
      int slot = toGlobal == null ? (i) : (int) toGlobal.get(i);
      countAcc.incrementCount(slot, segCount);
    }
  }
}

Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0

5 votes

/** accumulates per-segment multi-valued facet counts */
static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
    // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): 
    //   collect separately per-segment, then map to global ords
    accumMultiSeg(counts, si, disi, subIndex, map);
  } else {
    // otherwise: do collect+map on the fly
    accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map);
  }
}

Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0

5 votes

/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  // First count in seg-ord space:
  final int segCounts[];
  if (map == null) {
    segCounts = counts;
  } else {
    segCounts = new int[1+(int)si.getValueCount()];
  }
  
  int doc;
  while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (si.advanceExact(doc)) {
      int term = (int) si.nextOrd();
      do {
        segCounts[1+term]++;
      } while ((term = (int)si.nextOrd()) >= 0);
    } else {
      counts[0]++; // missing
    }
  }
  
  // migrate to global ords (if necessary)
  if (map != null) {
    migrateGlobal(counts, segCounts, subIndex, map);
  }
}

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

5 votes

private static LongBitSet findFieldOrdinalsMatchingQuery(Query q, String field, SolrIndexSearcher searcher, SortedSetDocValues docValues) throws IOException {
  final LongBitSet fromOrdBitSet = new LongBitSet(docValues.getValueCount());
  final Collector fromCollector = new MultiValueTermOrdinalCollector(field, docValues, fromOrdBitSet);

  searcher.search(q, fromCollector);

  return fromOrdBitSet;
}

Source File: TestFieldCacheVsDocValues.java From lucene-solr with Apache License 2.0

5 votes

private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
  // can be null for the segment if no docs actually had any SortedDocValues
  // in this case FC.getDocTermsOrds returns EMPTY
  if (actual == null) {
    assertEquals(expected.getValueCount(), 0);
    return;
  }
  assertEquals(expected.getValueCount(), actual.getValueCount());
  while (true) {
    int docID = expected.nextDoc();
    assertEquals(docID, actual.nextDoc());
    if (docID == NO_MORE_DOCS) {
      break;
    }
    long expectedOrd;
    while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
      assertEquals(expectedOrd, actual.nextOrd());
    }
    assertEquals(NO_MORE_ORDS, actual.nextOrd());
  }
  
  // compare ord dictionary
  for (long i = 0; i < expected.getValueCount(); i++) {
    final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
    final BytesRef actualBytes = actual.lookupOrd(i);
    assertEquals(expectedBytes, actualBytes);
  }
  
  // compare termsenum
  assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

Source File: DefaultSortedSetDocValuesReaderState.java From lucene-solr with Apache License 2.0

4 votes

/** Creates this, pulling doc values from the specified
 *  field. */
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
  this.field = field;
  this.reader = reader;

  // We need this to create thread-safe MultiSortedSetDV
  // per collector:
  SortedSetDocValues dv = getDocValues();
  if (dv == null) {
    throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
  }
  if (dv.getValueCount() > Integer.MAX_VALUE) {
    throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
  }
  valueCount = (int) dv.getValueCount();

  // TODO: we can make this more efficient if eg we can be
  // "involved" when OrdinalMap is being created?  Ie see
  // each term/ord it's assigning as it goes...
  String lastDim = null;
  int startOrd = -1;

  // TODO: this approach can work for full hierarchy?;
  // TaxoReader can't do this since ords are not in
  // "sorted order" ... but we should generalize this to
  // support arbitrary hierarchy:
  for(int ord=0;ord<valueCount;ord++) {
    final BytesRef term = dv.lookupOrd(ord);
    String[] components = FacetsConfig.stringToPath(term.utf8ToString());
    if (components.length != 2) {
      throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + term.utf8ToString());
    }
    if (!components[0].equals(lastDim)) {
      if (lastDim != null) {
        prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
      }
      startOrd = ord;
      lastDim = components[0];
    }
  }

  if (lastDim != null) {
    prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
  }
}

Source File: DocValuesTermsQuery.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
      final LongBitSet bits = new LongBitSet(values.getValueCount());
      boolean matchesAtLeastOneTerm = false;
      TermIterator iterator = termData.iterator();
      for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
        final long ord = values.lookupTerm(term);
        if (ord >= 0) {
          matchesAtLeastOneTerm = true;
          bits.set(ord);
        }
      }
      if (matchesAtLeastOneTerm == false) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), scoreMode, new TwoPhaseIterator(values) {

        @Override
        public boolean matches() throws IOException {
          for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            if (bits.get(ord)) {
              return true;
            }
          }
          return false;
        }

        @Override
        public float matchCost() {
          return 3; // lookup in a bitset
        }

      });
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return DocValues.isCacheable(ctx, field);
    }

  };
}

Source File: Lucene80DocValuesConsumer.java From lucene-solr with Apache License 2.0

4 votes

private void writeTermsIndex(SortedSetDocValues values) throws IOException {
  final long size = values.getValueCount();
  meta.writeInt(Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT);
  long start = data.getFilePointer();

  long numBlocks = 1L + ((size + Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) >>> Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT);
  ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput();
  DirectMonotonicWriter writer;
  try (ByteBuffersIndexOutput addressOutput = new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) {
    writer = DirectMonotonicWriter.getInstance(meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT);
    TermsEnum iterator = values.termsEnum();
    BytesRefBuilder previous = new BytesRefBuilder();
    long offset = 0;
    long ord = 0;
    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
      if ((ord & Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == 0) {
        writer.add(offset);
        final int sortKeyLength;
        if (ord == 0) {
          // no previous term: no bytes to write
          sortKeyLength = 0;
        } else {
          sortKeyLength = StringHelper.sortKeyLength(previous.get(), term);
        }
        offset += sortKeyLength;
        data.writeBytes(term.bytes, term.offset, sortKeyLength);
      } else if ((ord & Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) {
        previous.copyBytes(term);
      }
      ++ord;
    }
    writer.add(offset);
    writer.finish();
    meta.writeLong(start);
    meta.writeLong(data.getFilePointer() - start);
    start = data.getFilePointer();
    addressBuffer.copyTo(data);
    meta.writeLong(start);
    meta.writeLong(data.getFilePointer() - start);
  }
}

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  if (! (searcher instanceof SolrIndexSearcher)) {
    log.debug("Falling back to JoinQueryWeight because searcher [{}] is not the required SolrIndexSearcher", searcher);
    return super.createWeight(searcher, scoreMode, boost);
  }

  final SolrIndexSearcher solrSearcher = (SolrIndexSearcher) searcher;
  final JoinQueryWeight weight = new JoinQueryWeight(solrSearcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
  final SolrIndexSearcher fromSearcher = weight.fromSearcher;
  final SolrIndexSearcher toSearcher = weight.toSearcher;

  try {
    final SortedSetDocValues topLevelFromDocValues = validateAndFetchDocValues(fromSearcher, fromField, "from");
    final SortedSetDocValues topLevelToDocValues = validateAndFetchDocValues(toSearcher, toField, "to");
    if (topLevelFromDocValues.getValueCount() == 0 || topLevelToDocValues.getValueCount() == 0) {
      return createNoMatchesWeight(boost);
    }

    final LongBitSet fromOrdBitSet = findFieldOrdinalsMatchingQuery(q, fromField, fromSearcher, topLevelFromDocValues);
    final LongBitSet toOrdBitSet = new LongBitSet(topLevelToDocValues.getValueCount());
    final BitsetBounds toBitsetBounds = convertFromOrdinalsIntoToField(fromOrdBitSet, topLevelFromDocValues, toOrdBitSet, topLevelToDocValues);

    final boolean toMultivalued = toSearcher.getSchema().getFieldOrNull(toField).multiValued();
    return new ConstantScoreWeight(this, boost) {
      public Scorer scorer(LeafReaderContext context) throws IOException {
        if (toBitsetBounds.lower == BitsetBounds.NO_MATCHES) {
          return null;
        }

        final DocIdSetIterator toApproximation = (toMultivalued) ? context.reader().getSortedSetDocValues(toField) :
            context.reader().getSortedDocValues(toField);
        if (toApproximation == null) {
          return null;
        }

        final int docBase = context.docBase;
        return new ConstantScoreScorer(this, this.score(), scoreMode, new TwoPhaseIterator(toApproximation) {
          public boolean matches() throws IOException {
            final boolean hasDoc = topLevelToDocValues.advanceExact(docBase + approximation.docID());
            if (hasDoc) {
              for (long ord = topLevelToDocValues.nextOrd(); ord != -1L; ord = topLevelToDocValues.nextOrd()) {
                if (toOrdBitSet.get(ord)) {
                  return true;
                }
              }
            }
            return false;
          }

          public float matchCost() {
            return 10.0F;
          }
        });

      }

      public boolean isCacheable(LeafReaderContext ctx) {
        return false;
      }
    };
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0

4 votes

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
  final SortedSetDocValues sortedSetDocValues = in.getSortedSetDocValues(field);
  if (sortedSetDocValues == null) {
    return null;
  }
  return new SortedSetDocValues() {

    private boolean _access;

    @Override
    public void setDocument(int docID) {
      try {
        if (_access = _accessControl.hasAccess(ReadType.SORTED_SET_DOC_VALUE, docID)) {
          sortedSetDocValues.setDocument(docID);
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    @Override
    public long nextOrd() {
      if (_access) {
        return sortedSetDocValues.nextOrd();
      }
      return NO_MORE_ORDS;
    }

    @Override
    public void lookupOrd(long ord, BytesRef result) {
      if (_access) {
        sortedSetDocValues.lookupOrd(ord, result);
      } else {
        result.bytes = BinaryDocValues.MISSING;
        result.length = 0;
        result.offset = 0;
      }
    }

    @Override
    public long getValueCount() {
      return sortedSetDocValues.getValueCount();
    }
  };
}

Java Code Examples for org.apache.lucene.index.SortedSetDocValues#getValueCount()