org.apache.lucene.util.FixedBitSet#cardinality

Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0

6 votes

public void testOneDocMissingFixed() throws IOException {
  int maxDoc = 9699;
  final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable
  FixedBitSet set = new FixedBitSet(maxDoc);
  set.set(0, maxDoc);
  set.clear(1345);
  try (Directory dir = newDirectory()) {

    final int cardinality = set.cardinality();
    long length;
    int jumpTableentryCount;
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
      length = out.getFilePointer();
    }

    int step = 16000;
    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
      IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
      BitSetIterator disi2 = new BitSetIterator(set, cardinality);
      assertAdvanceEquality(disi, disi2, step);
    }
  }
}

Source File: TaggerRequestHandler.java From lucene-solr with Apache License 2.0

6 votes

private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
  //Now we must supply a Solr DocList and add it to the response.
  //  Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
  //  know exactly what documents to return, the order doesn't matter nor does
  //  scoring.
  //  Ideally an implementation of DocList could be directly implemented off
  //  of a BitSet, but there are way too many methods to implement for a minor
  //  payoff.
  int matchDocs = matchDocIdsBS.cardinality();
  int[] docIds = new int[ Math.min(rows, matchDocs) ];
  DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
  for (int i = 0; i < docIds.length; i++) {
    docIds[i] = docIdIter.nextDoc();
  }
  return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f, TotalHits.Relation.EQUAL_TO);
}

Source File: TaggerRequestHandler.java From SolrTextTagger with Apache License 2.0

6 votes

private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
  //Now we must supply a Solr DocList and add it to the response.
  //  Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
  //  know exactly what documents to return, the order doesn't matter nor does
  //  scoring.
  //  Ideally an implementation of DocList could be directly implemented off
  //  of a BitSet, but there are way too many methods to implement for a minor
  //  payoff.
  int matchDocs = matchDocIdsBS.cardinality();
  int[] docIds = new int[ Math.min(rows, matchDocs) ];
  DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
  for (int i = 0; i < docIds.length; i++) {
    docIds[i] = docIdIter.nextDoc();
  }
  return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f);
}

Source File: Lucene50LiveDocsFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
  long gen = info.getDelGen();
  String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
  final int length = info.info.maxDoc();
  try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, 
                                   info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
      long data[] = new long[FixedBitSet.bits2words(length)];
      for (int i = 0; i < data.length; i++) {
        data[i] = input.readLong();
      }
      FixedBitSet fbs = new FixedBitSet(data, length);
      if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
        throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + 
                                        " info.delcount=" + info.getDelCount(), input);
      }
      return fbs.asReadOnlyBits();
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(input, priorE);
    }
  }
  throw new AssertionError();
}

Source File: SloppyPhraseMatcher.java From lucene-solr with Apache License 2.0

5 votes

/** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser
 * of the two colliding pps. Note that there can only be one collision, as by the initialization
 * there were no collisions before pp was advanced.  */
private boolean advanceRpts(PhrasePositions pp) throws IOException {
  if (pp.rptGroup < 0) {
    return true; // not a repeater
  }
  PhrasePositions[] rg = rptGroups[pp.rptGroup];
  FixedBitSet bits = new FixedBitSet(rg.length); // for re-queuing after collisions are resolved
  int k0 = pp.rptInd;
  int k;
  while((k=collide(pp)) >= 0) {
    pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
    if (!advancePP(pp)) {
      return false; // exhausted
    }
    if (k != k0) { // careful: mark only those currently in the queue
      bits = FixedBitSet.ensureCapacity(bits, k);
      bits.set(k); // mark that pp2 need to be re-queued
    }
  }
  // collisions resolved, now re-queue
  // empty (partially) the queue until seeing all pps advanced for resolving collisions
  int n = 0;
  // TODO would be good if we can avoid calling cardinality() in each iteration!
  int numBits = bits.length(); // larges bit we set
  while (bits.cardinality() > 0) {
    PhrasePositions pp2 = pq.pop();
    rptStack[n++] = pp2;
    if (pp2.rptGroup >= 0 
        && pp2.rptInd < numBits  // this bit may not have been set
        && bits.get(pp2.rptInd)) {
      bits.clear(pp2.rptInd);
    }
  }
  // add back to queue
  for (int i=n-1; i>=0; i--) {
    pq.add(rptStack[i]);
  }
  return true;
}

Source File: SolrIndexSplitter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      RTimerTree t = timings.sub("findDocsToDelete");
      t.resume();
      FixedBitSet set = findDocsToDelete(context);
      t.pause();
      if (log.isInfoEnabled()) {
        log.info("### partition={}, leaf={}, maxDoc={}, numDels={}, setLen={}, setCard={}"
        , partition, context, context.reader().maxDoc()
        ,context.reader().numDeletedDocs(), set.length(), set.cardinality());
      }
      Bits liveDocs = context.reader().getLiveDocs();
      if (liveDocs != null) {
        // check that we don't delete already deleted docs
        FixedBitSet dels = FixedBitSet.copyOf(liveDocs);
        dels.flip(0, dels.length());
        dels.and(set);
        if (dels.cardinality() > 0) {
          log.error("### INVALID DELS {}", dels.cardinality());
        }
      }
      return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(set, set.length()));
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return false;
    }

    @Override
    public String toString() {
      return "weight(shardSplittingQuery,part" + partition + ")";
    }
  };
}

Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns the current slot value as long
 * This is used to get non-sharded value
 */
public long getNonShardValue(int slot) {
  long res;
  if (counts != null) {  // will only be pre-populated if this was used for sorting.
    res = counts[slot];
  } else {
    FixedBitSet bs = arr[slot];
    res = bs == null ? 0 : bs.cardinality();
  }
  return res;
}

Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0

5 votes

@SuppressWarnings({"unchecked", "rawtypes"})
private Object getShardValue(int slot) throws IOException {
  if (factory != null) return getShardHLL(slot);
  FixedBitSet ords = arr[slot];
  int unique;
  if (counts != null) {
    unique = counts[slot];
  } else {
    unique = ords==null ? 0 : ords.cardinality();
  }

  SimpleOrderedMap map = new SimpleOrderedMap();
  map.add("unique", unique);
  map.add("nTerms", nTerms);

  int maxExplicit=100;
  // TODO: make configurable
  // TODO: share values across buckets
  if (unique > 0) {

    List lst = new ArrayList( Math.min(unique, maxExplicit) );

    int maxOrd = ords.length();
    if (maxOrd > 0) {
      for (int ord=0; lst.size() < maxExplicit;) {
        ord = ords.nextSetBit(ord);
        if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
        BytesRef val = lookupOrd(ord);
        Object o = field.getType().toObject(field, val);
        lst.add(o);
        if (++ord >= maxOrd) break;
      }
    }

    map.add("vals", lst);
  }

  return map;
}

Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0

5 votes

public void calcCounts() {
  counts = new int[arr.length];
  for (int i=0; i<arr.length; i++) {
    FixedBitSet bs = arr[i];
    counts[i] = bs == null ? 0 : bs.cardinality();
  }
}

Source File: TestDocSet.java From lucene-solr with Apache License 2.0

5 votes

public DocSet getIntDocSet(FixedBitSet bs) {
  int[] docs = new int[bs.cardinality()];
  BitSetIterator iter = new BitSetIterator(bs, 0);
  for (int i=0; i<docs.length; i++) {
    docs[i] = iter.nextDoc();
  }
  return new SortedIntDocSet(docs);
}

Source File: TestDocSet.java From lucene-solr with Apache License 2.0

5 votes

public DocSlice getDocSlice(FixedBitSet bs) {
  int len = bs.cardinality();
  int[] arr = new int[len+5];
  arr[0]=10; arr[1]=20; arr[2]=30; arr[arr.length-1]=1; arr[arr.length-2]=2;
  int offset = 3;
  int end = offset + len;

  BitSetIterator iter = new BitSetIterator(bs, 0);
  // put in opposite order... DocLists are not ordered.
  for (int i=end-1; i>=offset; i--) {
    arr[i] = iter.nextDoc();
  }

  return new DocSlice(offset, len, arr, null, len*2, 100.0f, TotalHits.Relation.EQUAL_TO);
}

Source File: SimpleTextFieldsReader.java From lucene-solr with Apache License 2.0

4 votes

private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstCompiler;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
      outputsInner);
  fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
            outputs.newPair(lastDocsStart,
                outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      totalTermFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
            outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = fstCompiler.compile();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}

Source File: CheckIndex.java From lucene-solr with Apache License 2.0

4 votes

private static void checkSortedDocValues(String fieldName, int maxDoc, SortedDocValues dv, SortedDocValues dv2) throws IOException {
  if (dv.docID() != -1) {
    throw new RuntimeException("sorted dv iterator for field: " + fieldName + " should start at docID=-1, but got " + dv.docID());
  }
  final int maxOrd = dv.getValueCount()-1;
  FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
  int maxOrd2 = -1;
  for (int doc = dv.nextDoc(); doc != NO_MORE_DOCS; doc = dv.nextDoc()) {
    int ord = dv.ordValue();
    if (ord == -1) {
      throw new RuntimeException("dv for field: " + fieldName + " has -1 ord");
    } else if (ord < -1 || ord > maxOrd) {
      throw new RuntimeException("ord out of bounds: " + ord);
    } else {
      maxOrd2 = Math.max(maxOrd2, ord);
      seenOrds.set(ord);
    }

    if (dv2.advanceExact(doc) == false) {
      throw new RuntimeException("advanceExact did not find matching doc ID: " + doc);
    }
    int ord2 = dv2.ordValue();
    if (ord != ord2) {
      throw new RuntimeException("nextDoc and advanceExact report different ords: " + ord + " != " + ord2);
    }
  }
  if (maxOrd != maxOrd2) {
    throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
  }
  if (seenOrds.cardinality() != dv.getValueCount()) {
    throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
  }
  BytesRef lastValue = null;
  for (int i = 0; i <= maxOrd; i++) {
    final BytesRef term = dv.lookupOrd(i);
    term.isValid();
    if (lastValue != null) {
      if (term.compareTo(lastValue) <= 0) {
        throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term);
      }
    }
    lastValue = BytesRef.deepCopyOf(term);
  }
}

Source File: SolrIndexSplitter.java From lucene-solr with Apache License 2.0

4 votes

public LiveDocsReader(CodecReader in, FixedBitSet liveDocs) {
  super(in);
  this.liveDocs = liveDocs;
  this.numDocs = liveDocs.cardinality();
}

Java Code Examples for org.apache.lucene.util.FixedBitSet#cardinality()