Java Code Examples for org.apache.lucene.util.FixedBitSet#cardinality()
The following examples show how to use
org.apache.lucene.util.FixedBitSet#cardinality() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIndexedDISI.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testOneDocMissingFixed() throws IOException { int maxDoc = 9699; final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable FixedBitSet set = new FixedBitSet(maxDoc); set.set(0, maxDoc); set.clear(1345); try (Directory dir = newDirectory()) { final int cardinality = set.cardinality(); long length; int jumpTableentryCount; try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower); length = out.getFilePointer(); } int step = 16000; try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality); BitSetIterator disi2 = new BitSetIterator(set, cardinality); assertAdvanceEquality(disi, disi2, step); } } }
Example 2
Source File: TaggerRequestHandler.java From lucene-solr with Apache License 2.0 | 6 votes |
private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException { //Now we must supply a Solr DocList and add it to the response. // Typically this is gotten via a SolrIndexSearcher.search(), but in this case we // know exactly what documents to return, the order doesn't matter nor does // scoring. // Ideally an implementation of DocList could be directly implemented off // of a BitSet, but there are way too many methods to implement for a minor // payoff. int matchDocs = matchDocIdsBS.cardinality(); int[] docIds = new int[ Math.min(rows, matchDocs) ]; DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1); for (int i = 0; i < docIds.length; i++) { docIds[i] = docIdIter.nextDoc(); } return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f, TotalHits.Relation.EQUAL_TO); }
Example 3
Source File: TaggerRequestHandler.java From SolrTextTagger with Apache License 2.0 | 6 votes |
private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException { //Now we must supply a Solr DocList and add it to the response. // Typically this is gotten via a SolrIndexSearcher.search(), but in this case we // know exactly what documents to return, the order doesn't matter nor does // scoring. // Ideally an implementation of DocList could be directly implemented off // of a BitSet, but there are way too many methods to implement for a minor // payoff. int matchDocs = matchDocIdsBS.cardinality(); int[] docIds = new int[ Math.min(rows, matchDocs) ]; DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1); for (int i = 0; i < docIds.length; i++) { docIds[i] = docIdIter.nextDoc(); } return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f); }
Example 4
Source File: Lucene50LiveDocsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException { long gen = info.getDelGen(); String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen); final int length = info.info.maxDoc(); try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) { Throwable priorE = null; try { CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, info.info.getId(), Long.toString(gen, Character.MAX_RADIX)); long data[] = new long[FixedBitSet.bits2words(length)]; for (int i = 0; i < data.length; i++) { data[i] = input.readLong(); } FixedBitSet fbs = new FixedBitSet(data, length); if (fbs.length() - fbs.cardinality() != info.getDelCount()) { throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + " info.delcount=" + info.getDelCount(), input); } return fbs.asReadOnlyBits(); } catch (Throwable exception) { priorE = exception; } finally { CodecUtil.checkFooter(input, priorE); } } throw new AssertionError(); }
Example 5
Source File: SloppyPhraseMatcher.java From lucene-solr with Apache License 2.0 | 5 votes |
/** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser * of the two colliding pps. Note that there can only be one collision, as by the initialization * there were no collisions before pp was advanced. */ private boolean advanceRpts(PhrasePositions pp) throws IOException { if (pp.rptGroup < 0) { return true; // not a repeater } PhrasePositions[] rg = rptGroups[pp.rptGroup]; FixedBitSet bits = new FixedBitSet(rg.length); // for re-queuing after collisions are resolved int k0 = pp.rptInd; int k; while((k=collide(pp)) >= 0) { pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps if (!advancePP(pp)) { return false; // exhausted } if (k != k0) { // careful: mark only those currently in the queue bits = FixedBitSet.ensureCapacity(bits, k); bits.set(k); // mark that pp2 need to be re-queued } } // collisions resolved, now re-queue // empty (partially) the queue until seeing all pps advanced for resolving collisions int n = 0; // TODO would be good if we can avoid calling cardinality() in each iteration! int numBits = bits.length(); // larges bit we set while (bits.cardinality() > 0) { PhrasePositions pp2 = pq.pop(); rptStack[n++] = pp2; if (pp2.rptGroup >= 0 && pp2.rptInd < numBits // this bit may not have been set && bits.get(pp2.rptInd)) { bits.clear(pp2.rptInd); } } // add back to queue for (int i=n-1; i>=0; i--) { pq.add(rptStack[i]); } return true; }
Example 6
Source File: SolrIndexSplitter.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { RTimerTree t = timings.sub("findDocsToDelete"); t.resume(); FixedBitSet set = findDocsToDelete(context); t.pause(); if (log.isInfoEnabled()) { log.info("### partition={}, leaf={}, maxDoc={}, numDels={}, setLen={}, setCard={}" , partition, context, context.reader().maxDoc() ,context.reader().numDeletedDocs(), set.length(), set.cardinality()); } Bits liveDocs = context.reader().getLiveDocs(); if (liveDocs != null) { // check that we don't delete already deleted docs FixedBitSet dels = FixedBitSet.copyOf(liveDocs); dels.flip(0, dels.length()); dels.and(set); if (dels.cardinality() > 0) { log.error("### INVALID DELS {}", dels.cardinality()); } } return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(set, set.length())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return false; } @Override public String toString() { return "weight(shardSplittingQuery,part" + partition + ")"; } }; }
Example 7
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Returns the current slot value as long * This is used to get non-sharded value */ public long getNonShardValue(int slot) { long res; if (counts != null) { // will only be pre-populated if this was used for sorting. res = counts[slot]; } else { FixedBitSet bs = arr[slot]; res = bs == null ? 0 : bs.cardinality(); } return res; }
Example 8
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
@SuppressWarnings({"unchecked", "rawtypes"}) private Object getShardValue(int slot) throws IOException { if (factory != null) return getShardHLL(slot); FixedBitSet ords = arr[slot]; int unique; if (counts != null) { unique = counts[slot]; } else { unique = ords==null ? 0 : ords.cardinality(); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("unique", unique); map.add("nTerms", nTerms); int maxExplicit=100; // TODO: make configurable // TODO: share values across buckets if (unique > 0) { List lst = new ArrayList( Math.min(unique, maxExplicit) ); int maxOrd = ords.length(); if (maxOrd > 0) { for (int ord=0; lst.size() < maxExplicit;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); Object o = field.getType().toObject(field, val); lst.add(o); if (++ord >= maxOrd) break; } } map.add("vals", lst); } return map; }
Example 9
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
public void calcCounts() { counts = new int[arr.length]; for (int i=0; i<arr.length; i++) { FixedBitSet bs = arr[i]; counts[i] = bs == null ? 0 : bs.cardinality(); } }
Example 10
Source File: TestDocSet.java From lucene-solr with Apache License 2.0 | 5 votes |
public DocSet getIntDocSet(FixedBitSet bs) { int[] docs = new int[bs.cardinality()]; BitSetIterator iter = new BitSetIterator(bs, 0); for (int i=0; i<docs.length; i++) { docs[i] = iter.nextDoc(); } return new SortedIntDocSet(docs); }
Example 11
Source File: TestDocSet.java From lucene-solr with Apache License 2.0 | 5 votes |
public DocSlice getDocSlice(FixedBitSet bs) { int len = bs.cardinality(); int[] arr = new int[len+5]; arr[0]=10; arr[1]=20; arr[2]=30; arr[arr.length-1]=1; arr[arr.length-2]=2; int offset = 3; int end = offset + len; BitSetIterator iter = new BitSetIterator(bs, 0); // put in opposite order... DocLists are not ordered. for (int i=end-1; i>=offset; i--) { arr[i] = iter.nextDoc(); } return new DocSlice(offset, len, arr, null, len*2, 100.0f, TotalHits.Relation.EQUAL_TO); }
Example 12
Source File: SimpleTextFieldsReader.java From lucene-solr with Apache License 2.0 | 4 votes |
private void loadTerms() throws IOException { PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(); final FSTCompiler<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstCompiler; final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs); final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs, outputsInner); fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs); IndexInput in = SimpleTextFieldsReader.this.in.clone(); in.seek(termsStart); final BytesRefBuilder lastTerm = new BytesRefBuilder(); long lastDocsStart = -1; int docFreq = 0; long totalTermFreq = 0; FixedBitSet visitedDocs = new FixedBitSet(maxDoc); final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); while(true) { SimpleTextUtil.readLine(in, scratch); if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) { if (lastDocsStart != -1) { fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq))); sumTotalTermFreq += totalTermFreq; } break; } else if (StringHelper.startsWith(scratch.get(), DOC)) { docFreq++; sumDocFreq++; totalTermFreq++; scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length); int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()); visitedDocs.set(docID); } else if (StringHelper.startsWith(scratch.get(), FREQ)) { scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length); totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1; } else if (StringHelper.startsWith(scratch.get(), TERM)) { if (lastDocsStart != -1) { fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart, outputsInner.newPair((long) docFreq, totalTermFreq))); } lastDocsStart = in.getFilePointer(); final int len = scratch.length() - TERM.length; lastTerm.grow(len); System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len); lastTerm.setLength(len); docFreq = 0; sumTotalTermFreq += totalTermFreq; totalTermFreq = 0; termCount++; } } docCount = visitedDocs.cardinality(); fst = fstCompiler.compile(); /* PrintStream ps = new PrintStream("out.dot"); fst.toDot(ps); ps.close(); System.out.println("SAVED out.dot"); */ //System.out.println("FST " + fst.sizeInBytes()); }
Example 13
Source File: CheckIndex.java From lucene-solr with Apache License 2.0 | 4 votes |
private static void checkSortedDocValues(String fieldName, int maxDoc, SortedDocValues dv, SortedDocValues dv2) throws IOException { if (dv.docID() != -1) { throw new RuntimeException("sorted dv iterator for field: " + fieldName + " should start at docID=-1, but got " + dv.docID()); } final int maxOrd = dv.getValueCount()-1; FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount()); int maxOrd2 = -1; for (int doc = dv.nextDoc(); doc != NO_MORE_DOCS; doc = dv.nextDoc()) { int ord = dv.ordValue(); if (ord == -1) { throw new RuntimeException("dv for field: " + fieldName + " has -1 ord"); } else if (ord < -1 || ord > maxOrd) { throw new RuntimeException("ord out of bounds: " + ord); } else { maxOrd2 = Math.max(maxOrd2, ord); seenOrds.set(ord); } if (dv2.advanceExact(doc) == false) { throw new RuntimeException("advanceExact did not find matching doc ID: " + doc); } int ord2 = dv2.ordValue(); if (ord != ord2) { throw new RuntimeException("nextDoc and advanceExact report different ords: " + ord + " != " + ord2); } } if (maxOrd != maxOrd2) { throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); } if (seenOrds.cardinality() != dv.getValueCount()) { throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality()); } BytesRef lastValue = null; for (int i = 0; i <= maxOrd; i++) { final BytesRef term = dv.lookupOrd(i); term.isValid(); if (lastValue != null) { if (term.compareTo(lastValue) <= 0) { throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term); } } lastValue = BytesRef.deepCopyOf(term); } }
Example 14
Source File: SolrIndexSplitter.java From lucene-solr with Apache License 2.0 | 4 votes |
public LiveDocsReader(CodecReader in, FixedBitSet liveDocs) { super(in); this.liveDocs = liveDocs; this.numDocs = liveDocs.cardinality(); }