Java Code Examples for org.apache.lucene.util.FixedBitSet#nextSetBit()
The following examples show how to use
org.apache.lucene.util.FixedBitSet#nextSetBit() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BlockJoin.java From lucene-solr with Apache License 2.0 | 6 votes |
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */ public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException { FixedBitSet parentBits = parentList.getBits(); DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()); DocIterator iter = childInput.iterator(); int currentParent = -1; while (iter.hasNext()) { int childDoc = iter.nextDoc(); // TODO: skipping if (childDoc <= currentParent) { // use <= since we also allow parents in the input // we already visited this parent continue; } currentParent = parentBits.nextSetBit(childDoc); if (currentParent != DocIdSetIterator.NO_MORE_DOCS) { // only collect the parent the first time we skip to it collector.collect( currentParent ); } } return collector.getDocSet(); }
Example 2
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 6 votes |
@SuppressWarnings({"unchecked", "rawtypes"}) private Object getShardHLL(int slot) throws IOException { FixedBitSet ords = arr[slot]; if (ords == null) return HLLAgg.NO_VALUES; HLL hll = factory.getHLL(); long maxOrd = ords.length(); Hash.LongPair hashResult = new Hash.LongPair(); for(int ord=-1; ++ord < maxOrd;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); // way to avoid recomputing hash across slots? Prob not worth space Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult); // idea: if the set is small enough, just send the hashes? We can add at the top // level or even just do a hash table at the top level. hll.addRaw(hashResult.val1); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("hll", hll.toBytes()); return map; }
Example 3
Source File: BitSetHitStream.java From siren-join with GNU Affero General Public License v3.0 | 6 votes |
@Override public void next() { LimitedBitSetHitCollector collector = (LimitedBitSetHitCollector) this.getCollector(); int atomicDocId = this.currentAtomicDocId; if (currentAtomicReaderId < collector.getFixedSets().size()) { do { FixedBitSet bitSet = collector.getFixedSets().get(currentAtomicReaderId); if (atomicDocId == DocIdSetIterator.NO_MORE_DOCS) { // we start a new reader, reset the doc id atomicDocId = -1; } atomicDocId = atomicDocId + 1 < bitSet.length() ? bitSet.nextSetBit(atomicDocId + 1) : DocIdSetIterator.NO_MORE_DOCS; } while (atomicDocId == DocIdSetIterator.NO_MORE_DOCS && ++currentAtomicReaderId < collector.getFixedSets().size()); } this.currentAtomicDocId = atomicDocId; this.count++; }
Example 4
Source File: BaseCompoundFormatTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testCheckIntegrity() throws IOException { Directory dir = newDirectory(); String subFile = "_123.xyz"; SegmentInfo si = newSegmentInfo(dir, "_123"); try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) { CodecUtil.writeIndexHeader(os, "Foo", 0, si.getId(), "suffix"); for (int i = 0; i < 1024; i++) { os.writeByte((byte) i); } os.writeInt(CodecUtil.FOOTER_MAGIC); os.writeInt(0); long checksum = os.getChecksum(); os.writeLong(checksum); } si.setFiles(Collections.singletonList(subFile)); FileTrackingDirectoryWrapper writeTrackingDir = new FileTrackingDirectoryWrapper(dir); si.getCodec().compoundFormat().write(writeTrackingDir, si, IOContext.DEFAULT); final Set<String> createdFiles = writeTrackingDir.getFiles(); ReadBytesDirectoryWrapper readTrackingDir = new ReadBytesDirectoryWrapper(dir); CompoundDirectory compoundDir = si.getCodec().compoundFormat().getCompoundReader(readTrackingDir, si, IOContext.READ); compoundDir.checkIntegrity(); Map<String,FixedBitSet> readBytes = readTrackingDir.getReadBytes(); assertEquals(createdFiles, readBytes.keySet()); for (Map.Entry<String, FixedBitSet> entry : readBytes.entrySet()) { final String file = entry.getKey(); final FixedBitSet set = entry.getValue().clone(); set.flip(0, set.length()); final int next = set.nextSetBit(0); assertEquals("Byte at offset " + next + " of " + file + " was not read", DocIdSetIterator.NO_MORE_DOCS, next); } compoundDir.close(); dir.close(); }
Example 5
Source File: SloppyPhraseMatcher.java From lucene-solr with Apache License 2.0 | 5 votes |
/** map each term to the single group that contains it */ private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<FixedBitSet> bb) throws IOException { HashMap<Term,Integer> tg = new HashMap<>(); Term[] t = tord.keySet().toArray(new Term[0]); for (int i=0; i<bb.size(); i++) { // i is the group no. FixedBitSet bits = bb.get(i); for (int ord = bits.nextSetBit(0); ord != DocIdSetIterator.NO_MORE_DOCS; ord = ord + 1 >= bits.length() ? DocIdSetIterator.NO_MORE_DOCS : bits.nextSetBit(ord + 1)) { tg.put(t[ord],i); } } return tg; }
Example 6
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
@SuppressWarnings({"unchecked", "rawtypes"}) private Object getShardValue(int slot) throws IOException { if (factory != null) return getShardHLL(slot); FixedBitSet ords = arr[slot]; int unique; if (counts != null) { unique = counts[slot]; } else { unique = ords==null ? 0 : ords.cardinality(); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("unique", unique); map.add("nTerms", nTerms); int maxExplicit=100; // TODO: make configurable // TODO: share values across buckets if (unique > 0) { List lst = new ArrayList( Math.min(unique, maxExplicit) ); int maxOrd = ords.length(); if (maxOrd > 0) { for (int ord=0; lst.size() < maxExplicit;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); Object o = field.getType().toObject(field, val); lst.add(o); if (++ord >= maxOrd) break; } } map.add("vals", lst); } return map; }
Example 7
Source File: SlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
public FixedBitSet resize(FixedBitSet old) { FixedBitSet values = new FixedBitSet(getNewSize()); int oldSize = old.length(); for (int oldSlot = 0; ; ) { oldSlot = values.nextSetBit(oldSlot); if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break; int newSlot = getNewSlot(oldSlot); values.set(newSlot); if (++oldSlot >= oldSize) break; } return values; }
Example 8
Source File: DocSetUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public static DocSet toSmallSet(BitDocSet bitSet) { int sz = bitSet.size(); int[] docs = new int[sz]; FixedBitSet bs = bitSet.getBits(); int doc = -1; for (int i=0; i<sz; i++) { doc = bs.nextSetBit(doc + 1); docs[i] = doc; } return new SortedIntDocSet(docs); }
Example 9
Source File: BaseIndexFileFormatTestCase.java From lucene-solr with Apache License 2.0 | 4 votes |
/** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the * combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */ public void testCheckIntegrityReadsAllBytes() throws Exception { assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec); FileTrackingDirectoryWrapper dir = new FileTrackingDirectoryWrapper(newDirectory()); applyCreatedVersionMajor(dir); IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random())); IndexWriter w = new IndexWriter(dir, cfg); final int numDocs = atLeast(100); for (int i = 0; i < numDocs; ++i) { Document d = new Document(); addRandomFields(d); w.addDocument(d); } w.forceMerge(1); w.commit(); w.close(); ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir); IndexReader reader = DirectoryReader.open(readBytesWrapperDir); LeafReader leafReader = getOnlyLeafReader(reader); leafReader.checkIntegrity(); Map<String, FixedBitSet> readBytesMap = readBytesWrapperDir.getReadBytes(); Set<String> unreadFiles = new HashSet<>(dir.getFiles());System.out.println(Arrays.toString(dir.listAll())); unreadFiles.removeAll(readBytesMap.keySet()); unreadFiles.remove(IndexWriter.WRITE_LOCK_NAME); assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty()); List<String> messages = new ArrayList<>(); for (Map.Entry<String, FixedBitSet> entry : readBytesMap.entrySet()) { String name = entry.getKey(); FixedBitSet unreadBytes = entry.getValue().clone(); unreadBytes.flip(0, unreadBytes.length()); int unread = unreadBytes.nextSetBit(0); if (unread != Integer.MAX_VALUE) { messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read."); } } assertTrue(String.join("\n", messages), messages.isEmpty()); reader.close(); dir.close(); }