org.apache.lucene.util.BytesRefHash Java Examples
The following examples show how to use
org.apache.lucene.util.BytesRefHash.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0 | 6 votes |
private void readFromBytes(BytesRef bytes) { // Read pruned flag this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false); // Read size fo the set int size = Bytes.readInt(bytes); // Read terms bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); BytesRef reusable = new BytesRef(); for (int i = 0; i < size; i++) { Bytes.readBytesRef(bytes, reusable); set.add(reusable); } }
Example #2
Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Query build() { Map<String, BytesRef[]> collectedTerms = new HashMap<>(); for (Map.Entry<String, BytesRefHash> entry : terms.entrySet()) { collectedTerms.put(entry.getKey(), convertHash(entry.getValue())); } BooleanQuery.Builder parent = new BooleanQuery.Builder(); for (int i = 0; i < passes; i++) { BooleanQuery.Builder child = new BooleanQuery.Builder(); for (String field : terms.keySet()) { child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD); } parent.add(child.build(), BooleanClause.Occur.MUST); } return parent.build(); }
Example #3
Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Document buildQueryDocument(QueryTree querytree) { Document doc = new Document(); for (int i = 0; i < passes; i++) { Map<String, BytesRefHash> fieldTerms = collectTerms(querytree); for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) { // we add the index terms once under a suffixed field for the multipass query, and // once under the plan field name for the TermsEnumTokenFilter doc.add(new Field(field(entry.getKey(), i), new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE)); doc.add(new Field(entry.getKey(), new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE)); } querytree.advancePhase(minWeight); } return doc; }
Example #4
Source File: TermsIncludingScoreQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
TermsIncludingScoreQuery(ScoreMode scoreMode, String toField, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, String fromField, Query fromQuery, Object indexReaderContextId) { this.scoreMode = scoreMode; this.toField = toField; this.multipleValuesPerDocument = multipleValuesPerDocument; this.terms = terms; this.scores = scores; this.ords = terms.sort(); this.fromField = fromField; this.fromQuery = fromQuery; this.topReaderContextId = indexReaderContextId; this.ramBytesUsed = BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(fromField) + RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) + RamUsageEstimator.sizeOfObject(ords) + RamUsageEstimator.sizeOfObject(scores) + RamUsageEstimator.sizeOfObject(terms) + RamUsageEstimator.sizeOfObject(toField); }
Example #5
Source File: TermsQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * @param toField The field that should contain terms that are specified in the next parameter. * @param terms The terms that matching documents should have. The terms must be sorted by natural order. * @param indexReaderContextId Refers to the top level index reader used to create the set of terms in the previous parameter. */ TermsQuery(String toField, BytesRefHash terms, String fromField, Query fromQuery, Object indexReaderContextId) { super(toField); this.terms = terms; ords = terms.sort(); this.fromField = fromField; this.fromQuery = fromQuery; this.indexReaderContextId = indexReaderContextId; this.ramBytesUsed = BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(field) + RamUsageEstimator.sizeOfObject(fromField) + RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) + RamUsageEstimator.sizeOfObject(ords) + RamUsageEstimator.sizeOfObject(terms); }
Example #6
Source File: GenericTermsCollector.java From lucene-solr with Apache License 2.0 | 6 votes |
static GenericTermsCollector wrap(final TermsCollector<?> collector) { return new GenericTermsCollector() { @Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { return collector.getLeafCollector(context); } @Override public org.apache.lucene.search.ScoreMode scoreMode() { return collector.scoreMode(); } @Override public BytesRefHash getCollectedTerms() { return collector.getCollectorTerms(); } @Override public float[] getScoresPerTerm() { throw new UnsupportedOperationException("scores are not available for "+collector); } }; }
Example #7
Source File: MemoryIndex.java From lucene-solr with Apache License 2.0 | 6 votes |
private final int binarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords) { int mid = 0; while (low <= high) { mid = (low + high) >>> 1; hash.get(ords[mid], bytesRef); final int cmp = bytesRef.compareTo(b); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return mid; } } assert bytesRef.compareTo(b) != 0; return -(low + 1); }
Example #8
Source File: GraphQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Build an automaton to represent the frontier query */ private Automaton buildAutomaton(BytesRefHash termBytesHash) { // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?) final TreeSet<BytesRef> terms = new TreeSet<BytesRef>(); for (int i = 0 ; i < termBytesHash.size(); i++) { BytesRef ref = new BytesRef(); termBytesHash.get(i, ref); terms.add(ref); } final Automaton a = DaciukMihovAutomatonBuilder.build(terms); return a; }
Example #9
Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0 | 5 votes |
@Override protected void addAll(TermsSet terms) { if (!(terms instanceof BytesRefTermsSet)) { throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected."); } BytesRefHash input = ((BytesRefTermsSet) terms).set; BytesRef reusable = new BytesRef(); for (int i = 0; i < input.size(); i++) { input.get(i, reusable); set.add(reusable); } }
Example #10
Source File: GraphEdgeCollector.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Build an automaton to represent the frontier query */ private Automaton buildAutomaton(BytesRefHash termBytesHash) { // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?) final TreeSet<BytesRef> terms = new TreeSet<BytesRef>(); for (int i = 0; i < termBytesHash.size(); i++) { BytesRef ref = new BytesRef(); termBytesHash.get(i, ref); terms.add(ref); } final Automaton a = DaciukMihovAutomatonBuilder.build(terms); return a; }
Example #11
Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0 | 5 votes |
@Override public void readFrom(StreamInput in) throws IOException { this.setIsPruned(in.readBoolean()); int size = in.readInt(); bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); for (long i = 0; i < size; i++) { set.add(in.readBytesRef()); } }
Example #12
Source File: SortedDocValuesWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) { this.hash = hash; this.valueCount = valueCount; this.sortedValues = sortedValues; this.iter = docToOrd.iterator(); this.ordMap = ordMap; this.docsWithField = docsWithField; }
Example #13
Source File: SortedDocValuesWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; hash = new BytesRefHash( new ByteBlockPool( new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); }
Example #14
Source File: SortedSetDocValuesWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) { this.currentDoc = new int[maxCount]; this.sortedValues = sortedValues; this.ordMap = ordMap; this.hash = hash; this.ordsIter = ords.iterator(); this.ordCountsIter = ordCounts.iterator(); this.docsWithField = docsWithField; }
Example #15
Source File: SortedSetDocValuesWriter.java From lucene-solr with Apache License 2.0 | 5 votes |
public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.fieldInfo = fieldInfo; this.iwBytesUsed = iwBytesUsed; hash = new BytesRefHash( new ByteBlockPool( new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); pending = PackedLongValues.packedBuilder(PackedInts.COMPACT); pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT); docsWithField = new DocsWithFieldSet(); bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed(); iwBytesUsed.addAndGet(bytesUsed); }
Example #16
Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 5 votes |
private static BytesRef[] convertHash(BytesRefHash hash) { BytesRef[] terms = new BytesRef[hash.size()]; for (int i = 0; i < terms.length; i++) { BytesRef t = new BytesRef(); terms[i] = hash.get(i, t); } return terms; }
Example #17
Source File: MemoryIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
private Info(FieldInfo fieldInfo, ByteBlockPool byteBlockPool) { this.fieldInfo = fieldInfo; this.sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY); this.terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);; this.binaryProducer = new BinaryDocValuesProducer(); this.numericProducer = new NumericDocValuesProducer(); }
Example #18
Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Collects terms from a {@link QueryTree} and maps them per-field */ protected Map<String, BytesRefHash> collectTerms(QueryTree querytree) { Map<String, BytesRefHash> fieldTerms = new HashMap<>(); querytree.collectTerms((field, term) -> { BytesRefHash tt = fieldTerms.computeIfAbsent(field, f -> new BytesRefHash()); tt.add(term); }); return fieldTerms; }
Example #19
Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Builds a {@link Document} from the terms extracted from a query */ protected Document buildQueryDocument(QueryTree querytree) { Map<String, BytesRefHash> fieldTerms = collectTerms(querytree); Document doc = new Document(); for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) { doc.add(new Field(entry.getKey(), new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE)); } return doc; }
Example #20
Source File: QueryIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public boolean test(String field, BytesRef term) { BytesRefHash bytes = termsHash.get(field); if (bytes == null) { return false; } return bytes.find(term) != -1; }
Example #21
Source File: QueryIndex.java From lucene-solr with Apache License 2.0 | 5 votes |
QueryTermFilter(IndexReader reader) throws IOException { for (LeafReaderContext ctx : reader.leaves()) { for (FieldInfo fi : ctx.reader().getFieldInfos()) { BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash()); Terms t = ctx.reader().terms(fi.name); if (t != null) { TermsEnum te = t.iterator(); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } } } } }
Example #22
Source File: SeekingTermSetTermsEnum.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Constructor */ public SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) { super(tenum); this.terms = terms; this.ords = ords; lastElement = terms.size() - 1; lastTerm = terms.get(ords[lastElement], new BytesRef()); seekTerm = terms.get(ords[upto], spare); }
Example #23
Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void addTerm(String field, BytesRef term) { BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash()); t.add(term); }
Example #24
Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0 | 4 votes |
BytesRefHashIterator(BytesRefHash terms) { this.terms = terms; this.sortedTerms = terms.sort(); }
Example #25
Source File: SynonymMap.java From lucene-solr with Apache License 2.0 | 4 votes |
public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext) { this.fst = fst; this.words = words; this.maxHorizontalContext = maxHorizontalContext; }
Example #26
Source File: GraphEdgeCollector.java From lucene-solr with Apache License 2.0 | 4 votes |
GraphTermsCollector(SchemaField collectField, DocSet skipSet, DocSet leafNodes) { super(collectField, skipSet, leafNodes); this.collectorTerms = new BytesRefHash(); }
Example #27
Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0 | 4 votes |
public BytesRefTermsSet(final CircuitBreaker breaker) { super(breaker); this.bytesUsed = Counter.newCounter(); this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); this.set = new BytesRefHash(pool); }
Example #28
Source File: TermsWithScoreCollector.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public BytesRefHash getCollectedTerms() { return collectedTerms; }
Example #29
Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0 | 4 votes |
public BytesRefHash getBytesRefHash() { return set; }
Example #30
Source File: TermsCollector.java From lucene-solr with Apache License 2.0 | 4 votes |
public BytesRefHash getCollectorTerms() { return collectorTerms; }