org.apache.lucene.util.BytesRefHash Java Exaples

Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0

6 votes

private void readFromBytes(BytesRef bytes) {
  // Read pruned flag
  this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false);

  // Read size fo the set
  int size = Bytes.readInt(bytes);

  // Read terms
  bytesUsed = Counter.newCounter();
  pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  set = new BytesRefHash(pool);

  BytesRef reusable = new BytesRef();
  for (int i = 0; i < size; i++) {
    Bytes.readBytesRef(bytes, reusable);
    set.add(reusable);
  }
}

Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Query build() {
  Map<String, BytesRef[]> collectedTerms = new HashMap<>();
  for (Map.Entry<String, BytesRefHash> entry : terms.entrySet()) {
    collectedTerms.put(entry.getKey(), convertHash(entry.getValue()));
  }
  BooleanQuery.Builder parent = new BooleanQuery.Builder();
  for (int i = 0; i < passes; i++) {
    BooleanQuery.Builder child = new BooleanQuery.Builder();
    for (String field : terms.keySet()) {
      child.add(new TermInSetQuery(field(field, i), collectedTerms.get(field)), BooleanClause.Occur.SHOULD);
    }
    parent.add(child.build(), BooleanClause.Occur.MUST);
  }
  return parent.build();
}

Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Document buildQueryDocument(QueryTree querytree) {

  Document doc = new Document();

  for (int i = 0; i < passes; i++) {
    Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
    for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
      // we add the index terms once under a suffixed field for the multipass query, and
      // once under the plan field name for the TermsEnumTokenFilter
      doc.add(new Field(field(entry.getKey(), i),
          new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
      doc.add(new Field(entry.getKey(),
          new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
    }
    querytree.advancePhase(minWeight);
  }

  return doc;
}

Source File: TermsIncludingScoreQuery.java From lucene-solr with Apache License 2.0

6 votes

TermsIncludingScoreQuery(ScoreMode scoreMode, String toField, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores,
                         String fromField, Query fromQuery, Object indexReaderContextId) {
  this.scoreMode = scoreMode;
  this.toField = toField;
  this.multipleValuesPerDocument = multipleValuesPerDocument;
  this.terms = terms;
  this.scores = scores;
  this.ords = terms.sort();

  this.fromField = fromField;
  this.fromQuery = fromQuery;
  this.topReaderContextId = indexReaderContextId;

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(fromField) +
      RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
      RamUsageEstimator.sizeOfObject(ords) +
      RamUsageEstimator.sizeOfObject(scores) +
      RamUsageEstimator.sizeOfObject(terms) +
      RamUsageEstimator.sizeOfObject(toField);
}

Source File: TermsQuery.java From lucene-solr with Apache License 2.0

6 votes

/**
 * @param toField               The field that should contain terms that are specified in the next parameter.
 * @param terms                 The terms that matching documents should have. The terms must be sorted by natural order.
 * @param indexReaderContextId  Refers to the top level index reader used to create the set of terms in the previous parameter.
 */
TermsQuery(String toField, BytesRefHash terms, String fromField, Query fromQuery, Object indexReaderContextId) {
  super(toField);
  this.terms = terms;
  ords = terms.sort();
  this.fromField = fromField;
  this.fromQuery = fromQuery;
  this.indexReaderContextId = indexReaderContextId;

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(field) +
      RamUsageEstimator.sizeOfObject(fromField) +
      RamUsageEstimator.sizeOfObject(fromQuery, RamUsageEstimator.QUERY_DEFAULT_RAM_BYTES_USED) +
      RamUsageEstimator.sizeOfObject(ords) +
      RamUsageEstimator.sizeOfObject(terms);
}

Source File: GenericTermsCollector.java From lucene-solr with Apache License 2.0

6 votes

static GenericTermsCollector wrap(final TermsCollector<?> collector) {
  return new GenericTermsCollector() {

    
    @Override
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
      return collector.getLeafCollector(context);
    }

    @Override
    public org.apache.lucene.search.ScoreMode scoreMode() {
      return collector.scoreMode();
    }

    @Override
    public BytesRefHash getCollectedTerms() {
      return collector.getCollectorTerms();
    }

    @Override
    public float[] getScoresPerTerm() {
      throw new UnsupportedOperationException("scores are not available for "+collector);
    }
  };
}

Source File: MemoryIndex.java From lucene-solr with Apache License 2.0

6 votes

private final int binarySearch(BytesRef b, BytesRef bytesRef, int low,
    int high, BytesRefHash hash, int[] ords) {
  int mid = 0;
  while (low <= high) {
    mid = (low + high) >>> 1;
    hash.get(ords[mid], bytesRef);
    final int cmp = bytesRef.compareTo(b);
    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid;
    }
  }
  assert bytesRef.compareTo(b) != 0;
  return -(low + 1);
}

Source File: GraphQuery.java From lucene-solr with Apache License 2.0

5 votes

/** Build an automaton to represent the frontier query */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0 ; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;    
}

Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0

5 votes

@Override
protected void addAll(TermsSet terms) {
  if (!(terms instanceof BytesRefTermsSet)) {
    throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected.");
  }

  BytesRefHash input = ((BytesRefTermsSet) terms).set;
  BytesRef reusable = new BytesRef();
  for (int i = 0; i < input.size(); i++) {
    input.get(i, reusable);
    set.add(reusable);
  }
}

Source File: GraphEdgeCollector.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Build an automaton to represent the frontier query
 */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
  // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
  final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
  for (int i = 0; i < termBytesHash.size(); i++) {
    BytesRef ref = new BytesRef();
    termBytesHash.get(i, ref);
    terms.add(ref);
  }
  final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
  return a;
}

Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0

5 votes

@Override
public void readFrom(StreamInput in) throws IOException {
  this.setIsPruned(in.readBoolean());
  int size = in.readInt();

  bytesUsed = Counter.newCounter();
  pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  set = new BytesRefHash(pool);

  for (long i = 0; i < size; i++) {
    set.add(in.readBytesRef());
  }
}

Source File: SortedDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
  this.hash = hash;
  this.valueCount = valueCount;
  this.sortedValues = sortedValues;
  this.iter = docToOrd.iterator();
  this.ordMap = ordMap;
  this.docsWithField = docsWithField;
}

Source File: SortedDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: SortedSetDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
  this.currentDoc = new int[maxCount];
  this.sortedValues = sortedValues;
  this.ordMap = ordMap;
  this.hash = hash;
  this.ordsIter = ords.iterator();
  this.ordCountsIter = ordCounts.iterator();
  this.docsWithField = docsWithField;
}

Source File: SortedSetDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0

5 votes

private static BytesRef[] convertHash(BytesRefHash hash) {
  BytesRef[] terms = new BytesRef[hash.size()];
  for (int i = 0; i < terms.length; i++) {
    BytesRef t = new BytesRef();
    terms[i] = hash.get(i, t);
  }
  return terms;
}

Source File: MemoryIndex.java From lucene-solr with Apache License 2.0

5 votes

private Info(FieldInfo fieldInfo, ByteBlockPool byteBlockPool) {
  this.fieldInfo = fieldInfo;
  this.sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
  this.terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);;
  this.binaryProducer = new BinaryDocValuesProducer();
  this.numericProducer = new NumericDocValuesProducer();
}

Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Collects terms from a {@link QueryTree} and maps them per-field
 */
protected Map<String, BytesRefHash> collectTerms(QueryTree querytree) {
  Map<String, BytesRefHash> fieldTerms = new HashMap<>();
  querytree.collectTerms((field, term) -> {
    BytesRefHash tt = fieldTerms.computeIfAbsent(field, f -> new BytesRefHash());
    tt.add(term);
  });
  return fieldTerms;
}

Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Builds a {@link Document} from the terms extracted from a query
 */
protected Document buildQueryDocument(QueryTree querytree) {
  Map<String, BytesRefHash> fieldTerms = collectTerms(querytree);
  Document doc = new Document();
  for (Map.Entry<String, BytesRefHash> entry : fieldTerms.entrySet()) {
    doc.add(new Field(entry.getKey(),
        new TermsEnumTokenStream(new BytesRefHashIterator(entry.getValue())), QUERYFIELDTYPE));
  }
  return doc;
}

Source File: QueryIndex.java From lucene-solr with Apache License 2.0

5 votes

@Override
public boolean test(String field, BytesRef term) {
  BytesRefHash bytes = termsHash.get(field);
  if (bytes == null) {
    return false;
  }
  return bytes.find(term) != -1;
}

Source File: QueryIndex.java From lucene-solr with Apache License 2.0

5 votes

QueryTermFilter(IndexReader reader) throws IOException {
  for (LeafReaderContext ctx : reader.leaves()) {
    for (FieldInfo fi : ctx.reader().getFieldInfos()) {
      BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
      Terms t = ctx.reader().terms(fi.name);
      if (t != null) {
        TermsEnum te = t.iterator();
        BytesRef term;
        while ((term = te.next()) != null) {
          terms.add(term);
        }
      }
    }
  }
}

Source File: SeekingTermSetTermsEnum.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Constructor
 */
public SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefHash terms, int[] ords) {
  super(tenum);
  this.terms = terms;
  this.ords = ords;
  lastElement = terms.size() - 1;
  lastTerm = terms.get(ords[lastElement], new BytesRef());
  seekTerm = terms.get(ords[upto], spare);
}

Source File: MultipassTermFilteredPresearcher.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void addTerm(String field, BytesRef term) {
  BytesRefHash t = terms.computeIfAbsent(field, f -> new BytesRefHash());
  t.add(term);
}

Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0

4 votes

BytesRefHashIterator(BytesRefHash terms) {
  this.terms = terms;
  this.sortedTerms = terms.sort();
}

Source File: SynonymMap.java From lucene-solr with Apache License 2.0

4 votes

public SynonymMap(FST<BytesRef> fst, BytesRefHash words, int maxHorizontalContext) {
  this.fst = fst;
  this.words = words;
  this.maxHorizontalContext = maxHorizontalContext;
}

Source File: GraphEdgeCollector.java From lucene-solr with Apache License 2.0

4 votes

GraphTermsCollector(SchemaField collectField, DocSet skipSet, DocSet leafNodes) {
  super(collectField, skipSet, leafNodes);
  this.collectorTerms = new BytesRefHash();
}

Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0

4 votes

public BytesRefTermsSet(final CircuitBreaker breaker) {
  super(breaker);
  this.bytesUsed = Counter.newCounter();
  this.pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed));
  this.set = new BytesRefHash(pool);
}

Source File: TermsWithScoreCollector.java From lucene-solr with Apache License 2.0

4 votes

@Override
public BytesRefHash getCollectedTerms() {
  return collectedTerms;
}

Source File: BytesRefTermsSet.java From siren-join with GNU Affero General Public License v3.0

4 votes

public BytesRefHash getBytesRefHash() {
  return set;
}

Source File: TermsCollector.java From lucene-solr with Apache License 2.0

4 votes

public BytesRefHash getCollectorTerms() {
  return collectorTerms;
}

org.apache.lucene.util.BytesRefHash Java Examples