org.apache.lucene.util.BytesRef#compareTo

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

6 votes

private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Source File: SortedSetDocValues.java From lucene-solr with Apache License 2.0

6 votes

/** If {@code key} exists, returns its ordinal, else
 *  returns {@code -insertionPoint-1}, like {@code
 *  Arrays.binarySearch}.
 *
 *  @param key Key to look up
 **/
public long lookupTerm(BytesRef key) throws IOException {
  long low = 0;
  long high = getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Source File: FuzzyTermsEnum.java From lucene-solr with Apache License 2.0

6 votes

/**
 * fired when the max non-competitive boost has changed. this is the hook to
 * swap in a smarter actualEnum.
 */
private void bottomChanged(BytesRef lastTerm) throws IOException {
  int oldMaxEdits = maxEdits;
  
  // true if the last term encountered is lexicographically equal or after the bottom term in the PQ
  boolean termAfter = bottomTerm == null || (lastTerm != null && lastTerm.compareTo(bottomTerm) >= 0);

  // as long as the max non-competitive boost is >= the max boost
  // for some edit distance, keep dropping the max edit distance.
  while (maxEdits > 0) {
    float maxBoost = 1.0f - ((float) maxEdits / (float) termLength);
    if (bottom < maxBoost || (bottom == maxBoost && termAfter == false)) {
      break;
    }
    maxEdits--;
  }

  if (oldMaxEdits != maxEdits || lastTerm == null) {
    // This is a very powerful optimization: the maximum edit distance has changed.  This happens because we collect only the top scoring
    // N (= 50, by default) terms, and if e.g. maxEdits=2, and the queue is now full of matching terms, and we notice that the worst entry
    // in that queue is ed=1, then we can switch the automata here to ed=1 which is a big speedup.
    actualEnum = getAutomatonEnum(maxEdits, lastTerm);
  }
}

Source File: TermsQParserPlugin.java From lucene-solr with Apache License 2.0

6 votes

private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Source File: BlockReader.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Compares the searched term to the middle term of the block.
 * If the searched term is lexicographically equal or after the middle term
 * then jumps to the second half of the block directly.
 *
 * @return The comparison between the searched term and the middle term.
 */
protected int compareToMiddleAndJump(BytesRef searchedTerm) throws IOException {
  if (lineIndexInBlock != 0) {
    // Don't try to compare and jump if we are not positioned at the first line.
    // This can happen if we seek in the same current block and we continue
    // scanning from the current line (see initializeHeader()).
    return -1;
  }
  blockReadBuffer.skipBytes(blockHeader.getMiddleLineOffset());
  lineIndexInBlock = blockHeader.getMiddleLineIndex();
  readLineInBlock();
  if (blockLine == null) {
    throw newCorruptIndexException("Illegal absence of line at the middle of the block", null);
  }
  int compare = searchedTerm.compareTo(term());
  if (compare < 0) {
    blockReadBuffer.setPosition(blockFirstLineStart);
    lineIndexInBlock = 0;
  }
  return compare;
}

Source File: LegacySortedSetDocValues.java From lucene-solr with Apache License 2.0

6 votes

/** If {@code key} exists, returns its ordinal, else
 *  returns {@code -insertionPoint-1}, like {@code
 *  Arrays.binarySearch}.
 *
 *  @param key Key to look up
 **/
public long lookupTerm(BytesRef key) {
  long low = 0;
  long high = getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Source File: TestMultiTermsEnum.java From lucene-solr with Apache License 2.0

5 votes

@Override
public TermsEnum iterator() throws IOException {
  return new FilteredTermsEnum(delegate.iterator()) {

    @Override
    protected AcceptStatus accept(BytesRef term) {

      int comparison = term.compareTo(value);
      if (comparison < 0) {
        // I don't think it will actually get here because they are supposed to call nextSeekTerm
        // to get the initial term to seek to.
        return AcceptStatus.NO_AND_SEEK;
      } else if (comparison > 0) {
        return AcceptStatus.END;
      } else { // comparison == 0
        return AcceptStatus.YES;
      }
    }

    @Override
    protected BytesRef nextSeekTerm(BytesRef currentTerm) {
      if (currentTerm == null || currentTerm.compareTo(value) < 0) {
        return value;
      }

      return null;
    }
  };
}

Source File: TestSTBlockReader.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected int compareToMiddleAndJump(BytesRef searchedTerm) {
  blockLine = lines.get(lines.size() >> 1);
  lineIndexInBlock = blockHeader.getMiddleLineIndex();
  int compare = searchedTerm.compareTo(term());
  if (compare < 0) {
    lineIndexInBlock = 0;
  }
  return compare;
}

Source File: DocValuesStats.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected void doAccumulate(int count) throws IOException {
  BytesRef val = sdv.binaryValue();
  if (max == null || val.compareTo(max) > 0) {
    max = copyFrom(val, max);
  }
  if (min == null || val.compareTo(min) < 0) {
    min = copyFrom(val, min);
  }
}

Source File: TestAutomaton.java From lucene-solr with Apache License 2.0

5 votes

public void testMakeBinaryIntervalRandom() throws Exception {
  int iters = atLeast(100);
  for(int iter=0;iter<iters;iter++) {
    BytesRef minTerm = TestUtil.randomBinaryTerm(random());
    boolean minInclusive = random().nextBoolean();
    BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
    boolean maxInclusive = random().nextBoolean();

    Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);

    for(int iter2=0;iter2<500;iter2++) {
      BytesRef term = TestUtil.randomBinaryTerm(random());
      int minCmp = minTerm.compareTo(term);
      int maxCmp = maxTerm.compareTo(term);

      boolean expected;
      if (minCmp > 0 || maxCmp < 0) {
        expected = false;
      } else if (minCmp == 0 && maxCmp == 0) {
        expected = minInclusive && maxInclusive;
      } else if (minCmp == 0) {
        expected = minInclusive;
      } else if (maxCmp == 0) {
        expected = maxInclusive;
      } else {
        expected = true;
      }

      if (VERBOSE) {
        System.out.println("  check term=" + term + " expected=" + expected);
      }
      IntsRefBuilder intsBuilder = new IntsRefBuilder();
      Util.toIntsRef(term, intsBuilder);
      assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
    }
  }
}

Source File: SeekingTermSetTermsEnum.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
  if (term.compareTo(lastTerm) > 0) {
    return AcceptStatus.END;
  }

  BytesRef currentTerm = terms.get(ords[upto], spare);
  if (term.compareTo(currentTerm) == 0) {
    if (upto == lastElement) {
      return AcceptStatus.YES;
    } else {
      seekTerm = terms.get(ords[++upto], spare);
      return AcceptStatus.YES_AND_SEEK;
    }
  } else {
    if (upto == lastElement) {
      return AcceptStatus.NO;
    } else { // Our current term doesn't match the the given term.
      int cmp;
      do { // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
        if (upto == lastElement) {
          return AcceptStatus.NO;
        }
        // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
        // our terms so we don't do a binary search here
        seekTerm = terms.get(ords[++upto], spare);
      } while ((cmp = seekTerm.compareTo(term)) < 0);
      if (cmp == 0) {
        if (upto == lastElement) {
          return AcceptStatus.YES;
        }
        seekTerm = terms.get(ords[++upto], spare);
        return AcceptStatus.YES_AND_SEEK;
      } else {
        return AcceptStatus.NO_AND_SEEK;
      }
    }
  }
}

Source File: TestICUCollationDocValuesField.java From lucene-solr with Apache License 2.0

5 votes

private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception { 
  SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
  for(int docID=0;docID<is.getIndexReader().maxDoc();docID++) {
    Document doc = is.doc(docID);
    String s = doc.getField("field").stringValue();
    boolean collatorAccepts = collator.compare(s, startPoint) >= 0 && collator.compare(s, endPoint) <= 0;
    assertEquals(docID, dvs.nextDoc());
    BytesRef br = dvs.binaryValue();
    boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
    assertEquals(collatorAccepts, luceneAccepts);
  }
}

Source File: MultiTerms.java From lucene-solr with Apache License 2.0

5 votes

@Override
public BytesRef getMax() throws IOException {
  BytesRef maxTerm = null;
  for(Terms terms : subs) {
    BytesRef term = terms.getMax();
    if (maxTerm == null || term.compareTo(maxTerm) > 0) {
      maxTerm = term;
    }
  }

  return maxTerm;
}

Source File: TestTerms.java From lucene-solr with Apache License 2.0

5 votes

public void testTermMinMaxRandom() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  BytesRef minTerm = null;
  BytesRef maxTerm = null;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    Field field = new TextField("field", "", Field.Store.NO);
    doc.add(field);
    //System.out.println("  doc " + i);
    CannedBinaryTokenStream.BinaryToken[] tokens = new CannedBinaryTokenStream.BinaryToken[atLeast(10)];
    for(int j=0;j<tokens.length;j++) {
      byte[] bytes = new byte[TestUtil.nextInt(random(), 1, 20)];
      random().nextBytes(bytes);
      BytesRef tokenBytes = new BytesRef(bytes);
      //System.out.println("    token " + tokenBytes);
      if (minTerm == null || tokenBytes.compareTo(minTerm) < 0) {
        //System.out.println("      ** new min");
        minTerm = tokenBytes;
      }
      if (maxTerm == null || tokenBytes.compareTo(maxTerm) > 0) {
        //System.out.println("      ** new max");
        maxTerm = tokenBytes;
      }
      tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes);
    }
    field.setTokenStream(new CannedBinaryTokenStream(tokens));
    w.addDocument(doc);
  }

  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  assertEquals(minTerm, terms.getMin());
  assertEquals(maxTerm, terms.getMax());
  
  r.close();
  w.close();
  dir.close();
}

Source File: MergeSortRowIdLookup.java From incubator-retired-blur with Apache License 2.0

5 votes

@Override
public int compareTo(TermsEnumReader o) {
  try {
    BytesRef t1 = _termsEnum.term();
    BytesRef t2 = o._termsEnum.term();
    return t1.compareTo(t2);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Source File: MergeSortRowIdLookup.java From incubator-retired-blur with Apache License 2.0

5 votes

private static void advance(List<TermsEnumReader> termsEnumList, BytesRef rowId) throws IOException {
  for (TermsEnumReader reader : termsEnumList) {
    BytesRef term = reader._termsEnum.term();
    if (term.compareTo(rowId) < 0) {
      reader._termsEnum.seekCeil(rowId);
    }
  }
}

Source File: LegacyNumericRangeQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected final AcceptStatus accept(BytesRef term) {
  while (currentUpperBound == null || term.compareTo(currentUpperBound) > 0) {
    if (rangeBounds.isEmpty())
      return AcceptStatus.END;
    // peek next sub-range, only seek if the current term is smaller than next lower bound
    if (term.compareTo(rangeBounds.getFirst()) < 0)
      return AcceptStatus.NO_AND_SEEK;
    // step forward to next range without seeking, as next lower range bound is less or equal current term
    nextRange();
  }
  return AcceptStatus.YES;
}

Source File: IntersectBlockReader.java From lucene-solr with Apache License 2.0

4 votes

/**
 * True if the current state of the automata is best iterated linearly (without seeking).
 */
protected boolean isLinearState(BytesRef term) {
  return linear && term.compareTo(linearUpperBound) < 0;
}

Source File: TermRangeQueryExpression.java From incubator-atlas with Apache License 2.0

4 votes

private boolean compareUpperBound(BytesRef valueBytes) {
    return m_upperTerm == null || (m_upperInclusive ? valueBytes.compareTo(m_upperTerm) < 0 :
            valueBytes.compareTo(m_upperTerm) <= 0);
}

Source File: BlockReader.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Indicates whether the searched term is beyond the last term of the field.
 *
 * @param blockStartFP The current block start file pointer.
 */
protected boolean isBeyondLastTerm(BytesRef searchedTerm, long blockStartFP) {
  return blockStartFP == fieldMetadata.getLastBlockStartFP()
      && searchedTerm.compareTo(fieldMetadata.getLastTerm()) > 0;
}

Java Code Examples for org.apache.lucene.util.BytesRef#compareTo()