org.apache.lucene.util.BytesRefBuilder Java Exaples

Source File: SearchGroupsResultTransformer.java From lucene-solr with Apache License 2.0

6 votes

@SuppressWarnings({"rawtypes"})
private SearchGroup<BytesRef> deserializeOneSearchGroup(SchemaField groupField, String groupValue,
    SortField[] groupSortField, List<Comparable> rawSearchGroupData) {
  SearchGroup<BytesRef> searchGroup = new SearchGroup<>();
  searchGroup.groupValue = null;
  if (groupValue != null) {
    if (groupField != null) {
      BytesRefBuilder builder = new BytesRefBuilder();
      groupField.getType().readableToIndexed(groupValue, builder);
      searchGroup.groupValue = builder.get();
    } else {
      searchGroup.groupValue = new BytesRef(groupValue);
    }
  }
  searchGroup.sortValues = rawSearchGroupData.toArray(new Comparable[rawSearchGroupData.size()]);
  for (int i = 0; i < searchGroup.sortValues.length; i++) {
    SchemaField field = groupSortField[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSortField[i].getField()) : null;
    searchGroup.sortValues[i] = ShardResultTransformerUtils.unmarshalSortValue(searchGroup.sortValues[i], field);
  }
  return searchGroup;
}

Source File: GroupConverter.java From lucene-solr with Apache License 2.0

6 votes

static Collection<SearchGroup<BytesRef>> fromMutable(SchemaField field, Collection<SearchGroup<MutableValue>> values) {
  if (values == null) {
    return null;
  }
  FieldType fieldType = field.getType();
  List<SearchGroup<BytesRef>> result = new ArrayList<>(values.size());
  for (SearchGroup<MutableValue> original : values) {
    SearchGroup<BytesRef> converted = new SearchGroup<>();
    converted.sortValues = original.sortValues;
    if (original.groupValue.exists) {
      BytesRefBuilder binary = new BytesRefBuilder();
      fieldType.readableToIndexed(Utils.OBJECT_TO_STRING.apply(original.groupValue.toObject()), binary);
      converted.groupValue = binary.get();
    } else {
      converted.groupValue = null;
    }
    result.add(converted);
  }
  return result;
}

Source File: TermBuilder.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public BytesRef term(Long value) {
    BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(value, 0, builder);
    return builder.get();
}

Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0

6 votes

public void testIntConversionAndOrdering() throws Exception {
  // generate a series of encoded ints, each numerical one bigger than the one before
  BytesRefBuilder act = new BytesRefBuilder();
  BytesRefBuilder last = new BytesRefBuilder();
  for (int i=-100000; i<100000; i++) {
    LegacyNumericUtils.intToPrefixCoded(i, 0, act);
    if (last!=null) {
      // test if smaller
      assertTrue("actual bigger than last (BytesRef)", last.get().compareTo(act.get()) < 0 );
      assertTrue("actual bigger than last (as String)", last.get().utf8ToString().compareTo(act.get().utf8ToString()) < 0 );
    }
    // test is back and forward conversion works
    assertEquals("forward and back conversion should generate same int", i, LegacyNumericUtils.prefixCodedToInt(act.get()));
    // next step
    last.copyBytes(act.get());
  }
}

Source File: Correction.java From Elasticsearch with Apache License 2.0

6 votes

public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) {
    BytesRef[] toJoin = new BytesRef[this.candidates.length];
    int len = separator.length * this.candidates.length - 1;
    for (int i = 0; i < toJoin.length; i++) {
        Candidate candidate = candidates[i];
        if (preTag == null || candidate.userInput) {
            toJoin[i] = candidate.term;
        } else {
            final int maxLen = preTag.length + postTag.length + candidate.term.length;
            final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once
            highlighted.grow(maxLen);
            if (i == 0 || candidates[i-1].userInput) {
                highlighted.append(preTag);
            }
            highlighted.append(candidate.term);
            if (toJoin.length == i + 1 || candidates[i+1].userInput) {
                highlighted.append(postTag);
            }
            toJoin[i] = highlighted.get();
        }
        len += toJoin[i].length;
    }
    result.grow(len);
    return SuggestUtils.join(separator, result, toJoin);
}

Source File: CommonTermsQueryParser.java From Elasticsearch with Apache License 2.0

6 votes

private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
        Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
    // Logic similar to QueryParser#getFieldQuery
    int count = 0;
    try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
        source.reset();
        CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
        BytesRefBuilder builder = new BytesRefBuilder();
        while (source.incrementToken()) {
            // UTF-8
            builder.copyChars(termAtt);
            query.add(new Term(field, builder.toBytesRef()));
            count++;
        }
    }

    if (count == 0) {
        return null;
    }
    query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
    query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
    return query;
}

Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0

6 votes

public void testLongConversionAndOrdering() throws Exception {
  // generate a series of encoded longs, each numerical one bigger than the one before
  BytesRefBuilder last = new BytesRefBuilder();
  BytesRefBuilder act = new BytesRefBuilder();
  for (long l=-100000L; l<100000L; l++) {
    LegacyNumericUtils.longToPrefixCoded(l, 0, act);
    if (last!=null) {
      // test if smaller
      assertTrue("actual bigger than last (BytesRef)", last.get().compareTo(act.get()) < 0 );
      assertTrue("actual bigger than last (as String)", last.get().utf8ToString().compareTo(act.get().utf8ToString()) < 0 );
    }
    // test is back and forward conversion works
    assertEquals("forward and back conversion should generate same long", l, LegacyNumericUtils.prefixCodedToLong(act.get()));
    // next step
    last.copyBytes(act);
  }
}

Source File: FSTCompletionBuilder.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Builds the final automaton from a list of entries.
 */
private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException {
  // Build the automaton.
  final Outputs<Object> outputs = NoOutputs.getSingleton();
  final Object empty = outputs.getNoOutput();
  final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs)
      .shareMaxTailLength(shareMaxTailLength).build();

  BytesRefBuilder scratch = new BytesRefBuilder();
  BytesRef entry;
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  int count = 0;
  BytesRefIterator iter = sorter.iterator();
  while((entry = iter.next()) != null) {
    count++;
    if (scratch.get().compareTo(entry) != 0) {
      fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty);
      scratch.copyBytes(entry);
    }
  }
  
  return count == 0 ? null : fstCompiler.compile();
}

Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
 * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
 * After encoding, {@code bytes.offset} will always be 0. 
 * @param val the numeric value
 * @param shift how many bits to strip from the right
 * @param bytes will contain the encoded value
 */
public static void longToPrefixCoded(final long val, final int shift, final BytesRefBuilder bytes) {
  // ensure shift is 0..63
  if ((shift & ~0x3f) != 0) {
    throw new IllegalArgumentException("Illegal shift value, must be 0..63; got shift=" + shift);
  }
  int nChars = (((63-shift)*37)>>8) + 1;    // i/7 is the same as (i*37)>>8 for i in 0..63
  bytes.setLength(nChars+1);   // one extra for the byte that contains the shift info
  bytes.grow(BUF_SIZE_LONG);
  bytes.setByteAt(0, (byte)(SHIFT_START_LONG + shift));
  long sortableBits = val ^ 0x8000000000000000L;
  sortableBits >>>= shift;
  while (nChars > 0) {
    // Store 7 bits per byte for compatibility
    // with UTF-8 encoding of terms
    bytes.setByteAt(nChars--, (byte)(sortableBits & 0x7f));
    sortableBits >>>= 7;
  }
}

Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Returns prefix coded bits after reducing the precision by <code>shift</code> bits.
 * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}.
 * After encoding, {@code bytes.offset} will always be 0.
 * @param val the numeric value
 * @param shift how many bits to strip from the right
 * @param bytes will contain the encoded value
 */
public static void intToPrefixCoded(final int val, final int shift, final BytesRefBuilder bytes) {
  // ensure shift is 0..31
  if ((shift & ~0x1f) != 0) {
    throw new IllegalArgumentException("Illegal shift value, must be 0..31; got shift=" + shift);
  }
  int nChars = (((31-shift)*37)>>8) + 1;    // i/7 is the same as (i*37)>>8 for i in 0..63
  bytes.setLength(nChars+1);   // one extra for the byte that contains the shift info
  bytes.grow(LegacyNumericUtils.BUF_SIZE_LONG);  // use the max
  bytes.setByteAt(0, (byte)(SHIFT_START_INT + shift));
  int sortableBits = val ^ 0x80000000;
  sortableBits >>>= shift;
  while (nChars > 0) {
    // Store 7 bits per byte for compatibility
    // with UTF-8 encoding of terms
    bytes.setByteAt(nChars--, (byte)(sortableBits & 0x7f));
    sortableBits >>>= 7;
  }
}

Source File: TrieField.java From lucene-solr with Apache License 2.0

6 votes

private void storedToIndexed(IndexableField f, final BytesRefBuilder bytes) {
  final Number val = f.numericValue();
  if (val != null) {
    switch (type) {
      case INTEGER:
        LegacyNumericUtils.intToPrefixCoded(val.intValue(), 0, bytes);
        break;
      case FLOAT:
        LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes);
        break;
      case LONG: //fallthrough!
      case DATE:
        LegacyNumericUtils.longToPrefixCoded(val.longValue(), 0, bytes);
        break;
      case DOUBLE:
        LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes);
        break;
      default:
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name());
    }
  } else {
    // the old BinaryField encoding is no longer supported
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name());
  }
}

Source File: EnumField.java From lucene-solr with Apache License 2.0

6 votes

@Override
public List<IndexableField> createFields(SchemaField sf, Object value) {
  if (sf.hasDocValues()) {
    List<IndexableField> fields = new ArrayList<>();
    final IndexableField field = createField(sf, value);
    fields.add(field);

    if (sf.multiValued()) {
      BytesRefBuilder bytes = new BytesRefBuilder();
      readableToIndexed(enumMapping.stringValueToIntValue(value.toString()).toString(), bytes);
      fields.add(new SortedSetDocValuesField(sf.getName(), bytes.toBytesRef()));
    } else {
      final long bits = field.numericValue().intValue();
      fields.add(new NumericDocValuesField(sf.getName(), bits));
    }
    return fields;
  } else {
    return Collections.singletonList(createField(sf, value));
  }
}

Source File: TrieBuilder.java From ambiverse-nlu with Apache License 2.0

6 votes

public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException {
  PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
  Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
  BytesRefBuilder scratchBytes = new BytesRefBuilder();
  IntsRefBuilder scratchInts = new IntsRefBuilder();
  long outputValue = 0;
  for (String mention : sortedStrings) {
    scratchBytes.copyChars(mention);
    try {
      builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++);
    } catch (java.lang.AssertionError ae) {
      logger.debug("Assertion error for mention " + mention);
    }
  }
  return builder.finish();
}

Source File: DocumentBuilder.java From modernmt with Apache License 2.0

6 votes

private static Term makeLongTerm(long value, String field) {
    BytesRefBuilder builder = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(value, 0, builder);

    return new Term(field, builder.toBytesRef());
}

Source File: SimpleTextPointsReader.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void checkIntegrity() throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  IndexInput clone = dataIn.clone();
  clone.seek(0);

  // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM):
  long footerStartPos = dataIn.length() - (SimpleTextUtil.CHECKSUM.length + 21);
  ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (input.getFilePointer() >= footerStartPos) {
      // Make sure we landed at precisely the right location:
      if (input.getFilePointer() != footerStartPos) {
        throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input);
      }
      SimpleTextUtil.checkFooter(input);
      break;
    }
  }
}

Source File: GroupConverter.java From lucene-solr with Apache License 2.0

6 votes

@SuppressWarnings({"unchecked", "rawtypes"})
static TopGroups<BytesRef> fromMutable(SchemaField field, TopGroups<MutableValue> values) {
  if (values == null) {
    return null;
  }
  
  FieldType fieldType = field.getType();

  GroupDocs<BytesRef> groupDocs[] = new GroupDocs[values.groups.length];

  for (int i = 0; i < values.groups.length; i++) {
    GroupDocs<MutableValue> original = values.groups[i];
    final BytesRef groupValue;
    if (original.groupValue.exists) {
      BytesRefBuilder binary = new BytesRefBuilder();
      fieldType.readableToIndexed(Utils.OBJECT_TO_STRING.apply(original.groupValue.toObject()), binary);
      groupValue = binary.get();
    } else {
      groupValue = null;
    }
    groupDocs[i] = new GroupDocs<>(original.score, original.maxScore, original.totalHits, original.scoreDocs, groupValue, original.groupSortValues);
  }
  
  return new TopGroups<>(values.groupSort, values.withinGroupSort, values.totalHitCount, values.totalGroupedHitCount, groupDocs, values.maxScore);
}

Source File: SimpleTextDocValuesReader.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void checkIntegrity() throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  IndexInput clone = data.clone();
  clone.seek(0);
  // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM):
  long footerStartPos = data.length() - (SimpleTextUtil.CHECKSUM.length + 21);
  ChecksumIndexInput input = new BufferedChecksumIndexInput(clone);
  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (input.getFilePointer() >= footerStartPos) {
      // Make sure we landed at precisely the right location:
      if (input.getFilePointer() != footerStartPos) {
        throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input);
      }
      SimpleTextUtil.checkFooter(input);
      break;
    }
  }
}

Source File: XJoinQParserPlugin.java From BioSolr with Apache License 2.0

6 votes

static private Transformer transformer(final FieldType ft) {
  return new Transformer() {
    
    BytesRefBuilder term = new BytesRefBuilder();
    
    @Override
    public BytesRef transform(Object joinId) {
      String joinStr = joinId.toString();
      // logic same as TermQParserPlugin
      if (ft != null) {
        ft.readableToIndexed(joinStr, term);
      } else {
        term.copyChars(joinStr);
      }
      return term.toBytesRef();
    }
    
  };
}

Source File: DateFieldMapper.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public BytesRef indexedValueForSearch(Object value) {
    BytesRefBuilder bytesRef = new BytesRefBuilder();
    NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match
    return bytesRef.get();
}

Source File: ExpandComponent.java From lucene-solr with Apache License 2.0

6 votes

private Query getGroupQuery(String fname,
                         FieldType ft,
                         int size,
                         LongHashSet groupSet) {

  BytesRef[] bytesRefs = new BytesRef[size];
  int index = -1;
  BytesRefBuilder term = new BytesRefBuilder();
  Iterator<LongCursor> it = groupSet.iterator();

  while (it.hasNext()) {
    LongCursor cursor = it.next();
    String stringVal = numericToString(ft, cursor.value);
    ft.readableToIndexed(stringVal, term);
    bytesRefs[++index] = term.toBytesRef();
  }

  return new TermInSetQuery(fname, bytesRefs);
}

Source File: BytesRefTermStream.java From siren-join with GNU Affero General Public License v3.0

6 votes

@Override
public BytesRef next() {
  BytesRefBuilder b = new BytesRefBuilder();
  NumericUtils.longToPrefixCoded((int) values.valueAt(this.count++), 0, b);
  return b.toBytesRef();
}

Source File: TrieField.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void readableToIndexed(CharSequence val, BytesRefBuilder result) {
  String s = val.toString();
  switch (type) {
    case INTEGER:
      LegacyNumericUtils.intToPrefixCoded(parseIntFromUser(null, s), 0, result);
      break;
    case FLOAT:
      LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(parseFloatFromUser(null, s)), 0, result);
      break;
    case LONG:
      LegacyNumericUtils.longToPrefixCoded(parseLongFromUser(null, s), 0, result);
      break;
    case DOUBLE:
      LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(parseDoubleFromUser(null, s)), 0, result);
      break;
    case DATE:
      LegacyNumericUtils.longToPrefixCoded(DateMathParser.parseMath(null, s).getTime(), 0, result);
      break;
    default:
      throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type);
  }
}

Source File: BytesRefs.java From crate with Apache License 2.0

5 votes

public static BytesRef toBytesRef(Object value, BytesRefBuilder spare) {
    if (value == null) {
        return null;
    }
    if (value instanceof BytesRef) {
        return (BytesRef) value;
    }
    spare.copyChars(value.toString());
    return spare.get();
}

Source File: SimpleTextBKDReader.java From lucene-solr with Apache License 2.0

5 votes

int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  in.seek(blockFP);
  readLine(in, scratch);
  int count = parseInt(scratch, BLOCK_COUNT);
  for(int i=0;i<count;i++) {
    readLine(in, scratch);
    docIDs[i] = parseInt(scratch, BLOCK_DOC_ID);
  }
  return count;
}

Source File: TrieField.java From lucene-solr with Apache License 2.0

5 votes

@Override
public String readableToIndexed(String val) {
  // TODO: Numeric should never be handled as String, that may break in future lucene versions! Change to use BytesRef for term texts!
  final BytesRefBuilder bytes = new BytesRefBuilder();
  readableToIndexed(val, bytes);
  return bytes.get().utf8ToString();
}

Source File: SimpleTextBKDReader.java From lucene-solr with Apache License 2.0

5 votes

void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  in.seek(blockFP);
  readLine(in, scratch);
  int count = parseInt(scratch, BLOCK_COUNT);
  visitor.grow(count);
  for(int i=0;i<count;i++) {
    readLine(in, scratch);
    visitor.visit(parseInt(scratch, BLOCK_DOC_ID));
  }
}

Source File: CrossCollectionJoinQuery.java From lucene-solr with Apache License 2.0

5 votes

public TermsJoinKeyCollector(FieldType fieldType, Terms terms, SolrIndexSearcher searcher) throws IOException {
  this.fieldType = fieldType;
  this.searcher = searcher;

  termsEnum = terms.iterator();
  bytes = new BytesRefBuilder();

  bitSet = new FixedBitSet(searcher.maxDoc());
}

Source File: BBoxStrategy.java From lucene-solr with Apache License 2.0

5 votes

private Query makeNumberTermQuery(String field, double number) {
  if (hasPointVals) {
    return DoublePoint.newExactQuery(field, number);
  } else if (legacyNumericFieldType != null) {
    BytesRefBuilder bytes = new BytesRefBuilder();
    LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes);
    return new TermQuery(new Term(field, bytes.get()));
  }
  throw new UnsupportedOperationException("An index is required for this operation.");
}

Source File: SimpleTextLiveDocsFormat.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException {
  int size = bits.length();
  BytesRefBuilder scratch = new BytesRefBuilder();
  
  String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getNextDelGen());
  IndexOutput out = null;
  boolean success = false;
  try {
    out = dir.createOutput(fileName, context);
    SimpleTextUtil.write(out, SIZE);
    SimpleTextUtil.write(out, Integer.toString(size), scratch);
    SimpleTextUtil.writeNewline(out);
    
    for (int i = 0; i < size; ++i) {
      if (bits.get(i)) {
        SimpleTextUtil.write(out, DOC);
        SimpleTextUtil.write(out, Integer.toString(i), scratch);
        SimpleTextUtil.writeNewline(out);
      }
    }
    
    SimpleTextUtil.write(out, END);
    SimpleTextUtil.writeNewline(out);
    SimpleTextUtil.writeChecksum(out, scratch);
    success = true;
  } finally {
    if (success) {
      IOUtils.close(out);
    } else {
      IOUtils.closeWhileHandlingException(out);
    }
  }
}

Source File: FieldComparator.java From lucene-solr with Apache License 2.0

5 votes

/** Creates this, with control over how missing values
 *  are sorted.  Pass sortMissingLast=true to put
 *  missing values at the end. */
public TermOrdValComparator(int numHits, String field, boolean sortMissingLast) {
  ords = new int[numHits];
  values = new BytesRef[numHits];
  tempBRs = new BytesRefBuilder[numHits];
  readerGen = new int[numHits];
  this.field = field;
  if (sortMissingLast) {
    missingSortCmp = 1;
    missingOrd = Integer.MAX_VALUE;
  } else {
    missingSortCmp = -1;
    missingOrd = -1;
  }
}

org.apache.lucene.util.BytesRefBuilder Java Examples