org.apache.lucene.util.BytesRefBuilder Java Examples
The following examples show how to use
org.apache.lucene.util.BytesRefBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SearchGroupsResultTransformer.java From lucene-solr with Apache License 2.0 | 6 votes |
@SuppressWarnings({"rawtypes"}) private SearchGroup<BytesRef> deserializeOneSearchGroup(SchemaField groupField, String groupValue, SortField[] groupSortField, List<Comparable> rawSearchGroupData) { SearchGroup<BytesRef> searchGroup = new SearchGroup<>(); searchGroup.groupValue = null; if (groupValue != null) { if (groupField != null) { BytesRefBuilder builder = new BytesRefBuilder(); groupField.getType().readableToIndexed(groupValue, builder); searchGroup.groupValue = builder.get(); } else { searchGroup.groupValue = new BytesRef(groupValue); } } searchGroup.sortValues = rawSearchGroupData.toArray(new Comparable[rawSearchGroupData.size()]); for (int i = 0; i < searchGroup.sortValues.length; i++) { SchemaField field = groupSortField[i].getField() != null ? searcher.getSchema().getFieldOrNull(groupSortField[i].getField()) : null; searchGroup.sortValues[i] = ShardResultTransformerUtils.unmarshalSortValue(searchGroup.sortValues[i], field); } return searchGroup; }
Example #2
Source File: GroupConverter.java From lucene-solr with Apache License 2.0 | 6 votes |
static Collection<SearchGroup<BytesRef>> fromMutable(SchemaField field, Collection<SearchGroup<MutableValue>> values) { if (values == null) { return null; } FieldType fieldType = field.getType(); List<SearchGroup<BytesRef>> result = new ArrayList<>(values.size()); for (SearchGroup<MutableValue> original : values) { SearchGroup<BytesRef> converted = new SearchGroup<>(); converted.sortValues = original.sortValues; if (original.groupValue.exists) { BytesRefBuilder binary = new BytesRefBuilder(); fieldType.readableToIndexed(Utils.OBJECT_TO_STRING.apply(original.groupValue.toObject()), binary); converted.groupValue = binary.get(); } else { converted.groupValue = null; } result.add(converted); } return result; }
Example #3
Source File: TermBuilder.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public BytesRef term(Long value) { BytesRefBuilder builder = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(value, 0, builder); return builder.get(); }
Example #4
Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testIntConversionAndOrdering() throws Exception { // generate a series of encoded ints, each numerical one bigger than the one before BytesRefBuilder act = new BytesRefBuilder(); BytesRefBuilder last = new BytesRefBuilder(); for (int i=-100000; i<100000; i++) { LegacyNumericUtils.intToPrefixCoded(i, 0, act); if (last!=null) { // test if smaller assertTrue("actual bigger than last (BytesRef)", last.get().compareTo(act.get()) < 0 ); assertTrue("actual bigger than last (as String)", last.get().utf8ToString().compareTo(act.get().utf8ToString()) < 0 ); } // test is back and forward conversion works assertEquals("forward and back conversion should generate same int", i, LegacyNumericUtils.prefixCodedToInt(act.get())); // next step last.copyBytes(act.get()); } }
Example #5
Source File: Correction.java From Elasticsearch with Apache License 2.0 | 6 votes |
public BytesRef join(BytesRef separator, BytesRefBuilder result, BytesRef preTag, BytesRef postTag) { BytesRef[] toJoin = new BytesRef[this.candidates.length]; int len = separator.length * this.candidates.length - 1; for (int i = 0; i < toJoin.length; i++) { Candidate candidate = candidates[i]; if (preTag == null || candidate.userInput) { toJoin[i] = candidate.term; } else { final int maxLen = preTag.length + postTag.length + candidate.term.length; final BytesRefBuilder highlighted = new BytesRefBuilder();// just allocate once highlighted.grow(maxLen); if (i == 0 || candidates[i-1].userInput) { highlighted.append(preTag); } highlighted.append(candidate.term); if (toJoin.length == i + 1 || candidates[i+1].userInput) { highlighted.append(postTag); } toJoin[i] = highlighted.get(); } len += toJoin[i].length; } result.grow(len); return SuggestUtils.join(separator, result, toJoin); }
Example #6
Source File: CommonTermsQueryParser.java From Elasticsearch with Apache License 2.0 | 6 votes |
private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext, Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException { // Logic similar to QueryParser#getFieldQuery int count = 0; try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); BytesRefBuilder builder = new BytesRefBuilder(); while (source.incrementToken()) { // UTF-8 builder.copyChars(termAtt); query.add(new Term(field, builder.toBytesRef())); count++; } } if (count == 0) { return null; } query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch); query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch); return query; }
Example #7
Source File: TestLegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testLongConversionAndOrdering() throws Exception { // generate a series of encoded longs, each numerical one bigger than the one before BytesRefBuilder last = new BytesRefBuilder(); BytesRefBuilder act = new BytesRefBuilder(); for (long l=-100000L; l<100000L; l++) { LegacyNumericUtils.longToPrefixCoded(l, 0, act); if (last!=null) { // test if smaller assertTrue("actual bigger than last (BytesRef)", last.get().compareTo(act.get()) < 0 ); assertTrue("actual bigger than last (as String)", last.get().utf8ToString().compareTo(act.get().utf8ToString()) < 0 ); } // test is back and forward conversion works assertEquals("forward and back conversion should generate same long", l, LegacyNumericUtils.prefixCodedToLong(act.get())); // next step last.copyBytes(act); } }
Example #8
Source File: FSTCompletionBuilder.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Builds the final automaton from a list of entries. */ private FST<Object> buildAutomaton(BytesRefSorter sorter) throws IOException { // Build the automaton. final Outputs<Object> outputs = NoOutputs.getSingleton(); final Object empty = outputs.getNoOutput(); final FSTCompiler<Object> fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs) .shareMaxTailLength(shareMaxTailLength).build(); BytesRefBuilder scratch = new BytesRefBuilder(); BytesRef entry; final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); int count = 0; BytesRefIterator iter = sorter.iterator(); while((entry = iter.next()) != null) { count++; if (scratch.get().compareTo(entry) != 0) { fstCompiler.add(Util.toIntsRef(entry, scratchIntsRef), empty); scratch.copyBytes(entry); } } return count == 0 ? null : fstCompiler.compile(); }
Example #9
Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns prefix coded bits after reducing the precision by <code>shift</code> bits. * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}. * After encoding, {@code bytes.offset} will always be 0. * @param val the numeric value * @param shift how many bits to strip from the right * @param bytes will contain the encoded value */ public static void longToPrefixCoded(final long val, final int shift, final BytesRefBuilder bytes) { // ensure shift is 0..63 if ((shift & ~0x3f) != 0) { throw new IllegalArgumentException("Illegal shift value, must be 0..63; got shift=" + shift); } int nChars = (((63-shift)*37)>>8) + 1; // i/7 is the same as (i*37)>>8 for i in 0..63 bytes.setLength(nChars+1); // one extra for the byte that contains the shift info bytes.grow(BUF_SIZE_LONG); bytes.setByteAt(0, (byte)(SHIFT_START_LONG + shift)); long sortableBits = val ^ 0x8000000000000000L; sortableBits >>>= shift; while (nChars > 0) { // Store 7 bits per byte for compatibility // with UTF-8 encoding of terms bytes.setByteAt(nChars--, (byte)(sortableBits & 0x7f)); sortableBits >>>= 7; } }
Example #10
Source File: LegacyNumericUtils.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Returns prefix coded bits after reducing the precision by <code>shift</code> bits. * This is method is used by {@link org.apache.solr.legacy.LegacyNumericTokenStream}. * After encoding, {@code bytes.offset} will always be 0. * @param val the numeric value * @param shift how many bits to strip from the right * @param bytes will contain the encoded value */ public static void intToPrefixCoded(final int val, final int shift, final BytesRefBuilder bytes) { // ensure shift is 0..31 if ((shift & ~0x1f) != 0) { throw new IllegalArgumentException("Illegal shift value, must be 0..31; got shift=" + shift); } int nChars = (((31-shift)*37)>>8) + 1; // i/7 is the same as (i*37)>>8 for i in 0..63 bytes.setLength(nChars+1); // one extra for the byte that contains the shift info bytes.grow(LegacyNumericUtils.BUF_SIZE_LONG); // use the max bytes.setByteAt(0, (byte)(SHIFT_START_INT + shift)); int sortableBits = val ^ 0x80000000; sortableBits >>>= shift; while (nChars > 0) { // Store 7 bits per byte for compatibility // with UTF-8 encoding of terms bytes.setByteAt(nChars--, (byte)(sortableBits & 0x7f)); sortableBits >>>= 7; } }
Example #11
Source File: TrieField.java From lucene-solr with Apache License 2.0 | 6 votes |
private void storedToIndexed(IndexableField f, final BytesRefBuilder bytes) { final Number val = f.numericValue(); if (val != null) { switch (type) { case INTEGER: LegacyNumericUtils.intToPrefixCoded(val.intValue(), 0, bytes); break; case FLOAT: LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(val.floatValue()), 0, bytes); break; case LONG: //fallthrough! case DATE: LegacyNumericUtils.longToPrefixCoded(val.longValue(), 0, bytes); break; case DOUBLE: LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(val.doubleValue()), 0, bytes); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + f.name()); } } else { // the old BinaryField encoding is no longer supported throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid field contents: "+f.name()); } }
Example #12
Source File: EnumField.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public List<IndexableField> createFields(SchemaField sf, Object value) { if (sf.hasDocValues()) { List<IndexableField> fields = new ArrayList<>(); final IndexableField field = createField(sf, value); fields.add(field); if (sf.multiValued()) { BytesRefBuilder bytes = new BytesRefBuilder(); readableToIndexed(enumMapping.stringValueToIntValue(value.toString()).toString(), bytes); fields.add(new SortedSetDocValuesField(sf.getName(), bytes.toBytesRef())); } else { final long bits = field.numericValue().intValue(); fields.add(new NumericDocValuesField(sf.getName(), bits)); } return fields; } else { return Collections.singletonList(createField(sf, value)); } }
Example #13
Source File: TrieBuilder.java From ambiverse-nlu with Apache License 2.0 | 6 votes |
public static FST<Long> buildTrie(Set<String> sortedStrings) throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); long outputValue = 0; for (String mention : sortedStrings) { scratchBytes.copyChars(mention); try { builder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), outputValue++); } catch (java.lang.AssertionError ae) { logger.debug("Assertion error for mention " + mention); } } return builder.finish(); }
Example #14
Source File: DocumentBuilder.java From modernmt with Apache License 2.0 | 6 votes |
private static Term makeLongTerm(long value, String field) { BytesRefBuilder builder = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(value, 0, builder); return new Term(field, builder.toBytesRef()); }
Example #15
Source File: SimpleTextPointsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void checkIntegrity() throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); IndexInput clone = dataIn.clone(); clone.seek(0); // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM): long footerStartPos = dataIn.length() - (SimpleTextUtil.CHECKSUM.length + 21); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.readLine(input, scratch); if (input.getFilePointer() >= footerStartPos) { // Make sure we landed at precisely the right location: if (input.getFilePointer() != footerStartPos) { throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input); } SimpleTextUtil.checkFooter(input); break; } } }
Example #16
Source File: GroupConverter.java From lucene-solr with Apache License 2.0 | 6 votes |
@SuppressWarnings({"unchecked", "rawtypes"}) static TopGroups<BytesRef> fromMutable(SchemaField field, TopGroups<MutableValue> values) { if (values == null) { return null; } FieldType fieldType = field.getType(); GroupDocs<BytesRef> groupDocs[] = new GroupDocs[values.groups.length]; for (int i = 0; i < values.groups.length; i++) { GroupDocs<MutableValue> original = values.groups[i]; final BytesRef groupValue; if (original.groupValue.exists) { BytesRefBuilder binary = new BytesRefBuilder(); fieldType.readableToIndexed(Utils.OBJECT_TO_STRING.apply(original.groupValue.toObject()), binary); groupValue = binary.get(); } else { groupValue = null; } groupDocs[i] = new GroupDocs<>(original.score, original.maxScore, original.totalHits, original.scoreDocs, groupValue, original.groupSortValues); } return new TopGroups<>(values.groupSort, values.withinGroupSort, values.totalHitCount, values.totalGroupedHitCount, groupDocs, values.maxScore); }
Example #17
Source File: SimpleTextDocValuesReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void checkIntegrity() throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); IndexInput clone = data.clone(); clone.seek(0); // checksum is fixed-width encoded with 20 bytes, plus 1 byte for newline (the space is included in SimpleTextUtil.CHECKSUM): long footerStartPos = data.length() - (SimpleTextUtil.CHECKSUM.length + 21); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.readLine(input, scratch); if (input.getFilePointer() >= footerStartPos) { // Make sure we landed at precisely the right location: if (input.getFilePointer() != footerStartPos) { throw new CorruptIndexException("SimpleText failure: footer does not start at expected position current=" + input.getFilePointer() + " vs expected=" + footerStartPos, input); } SimpleTextUtil.checkFooter(input); break; } } }
Example #18
Source File: XJoinQParserPlugin.java From BioSolr with Apache License 2.0 | 6 votes |
static private Transformer transformer(final FieldType ft) { return new Transformer() { BytesRefBuilder term = new BytesRefBuilder(); @Override public BytesRef transform(Object joinId) { String joinStr = joinId.toString(); // logic same as TermQParserPlugin if (ft != null) { ft.readableToIndexed(joinStr, term); } else { term.copyChars(joinStr); } return term.toBytesRef(); } }; }
Example #19
Source File: DateFieldMapper.java From Elasticsearch with Apache License 2.0 | 6 votes |
@Override public BytesRef indexedValueForSearch(Object value) { BytesRefBuilder bytesRef = new BytesRefBuilder(); NumericUtils.longToPrefixCoded(parseValue(value), 0, bytesRef); // 0 because of exact match return bytesRef.get(); }
Example #20
Source File: ExpandComponent.java From lucene-solr with Apache License 2.0 | 6 votes |
private Query getGroupQuery(String fname, FieldType ft, int size, LongHashSet groupSet) { BytesRef[] bytesRefs = new BytesRef[size]; int index = -1; BytesRefBuilder term = new BytesRefBuilder(); Iterator<LongCursor> it = groupSet.iterator(); while (it.hasNext()) { LongCursor cursor = it.next(); String stringVal = numericToString(ft, cursor.value); ft.readableToIndexed(stringVal, term); bytesRefs[++index] = term.toBytesRef(); } return new TermInSetQuery(fname, bytesRefs); }
Example #21
Source File: BytesRefTermStream.java From siren-join with GNU Affero General Public License v3.0 | 6 votes |
@Override public BytesRef next() { BytesRefBuilder b = new BytesRefBuilder(); NumericUtils.longToPrefixCoded((int) values.valueAt(this.count++), 0, b); return b.toBytesRef(); }
Example #22
Source File: TrieField.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void readableToIndexed(CharSequence val, BytesRefBuilder result) { String s = val.toString(); switch (type) { case INTEGER: LegacyNumericUtils.intToPrefixCoded(parseIntFromUser(null, s), 0, result); break; case FLOAT: LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(parseFloatFromUser(null, s)), 0, result); break; case LONG: LegacyNumericUtils.longToPrefixCoded(parseLongFromUser(null, s), 0, result); break; case DOUBLE: LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(parseDoubleFromUser(null, s)), 0, result); break; case DATE: LegacyNumericUtils.longToPrefixCoded(DateMathParser.parseMath(null, s).getTime(), 0, result); break; default: throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown type for trie field: " + type); } }
Example #23
Source File: BytesRefs.java From crate with Apache License 2.0 | 5 votes |
public static BytesRef toBytesRef(Object value, BytesRefBuilder spare) { if (value == null) { return null; } if (value instanceof BytesRef) { return (BytesRef) value; } spare.copyChars(value.toString()); return spare.get(); }
Example #24
Source File: SimpleTextBKDReader.java From lucene-solr with Apache License 2.0 | 5 votes |
int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); in.seek(blockFP); readLine(in, scratch); int count = parseInt(scratch, BLOCK_COUNT); for(int i=0;i<count;i++) { readLine(in, scratch); docIDs[i] = parseInt(scratch, BLOCK_DOC_ID); } return count; }
Example #25
Source File: TrieField.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public String readableToIndexed(String val) { // TODO: Numeric should never be handled as String, that may break in future lucene versions! Change to use BytesRef for term texts! final BytesRefBuilder bytes = new BytesRefBuilder(); readableToIndexed(val, bytes); return bytes.get().utf8ToString(); }
Example #26
Source File: SimpleTextBKDReader.java From lucene-solr with Apache License 2.0 | 5 votes |
void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); in.seek(blockFP); readLine(in, scratch); int count = parseInt(scratch, BLOCK_COUNT); visitor.grow(count); for(int i=0;i<count;i++) { readLine(in, scratch); visitor.visit(parseInt(scratch, BLOCK_DOC_ID)); } }
Example #27
Source File: CrossCollectionJoinQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public TermsJoinKeyCollector(FieldType fieldType, Terms terms, SolrIndexSearcher searcher) throws IOException { this.fieldType = fieldType; this.searcher = searcher; termsEnum = terms.iterator(); bytes = new BytesRefBuilder(); bitSet = new FixedBitSet(searcher.maxDoc()); }
Example #28
Source File: BBoxStrategy.java From lucene-solr with Apache License 2.0 | 5 votes |
private Query makeNumberTermQuery(String field, double number) { if (hasPointVals) { return DoublePoint.newExactQuery(field, number); } else if (legacyNumericFieldType != null) { BytesRefBuilder bytes = new BytesRefBuilder(); LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(number), 0, bytes); return new TermQuery(new Term(field, bytes.get())); } throw new UnsupportedOperationException("An index is required for this operation."); }
Example #29
Source File: SimpleTextLiveDocsFormat.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void writeLiveDocs(Bits bits, Directory dir, SegmentCommitInfo info, int newDelCount, IOContext context) throws IOException { int size = bits.length(); BytesRefBuilder scratch = new BytesRefBuilder(); String fileName = IndexFileNames.fileNameFromGeneration(info.info.name, LIVEDOCS_EXTENSION, info.getNextDelGen()); IndexOutput out = null; boolean success = false; try { out = dir.createOutput(fileName, context); SimpleTextUtil.write(out, SIZE); SimpleTextUtil.write(out, Integer.toString(size), scratch); SimpleTextUtil.writeNewline(out); for (int i = 0; i < size; ++i) { if (bits.get(i)) { SimpleTextUtil.write(out, DOC); SimpleTextUtil.write(out, Integer.toString(i), scratch); SimpleTextUtil.writeNewline(out); } } SimpleTextUtil.write(out, END); SimpleTextUtil.writeNewline(out); SimpleTextUtil.writeChecksum(out, scratch); success = true; } finally { if (success) { IOUtils.close(out); } else { IOUtils.closeWhileHandlingException(out); } } }
Example #30
Source File: FieldComparator.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Creates this, with control over how missing values * are sorted. Pass sortMissingLast=true to put * missing values at the end. */ public TermOrdValComparator(int numHits, String field, boolean sortMissingLast) { ords = new int[numHits]; values = new BytesRef[numHits]; tempBRs = new BytesRefBuilder[numHits]; readerGen = new int[numHits]; this.field = field; if (sortMissingLast) { missingSortCmp = 1; missingOrd = Integer.MAX_VALUE; } else { missingSortCmp = -1; missingOrd = -1; } }