org.apache.lucene.util.StringHelper Java Examples
The following examples show how to use
org.apache.lucene.util.StringHelper.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testVersionsOneSegment() throws IOException { BaseDirectoryWrapper dir = newDirectory(); dir.setCheckIndexOnClose(false); byte id[] = StringHelper.randomId(); Codec codec = Codec.getDefault(); SegmentInfos sis = new SegmentInfos(Version.LATEST.major); SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(), Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); sis = SegmentInfos.readLatestCommit(dir); assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion()); assertEquals(Version.LATEST, sis.getCommitLuceneVersion()); dir.close(); }
Example #2
Source File: CodecUtil.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Retrieves the full index header from the provided {@link IndexInput}. * This throws {@link CorruptIndexException} if this file does * not appear to be an index file. */ public static byte[] readIndexHeader(IndexInput in) throws IOException { in.seek(0); final int actualHeader = in.readInt(); if (actualHeader != CODEC_MAGIC) { throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in); } String codec = in.readString(); in.readInt(); in.seek(in.getFilePointer() + StringHelper.ID_LENGTH); int suffixLength = in.readByte() & 0xFF; byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength]; in.seek(0); in.readBytes(bytes, 0, bytes.length); return bytes; }
Example #3
Source File: SegmentCommitInfo.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Sole constructor. * @param info * {@link SegmentInfo} that we wrap * @param delCount * number of deleted documents in this segment * @param delGen * deletion generation number (used to name deletion files) * @param fieldInfosGen * FieldInfos generation number (used to name field-infos files) * @param docValuesGen * DocValues generation number (used to name doc-values updates files) * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()} */ public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) { this.info = info; this.delCount = delCount; this.softDelCount = softDelCount; this.delGen = delGen; this.nextWriteDelGen = delGen == -1 ? 1 : delGen + 1; this.fieldInfosGen = fieldInfosGen; this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1; this.docValuesGen = docValuesGen; this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1; this.id = id; if (id != null && id.length != StringHelper.ID_LENGTH) { throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); } }
Example #4
Source File: SegmentCommitInfo.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Returns a description of this segment. */ public String toString(int pendingDelCount) { String s = info.toString(delCount + pendingDelCount); if (delGen != -1) { s += ":delGen=" + delGen; } if (fieldInfosGen != -1) { s += ":fieldInfosGen=" + fieldInfosGen; } if (docValuesGen != -1) { s += ":dvGen=" + docValuesGen; } if (softDelCount > 0) { s += " :softDel=" + softDelCount; } if (this.id != null) { s += " :id=" + StringHelper.idToString(id); } return s; }
Example #5
Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Test versions */ public void testVersions() throws Exception { Codec codec = getCodec(); for (Version v : getVersions()) { for (Version minV : new Version[] { v, null}) { Directory dir = newDirectory(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, v, minV, "_123", 1, false, codec, Collections.<String,String>emptyMap(), id, Collections.emptyMap(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(info2.getVersion(), v); if (supportsMinVersion()) { assertEquals(info2.getMinVersion(), minV); } else { assertEquals(info2.getMinVersion(), null); } dir.close(); } } }
Example #6
Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Test attributes map */ public void testAttributes() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); Map<String,String> attributes = new HashMap<>(); attributes.put("key1", "value1"); attributes.put("key2", "value2"); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, Collections.emptyMap(), id, attributes, null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(attributes, info2.getAttributes()); // attributes map should be immutable expectThrows(UnsupportedOperationException.class, () -> { info2.getAttributes().put("bogus", "bogus"); }); dir.close(); }
Example #7
Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Test diagnostics map */ public void testDiagnostics() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); Map<String,String> diagnostics = new HashMap<>(); diagnostics.put("key1", "value1"); diagnostics.put("key2", "value2"); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, diagnostics, id, Collections.emptyMap(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(diagnostics, info2.getDiagnostics()); // diagnostics map should be immutable expectThrows(UnsupportedOperationException.class, () -> { info2.getDiagnostics().put("bogus", "bogus"); }); dir.close(); }
Example #8
Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Tests SI writer adds itself to files... */ public void testAddsSelfToFiles() throws Exception { Directory dir = newDirectory(); Codec codec = getCodec(); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, Collections.emptyMap(), id, Collections.emptyMap(), null); Set<String> originalFiles = Collections.singleton("_123.a"); info.setFiles(originalFiles); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); Set<String> modifiedFiles = info.files(); assertTrue(modifiedFiles.containsAll(originalFiles)); assertTrue("did you forget to add yourself to files()", modifiedFiles.size() > originalFiles.size()); SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT); assertEquals(info.files(), info2.files()); // files set should be immutable expectThrows(UnsupportedOperationException.class, () -> { info2.files().add("bogus"); }); dir.close(); }
Example #9
Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testWriteVeryLongSuffix() throws Exception { StringBuilder justLongEnough = new StringBuilder(); for (int i = 0; i < 255; i++) { justLongEnough.append('a'); } ByteBuffersDataOutput out = new ByteBuffersDataOutput(); IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp"); byte[] id = StringHelper.randomId(); CodecUtil.writeIndexHeader(output, "foobar", 5, id, justLongEnough.toString()); output.close(); IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp"); CodecUtil.checkIndexHeader(input, "foobar", 5, 5, id, justLongEnough.toString()); assertEquals(input.getFilePointer(), input.length()); assertEquals(input.getFilePointer(), CodecUtil.indexHeaderLength("foobar", justLongEnough.toString())); input.close(); }
Example #10
Source File: SimpleTextFieldsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
private TreeMap<String,Long> readFields(IndexInput in) throws IOException { ChecksumIndexInput input = new BufferedChecksumIndexInput(in); BytesRefBuilder scratch = new BytesRefBuilder(); TreeMap<String,Long> fields = new TreeMap<>(); while (true) { SimpleTextUtil.readLine(input, scratch); if (scratch.get().equals(END)) { SimpleTextUtil.checkFooter(input); return fields; } else if (StringHelper.startsWith(scratch.get(), FIELD)) { String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8); fields.put(fieldName, input.getFilePointer()); } } }
Example #11
Source File: TestPendingDeletes.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testIsFullyDeleted() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); FieldInfos fieldInfos = FieldInfos.EMPTY; si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT); PendingDeletes deletes = newPendingDeletes(commitInfo); for (int i = 0; i < 3; i++) { assertTrue(deletes.delete(i)); if (random().nextBoolean()) { assertTrue(deletes.writeLiveDocs(dir)); } assertEquals(i == 2, deletes.isFullyDeleted(() -> null)); } }
Example #12
Source File: TestMergePolicy.java From lucene-solr with Apache License 2.0 | 6 votes |
private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir, int numMerges) { MergePolicy.MergeSpecification ms = new MergePolicy.MergeSpecification(); for (int ii = 0; ii < numMerges; ++ii) { final SegmentInfo si = new SegmentInfo( dir, // dir Version.LATEST, // version Version.LATEST, // min version TestUtil.randomSimpleString(random()), // name random().nextInt(1000), // maxDoc random().nextBoolean(), // isCompoundFile null, // codec Collections.emptyMap(), // diagnostics TestUtil.randomSimpleString(// id random(), StringHelper.ID_LENGTH, StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII), Collections.emptyMap(), // attributes null /* indexSort */); final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>(); segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId())); ms.add(new MergePolicy.OneMerge(segments)); } return ms; }
Example #13
Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testIDChangesOnAdvance() throws IOException { try (BaseDirectoryWrapper dir = newDirectory()) { dir.setCheckIndexOnClose(false); byte id[] = StringHelper.randomId(); SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(), Collections.<String, String>emptyMap(), StringHelper.randomId(), Collections.<String, String>emptyMap(), null); SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, id); assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); commitInfo.advanceDelGen(); assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); id = commitInfo.getId(); commitInfo.advanceDocValuesGen(); assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); id = commitInfo.getId(); commitInfo.advanceFieldInfosGen(); assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); SegmentCommitInfo clone = commitInfo.clone(); id = commitInfo.getId(); assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); assertEquals(StringHelper.idToString(id), StringHelper.idToString(clone.getId())); commitInfo.advanceFieldInfosGen(); assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId())); assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId())); } }
Example #14
Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testVersionsTwoSegments() throws IOException { BaseDirectoryWrapper dir = newDirectory(); dir.setCheckIndexOnClose(false); byte id[] = StringHelper.randomId(); Codec codec = Codec.getDefault(); SegmentInfos sis = new SegmentInfos(Version.LATEST.major); SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(), Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_1", 1, false, Codec.getDefault(), Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null); info.setFiles(Collections.<String>emptySet()); codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT); commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId()); sis.add(commitInfo); sis.commit(dir); byte[] commitInfoId0 = sis.info(0).getId(); byte[] commitInfoId1 = sis.info(1).getId(); sis = SegmentInfos.readLatestCommit(dir); assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion()); assertEquals(Version.LATEST, sis.getCommitLuceneVersion()); assertEquals(StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId())); assertEquals(StringHelper.idToString(commitInfoId1), StringHelper.idToString(sis.info(1).getId())); dir.close(); }
Example #15
Source File: AutomatonTermsEnum.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Returns true if the term matches the automaton. Also stashes away the term * to assist with smart enumeration. */ @Override protected AcceptStatus accept(final BytesRef term) { if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) { if (runAutomaton.run(term.bytes, term.offset, term.length)) return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK; else return (linear && term.compareTo(linearUpperBound) < 0) ? AcceptStatus.NO : AcceptStatus.NO_AND_SEEK; } else { return (linear && term.compareTo(linearUpperBound) < 0) ? AcceptStatus.NO : AcceptStatus.NO_AND_SEEK; } }
Example #16
Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testWriteNonAsciiSuffix() throws Exception { ByteBuffersDataOutput out = new ByteBuffersDataOutput(); IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp"); expectThrows(IllegalArgumentException.class, () -> { CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), "\u1234"); }); }
Example #17
Source File: TestOneMergeWrappingMergePolicy.java From lucene-solr with Apache License 2.0 | 5 votes |
private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir) { MergePolicy.MergeSpecification ms; if (0 < random().nextInt(10)) { // ~ 1 in 10 times return null ms = new MergePolicy.MergeSpecification(); // append up to 10 (random non-sensical) one merge objects for (int ii = 0; ii < random().nextInt(10); ++ii) { final SegmentInfo si = new SegmentInfo( dir, // dir Version.LATEST, // version Version.LATEST, // min version TestUtil.randomSimpleString(random()), // name random().nextInt(), // maxDoc random().nextBoolean(), // isCompoundFile null, // codec Collections.emptyMap(), // diagnostics TestUtil.randomSimpleString(// id random(), StringHelper.ID_LENGTH, StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII), Collections.emptyMap(), // attributes null /* indexSort */); final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>(); segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId())); ms.add(new MergePolicy.OneMerge(segments)); } } return null; }
Example #18
Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testWriteTooLongSuffix() throws Exception { StringBuilder tooLong = new StringBuilder(); for (int i = 0; i < 256; i++) { tooLong.append('a'); } ByteBuffersDataOutput out = new ByteBuffersDataOutput(); IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp"); expectThrows(IllegalArgumentException.class, () -> { CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), tooLong.toString()); }); }
Example #19
Source File: TestIndexWriterThreadsToSegments.java From lucene-solr with Apache License 2.0 | 5 votes |
byte[] readSegmentInfoID(Directory dir, String file) throws IOException { try (IndexInput in = dir.openInput(file, IOContext.DEFAULT)) { in.readInt(); // magic in.readString(); // codec name in.readInt(); // version byte id[] = new byte[StringHelper.ID_LENGTH]; in.readBytes(id, 0, id.length); return id; } }
Example #20
Source File: TestPendingDeletes.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDeleteDoc() throws IOException { Directory dir = new ByteBuffersDirectory(); SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(), Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId()); PendingDeletes deletes = newPendingDeletes(commitInfo); assertNull(deletes.getLiveDocs()); int docToDelete = TestUtil.nextInt(random(), 0, 7); assertTrue(deletes.delete(docToDelete)); assertNotNull(deletes.getLiveDocs()); assertEquals(1, deletes.numPendingDeletes()); Bits liveDocs = deletes.getLiveDocs(); assertFalse(liveDocs.get(docToDelete)); assertFalse(deletes.delete(docToDelete)); // delete again assertTrue(liveDocs.get(8)); assertTrue(deletes.delete(8)); assertTrue(liveDocs.get(8)); // we have a snapshot assertEquals(2, deletes.numPendingDeletes()); assertTrue(liveDocs.get(9)); assertTrue(deletes.delete(9)); assertTrue(liveDocs.get(9)); // now make sure new live docs see the deletions liveDocs = deletes.getLiveDocs(); assertFalse(liveDocs.get(9)); assertFalse(liveDocs.get(8)); assertFalse(liveDocs.get(docToDelete)); assertEquals(3, deletes.numPendingDeletes()); dir.close(); }
Example #21
Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSegmentHeaderLength() throws Exception { ByteBuffersDataOutput out = new ByteBuffersDataOutput(); IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp"); CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz"); output.writeString("this is the data"); output.close(); IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp"); input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz")); assertEquals("this is the data", input.readString()); input.close(); }
Example #22
Source File: TestDoc.java From lucene-solr with Apache License 2.0 | 5 votes |
private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile) throws Exception { IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1))); SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context); SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context); final Codec codec = Codec.getDefault(); TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir); final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null); SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2), si, InfoStream.getDefault(), trackingDir, new FieldInfos.FieldNumbers(null), context); MergeState mergeState = merger.merge(); r1.close(); r2.close();; si.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); if (useCompoundFile) { Collection<String> filesToDelete = si.files(); codec.compoundFormat().write(dir, si, context); si.setUseCompoundFile(true); for(String name : filesToDelete) { si1.info.dir.deleteFile(name); } } return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId()); }
Example #23
Source File: DocTermOrds.java From lucene-solr with Apache License 2.0 | 5 votes |
private BytesRef setTerm() throws IOException { term = termsEnum.term(); //System.out.println(" setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString())); if (prefix != null && !StringHelper.startsWith(term, prefix)) { term = null; } return term; }
Example #24
Source File: CodecUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
/** Expert: just reads and verifies the object ID of an index header */ public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException { byte id[] = new byte[StringHelper.ID_LENGTH]; in.readBytes(id, 0, id.length); if (!Arrays.equals(id, expectedID)) { throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) + ", got=" + StringHelper.idToString(id), in); } return id; }
Example #25
Source File: MultiPhrasePrefixQuery.java From crate with Apache License 2.0 | 5 votes |
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }
Example #26
Source File: ModuloBucketBuilder.java From Elasticsearch with Apache License 2.0 | 5 votes |
private static int hashCode(@Nullable Object value) { if (value == null) { return 0; } if (value instanceof BytesRef) { // since lucene 4.8 // BytesRef.hashCode() uses a random seed across different jvm // which causes the hashCode / routing to be different on each node // this breaks the group by redistribution logic - need to use a fixed seed here // to be consistent. return StringHelper.murmurhash3_x86_32(((BytesRef) value), 1); } return value.hashCode(); }
Example #27
Source File: SrndPrefixQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void visitMatchingTerms( IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { /* inspired by PrefixQuery.rewrite(): */ Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms != null) { TermsEnum termsEnum = terms.iterator(); boolean skip = false; TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix())); if (status == TermsEnum.SeekStatus.FOUND) { mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName)); } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { if (StringHelper.startsWith(termsEnum.term(), prefixRef)) { mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString())); } else { skip = true; } } else { // EOF skip = true; } if (!skip) { while(true) { BytesRef text = termsEnum.next(); if (text != null && StringHelper.startsWith(text, prefixRef)) { mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString())); } else { break; } } } } }
Example #28
Source File: SrndTruncQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void visitMatchingTerms( IndexReader reader, String fieldName, MatchingTermVisitor mtv) throws IOException { int prefixLength = prefix.length(); Terms terms = MultiTerms.getTerms(reader, fieldName); if (terms != null) { Matcher matcher = pattern.matcher(""); try { TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef); BytesRef text; if (status == TermsEnum.SeekStatus.FOUND) { text = prefixRef; } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { text = termsEnum.term(); } else { text = null; } while(text != null) { if (text != null && StringHelper.startsWith(text, prefixRef)) { String textString = text.utf8ToString(); matcher.reset(textString.substring(prefixLength)); if (matcher.matches()) { mtv.visitMatchingTerm(new Term(fieldName, textString)); } } else { break; } text = termsEnum.next(); } } finally { matcher.reset(); } } }
Example #29
Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public static void checkFooter(ChecksumIndexInput input) throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum()); readLine(input, scratch); if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) { throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString(), input); } String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString(); if (!expectedChecksum.equals(actualChecksum)) { throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum, input); } if (input.length() != input.getFilePointer()) { throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor!", input); } }
Example #30
Source File: MultiPhrasePrefixQuery.java From Elasticsearch with Apache License 2.0 | 5 votes |
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }