org.apache.lucene.util.StringHelper Java Exaples

Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0

6 votes

public void testVersionsOneSegment() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());

  sis.add(commitInfo);
  sis.commit(dir);
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  dir.close();
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

6 votes

/** Retrieves the full index header from the provided {@link IndexInput}.
 *  This throws {@link CorruptIndexException} if this file does
 * not appear to be an index file. */
public static byte[] readIndexHeader(IndexInput in) throws IOException {
  in.seek(0);
  final int actualHeader = in.readInt();
  if (actualHeader != CODEC_MAGIC) {
    throw new CorruptIndexException("codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CODEC_MAGIC, in);
  }
  String codec = in.readString();
  in.readInt();
  in.seek(in.getFilePointer() + StringHelper.ID_LENGTH);
  int suffixLength = in.readByte() & 0xFF;
  byte[] bytes = new byte[headerLength(codec) + StringHelper.ID_LENGTH + 1 + suffixLength];
  in.seek(0);
  in.readBytes(bytes, 0, bytes.length);
  return bytes;
}

Source File: SegmentCommitInfo.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Sole constructor.
 * @param info
 *          {@link SegmentInfo} that we wrap
 * @param delCount
 *          number of deleted documents in this segment
 * @param delGen
 *          deletion generation number (used to name deletion files)
 * @param fieldInfosGen
 *          FieldInfos generation number (used to name field-infos files)
 * @param docValuesGen
 *          DocValues generation number (used to name doc-values updates files)
 * @param id Id that uniquely identifies this segment commit. This id must be 16 bytes long. See {@link StringHelper#randomId()}
 */
public SegmentCommitInfo(SegmentInfo info, int delCount, int softDelCount, long delGen, long fieldInfosGen, long docValuesGen, byte[] id) {
  this.info = info;
  this.delCount = delCount;
  this.softDelCount = softDelCount;
  this.delGen = delGen;
  this.nextWriteDelGen = delGen == -1 ? 1 : delGen + 1;
  this.fieldInfosGen = fieldInfosGen;
  this.nextWriteFieldInfosGen = fieldInfosGen == -1 ? 1 : fieldInfosGen + 1;
  this.docValuesGen = docValuesGen;
  this.nextWriteDocValuesGen = docValuesGen == -1 ? 1 : docValuesGen + 1;
  this.id = id;
  if (id != null && id.length != StringHelper.ID_LENGTH) {
    throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
  }
}

Source File: SegmentCommitInfo.java From lucene-solr with Apache License 2.0

6 votes

/** Returns a description of this segment. */
public String toString(int pendingDelCount) {
  String s = info.toString(delCount + pendingDelCount);
  if (delGen != -1) {
    s += ":delGen=" + delGen;
  }
  if (fieldInfosGen != -1) {
    s += ":fieldInfosGen=" + fieldInfosGen;
  }
  if (docValuesGen != -1) {
    s += ":dvGen=" + docValuesGen;
  }
  if (softDelCount > 0) {
    s += " :softDel=" + softDelCount;
  }
  if (this.id != null) {
    s += " :id=" + StringHelper.idToString(id);
  }

  return s;
}

Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0

6 votes

/** Test versions */
public void testVersions() throws Exception {
  Codec codec = getCodec();
  for (Version v : getVersions()) {
    for (Version minV : new Version[] { v, null}) {
      Directory dir = newDirectory();
      byte id[] = StringHelper.randomId();
      SegmentInfo info = new SegmentInfo(dir, v, minV, "_123", 1, false, codec, 
                                         Collections.<String,String>emptyMap(), id, Collections.emptyMap(), null);
      info.setFiles(Collections.<String>emptySet());
      codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
      SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
      assertEquals(info2.getVersion(), v);
      if (supportsMinVersion()) {
        assertEquals(info2.getMinVersion(), minV);
      } else {
        assertEquals(info2.getMinVersion(), null);
      }
      dir.close();
    }
  }
}

Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0

6 votes

/** Test attributes map */
public void testAttributes() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> attributes = new HashMap<>();
  attributes.put("key1", "value1");
  attributes.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, attributes, null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(attributes, info2.getAttributes());
  
  // attributes map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getAttributes().put("bogus", "bogus");
  });

  dir.close();
}

Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0

6 votes

/** Test diagnostics map */
public void testDiagnostics() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  Map<String,String> diagnostics = new HashMap<>();
  diagnostics.put("key1", "value1");
  diagnostics.put("key2", "value2");
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     diagnostics, id, Collections.emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(diagnostics, info2.getDiagnostics());

  // diagnostics map should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.getDiagnostics().put("bogus", "bogus");
  });

  dir.close();
}

Source File: BaseSegmentInfoFormatTestCase.java From lucene-solr with Apache License 2.0

6 votes

/** Tests SI writer adds itself to files... */
public void testAddsSelfToFiles() throws Exception {
  Directory dir = newDirectory();
  Codec codec = getCodec();
  byte id[] = StringHelper.randomId();
  SegmentInfo info = new SegmentInfo(dir, getVersions()[0], getVersions()[0], "_123", 1, false, codec, 
                                     Collections.emptyMap(), id, Collections.emptyMap(), null);
  Set<String> originalFiles = Collections.singleton("_123.a");
  info.setFiles(originalFiles);
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  
  Set<String> modifiedFiles = info.files();
  assertTrue(modifiedFiles.containsAll(originalFiles));
  assertTrue("did you forget to add yourself to files()", modifiedFiles.size() > originalFiles.size());
  
  SegmentInfo info2 = codec.segmentInfoFormat().read(dir, "_123", id, IOContext.DEFAULT);
  assertEquals(info.files(), info2.files());

  // files set should be immutable
  expectThrows(UnsupportedOperationException.class, () -> {
    info2.files().add("bogus");
  });

  dir.close();
}

Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0

6 votes

public void testWriteVeryLongSuffix() throws Exception {
  StringBuilder justLongEnough = new StringBuilder();
  for (int i = 0; i < 255; i++) {
    justLongEnough.append('a');
  }
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  byte[] id = StringHelper.randomId();
  CodecUtil.writeIndexHeader(output, "foobar", 5, id, justLongEnough.toString());
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  CodecUtil.checkIndexHeader(input, "foobar", 5, 5, id, justLongEnough.toString());
  assertEquals(input.getFilePointer(), input.length());
  assertEquals(input.getFilePointer(), CodecUtil.indexHeaderLength("foobar", justLongEnough.toString()));
  input.close();
}

Source File: SimpleTextFieldsReader.java From lucene-solr with Apache License 2.0

6 votes

private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
  ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
  BytesRefBuilder scratch = new BytesRefBuilder();
  TreeMap<String,Long> fields = new TreeMap<>();

  while (true) {
    SimpleTextUtil.readLine(input, scratch);
    if (scratch.get().equals(END)) {
      SimpleTextUtil.checkFooter(input);
      return fields;
    } else if (StringHelper.startsWith(scratch.get(), FIELD)) {
      String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8);
      fields.put(fieldName, input.getFilePointer());
    }
  }
}

Source File: TestPendingDeletes.java From lucene-solr with Apache License 2.0

6 votes

public void testIsFullyDeleted() throws IOException {
  Directory dir = new ByteBuffersDirectory();
  SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
      Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
  FieldInfos fieldInfos = FieldInfos.EMPTY;
  si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT);
  PendingDeletes deletes = newPendingDeletes(commitInfo);
  for (int i = 0; i < 3; i++) {
    assertTrue(deletes.delete(i));
    if (random().nextBoolean()) {
      assertTrue(deletes.writeLiveDocs(dir));
    }
    assertEquals(i == 2, deletes.isFullyDeleted(() -> null));
  }
}

Source File: TestMergePolicy.java From lucene-solr with Apache License 2.0

6 votes

private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir, int numMerges) {
  MergePolicy.MergeSpecification ms = new MergePolicy.MergeSpecification();
    for (int ii = 0; ii < numMerges; ++ii) {
      final SegmentInfo si = new SegmentInfo(
          dir, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(1000), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>();
      segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId()));
      ms.add(new MergePolicy.OneMerge(segments));
    }
    return ms;
}

Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0

5 votes

public void testIDChangesOnAdvance() throws IOException {
  try (BaseDirectoryWrapper dir = newDirectory()) {
    dir.setCheckIndexOnClose(false);
    byte id[] = StringHelper.randomId();
    SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
        Collections.<String, String>emptyMap(), StringHelper.randomId(), Collections.<String, String>emptyMap(), null);
    SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, id);
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    commitInfo.advanceDelGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));

    id = commitInfo.getId();
    commitInfo.advanceDocValuesGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));

    id = commitInfo.getId();
    commitInfo.advanceFieldInfosGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    SegmentCommitInfo clone = commitInfo.clone();
    id = commitInfo.getId();
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    assertEquals(StringHelper.idToString(id), StringHelper.idToString(clone.getId()));

    commitInfo.advanceFieldInfosGen();
    assertNotEquals(StringHelper.idToString(id), StringHelper.idToString(commitInfo.getId()));
    assertEquals("clone changed but shouldn't", StringHelper.idToString(id), StringHelper.idToString(clone.getId()));
  }
}

Source File: TestSegmentInfos.java From lucene-solr with Apache License 2.0

5 votes

public void testVersionsTwoSegments() throws IOException {
  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false);
  byte id[] = StringHelper.randomId();
  Codec codec = Codec.getDefault();

  SegmentInfos sis = new SegmentInfos(Version.LATEST.major);
  SegmentInfo info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_0", 1, false, Codec.getDefault(),
                                     Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(info, 0, 0, -1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  info = new SegmentInfo(dir, Version.LUCENE_9_0_0, Version.LUCENE_9_0_0, "_1", 1, false, Codec.getDefault(),
                         Collections.<String,String>emptyMap(), id, Collections.<String,String>emptyMap(), null);
  info.setFiles(Collections.<String>emptySet());
  codec.segmentInfoFormat().write(dir, info, IOContext.DEFAULT);
  commitInfo = new SegmentCommitInfo(info, 0, 0,-1, -1, -1, StringHelper.randomId());
  sis.add(commitInfo);

  sis.commit(dir);
  byte[] commitInfoId0 = sis.info(0).getId();
  byte[] commitInfoId1 = sis.info(1).getId();
  sis = SegmentInfos.readLatestCommit(dir);
  assertEquals(Version.LUCENE_9_0_0, sis.getMinSegmentLuceneVersion());
  assertEquals(Version.LATEST, sis.getCommitLuceneVersion());
  assertEquals(StringHelper.idToString(commitInfoId0), StringHelper.idToString(sis.info(0).getId()));
  assertEquals(StringHelper.idToString(commitInfoId1), StringHelper.idToString(sis.info(1).getId()));
  dir.close();
}

Source File: AutomatonTermsEnum.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Returns true if the term matches the automaton. Also stashes away the term
 * to assist with smart enumeration.
 */
@Override
protected AcceptStatus accept(final BytesRef term) {
  if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) {
    if (runAutomaton.run(term.bytes, term.offset, term.length))
      return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK;
    else
      return (linear && term.compareTo(linearUpperBound) < 0) ? 
          AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  } else {
    return (linear && term.compareTo(linearUpperBound) < 0) ? 
        AcceptStatus.NO : AcceptStatus.NO_AND_SEEK;
  }
}

Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0

5 votes

public void testWriteNonAsciiSuffix() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  expectThrows(IllegalArgumentException.class, () -> {
    CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), "\u1234");
  });
}

Source File: TestOneMergeWrappingMergePolicy.java From lucene-solr with Apache License 2.0

5 votes

private static MergePolicy.MergeSpecification createRandomMergeSpecification(Directory dir) {
  MergePolicy.MergeSpecification ms;
  if (0 < random().nextInt(10)) { // ~ 1 in 10 times return null
    ms = new MergePolicy.MergeSpecification();
    // append up to 10 (random non-sensical) one merge objects
    for (int ii = 0; ii < random().nextInt(10); ++ii) {
      final SegmentInfo si = new SegmentInfo(
          dir, // dir
          Version.LATEST, // version
          Version.LATEST, // min version
          TestUtil.randomSimpleString(random()), // name
          random().nextInt(), // maxDoc
          random().nextBoolean(), // isCompoundFile
          null, // codec
          Collections.emptyMap(), // diagnostics
          TestUtil.randomSimpleString(// id
              random(),
              StringHelper.ID_LENGTH,
              StringHelper.ID_LENGTH).getBytes(StandardCharsets.US_ASCII),
          Collections.emptyMap(), // attributes
          null /* indexSort */);
      final List<SegmentCommitInfo> segments = new LinkedList<SegmentCommitInfo>();
      segments.add(new SegmentCommitInfo(si, 0, 0, 0, 0, 0, StringHelper.randomId()));
      ms.add(new MergePolicy.OneMerge(segments));
    }
  }
  return null;
}

Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0

5 votes

public void testWriteTooLongSuffix() throws Exception {
  StringBuilder tooLong = new StringBuilder();
  for (int i = 0; i < 256; i++) {
    tooLong.append('a');
  }
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  expectThrows(IllegalArgumentException.class, () -> {
    CodecUtil.writeIndexHeader(output, "foobar", 5, StringHelper.randomId(), tooLong.toString());
  });
}

Source File: TestIndexWriterThreadsToSegments.java From lucene-solr with Apache License 2.0

5 votes

byte[] readSegmentInfoID(Directory dir, String file) throws IOException {
  try (IndexInput in = dir.openInput(file, IOContext.DEFAULT)) {
    in.readInt(); // magic
    in.readString(); // codec name
    in.readInt(); // version
    byte id[] = new byte[StringHelper.ID_LENGTH];
    in.readBytes(id, 0, id.length);
    return id;
  }
}

Source File: TestPendingDeletes.java From lucene-solr with Apache License 2.0

5 votes

public void testDeleteDoc() throws IOException {
  Directory dir = new ByteBuffersDirectory();
  SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 10, false, Codec.getDefault(),
      Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
  SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1, StringHelper.randomId());
  PendingDeletes deletes = newPendingDeletes(commitInfo);
  assertNull(deletes.getLiveDocs());
  int docToDelete = TestUtil.nextInt(random(), 0, 7);
  assertTrue(deletes.delete(docToDelete));
  assertNotNull(deletes.getLiveDocs());
  assertEquals(1, deletes.numPendingDeletes());

  Bits liveDocs = deletes.getLiveDocs();
  assertFalse(liveDocs.get(docToDelete));
  assertFalse(deletes.delete(docToDelete)); // delete again

  assertTrue(liveDocs.get(8));
  assertTrue(deletes.delete(8));
  assertTrue(liveDocs.get(8)); // we have a snapshot
  assertEquals(2, deletes.numPendingDeletes());

  assertTrue(liveDocs.get(9));
  assertTrue(deletes.delete(9));
  assertTrue(liveDocs.get(9));

  // now make sure new live docs see the deletions
  liveDocs = deletes.getLiveDocs();
  assertFalse(liveDocs.get(9));
  assertFalse(liveDocs.get(8));
  assertFalse(liveDocs.get(docToDelete));
  assertEquals(3, deletes.numPendingDeletes());
  dir.close();
}

Source File: TestCodecUtil.java From lucene-solr with Apache License 2.0

5 votes

public void testSegmentHeaderLength() throws Exception {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  IndexOutput output = new ByteBuffersIndexOutput(out, "temp", "temp");
  CodecUtil.writeIndexHeader(output, "FooBar", 5, StringHelper.randomId(), "xyz");
  output.writeString("this is the data");
  output.close();
  
  IndexInput input = new ByteBuffersIndexInput(out.toDataInput(), "temp");
  input.seek(CodecUtil.indexHeaderLength("FooBar", "xyz"));
  assertEquals("this is the data", input.readString());
  input.close();
}

Source File: TestDoc.java From lucene-solr with Apache License 2.0

5 votes

private SegmentCommitInfo merge(Directory dir, SegmentCommitInfo si1, SegmentCommitInfo si2, String merged, boolean useCompoundFile)
  throws Exception {
  IOContext context = newIOContext(random(), new IOContext(new MergeInfo(-1, -1, false, -1)));
  SegmentReader r1 = new SegmentReader(si1, Version.LATEST.major, context);
  SegmentReader r2 = new SegmentReader(si2, Version.LATEST.major, context);

  final Codec codec = Codec.getDefault();
  TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(si1.info.dir);
  final SegmentInfo si = new SegmentInfo(si1.info.dir, Version.LATEST, null, merged, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);

  SegmentMerger merger = new SegmentMerger(Arrays.<CodecReader>asList(r1, r2),
                                           si, InfoStream.getDefault(), trackingDir,
                                           new FieldInfos.FieldNumbers(null), context);

  MergeState mergeState = merger.merge();
  r1.close();
  r2.close();;
  si.setFiles(new HashSet<>(trackingDir.getCreatedFiles()));
    
  if (useCompoundFile) {
    Collection<String> filesToDelete = si.files();
    codec.compoundFormat().write(dir, si, context);
    si.setUseCompoundFile(true);
    for(String name : filesToDelete) {
      si1.info.dir.deleteFile(name);
    }
  }

  return new SegmentCommitInfo(si, 0, 0, -1L, -1L, -1L, StringHelper.randomId());
}

Source File: DocTermOrds.java From lucene-solr with Apache License 2.0

5 votes

private BytesRef setTerm() throws IOException {
  term = termsEnum.term();
  //System.out.println("  setTerm() term=" + term.utf8ToString() + " vs prefix=" + (prefix == null ? "null" : prefix.utf8ToString()));
  if (prefix != null && !StringHelper.startsWith(term, prefix)) {
    term = null;
  }
  return term;
}

Source File: CodecUtil.java From lucene-solr with Apache License 2.0

5 votes

/** Expert: just reads and verifies the object ID of an index header */
public static byte[] checkIndexHeaderID(DataInput in, byte[] expectedID) throws IOException {
  byte id[] = new byte[StringHelper.ID_LENGTH];
  in.readBytes(id, 0, id.length);
  if (!Arrays.equals(id, expectedID)) {
    throw new CorruptIndexException("file mismatch, expected id=" + StringHelper.idToString(expectedID) 
                                                       + ", got=" + StringHelper.idToString(id), in);
  }
  return id;
}

Source File: MultiPhrasePrefixQuery.java From crate with Apache License 2.0

5 votes

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}

Source File: ModuloBucketBuilder.java From Elasticsearch with Apache License 2.0

5 votes

private static int hashCode(@Nullable Object value) {
    if (value == null) {
        return 0;
    }
    if (value instanceof BytesRef) {
        // since lucene 4.8
        // BytesRef.hashCode() uses a random seed across different jvm
        // which causes the hashCode / routing to be different on each node
        // this breaks the group by redistribution logic - need to use a fixed seed here
        // to be consistent.
        return StringHelper.murmurhash3_x86_32(((BytesRef) value), 1);
    }
    return value.hashCode();
}

Source File: SrndPrefixQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator();

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}

Source File: SrndTruncQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator();

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}

Source File: SimpleTextUtil.java From lucene-solr with Apache License 2.0

5 votes

public static void checkFooter(ChecksumIndexInput input) throws IOException {
  BytesRefBuilder scratch = new BytesRefBuilder();
  String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum());
  readLine(input, scratch);
  if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) {
    throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString(), input);
  }
  String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString();
  if (!expectedChecksum.equals(actualChecksum)) {
    throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum, input);
  }
  if (input.length() != input.getFilePointer()) {
    throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor!", input);
  }
}

Source File: MultiPhrasePrefixQuery.java From Elasticsearch with Apache License 2.0

5 votes

private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}

org.apache.lucene.util.StringHelper Java Examples