org.apache.lucene.store.ByteArrayDataInput Java Exaples

Source File: OrdsSegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0

6 votes

public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  assert numBytes > 0;
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong();
  // System.out.println("  setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + " shift=" + (nextFloorTermOrd-termOrdOrig));

  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}

Source File: TestBlockPostingsFormat.java From lucene-solr with Apache License 2.0

6 votes

private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
  CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
  for (Impact impact : impacts) {
    acc.add(impact.freq, impact.norm);
  }
  try(Directory dir = newDirectory()) {
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      Lucene50SkipWriter.writeImpacts(acc, out);
    }
    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
      byte[] b = new byte[Math.toIntExact(in.length())];
      in.readBytes(b, 0, b.length);
      List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
      assertEquals(impacts, impacts2);
    }
  }
}

Source File: TestFSTDirectAddressing.java From lucene-solr with Apache License 2.0

6 votes

private static void countFSTArcs(String fstFilePath) throws IOException {
  byte[] buf = Files.readAllBytes(Paths.get(fstFilePath));
  DataInput in = new ByteArrayDataInput(buf);
  FST<BytesRef> fst = new FST<>(in, in, ByteSequenceOutputs.getSingleton());
  BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst);
  int binarySearchArcCount = 0, directAddressingArcCount = 0, listArcCount = 0;
  while(fstEnum.next() != null) {
    if (fstEnum.arcs[fstEnum.upto].bytesPerArc() == 0) {
      listArcCount ++;
    } else if (fstEnum.arcs[fstEnum.upto].nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) {
      directAddressingArcCount ++;
    } else {
      binarySearchArcCount ++;
    }
  }
  System.out.println("direct addressing arcs = " + directAddressingArcCount
      + ", binary search arcs = " + binarySearchArcCount
      + " list arcs = " + listArcCount);
}

Source File: TestLucene84PostingsFormat.java From lucene-solr with Apache License 2.0

6 votes

private void doTestImpactSerialization(List<Impact> impacts) throws IOException {
  CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator();
  for (Impact impact : impacts) {
    acc.add(impact.freq, impact.norm);
  }
  try(Directory dir = newDirectory()) {
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      Lucene84SkipWriter.writeImpacts(acc, out);
    }
    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
      byte[] b = new byte[Math.toIntExact(in.length())];
      in.readBytes(b, 0, b.length);
      List<Impact> impacts2 = Lucene84ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList());
      assertEquals(impacts, impacts2);
    }
  }
}

Source File: CompressingTermVectorsReader.java From lucene-solr with Apache License 2.0

6 votes

void reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths,
    int[] payloadIndex, BytesRef payloads, ByteArrayDataInput in) {
  this.numTerms = numTerms;
  this.prefixLengths = prefixLengths;
  this.suffixLengths = suffixLengths;
  this.termFreqs = termFreqs;
  this.positionIndex = positionIndex;
  this.positions = positions;
  this.startOffsets = startOffsets;
  this.lengths = lengths;
  this.payloadIndex = payloadIndex;
  this.payloads = payloads;
  this.in = in;
  startPos = in.getPosition();
  reset();
}

Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0

6 votes

/** decodes the contexts at the current position */
protected Set<BytesRef> decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 2); //skip to context set size
  short ctxSetSize = tmpInput.readShort();
  scratch.length -= 2;
  final Set<BytesRef> contextSet = new HashSet<>();
  for (short i = 0; i < ctxSetSize; i++) {
    tmpInput.setPosition(scratch.offset + scratch.length - 2);
    short curContextLength = tmpInput.readShort();
    scratch.length -= 2;
    tmpInput.setPosition(scratch.offset + scratch.length - curContextLength);
    BytesRef contextSpare = new BytesRef(curContextLength);
    tmpInput.readBytes(contextSpare.bytes, 0, curContextLength);
    contextSpare.length = curContextLength;
    contextSet.add(contextSpare);
    scratch.length -= curContextLength;
  }
  return contextSet;
}

Source File: Stemmer.java From lucene-solr with Apache License 2.0

6 votes

/**
 * Constructs a new Stemmer which will use the provided Dictionary to create its stems.
 *
 * @param dictionary Dictionary that will be used to create the stems
 */
public Stemmer(Dictionary dictionary) {
  this.dictionary = dictionary;
  this.affixReader = new ByteArrayDataInput(dictionary.affixData);
  for (int level = 0; level < 3; level++) {
    if (dictionary.prefixes != null) {
      prefixArcs[level] = new FST.Arc<>();
      prefixReaders[level] = dictionary.prefixes.getBytesReader();
    }
    if (dictionary.suffixes != null) {
      suffixArcs[level] = new FST.Arc<>();
      suffixReaders[level] = dictionary.suffixes.getBytesReader();
    }
  }
  formStep = dictionary.hasStemExceptions ? 2 : 1;
}

Source File: SegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0

5 votes

public SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException {
  this.ste = ste;
  this.ord = ord;
  this.state = ste.fr.parent.postingsReader.newTermState();
  this.state.totalTermFreq = -1;
  this.version = ste.fr.parent.version;
  if (version >= BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
    suffixLengthBytes = new byte[32];
    suffixLengthsReader = new ByteArrayDataInput();
  } else {
    suffixLengthBytes = null;
    suffixLengthsReader = suffixesReader;
  }
}

Source File: BlockTreeTermsReader.java From incubator-retired-blur with Apache License 2.0

5 votes

public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}

Source File: CompressingTermVectorsReader.java From lucene-solr with Apache License 2.0

5 votes

@Override
public TermsEnum iterator() throws IOException {
  TVTermsEnum termsEnum = new TVTermsEnum();
  termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths,
      payloadIndex, payloadBytes,
      new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length));
  return termsEnum;
}

Source File: IntersectTermsEnumFrame.java From lucene-solr with Apache License 2.0

5 votes

public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
  this.ite = ite;
  this.ord = ord;
  this.termState = ite.fr.parent.postingsReader.newTermState();
  this.termState.totalTermFreq = -1;
  this.version = ite.fr.parent.version;
  if (version >= BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) {
    suffixLengthBytes = new byte[32];
    suffixLengthsReader = new ByteArrayDataInput();
  } else {
    suffixLengthBytes = null;
    suffixLengthsReader = suffixesReader;
  }
}

Source File: SegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0

5 votes

public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}

Source File: BaseSynonymParserTestCase.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Helper method to validate synonym parsing.
 *
 * @param synonynMap  the generated synonym map after parsing
 * @param word        word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer.
 *                    All spaces will be replaced by word separators.
 * @param includeOrig if synonyms should include original
 * @param synonyms    actual synonyms. All word separators are replaced with a single space.
 */
public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms)
    throws Exception {
  word = word.replace(' ', SynonymMap.WORD_SEPARATOR);
  BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder()));
  assertNotNull("No synonyms found for: " + word, value);

  ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
  final int code = bytesReader.readVInt();

  final boolean keepOrig = (code & 0x1) == 0;
  assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig,
      includeOrig, keepOrig);

  final int count = code >>> 1;
  assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count,
      synonyms.length, count);

  Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms));

  BytesRef scratchBytes = new BytesRef();
  for (int i = 0; i < count; i++) {
    synonynMap.words.get(bytesReader.readVInt(), scratchBytes);
    String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' ');
    assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym));
  }
}

Source File: IDVersionSegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0

5 votes

public void setFloorData(ByteArrayDataInput in, BytesRef source) {
  final int numBytes = source.length - (in.getPosition() - source.offset);
  if (numBytes > floorData.length) {
    floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
  }
  System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
  floorDataReader.reset(floorData, 0, numBytes);
  numFollowFloorBlocks = floorDataReader.readVInt();
  nextFloorLabel = floorDataReader.readByte() & 0xff;
  //if (DEBUG) {
  //System.out.println("    setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
  //}
}

Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0

5 votes

/** decodes the payload at the current position */
protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 2); // skip to payload size
  short payloadLength = tmpInput.readShort(); // read payload size
  assert payloadLength >= 0: payloadLength;
  tmpInput.setPosition(scratch.offset + scratch.length - 2 - payloadLength); // setPosition to start of payload
  BytesRef payloadScratch = new BytesRef(payloadLength); 
  tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload
  payloadScratch.length = payloadLength;
  scratch.length -= 2; // payload length info (short)
  scratch.length -= payloadLength; // payload
  return payloadScratch;
}

Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0

5 votes

/** decodes the weight at the current position */
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
  tmpInput.reset(scratch.bytes, scratch.offset, scratch.length);
  tmpInput.skipBytes(scratch.length - 8); // suggestion
  scratch.length -= Long.BYTES; // long
  return tmpInput.readLong();
}

Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0

5 votes

@Override
public BytesRef next() throws IOException {
  boolean success = false;
  if (done) {
    return null;
  }
  try {
    ByteArrayDataInput input = new ByteArrayDataInput();
    BytesRef bytes = reader.next();
    if (bytes != null) {
      weight = decode(bytes, input);
      if (hasPayloads) {
        payload = decodePayload(bytes, input);
      }
      if (hasContexts) {
        contexts = decodeContexts(bytes, input);
      }
      success = true;
      return bytes;
    }
    close();
    success = done = true;
    return null;
  } finally {
    if (!success) {
      done = true;
      close();
    }
  }
}

Source File: WFSTCompletionLookup.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) {
  scratch.length -= 4; // int
  // skip suggestion:
  tmpInput.reset(scratch.bytes, scratch.offset+scratch.length, 4);
  return tmpInput.readInt();
}

Source File: BlockReader.java From lucene-solr with Apache License 2.0

5 votes

protected void initializeBlockReadLazily() throws IOException {
  if (blockStartFP == -1) {
    blockInput = blockInput.clone();
    blockHeaderReader = createBlockHeaderSerializer();
    blockLineReader = createBlockLineSerializer();
    blockReadBuffer = new ByteArrayDataInput();
    termStatesReadBuffer = new ByteArrayDataInput();
    termStateSerializer = createDeltaBaseTermStateSerializer();
    scratchBlockBytes = new BytesRef();
    scratchBlockLine = new BlockLine(new TermBytes(0, scratchBlockBytes), 0);
  }
}

Source File: UniformSplitTermsReader.java From lucene-solr with Apache License 2.0

5 votes

protected Collection<FieldMetadata> readEncodedFieldsMetadata(int numFields, DataInput metadataInput, BlockDecoder blockDecoder,
                                                              FieldInfos fieldInfos, FieldMetadata.Serializer fieldMetadataReader,
                                                              int maxNumDocs) throws IOException {
  long encodedLength = metadataInput.readVLong();
  if (encodedLength < 0) {
    throw new CorruptIndexException("Illegal encoded length: " + encodedLength, metadataInput);
  }
  BytesRef decodedBytes = blockDecoder.decode(metadataInput, encodedLength);
  DataInput decodedMetadataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
  return readUnencodedFieldsMetadata(numFields, decodedMetadataInput, fieldInfos, fieldMetadataReader, maxNumDocs);
}

Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0

4 votes

BaseTermsEnum() throws IOException {
  this.state = postingsReader.newTermState();
  this.bytesReader = new ByteArrayDataInput();
  // NOTE: metadata will only be initialized in child class
}

Source File: AbstractTestCompressionMode.java From lucene-solr with Apache License 2.0

4 votes

static byte[] decompress(Decompressor decompressor, byte[] compressed, int originalLength) throws IOException {
  final BytesRef bytes = new BytesRef();
  decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, 0, originalLength, bytes);
  return BytesRef.deepCopyOf(bytes).bytes;
}

Source File: AbstractTestCompressionMode.java From lucene-solr with Apache License 2.0

4 votes

byte[] decompress(byte[] compressed, int originalLength, int offset, int length) throws IOException {
  Decompressor decompressor = mode.newDecompressor();
  final BytesRef bytes = new BytesRef();
  decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, offset, length, bytes);
  return BytesRef.deepCopyOf(bytes).bytes;
}

Source File: FSTCompletionLookup.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void build(InputIterator iterator) throws IOException {
  if (iterator.hasPayloads()) {
    throw new IllegalArgumentException("this suggester doesn't support payloads");
  }
  if (iterator.hasContexts()) {
    throw new IllegalArgumentException("this suggester doesn't support contexts");
  }

  OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix);
  ExternalRefSorter externalSorter = new ExternalRefSorter(sorter);
  IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT);
  String tempSortedFileName = null;

  OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
  OfflineSorter.ByteSequencesReader reader = null;

  // Push floats up front before sequences to sort them. For now, assume they are non-negative.
  // If negative floats are allowed some trickery needs to be done to find their byte order.
  count = 0;
  try {
    byte [] buffer = new byte [0];
    ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
    BytesRef spare;
    int inputLineCount = 0;
    while ((spare = iterator.next()) != null) {
      if (spare.length + 4 >= buffer.length) {
        buffer = ArrayUtil.grow(buffer, spare.length + 4);
      }

      output.reset(buffer);
      output.writeInt(encodeWeight(iterator.weight()));
      output.writeBytes(spare.bytes, spare.offset, spare.length);
      writer.write(buffer, 0, output.getPosition());
      inputLineCount++;
    }
    CodecUtil.writeFooter(tempInput);
    writer.close();

    // We don't know the distribution of scores and we need to bucket them, so we'll sort
    // and divide into equal buckets.
    tempSortedFileName = sorter.sort(tempInput.getName());
    tempDir.deleteFile(tempInput.getName());

    FSTCompletionBuilder builder = new FSTCompletionBuilder(
        buckets, externalSorter, sharedTailLength);

    reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), tempSortedFileName);
    long line = 0;
    int previousBucket = 0;
    int previousScore = 0;
    ByteArrayDataInput input = new ByteArrayDataInput();
    BytesRef tmp2 = new BytesRef();
    while (true) {
      BytesRef scratch = reader.next();
      if (scratch == null) {
        break;
      }
      input.reset(scratch.bytes, scratch.offset, scratch.length);
      int currentScore = input.readInt();

      int bucket;
      if (line > 0 && currentScore == previousScore) {
        bucket = previousBucket;
      } else {
        bucket = (int) (line * buckets / inputLineCount);
      }
      previousScore = currentScore;
      previousBucket = bucket;

      // Only append the input, discard the weight.
      tmp2.bytes = scratch.bytes;
      tmp2.offset = scratch.offset + input.getPosition();
      tmp2.length = scratch.length - input.getPosition();
      builder.add(tmp2, bucket);

      line++;
      count++;
    }

    // The two FSTCompletions share the same automaton.
    this.higherWeightsCompletion = builder.build();
    this.normalCompletion = new FSTCompletion(
        higherWeightsCompletion.getFST(), false, exactMatchFirst);
    
  } finally {
    IOUtils.closeWhileHandlingException(reader, writer, externalSorter);
    IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName);
  }
}

Source File: Test2BBinaryDocValues.java From lucene-solr with Apache License 2.0

4 votes

public void testVariableBinary() throws Exception {
  BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary"));
  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  }
  
  IndexWriter w = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
      .setRAMBufferSizeMB(256.0)
      .setMergeScheduler(new ConcurrentMergeScheduler())
      .setMergePolicy(newLogMergePolicy(false, 10))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE)
      .setCodec(TestUtil.getDefaultCodec()));

  Document doc = new Document();
  byte bytes[] = new byte[4];
  ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes);
  BytesRef data = new BytesRef(bytes);
  BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data);
  doc.add(dvField);
  
  for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
    encoder.reset(bytes);
    encoder.writeVInt(i % 65535); // 1, 2, or 3 bytes
    data.length = encoder.getPosition();
    w.addDocument(doc);
    if (i % 100000 == 0) {
      System.out.println("indexed: " + i);
      System.out.flush();
    }
  }
  
  w.forceMerge(1);
  w.close();
  
  System.out.println("verifying...");
  System.out.flush();
  
  DirectoryReader r = DirectoryReader.open(dir);
  int expectedValue = 0;
  ByteArrayDataInput input = new ByteArrayDataInput();
  for (LeafReaderContext context : r.leaves()) {
    LeafReader reader = context.reader();
    BinaryDocValues dv = reader.getBinaryDocValues("dv");
    for (int i = 0; i < reader.maxDoc(); i++) {
      assertEquals(i, dv.nextDoc());
      final BytesRef term = dv.binaryValue();
      input.reset(term.bytes, term.offset, term.length);
      assertEquals(expectedValue % 65535, input.readVInt());
      assertTrue(input.eof());
      expectedValue++;
    }
  }
  
  r.close();
  dir.close();
}

Source File: LZ4TestCase.java From lucene-solr with Apache License 2.0

4 votes

private void doTest(byte[] data, int offset, int length, LZ4.HashTable hashTable) throws IOException {
  ByteBuffersDataOutput out = new ByteBuffersDataOutput();
  LZ4.compress(data, offset, length, out, hashTable);
  byte[] compressed = out.toArrayCopy();

  int off = 0;
  int decompressedOff = 0;
  for (;;) {
    final int token = compressed[off++] & 0xFF;
    int literalLen = token >>> 4;
    if (literalLen == 0x0F) {
      while (compressed[off] == (byte) 0xFF) {
        literalLen += 0xFF;
        ++off;
      }
      literalLen += compressed[off++] & 0xFF;
    }
    // skip literals
    off += literalLen;
    decompressedOff += literalLen;

    // check that the stream ends with literals and that there are at least
    // 5 of them
    if (off == compressed.length) {
      assertEquals(length, decompressedOff);
      assertTrue("lastLiterals=" + literalLen + ", bytes=" + length,
          literalLen >= LZ4.LAST_LITERALS || literalLen == length);
      break;
    }

    final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8);
    // check that match dec is not 0
    assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff);

    int matchLen = token & 0x0F;
    if (matchLen == 0x0F) {
      while (compressed[off] == (byte) 0xFF) {
        matchLen += 0xFF;
        ++off;
      }
      matchLen += compressed[off++] & 0xFF;
    }
    matchLen += LZ4.MIN_MATCH;

    // if the match ends prematurely, the next sequence should not have
    // literals or this means we are wasting space
    if (decompressedOff + matchLen < length - LZ4.LAST_LITERALS) {
      final boolean moreCommonBytes = data[offset + decompressedOff + matchLen] == data[offset + decompressedOff - matchDec + matchLen];
      final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0;
      assertTrue(moreCommonBytes == false || nextSequenceHasLiterals == false);
    }      

    decompressedOff += matchLen;
  }
  assertEquals(length, decompressedOff);

  // Compress once again with the same hash table to test reuse
  ByteBuffersDataOutput out2 = new ByteBuffersDataOutput();
  LZ4.compress(data, offset, length, out2, hashTable);
  assertArrayEquals(compressed, out2.toArrayCopy());

  // Now restore and compare bytes
  byte[] restored = new byte[length + random().nextInt(10)];
  LZ4.decompress(new ByteArrayDataInput(compressed), length, restored);
  assertArrayEquals(ArrayUtil.copyOfSubArray(data, offset, offset+length), ArrayUtil.copyOfSubArray(restored, 0, length));
}

Source File: BlockTermsReader.java From lucene-solr with Apache License 2.0

4 votes

private boolean nextBlock() throws IOException {

        // TODO: we still lazy-decode the byte[] for each
        // term (the suffix), but, if we decoded
        // all N terms up front then seeking could do a fast
        // bsearch w/in the block...

        //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this);
        state.blockFilePointer = in.getFilePointer();
        blockTermCount = in.readVInt();
        //System.out.println("  blockTermCount=" + blockTermCount);
        if (blockTermCount == 0) {
          return false;
        }
        termBlockPrefix = in.readVInt();

        // term suffixes:
        int len = in.readVInt();
        if (termSuffixes.length < len) {
          termSuffixes = new byte[ArrayUtil.oversize(len, 1)];
        }
        //System.out.println("  termSuffixes len=" + len);
        in.readBytes(termSuffixes, 0, len);
        termSuffixesReader.reset(termSuffixes, 0, len);

        // docFreq, totalTermFreq
        len = in.readVInt();
        if (docFreqBytes.length < len) {
          docFreqBytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        //System.out.println("  freq bytes len=" + len);
        in.readBytes(docFreqBytes, 0, len);
        freqReader.reset(docFreqBytes, 0, len);

        // metadata
        len = in.readVInt();
        if (bytes == null) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
          bytesReader = new ByteArrayDataInput();
        } else if (bytes.length < len) {
          bytes = new byte[ArrayUtil.oversize(len, 1)];
        }
        in.readBytes(bytes, 0, len);
        bytesReader.reset(bytes, 0, len);

        metaDataUpto = 0;
        state.termBlockOrd = 0;

        indexIsCurrent = false;
        //System.out.println("  indexIsCurrent=" + indexIsCurrent);

        return true;
      }

Source File: Store.java From crate with Apache License 2.0

4 votes

public long getStoredChecksum() {
    return new ByteArrayDataInput(checksum).readLong();
}

org.apache.lucene.store.ByteArrayDataInput Java Examples