org.apache.lucene.store.ByteArrayDataInput Java Examples
The following examples show how to use
org.apache.lucene.store.ByteArrayDataInput.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrdsSegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0 | 6 votes |
public void setFloorData(ByteArrayDataInput in, BytesRef source) { final int numBytes = source.length - (in.getPosition() - source.offset); assert numBytes > 0; if (numBytes > floorData.length) { floorData = new byte[ArrayUtil.oversize(numBytes, 1)]; } System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes); floorDataReader.reset(floorData, 0, numBytes); numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; nextFloorTermOrd = termOrdOrig + floorDataReader.readVLong(); // System.out.println(" setFloorData ord=" + ord + " nextFloorTermOrd=" + nextFloorTermOrd + " shift=" + (nextFloorTermOrd-termOrdOrig)); //if (DEBUG) { //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel)); //} }
Example #2
Source File: TestBlockPostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
private void doTestImpactSerialization(List<Impact> impacts) throws IOException { CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator(); for (Impact impact : impacts) { acc.add(impact.freq, impact.norm); } try(Directory dir = newDirectory()) { try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { Lucene50SkipWriter.writeImpacts(acc, out); } try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { byte[] b = new byte[Math.toIntExact(in.length())]; in.readBytes(b, 0, b.length); List<Impact> impacts2 = Lucene50ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList()); assertEquals(impacts, impacts2); } } }
Example #3
Source File: TestFSTDirectAddressing.java From lucene-solr with Apache License 2.0 | 6 votes |
private static void countFSTArcs(String fstFilePath) throws IOException { byte[] buf = Files.readAllBytes(Paths.get(fstFilePath)); DataInput in = new ByteArrayDataInput(buf); FST<BytesRef> fst = new FST<>(in, in, ByteSequenceOutputs.getSingleton()); BytesRefFSTEnum<BytesRef> fstEnum = new BytesRefFSTEnum<>(fst); int binarySearchArcCount = 0, directAddressingArcCount = 0, listArcCount = 0; while(fstEnum.next() != null) { if (fstEnum.arcs[fstEnum.upto].bytesPerArc() == 0) { listArcCount ++; } else if (fstEnum.arcs[fstEnum.upto].nodeFlags() == FST.ARCS_FOR_DIRECT_ADDRESSING) { directAddressingArcCount ++; } else { binarySearchArcCount ++; } } System.out.println("direct addressing arcs = " + directAddressingArcCount + ", binary search arcs = " + binarySearchArcCount + " list arcs = " + listArcCount); }
Example #4
Source File: TestLucene84PostingsFormat.java From lucene-solr with Apache License 2.0 | 6 votes |
private void doTestImpactSerialization(List<Impact> impacts) throws IOException { CompetitiveImpactAccumulator acc = new CompetitiveImpactAccumulator(); for (Impact impact : impacts) { acc.add(impact.freq, impact.norm); } try(Directory dir = newDirectory()) { try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) { Lucene84SkipWriter.writeImpacts(acc, out); } try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) { byte[] b = new byte[Math.toIntExact(in.length())]; in.readBytes(b, 0, b.length); List<Impact> impacts2 = Lucene84ScoreSkipReader.readImpacts(new ByteArrayDataInput(b), new MutableImpactList()); assertEquals(impacts, impacts2); } } }
Example #5
Source File: CompressingTermVectorsReader.java From lucene-solr with Apache License 2.0 | 6 votes |
void reset(int numTerms, int flags, int[] prefixLengths, int[] suffixLengths, int[] termFreqs, int[] positionIndex, int[] positions, int[] startOffsets, int[] lengths, int[] payloadIndex, BytesRef payloads, ByteArrayDataInput in) { this.numTerms = numTerms; this.prefixLengths = prefixLengths; this.suffixLengths = suffixLengths; this.termFreqs = termFreqs; this.positionIndex = positionIndex; this.positions = positions; this.startOffsets = startOffsets; this.lengths = lengths; this.payloadIndex = payloadIndex; this.payloads = payloads; this.in = in; startPos = in.getPosition(); reset(); }
Example #6
Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0 | 6 votes |
/** decodes the contexts at the current position */ protected Set<BytesRef> decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) { tmpInput.reset(scratch.bytes, scratch.offset, scratch.length); tmpInput.skipBytes(scratch.length - 2); //skip to context set size short ctxSetSize = tmpInput.readShort(); scratch.length -= 2; final Set<BytesRef> contextSet = new HashSet<>(); for (short i = 0; i < ctxSetSize; i++) { tmpInput.setPosition(scratch.offset + scratch.length - 2); short curContextLength = tmpInput.readShort(); scratch.length -= 2; tmpInput.setPosition(scratch.offset + scratch.length - curContextLength); BytesRef contextSpare = new BytesRef(curContextLength); tmpInput.readBytes(contextSpare.bytes, 0, curContextLength); contextSpare.length = curContextLength; contextSet.add(contextSpare); scratch.length -= curContextLength; } return contextSet; }
Example #7
Source File: Stemmer.java From lucene-solr with Apache License 2.0 | 6 votes |
/** * Constructs a new Stemmer which will use the provided Dictionary to create its stems. * * @param dictionary Dictionary that will be used to create the stems */ public Stemmer(Dictionary dictionary) { this.dictionary = dictionary; this.affixReader = new ByteArrayDataInput(dictionary.affixData); for (int level = 0; level < 3; level++) { if (dictionary.prefixes != null) { prefixArcs[level] = new FST.Arc<>(); prefixReaders[level] = dictionary.prefixes.getBytesReader(); } if (dictionary.suffixes != null) { suffixArcs[level] = new FST.Arc<>(); suffixReaders[level] = dictionary.suffixes.getBytesReader(); } } formStep = dictionary.hasStemExceptions ? 2 : 1; }
Example #8
Source File: SegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0 | 5 votes |
public SegmentTermsEnumFrame(SegmentTermsEnum ste, int ord) throws IOException { this.ste = ste; this.ord = ord; this.state = ste.fr.parent.postingsReader.newTermState(); this.state.totalTermFreq = -1; this.version = ste.fr.parent.version; if (version >= BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) { suffixLengthBytes = new byte[32]; suffixLengthsReader = new ByteArrayDataInput(); } else { suffixLengthBytes = null; suffixLengthsReader = suffixesReader; } }
Example #9
Source File: BlockTreeTermsReader.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
public void setFloorData(ByteArrayDataInput in, BytesRef source) { final int numBytes = source.length - (in.getPosition() - source.offset); if (numBytes > floorData.length) { floorData = new byte[ArrayUtil.oversize(numBytes, 1)]; } System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes); floorDataReader.reset(floorData, 0, numBytes); numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; //if (DEBUG) { //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel)); //} }
Example #10
Source File: CompressingTermVectorsReader.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public TermsEnum iterator() throws IOException { TVTermsEnum termsEnum = new TVTermsEnum(); termsEnum.reset(numTerms, flags, prefixLengths, suffixLengths, termFreqs, positionIndex, positions, startOffsets, lengths, payloadIndex, payloadBytes, new ByteArrayDataInput(termBytes.bytes, termBytes.offset, termBytes.length)); return termsEnum; }
Example #11
Source File: IntersectTermsEnumFrame.java From lucene-solr with Apache License 2.0 | 5 votes |
public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException { this.ite = ite; this.ord = ord; this.termState = ite.fr.parent.postingsReader.newTermState(); this.termState.totalTermFreq = -1; this.version = ite.fr.parent.version; if (version >= BlockTreeTermsReader.VERSION_COMPRESSED_SUFFIXES) { suffixLengthBytes = new byte[32]; suffixLengthsReader = new ByteArrayDataInput(); } else { suffixLengthBytes = null; suffixLengthsReader = suffixesReader; } }
Example #12
Source File: SegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0 | 5 votes |
public void setFloorData(ByteArrayDataInput in, BytesRef source) { final int numBytes = source.length - (in.getPosition() - source.offset); if (numBytes > floorData.length) { floorData = new byte[ArrayUtil.oversize(numBytes, 1)]; } System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes); floorDataReader.reset(floorData, 0, numBytes); numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; //if (DEBUG) { //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel)); //} }
Example #13
Source File: BaseSynonymParserTestCase.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Helper method to validate synonym parsing. * * @param synonynMap the generated synonym map after parsing * @param word word (phrase) we are validating the synonyms for. Should be the value that comes out of the analyzer. * All spaces will be replaced by word separators. * @param includeOrig if synonyms should include original * @param synonyms actual synonyms. All word separators are replaced with a single space. */ public static void assertEntryEquals(SynonymMap synonynMap, String word, boolean includeOrig, String[] synonyms) throws Exception { word = word.replace(' ', SynonymMap.WORD_SEPARATOR); BytesRef value = Util.get(synonynMap.fst, Util.toUTF32(new CharsRef(word), new IntsRefBuilder())); assertNotNull("No synonyms found for: " + word, value); ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length); final int code = bytesReader.readVInt(); final boolean keepOrig = (code & 0x1) == 0; assertEquals("Include original different than expected. Expected " + includeOrig + " was " + keepOrig, includeOrig, keepOrig); final int count = code >>> 1; assertEquals("Invalid synonym count. Expected " + synonyms.length + " was " + count, synonyms.length, count); Set<String> synonymSet = new HashSet<>(Arrays.asList(synonyms)); BytesRef scratchBytes = new BytesRef(); for (int i = 0; i < count; i++) { synonynMap.words.get(bytesReader.readVInt(), scratchBytes); String synonym = scratchBytes.utf8ToString().replace(SynonymMap.WORD_SEPARATOR, ' '); assertTrue("Unexpected synonym found: " + synonym, synonymSet.contains(synonym)); } }
Example #14
Source File: IDVersionSegmentTermsEnumFrame.java From lucene-solr with Apache License 2.0 | 5 votes |
public void setFloorData(ByteArrayDataInput in, BytesRef source) { final int numBytes = source.length - (in.getPosition() - source.offset); if (numBytes > floorData.length) { floorData = new byte[ArrayUtil.oversize(numBytes, 1)]; } System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes); floorDataReader.reset(floorData, 0, numBytes); numFollowFloorBlocks = floorDataReader.readVInt(); nextFloorLabel = floorDataReader.readByte() & 0xff; //if (DEBUG) { //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel)); //} }
Example #15
Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
/** decodes the payload at the current position */ protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) { tmpInput.reset(scratch.bytes, scratch.offset, scratch.length); tmpInput.skipBytes(scratch.length - 2); // skip to payload size short payloadLength = tmpInput.readShort(); // read payload size assert payloadLength >= 0: payloadLength; tmpInput.setPosition(scratch.offset + scratch.length - 2 - payloadLength); // setPosition to start of payload BytesRef payloadScratch = new BytesRef(payloadLength); tmpInput.readBytes(payloadScratch.bytes, 0, payloadLength); // read payload payloadScratch.length = payloadLength; scratch.length -= 2; // payload length info (short) scratch.length -= payloadLength; // payload return payloadScratch; }
Example #16
Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
/** decodes the weight at the current position */ protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { tmpInput.reset(scratch.bytes, scratch.offset, scratch.length); tmpInput.skipBytes(scratch.length - 8); // suggestion scratch.length -= Long.BYTES; // long return tmpInput.readLong(); }
Example #17
Source File: SortedInputIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public BytesRef next() throws IOException { boolean success = false; if (done) { return null; } try { ByteArrayDataInput input = new ByteArrayDataInput(); BytesRef bytes = reader.next(); if (bytes != null) { weight = decode(bytes, input); if (hasPayloads) { payload = decodePayload(bytes, input); } if (hasContexts) { contexts = decodeContexts(bytes, input); } success = true; return bytes; } close(); success = done = true; return null; } finally { if (!success) { done = true; close(); } } }
Example #18
Source File: WFSTCompletionLookup.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override protected long decode(BytesRef scratch, ByteArrayDataInput tmpInput) { scratch.length -= 4; // int // skip suggestion: tmpInput.reset(scratch.bytes, scratch.offset+scratch.length, 4); return tmpInput.readInt(); }
Example #19
Source File: BlockReader.java From lucene-solr with Apache License 2.0 | 5 votes |
protected void initializeBlockReadLazily() throws IOException { if (blockStartFP == -1) { blockInput = blockInput.clone(); blockHeaderReader = createBlockHeaderSerializer(); blockLineReader = createBlockLineSerializer(); blockReadBuffer = new ByteArrayDataInput(); termStatesReadBuffer = new ByteArrayDataInput(); termStateSerializer = createDeltaBaseTermStateSerializer(); scratchBlockBytes = new BytesRef(); scratchBlockLine = new BlockLine(new TermBytes(0, scratchBlockBytes), 0); } }
Example #20
Source File: UniformSplitTermsReader.java From lucene-solr with Apache License 2.0 | 5 votes |
protected Collection<FieldMetadata> readEncodedFieldsMetadata(int numFields, DataInput metadataInput, BlockDecoder blockDecoder, FieldInfos fieldInfos, FieldMetadata.Serializer fieldMetadataReader, int maxNumDocs) throws IOException { long encodedLength = metadataInput.readVLong(); if (encodedLength < 0) { throw new CorruptIndexException("Illegal encoded length: " + encodedLength, metadataInput); } BytesRef decodedBytes = blockDecoder.decode(metadataInput, encodedLength); DataInput decodedMetadataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length); return readUnencodedFieldsMetadata(numFields, decodedMetadataInput, fieldInfos, fieldMetadataReader, maxNumDocs); }
Example #21
Source File: FSTTermsReader.java From lucene-solr with Apache License 2.0 | 4 votes |
BaseTermsEnum() throws IOException { this.state = postingsReader.newTermState(); this.bytesReader = new ByteArrayDataInput(); // NOTE: metadata will only be initialized in child class }
Example #22
Source File: AbstractTestCompressionMode.java From lucene-solr with Apache License 2.0 | 4 votes |
static byte[] decompress(Decompressor decompressor, byte[] compressed, int originalLength) throws IOException { final BytesRef bytes = new BytesRef(); decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, 0, originalLength, bytes); return BytesRef.deepCopyOf(bytes).bytes; }
Example #23
Source File: AbstractTestCompressionMode.java From lucene-solr with Apache License 2.0 | 4 votes |
byte[] decompress(byte[] compressed, int originalLength, int offset, int length) throws IOException { Decompressor decompressor = mode.newDecompressor(); final BytesRef bytes = new BytesRef(); decompressor.decompress(new ByteArrayDataInput(compressed), originalLength, offset, length, bytes); return BytesRef.deepCopyOf(bytes).bytes; }
Example #24
Source File: FSTCompletionLookup.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public void build(InputIterator iterator) throws IOException { if (iterator.hasPayloads()) { throw new IllegalArgumentException("this suggester doesn't support payloads"); } if (iterator.hasContexts()) { throw new IllegalArgumentException("this suggester doesn't support contexts"); } OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix); ExternalRefSorter externalSorter = new ExternalRefSorter(sorter); IndexOutput tempInput = tempDir.createTempOutput(tempFileNamePrefix, "input", IOContext.DEFAULT); String tempSortedFileName = null; OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput); OfflineSorter.ByteSequencesReader reader = null; // Push floats up front before sequences to sort them. For now, assume they are non-negative. // If negative floats are allowed some trickery needs to be done to find their byte order. count = 0; try { byte [] buffer = new byte [0]; ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); BytesRef spare; int inputLineCount = 0; while ((spare = iterator.next()) != null) { if (spare.length + 4 >= buffer.length) { buffer = ArrayUtil.grow(buffer, spare.length + 4); } output.reset(buffer); output.writeInt(encodeWeight(iterator.weight())); output.writeBytes(spare.bytes, spare.offset, spare.length); writer.write(buffer, 0, output.getPosition()); inputLineCount++; } CodecUtil.writeFooter(tempInput); writer.close(); // We don't know the distribution of scores and we need to bucket them, so we'll sort // and divide into equal buckets. tempSortedFileName = sorter.sort(tempInput.getName()); tempDir.deleteFile(tempInput.getName()); FSTCompletionBuilder builder = new FSTCompletionBuilder( buckets, externalSorter, sharedTailLength); reader = new OfflineSorter.ByteSequencesReader(tempDir.openChecksumInput(tempSortedFileName, IOContext.READONCE), tempSortedFileName); long line = 0; int previousBucket = 0; int previousScore = 0; ByteArrayDataInput input = new ByteArrayDataInput(); BytesRef tmp2 = new BytesRef(); while (true) { BytesRef scratch = reader.next(); if (scratch == null) { break; } input.reset(scratch.bytes, scratch.offset, scratch.length); int currentScore = input.readInt(); int bucket; if (line > 0 && currentScore == previousScore) { bucket = previousBucket; } else { bucket = (int) (line * buckets / inputLineCount); } previousScore = currentScore; previousBucket = bucket; // Only append the input, discard the weight. tmp2.bytes = scratch.bytes; tmp2.offset = scratch.offset + input.getPosition(); tmp2.length = scratch.length - input.getPosition(); builder.add(tmp2, bucket); line++; count++; } // The two FSTCompletions share the same automaton. this.higherWeightsCompletion = builder.build(); this.normalCompletion = new FSTCompletion( higherWeightsCompletion.getFST(), false, exactMatchFirst); } finally { IOUtils.closeWhileHandlingException(reader, writer, externalSorter); IOUtils.deleteFilesIgnoringExceptions(tempDir, tempInput.getName(), tempSortedFileName); } }
Example #25
Source File: Test2BBinaryDocValues.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testVariableBinary() throws Exception { BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BVariableBinary")); if (dir instanceof MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER); } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())) .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setRAMBufferSizeMB(256.0) .setMergeScheduler(new ConcurrentMergeScheduler()) .setMergePolicy(newLogMergePolicy(false, 10)) .setOpenMode(IndexWriterConfig.OpenMode.CREATE) .setCodec(TestUtil.getDefaultCodec())); Document doc = new Document(); byte bytes[] = new byte[4]; ByteArrayDataOutput encoder = new ByteArrayDataOutput(bytes); BytesRef data = new BytesRef(bytes); BinaryDocValuesField dvField = new BinaryDocValuesField("dv", data); doc.add(dvField); for (int i = 0; i < IndexWriter.MAX_DOCS; i++) { encoder.reset(bytes); encoder.writeVInt(i % 65535); // 1, 2, or 3 bytes data.length = encoder.getPosition(); w.addDocument(doc); if (i % 100000 == 0) { System.out.println("indexed: " + i); System.out.flush(); } } w.forceMerge(1); w.close(); System.out.println("verifying..."); System.out.flush(); DirectoryReader r = DirectoryReader.open(dir); int expectedValue = 0; ByteArrayDataInput input = new ByteArrayDataInput(); for (LeafReaderContext context : r.leaves()) { LeafReader reader = context.reader(); BinaryDocValues dv = reader.getBinaryDocValues("dv"); for (int i = 0; i < reader.maxDoc(); i++) { assertEquals(i, dv.nextDoc()); final BytesRef term = dv.binaryValue(); input.reset(term.bytes, term.offset, term.length); assertEquals(expectedValue % 65535, input.readVInt()); assertTrue(input.eof()); expectedValue++; } } r.close(); dir.close(); }
Example #26
Source File: LZ4TestCase.java From lucene-solr with Apache License 2.0 | 4 votes |
private void doTest(byte[] data, int offset, int length, LZ4.HashTable hashTable) throws IOException { ByteBuffersDataOutput out = new ByteBuffersDataOutput(); LZ4.compress(data, offset, length, out, hashTable); byte[] compressed = out.toArrayCopy(); int off = 0; int decompressedOff = 0; for (;;) { final int token = compressed[off++] & 0xFF; int literalLen = token >>> 4; if (literalLen == 0x0F) { while (compressed[off] == (byte) 0xFF) { literalLen += 0xFF; ++off; } literalLen += compressed[off++] & 0xFF; } // skip literals off += literalLen; decompressedOff += literalLen; // check that the stream ends with literals and that there are at least // 5 of them if (off == compressed.length) { assertEquals(length, decompressedOff); assertTrue("lastLiterals=" + literalLen + ", bytes=" + length, literalLen >= LZ4.LAST_LITERALS || literalLen == length); break; } final int matchDec = (compressed[off++] & 0xFF) | ((compressed[off++] & 0xFF) << 8); // check that match dec is not 0 assertTrue(matchDec + " " + decompressedOff, matchDec > 0 && matchDec <= decompressedOff); int matchLen = token & 0x0F; if (matchLen == 0x0F) { while (compressed[off] == (byte) 0xFF) { matchLen += 0xFF; ++off; } matchLen += compressed[off++] & 0xFF; } matchLen += LZ4.MIN_MATCH; // if the match ends prematurely, the next sequence should not have // literals or this means we are wasting space if (decompressedOff + matchLen < length - LZ4.LAST_LITERALS) { final boolean moreCommonBytes = data[offset + decompressedOff + matchLen] == data[offset + decompressedOff - matchDec + matchLen]; final boolean nextSequenceHasLiterals = ((compressed[off] & 0xFF) >>> 4) != 0; assertTrue(moreCommonBytes == false || nextSequenceHasLiterals == false); } decompressedOff += matchLen; } assertEquals(length, decompressedOff); // Compress once again with the same hash table to test reuse ByteBuffersDataOutput out2 = new ByteBuffersDataOutput(); LZ4.compress(data, offset, length, out2, hashTable); assertArrayEquals(compressed, out2.toArrayCopy()); // Now restore and compare bytes byte[] restored = new byte[length + random().nextInt(10)]; LZ4.decompress(new ByteArrayDataInput(compressed), length, restored); assertArrayEquals(ArrayUtil.copyOfSubArray(data, offset, offset+length), ArrayUtil.copyOfSubArray(restored, 0, length)); }
Example #27
Source File: BlockTermsReader.java From lucene-solr with Apache License 2.0 | 4 votes |
private boolean nextBlock() throws IOException { // TODO: we still lazy-decode the byte[] for each // term (the suffix), but, if we decoded // all N terms up front then seeking could do a fast // bsearch w/in the block... //System.out.println("BTR.nextBlock() fp=" + in.getFilePointer() + " this=" + this); state.blockFilePointer = in.getFilePointer(); blockTermCount = in.readVInt(); //System.out.println(" blockTermCount=" + blockTermCount); if (blockTermCount == 0) { return false; } termBlockPrefix = in.readVInt(); // term suffixes: int len = in.readVInt(); if (termSuffixes.length < len) { termSuffixes = new byte[ArrayUtil.oversize(len, 1)]; } //System.out.println(" termSuffixes len=" + len); in.readBytes(termSuffixes, 0, len); termSuffixesReader.reset(termSuffixes, 0, len); // docFreq, totalTermFreq len = in.readVInt(); if (docFreqBytes.length < len) { docFreqBytes = new byte[ArrayUtil.oversize(len, 1)]; } //System.out.println(" freq bytes len=" + len); in.readBytes(docFreqBytes, 0, len); freqReader.reset(docFreqBytes, 0, len); // metadata len = in.readVInt(); if (bytes == null) { bytes = new byte[ArrayUtil.oversize(len, 1)]; bytesReader = new ByteArrayDataInput(); } else if (bytes.length < len) { bytes = new byte[ArrayUtil.oversize(len, 1)]; } in.readBytes(bytes, 0, len); bytesReader.reset(bytes, 0, len); metaDataUpto = 0; state.termBlockOrd = 0; indexIsCurrent = false; //System.out.println(" indexIsCurrent=" + indexIsCurrent); return true; }
Example #28
Source File: Store.java From crate with Apache License 2.0 | 4 votes |
public long getStoredChecksum() { return new ByteArrayDataInput(checksum).readLong(); }