org.apache.lucene.util.packed.PackedInts Java Exaples

Source File: BestBucketsDeferringCollector.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx) throws IOException {
    finishLeaf();

    context = ctx;
    docDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
    buckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);

    return new LeafBucketCollector() {
        int lastDoc = 0;

        @Override
        public void collect(int doc, long bucket) throws IOException {
            docDeltas.add(doc - lastDoc);
            buckets.add(bucket);
            lastDoc = doc;
            maxBucket = Math.max(maxBucket, bucket);
        }
    };
}

Source File: DiskDocValuesConsumer.java From incubator-retired-blur with Apache License 2.0

6 votes

@Override
public void addSortedSetField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrdCount, Iterable<Number> ords) throws IOException {
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.SORTED_SET);
  // write the ord -> byte[] as a binary field
  addBinaryField(field, values);
  // write the stream of ords as a numeric field
  // NOTE: we could return an iterator that delta-encodes these within a doc
  addNumericField(field, ords);
  
  // write the doc -> ord count as a absolute index to the stream
  meta.writeVInt(field.number);
  meta.writeByte(DiskDocValuesFormat.NUMERIC);
  meta.writeVInt(PackedInts.VERSION_CURRENT);
  meta.writeLong(data.getFilePointer());
  meta.writeVLong(maxDoc);
  meta.writeVInt(BLOCK_SIZE);

  final MonotonicBlockPackedWriter writer = new MonotonicBlockPackedWriter(data, BLOCK_SIZE);
  long addr = 0;
  for (Number v : docToOrdCount) {
    addr += v.longValue();
    writer.add(addr);
  }
  writer.finish();
}

Source File: TestLSBRadixSorter.java From lucene-solr with Apache License 2.0

6 votes

public void test(LSBRadixSorter sorter, int[] arr, int len) {
  final int[] expected = ArrayUtil.copyOfSubArray(arr, 0, len);
  Arrays.sort(expected);

  int numBits = 0;
  for (int i = 0; i < len; ++i) {
    numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
  }

  if (random().nextBoolean()) {
    numBits = TestUtil.nextInt(random(), numBits, 32);
  }

  sorter.sort(numBits, arr, len);
  final int[] actual = ArrayUtil.copyOfSubArray(arr, 0, len);
  assertArrayEquals(expected, actual);
}

Source File: LZ4.java From lucene-solr with Apache License 2.0

6 votes

@Override
void reset(byte[] bytes, int off, int len) {
  Objects.checkFromIndexSize(off, len, bytes.length);
  this.bytes = bytes;
  this.base = off;
  this.lastOff = off - 1;
  this.end = off + len;
  final int bitsPerOffset = PackedInts.bitsRequired(len - LAST_LITERALS);
  final int bitsPerOffsetLog = 32 - Integer.numberOfLeadingZeros(bitsPerOffset - 1);
  hashLog = MEMORY_USAGE + 3 - bitsPerOffsetLog;
  if (hashTable == null || hashTable.size() < 1 << hashLog || hashTable.getBitsPerValue() < bitsPerOffset) {
    hashTable = PackedInts.getMutable(1 << hashLog, bitsPerOffset, PackedInts.DEFAULT);
  } else {
    // Avoid calling hashTable.clear(), this makes it costly to compress many short sequences otherwise.
    // Instead, get() checks that references are less than the current offset.
    get(off); // this sets the hashTable for the first 4 bytes as a side-effect
  }
}

Source File: FixedGapTermsIndexWriter.java From lucene-solr with Apache License 2.0

6 votes

public FixedGapTermsIndexWriter(SegmentWriteState state, int termIndexInterval) throws IOException {
  if (termIndexInterval <= 0) {
    throw new IllegalArgumentException("invalid termIndexInterval: " + termIndexInterval);
  }
  this.termIndexInterval = termIndexInterval;
  final String indexFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION);
  out = state.directory.createOutput(indexFileName, state.context);
  boolean success = false;
  try {
    CodecUtil.writeIndexHeader(out, CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
    out.writeVInt(termIndexInterval);
    out.writeVInt(PackedInts.VERSION_CURRENT);
    out.writeVInt(BLOCKSIZE);
    success = true;
  } finally {
    if (!success) {
      IOUtils.closeWhileHandlingException(out);
    }
  }
}

Source File: PackedArrayIndexFieldData.java From Elasticsearch with Apache License 2.0

6 votes

private long getPageMemoryUsage(PackedLongValues values, float acceptableOverheadRatio, int pageSize, long pageMinOrdinal, long pageMaxOrdinal) {
    int bitsRequired;
    long pageMemorySize = 0;
    PackedInts.FormatAndBits formatAndBits;
    if (pageMaxOrdinal == Long.MIN_VALUE) {
        // empty page - will use the null reader which just stores size
        pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);

    } else {
        long pageMinValue = values.get(pageMinOrdinal);
        long pageMaxValue = values.get(pageMaxOrdinal);
        long pageDelta = pageMaxValue - pageMinValue;
        if (pageDelta != 0) {
            bitsRequired = pageDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta);
            formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio);
            pageMemorySize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG;
            pageMemorySize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage
        } else {
            // empty page
            pageMemorySize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT);
        }
    }
    return pageMemorySize;
}

Source File: SortedNumericDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public SortedNumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: HyperLogLogPlusPlus.java From Elasticsearch with Apache License 2.0

5 votes

/**
 * Compute the required precision so that <code>count</code> distinct entries
 * would be counted with linear counting.
 */
public static int precisionFromThreshold(long count) {
    final long hashTableEntries = (long) Math.ceil(count / MAX_LOAD_FACTOR);
    int precision = PackedInts.bitsRequired(hashTableEntries * RamUsageEstimator.NUM_BYTES_INT);
    precision = Math.max(precision, MIN_PRECISION);
    precision = Math.min(precision, MAX_PRECISION);
    return precision;
}

Source File: TestTimSorterWorstCase.java From lucene-solr with Apache License 2.0

5 votes

/** Create an array for the given list of runs. */
private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
  PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
  int endRun = -1;
  for (long len : runs) {
    array.set(endRun += len, 1);
  }
  array.set(length - 1, 0);
  return array;
}

Source File: OrdinalsBuilder.java From Elasticsearch with Apache License 2.0

5 votes

public OrdinalsBuilder(long numTerms, int maxDoc, float acceptableOverheadRatio) throws IOException {
    this.maxDoc = maxDoc;
    int startBitsPerValue = 8;
    if (numTerms >= 0) {
        startBitsPerValue = PackedInts.bitsRequired(numTerms);
    }
    ordinals = new OrdinalsStore(maxDoc, startBitsPerValue, acceptableOverheadRatio);
    spare = new LongsRef();
}

Source File: NodeHash.java From lucene-solr with Apache License 2.0

5 votes

private void rehash() throws IOException {
  final PagedGrowableWriter oldTable = table;

  table = new PagedGrowableWriter(2*oldTable.size(), 1<<30, PackedInts.bitsRequired(count), PackedInts.COMPACT);
  mask = table.size()-1;
  for(long idx=0;idx<oldTable.size();idx++) {
    final long address = oldTable.get(idx);
    if (address != 0) {
      addNew(address);
    }
  }
}

Source File: DocValuesFieldUpdates.java From lucene-solr with Apache License 2.0

5 votes

protected DocValuesFieldUpdates(int maxDoc, long delGen, String field, DocValuesType type) {
  this.maxDoc = maxDoc;
  this.delGen = delGen;
  this.field = field;
  if (type == null) {
    throw new NullPointerException("DocValuesType must not be null");
  }
  this.type = type;
  bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
  docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
}

Source File: NormValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  docsWithField = new DocsWithFieldSet();
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: IndexSorter.java From lucene-solr with Apache License 2.0

5 votes

@Override
public ComparableProvider[] getComparableProviders(List<? extends LeafReader> readers) throws IOException {
  final ComparableProvider[] providers = new ComparableProvider[readers.size()];
  final SortedDocValues[] values = new SortedDocValues[readers.size()];
  for(int i=0;i<readers.size();i++) {
    final SortedDocValues sorted = valuesProvider.get(readers.get(i));
    values[i] = sorted;
  }
  OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
  final int missingOrd;
  if (missingValue == SortField.STRING_LAST) {
    missingOrd = Integer.MAX_VALUE;
  } else {
    missingOrd = Integer.MIN_VALUE;
  }

  for(int readerIndex=0;readerIndex<readers.size();readerIndex++) {
    final SortedDocValues readerValues = values[readerIndex];
    final LongValues globalOrds = ordinalMap.getGlobalOrds(readerIndex);
    providers[readerIndex] = docID -> {
      if (readerValues.advanceExact(docID)) {
        // translate segment's ord to global ord space:
        return globalOrds.get(readerValues.ordValue());
      } else {
        return missingOrd;
      }
    };
  }
  return providers;
}

Source File: NumericDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: MultiDocValues.java From lucene-solr with Apache License 2.0

5 votes

/** Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedSetDocValues(String)}
 * </p>  
 */
public static SortedSetDocValues getSortedSetValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedSetDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedSetDocValues[] values = new SortedSetDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedSetDocValues v = context.reader().getSortedSetDocValues(field);
    if (v == null) {
      v = DocValues.emptySortedSet();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedSetDocValues(values, starts, mapping, totalCost);
  }
}

Source File: MultiDocValues.java From lucene-solr with Apache License 2.0

5 votes

/** Returns a SortedDocValues for a reader's docvalues (potentially doing extremely slow things).
 * <p>
 * This is an extremely slow way to access sorted values. Instead, access them per-segment
 * with {@link LeafReader#getSortedDocValues(String)}
 * </p>  
 */
public static SortedDocValues getSortedValues(final IndexReader r, final String field) throws IOException {
  final List<LeafReaderContext> leaves = r.leaves();
  final int size = leaves.size();
  
  if (size == 0) {
    return null;
  } else if (size == 1) {
    return leaves.get(0).reader().getSortedDocValues(field);
  }
  
  boolean anyReal = false;
  final SortedDocValues[] values = new SortedDocValues[size];
  final int[] starts = new int[size+1];
  long totalCost = 0;
  for (int i = 0; i < size; i++) {
    LeafReaderContext context = leaves.get(i);
    SortedDocValues v = context.reader().getSortedDocValues(field);
    if (v == null) {
      v = DocValues.emptySorted();
    } else {
      anyReal = true;
      totalCost += v.cost();
    }
    values[i] = v;
    starts[i] = context.docBase;
  }
  starts[size] = r.maxDoc();
  
  if (anyReal == false) {
    return null;
  } else {
    IndexReader.CacheHelper cacheHelper = r.getReaderCacheHelper();
    IndexReader.CacheKey owner = cacheHelper == null ? null : cacheHelper.getKey();
    OrdinalMap mapping = OrdinalMap.build(owner, values, PackedInts.DEFAULT);
    return new MultiSortedDocValues(values, starts, mapping, totalCost);
  }
}

Source File: MultiOrdinals.java From Elasticsearch with Apache License 2.0

5 votes

/**
 * Return true if this impl is going to be smaller than {@link SinglePackedOrdinals} by at least 20%.
 */
public static boolean significantlySmallerThanSinglePackedOrdinals(int maxDoc, int numDocsWithValue, long numOrds, float acceptableOverheadRatio) {
    int bitsPerOrd = PackedInts.bitsRequired(numOrds);
    bitsPerOrd = PackedInts.fastestFormatAndBits(numDocsWithValue, bitsPerOrd, acceptableOverheadRatio).bitsPerValue;
    // Compute the worst-case number of bits per value for offsets in the worst case, eg. if no docs have a value at the
    // beginning of the block and all docs have one at the end of the block
    final float avgValuesPerDoc = (float) numDocsWithValue / maxDoc;
    final int maxDelta = (int) Math.ceil(OFFSETS_PAGE_SIZE * (1 - avgValuesPerDoc) * avgValuesPerDoc);
    int bitsPerOffset = PackedInts.bitsRequired(maxDelta) + 1; // +1 because of the sign
    bitsPerOffset = PackedInts.fastestFormatAndBits(maxDoc, bitsPerOffset, acceptableOverheadRatio).bitsPerValue;

    final long expectedMultiSizeInBytes = (long) numDocsWithValue * bitsPerOrd + (long) maxDoc * bitsPerOffset;
    final long expectedSingleSizeInBytes = (long) maxDoc * bitsPerOrd;
    return expectedMultiSizeInBytes < 0.8f * expectedSingleSizeInBytes;
}

Source File: ParentChildIndexFieldData.java From Elasticsearch with Apache License 2.0

5 votes

private static OrdinalMap buildOrdinalMap(AtomicParentChildFieldData[] atomicFD, String parentType) throws IOException {
    final SortedDocValues[] ordinals = new SortedDocValues[atomicFD.length];
    for (int i = 0; i < ordinals.length; ++i) {
        ordinals[i] = atomicFD[i].getOrdinalsValues(parentType);
    }
    return OrdinalMap.build(null, ordinals, PackedInts.DEFAULT);
}

Source File: MergeState.java From lucene-solr with Apache License 2.0

5 votes

static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
  final PackedLongValues.Builder docMapBuilder = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  int del = 0;
  for (int i = 0; i < maxDoc; ++i) {
    docMapBuilder.add(i - del);
    if (liveDocs.get(i) == false) {
      ++del;
    }
  }
  return docMapBuilder.build();
}

Source File: OrdinalsBuilder.java From Elasticsearch with Apache License 2.0

5 votes

/**
 * Builds an {@link Ordinals} instance from the builders current state.
 */
public Ordinals build(Settings settings) {
    final float acceptableOverheadRatio = settings.getAsFloat("acceptable_overhead_ratio", PackedInts.FASTEST);
    final boolean forceMultiOrdinals = settings.getAsBoolean(FORCE_MULTI_ORDINALS, false);
    if (forceMultiOrdinals || numMultiValuedDocs > 0 || MultiOrdinals.significantlySmallerThanSinglePackedOrdinals(maxDoc, numDocsWithValue, getValueCount(), acceptableOverheadRatio)) {
        // MultiOrdinals can be smaller than SinglePackedOrdinals for sparse fields
        return new MultiOrdinals(this, acceptableOverheadRatio);
    } else {
        return new SinglePackedOrdinals(this, acceptableOverheadRatio);
    }
}

Source File: BaseCompressingDocValuesFormatTestCase.java From lucene-solr with Apache License 2.0

5 votes

public void testDateCompression() throws IOException {
  try (final Directory dir = new ByteBuffersDirectory()) {
    final IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    final IndexWriter iwriter = new IndexWriter(dir, iwc);

    final long base = 13; // prime
    final long day = 1000L * 60 * 60 * 24;

    final Document doc = new Document();
    final NumericDocValuesField dvf = new NumericDocValuesField("dv", 0);
    doc.add(dvf);
    for (int i = 0; i < 300; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size1 = dirSize(dir);
    for (int i = 0; i < 50; ++i) {
      dvf.setLongValue(base + random().nextInt(1000) * day);
      iwriter.addDocument(doc);
    }
    iwriter.forceMerge(1);
    final long size2 = dirSize(dir);
    // make sure the new longs costed less than if they had only been packed
    assertTrue(size2 < size1 + (PackedInts.bitsRequired(day) * 50) / 8);
  }
}

Source File: BinaryDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public BinaryDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.bytes = new PagedBytes(BLOCK_BITS);
  this.bytesOut = bytes.getDataOutput();
  this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  this.iwBytesUsed = iwBytesUsed;
  this.docsWithField = new DocsWithFieldSet();
  this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: SortedSetDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
  pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: SortedDocValuesWriter.java From lucene-solr with Apache License 2.0

5 votes

public SortedDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
  this.fieldInfo = fieldInfo;
  this.iwBytesUsed = iwBytesUsed;
  hash = new BytesRefHash(
      new ByteBlockPool(
          new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)),
          BytesRefHash.DEFAULT_CAPACITY,
          new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
  pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
  docsWithField = new DocsWithFieldSet();
  bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
  iwBytesUsed.addAndGet(bytesUsed);
}

Source File: LegacyFieldsIndexReader.java From lucene-solr with Apache License 2.0

4 votes

LegacyFieldsIndexReader(IndexInput fieldsIndexIn, SegmentInfo si) throws IOException {
  maxDoc = si.maxDoc();
  int[] docBases = new int[16];
  long[] startPointers = new long[16];
  int[] avgChunkDocs = new int[16];
  long[] avgChunkSizes = new long[16];
  PackedInts.Reader[] docBasesDeltas = new PackedInts.Reader[16];
  PackedInts.Reader[] startPointersDeltas = new PackedInts.Reader[16];

  final int packedIntsVersion = fieldsIndexIn.readVInt();

  int blockCount = 0;

  for (;;) {
    final int numChunks = fieldsIndexIn.readVInt();
    if (numChunks == 0) {
      break;
    }
    if (blockCount == docBases.length) {
      final int newSize = ArrayUtil.oversize(blockCount + 1, 8);
      docBases = ArrayUtil.growExact(docBases, newSize);
      startPointers = ArrayUtil.growExact(startPointers, newSize);
      avgChunkDocs = ArrayUtil.growExact(avgChunkDocs, newSize);
      avgChunkSizes = ArrayUtil.growExact(avgChunkSizes, newSize);
      docBasesDeltas = ArrayUtil.growExact(docBasesDeltas, newSize);
      startPointersDeltas = ArrayUtil.growExact(startPointersDeltas, newSize);
    }

    // doc bases
    docBases[blockCount] = fieldsIndexIn.readVInt();
    avgChunkDocs[blockCount] = fieldsIndexIn.readVInt();
    final int bitsPerDocBase = fieldsIndexIn.readVInt();
    if (bitsPerDocBase > 32) {
      throw new CorruptIndexException("Corrupted bitsPerDocBase: " + bitsPerDocBase, fieldsIndexIn);
    }
    docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);

    // start pointers
    startPointers[blockCount] = fieldsIndexIn.readVLong();
    avgChunkSizes[blockCount] = fieldsIndexIn.readVLong();
    final int bitsPerStartPointer = fieldsIndexIn.readVInt();
    if (bitsPerStartPointer > 64) {
      throw new CorruptIndexException("Corrupted bitsPerStartPointer: " + bitsPerStartPointer, fieldsIndexIn);
    }
    startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);

    ++blockCount;
  }

  this.docBases = ArrayUtil.copyOfSubArray(docBases, 0, blockCount);
  this.startPointers = ArrayUtil.copyOfSubArray(startPointers, 0, blockCount);
  this.avgChunkDocs = ArrayUtil.copyOfSubArray(avgChunkDocs, 0, blockCount);
  this.avgChunkSizes = ArrayUtil.copyOfSubArray(avgChunkSizes, 0, blockCount);
  this.docBasesDeltas = ArrayUtil.copyOfSubArray(docBasesDeltas, 0, blockCount);
  this.startPointersDeltas = ArrayUtil.copyOfSubArray(startPointersDeltas, 0, blockCount);
}

Source File: BinaryDocValuesFieldUpdates.java From lucene-solr with Apache License 2.0

4 votes

public BinaryDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
  super(maxDoc, delGen, field, DocValuesType.BINARY);
  offsets = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
  lengths = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
  values = new BytesRefBuilder();
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

4 votes

@Override
protected Accountable createValue(LeafReader reader, CacheKey key)
    throws IOException {

  // TODO: would be nice to first check if DocTermsIndex
  // was already cached for this field and then return
  // that instead, to avoid insanity

  final int maxDoc = reader.maxDoc();
  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final int termCountHardLimit = maxDoc;

  // Holds the actual term data, expanded.
  final PagedBytes bytes = new PagedBytes(15);

  int startBPV;

  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > termCountHardLimit) {
        numUniqueTerms = termCountHardLimit;
      }
      startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
    } else {
      startBPV = 1;
    }
  } else {
    startBPV = 1;
  }

  final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
  
  // pointer==0 means not set
  bytes.copyUsingLengthPrefix(new BytesRef());

  if (terms != null) {
    int termCount = 0;
    final TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;
    while(true) {
      if (termCount++ == termCountHardLimit) {
        // app is misusing the API (there is more than
        // one term per doc); in this case we make best
        // effort to load what we can (see LUCENE-2142)
        break;
      }

      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      final long pointer = bytes.copyUsingLengthPrefix(term);
      docs = termsEnum.postings(docs, PostingsEnum.NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        docToOffset.set(docID, pointer);
      }
    }
  }

  final PackedInts.Reader offsetReader = docToOffset.getMutable();
  Bits docsWithField = new Bits() {
    @Override
    public boolean get(int index) {
      return offsetReader.get(index) != 0;
    }

    @Override
    public int length() {
      return maxDoc;
    }
  };

  wrapper.setDocsWithField(reader, key.field, docsWithField, null);
  // maybe an int-only impl?
  return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

4 votes

public BinaryDocValues getTerms(LeafReader reader, String field) throws IOException {
  return getTerms(reader, field, PackedInts.FAST);
}

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

4 votes

public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, Bits docsWithField) {
  this.bytes = bytes;
  this.docToOffset = docToOffset;
  this.docsWithField = docsWithField;
}

org.apache.lucene.util.packed.PackedInts Java Examples