org.apache.hadoop.hbase.KeyValue#getKeyValueDataStructureSize

Source File: SchemaUtil.java From phoenix with Apache License 2.0

6 votes

/**
 * Imperfect estimate of row size given a PTable
 * TODO: keep row count in stats table and use total size / row count instead
 * @param table
 * @return estimate of size in bytes of a row
 */
public static long estimateRowSize(PTable table) {
	int keyLength = estimateKeyLength(table);
	long rowSize = 0;
	for (PColumn column : table.getColumns()) {
		if (!SchemaUtil.isPKColumn(column)) {
            PDataType type = column.getDataType();
            Integer maxLength = column.getMaxLength();
            int valueLength = !type.isFixedWidth() ? VAR_KV_LENGTH_ESTIMATE : maxLength == null ? type.getByteSize() : maxLength;
			rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, column.getFamilyName().getBytes().length, column.getName().getBytes().length, valueLength);
		}
	}
	// Empty key value
	rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, getEmptyColumnFamily(table).length, QueryConstants.EMPTY_COLUMN_BYTES.length, 0);
	return rowSize;
}

Source File: SchemaUtil.java From phoenix with Apache License 2.0

6 votes

/**
 * Imperfect estimate of row size given a PTable
 * TODO: keep row count in stats table and use total size / row count instead
 * @param table
 * @return estimate of size in bytes of a row
 */
public static long estimateRowSize(PTable table) {
	int keyLength = estimateKeyLength(table);
	long rowSize = 0;
	for (PColumn column : table.getColumns()) {
		if (!SchemaUtil.isPKColumn(column)) {
            PDataType type = column.getDataType();
            Integer maxLength = column.getMaxLength();
            int valueLength = !type.isFixedWidth() ? VAR_KV_LENGTH_ESTIMATE : maxLength == null ? type.getByteSize() : maxLength;
			rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, column.getFamilyName().getBytes().length, column.getName().getBytes().length, valueLength);
		}
	}
	byte[] emptyKeyValueKV = EncodedColumnsUtil.getEmptyKeyValueInfo(table).getFirst();
	// Empty key value
	rowSize += KeyValue.getKeyValueDataStructureSize(keyLength, getEmptyColumnFamily(table).length, emptyKeyValueKV.length, 0);
	return rowSize;
}

Source File: Result.java From hbase with Apache License 2.0

5 votes

/**
 * Searches for the latest value for the specified column.
 *
 * @param kvs the array to search
 * @param family family name
 * @param foffset family offset
 * @param flength family length
 * @param qualifier column qualifier
 * @param qoffset qualifier offset
 * @param qlength qualifier length
 *
 * @return the index where the value was found, or -1 otherwise
 */
protected int binarySearch(final Cell [] kvs,
    final byte [] family, final int foffset, final int flength,
    final byte [] qualifier, final int qoffset, final int qlength) {

  double keyValueSize = (double)
      KeyValue.getKeyValueDataStructureSize(kvs[0].getRowLength(), flength, qlength, 0);

  byte[] buffer = localBuffer.get();
  if (buffer == null || keyValueSize > buffer.length) {
    // pad to the smallest multiple of the pad width
    buffer = new byte[(int) Math.ceil(keyValueSize / PAD_WIDTH) * PAD_WIDTH];
    localBuffer.set(buffer);
  }

  Cell searchTerm = KeyValueUtil.createFirstOnRow(buffer, 0,
      kvs[0].getRowArray(), kvs[0].getRowOffset(), kvs[0].getRowLength(),
      family, foffset, flength,
      qualifier, qoffset, qlength);

  // pos === ( -(insertion point) - 1)
  int pos = Arrays.binarySearch(kvs, searchTerm, CellComparator.getInstance());
  // never will exact match
  if (pos < 0) {
    pos = (pos+1) * -1;
    // pos is now insertion point
  }
  if (pos == kvs.length) {
    return -1; // doesn't exist
  }
  return pos;
}

Source File: TestByteRangeWithKVSerialization.java From hbase with Apache License 2.0

5 votes

static KeyValue readCell(PositionedByteRange pbr) throws Exception {
  int kvStartPos = pbr.getPosition();
  int keyLen = pbr.getInt();
  int valLen = pbr.getInt();
  pbr.setPosition(pbr.getPosition() + keyLen + valLen); // Skip the key and value section
  int tagsLen = ((pbr.get() & 0xff) << 8) ^ (pbr.get() & 0xff);
  pbr.setPosition(pbr.getPosition() + tagsLen); // Skip the tags section
  long mvcc = pbr.getVLong();
  KeyValue kv = new KeyValue(pbr.getBytes(), kvStartPos,
      (int) KeyValue.getKeyValueDataStructureSize(keyLen, valLen, tagsLen));
  kv.setSequenceId(mvcc);
  return kv;
}

Source File: EncodedDataBlock.java From hbase with Apache License 2.0

4 votes

/**
 * Provides access to compressed value.
 * @param headerSize header size of the block.
 * @return Forwards sequential iterator.
 */
public Iterator<Cell> getIterator(int headerSize) {
  final int rawSize = rawKVs.length;
  byte[] encodedDataWithHeader = getEncodedData();
  int bytesToSkip = headerSize + Bytes.SIZEOF_SHORT;
  ByteArrayInputStream bais = new ByteArrayInputStream(encodedDataWithHeader,
      bytesToSkip, encodedDataWithHeader.length - bytesToSkip);
  final DataInputStream dis = new DataInputStream(bais);

  return new Iterator<Cell>() {
    private ByteBuffer decompressedData = null;
    private Iterator<Boolean> it = isTagsLenZero.iterator();

    @Override
    public boolean hasNext() {
      if (decompressedData == null) {
        return rawSize > 0;
      }
      return decompressedData.hasRemaining();
    }

    @Override
    public Cell next() {
      if (decompressedData == null) {
        try {
          decompressedData = dataBlockEncoder.decodeKeyValues(dis, dataBlockEncoder
              .newDataBlockDecodingContext(meta));
        } catch (IOException e) {
          throw new RuntimeException("Problem with data block encoder, " +
              "most likely it requested more bytes than are available.", e);
        }
        decompressedData.rewind();
      }
      int offset = decompressedData.position();
      int klen = decompressedData.getInt();
      int vlen = decompressedData.getInt();
      int tagsLen = 0;
      ByteBufferUtils.skip(decompressedData, klen + vlen);
      // Read the tag length in case when stream contain tags
      if (meta.isIncludesTags()) {
        boolean noTags = true;
        if (it.hasNext()) {
          noTags = it.next();
        }
        // ROW_INDEX_V1 will not put tagsLen back in cell if it is zero, there is no need
        // to read short here.
        if (!(encoding.equals(DataBlockEncoding.ROW_INDEX_V1) && noTags)) {
          tagsLen = ((decompressedData.get() & 0xff) << 8) ^ (decompressedData.get() & 0xff);
          ByteBufferUtils.skip(decompressedData, tagsLen);
        }
      }
      KeyValue kv = new KeyValue(decompressedData.array(), offset,
          (int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen));
      if (meta.isIncludesMvcc()) {
        long mvccVersion = ByteBufferUtils.readVLong(decompressedData);
        kv.setSequenceId(mvccVersion);
      }
      return kv;
    }

    @Override
    public void remove() {
      throw new NotImplementedException("remove() is not supported!");
    }

    @Override
    public String toString() {
      return "Iterator of: " + dataBlockEncoder.getClass().getName();
    }

  };
}

Source File: PArrayDataTypeEncoder.java From phoenix with Apache License 2.0

4 votes

/**
 * @param colValueMap map from column to value
 * @return estimated encoded size
 */
public static int getEstimatedByteSize(PTable table, int rowLength,
        Map<PColumn, byte[]> colValueMap) {
    // iterate over column familiies
    int rowSize = 0;
    for (PColumnFamily family : table.getColumnFamilies()) {
        Collection<PColumn> columns = family.getColumns();
        // we add a non null value to the start so that we can represent absent values in the array with negative offsets
        int numColumns = columns.size() + 1;
        int cellSize = 1;
        int nulls = 0;
        int maxOffset = 0;
        // iterate over columns
        for (PColumn column : columns) {
            if (colValueMap.containsKey(column)) {
                byte[] colValue = colValueMap.get(column);
                // the column value is null
                if (colValue == null || colValue.length == 0) {
                    ++nulls;
                    maxOffset = cellSize;
                } else {
                    // count the bytes written to serialize nulls
                    if (nulls > 0) {
                        cellSize += (1 + Math.ceil(nulls / 255));
                        nulls = 0;
                    }
                    maxOffset = cellSize;
                    cellSize += colValue.length;
                }
            }
            // the column value is absent
            else {
                ++nulls;
                maxOffset = cellSize;
            }
        }
        // count the bytes used for the offset array
        cellSize +=
                PArrayDataType.useShortForOffsetArray(maxOffset,
                    PArrayDataType.IMMUTABLE_SERIALIZATION_VERSION)
                            ? numColumns * Bytes.SIZEOF_SHORT
                            : numColumns * Bytes.SIZEOF_INT;
        cellSize += 4;
        // count the bytes used for header information
        cellSize += 5;
        // add the size of the single cell containing all column values
        rowSize +=
                KeyValue.getKeyValueDataStructureSize(rowLength,
                    family.getName().getBytes().length,
                    QueryConstants.SINGLE_KEYVALUE_COLUMN_QUALIFIER_BYTES.length, cellSize);
    }
    return rowSize;
}

Java Code Examples for org.apache.hadoop.hbase.KeyValue#getKeyValueDataStructureSize()