io.netty.buffer.ArrowBuf Java Exaples

Source File: ArrowSchemaConverter.java From spark-bigquery-connector with Apache License 2.0

6 votes

@Override
final UTF8String getUTF8String(int rowId) {
  accessor.get(rowId, stringResult);
  if (stringResult.isSet == 0) {
    return null;
  } else {
    ArrowBuf offsets = accessor.getOffsetBuffer();
    int index = rowId * VarCharVector.OFFSET_WIDTH;
    int start = offsets.getInt(index);
    int end = offsets.getInt(index + VarCharVector.OFFSET_WIDTH);

    /* Since the result is accessed lazily if the memory address is corrupted we
     * might lose the data. Might be better to include a byte array. Not doing so
     * for performance reasons.
     */
    return UTF8String.fromAddress(/* base = */null,
        stringResult.buffer.memoryAddress() + start,
        end - start);
  }
}

Source File: BlockAllocatorImpl.java From aws-athena-query-federation with Apache License 2.0

6 votes

/**
 * Creates an ArrowBuf and registers it for later clean up if the ArrowBuff isn't explicitly closed by the caller.
 *
 * @see com.amazonaws.athena.connector.lambda.data.BlockAllocator
 */
public ArrowBuf createBuffer(int size)
{
    ArrowBuf buffer = null;
    try {
        buffer = rootAllocator.buffer(size);
        arrowBufs.add(buffer);
        return buffer;
    }
    catch (Exception ex) {
        if (buffer != null) {
            buffer.close();
        }
        throw ex;
    }
}

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

6 votes

private void setNextNValuesInVector(
    int typeWidth, NullabilityHolder nullabilityHolder,
    ValuesAsBytesReader valuesReader, int bufferIdx, FieldVector vector, int numValues) {
  ArrowBuf validityBuffer = vector.getValidityBuffer();
  if (currentValue == maxDefLevel) {
    ByteBuffer buffer = valuesReader.getBuffer(numValues * typeWidth);
    vector.getDataBuffer().setBytes(bufferIdx * typeWidth, buffer);
    nullabilityHolder.setNotNulls(bufferIdx, numValues);
    if (setArrowValidityVector) {
      for (int i = 0; i < numValues; i++) {
        BitVectorHelper.setValidityBitToOne(validityBuffer, bufferIdx + i);
      }
    }
  } else {
    setNulls(nullabilityHolder, bufferIdx, numValues, validityBuffer);
  }
}

Source File: ArrowSchemaConverter.java From spark-bigquery-connector with Apache License 2.0

5 votes

@Override
final ColumnarArray getArray(int rowId) {
  ArrowBuf offsets = accessor.getOffsetBuffer();
  int index = rowId * ListVector.OFFSET_WIDTH;
  int start = offsets.getInt(index);
  int end = offsets.getInt(index + ListVector.OFFSET_WIDTH);
  return new ColumnarArray(arrayData, start, end - start);
}

Source File: BlockAllocatorImpl.java From aws-athena-query-federation with Apache License 2.0

5 votes

/**
 * Attempts to close all buffers allocated by this BlockAllocator.
 */
@VisibleForTesting
protected synchronized void closeBuffers()
{
    logger.debug("closeBuffers: {}", arrowBufs.size());
    for (ArrowBuf next : arrowBufs) {
        try {
            next.close();
        }
        catch (Exception ex) {
            logger.warn("closeBuffers: Error closing buffer", ex);
        }
    }
    arrowBufs.clear();
}

Source File: FlightArrowColumnVector.java From flight-spark-source with Apache License 2.0

5 votes

@Override
final ColumnarArray getArray(int rowId) {
  ArrowBuf offsets = accessor.getOffsetBuffer();
  int index = rowId * ListVector.OFFSET_WIDTH;
  int start = offsets.getInt(index);
  int end = offsets.getInt(index + ListVector.OFFSET_WIDTH);
  return new ColumnarArray(arrayData, start, end - start);
}

Source File: ArrowVectorAccessors.java From iceberg with Apache License 2.0

5 votes

@Override
final ColumnarArray getArray(int rowId) {
  ArrowBuf offsets = vector.getOffsetBuffer();
  int index = rowId * ListVector.OFFSET_WIDTH;
  int start = offsets.getInt(index);
  int end = offsets.getInt(index + ListVector.OFFSET_WIDTH);
  return new ColumnarArray(arrayData, start, end - start);
}

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

5 votes

private void setNulls(NullabilityHolder nullabilityHolder, int idx, int numValues, ArrowBuf validityBuffer) {
  nullabilityHolder.setNulls(idx, numValues);
  if (setArrowValidityVector) {
    for (int i = 0; i < numValues; i++) {
      BitVectorHelper.setValidityBit(validityBuffer, idx + i, 0);
    }
  }
}

Source File: ArrowWrite.java From ArrowExample with Apache License 2.0

5 votes

private void showFieldLayout(Field field, FieldVector fieldVector){
        // per field execution
        TypeLayout typeLayout = TypeLayout.getTypeLayout(field.getType());
        List<BufferLayout.BufferType> vectorTypes = typeLayout.getBufferTypes();
        ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()];

        if (vectorTypes.size() != vectorBuffers.length) {
            throw new IllegalArgumentException("vector types and vector buffers are not the same size: " + vectorTypes.size() + " != " + vectorBuffers.length);
        }
        System.out.println(" ----- [ " + field.toString() + " ] -------- ");
        System.out.println("FieldVector type: " + fieldVector.getClass().getCanonicalName());
        System.out.println("TypeLayout is " +  typeLayout.toString() + " vectorSize is " + vectorTypes.size());
        for(int i = 0; i < vectorTypes.size(); i++){
            /* fields in the vector type tells how to locate, for primitive types it has only 2 validity and data
            whereas for binary it has 3, validity, offset and data. I suppose if I remove the nullable part, then
            it will be only 1 and 2 types - can confirm? TODO:
            */
            System.out.println(" \t vector type entries [" + i + "] " + vectorTypes.get(i).toString());
        }
        System.out.println("*********************************************");
//
//        fieldVector.allocateNew();
//        fieldVector.getMutator().setValueCount(this.entries);
//        System.out.println(" Setting up the mutator count to be " + this.entries + " ** " + fieldVector.getMutator().getClass().getCanonicalName() + " >>>>> " + field.getFieldType().getType().getTypeID());
//        /* based upon the schema */
//        for (int v = 0; v < vectorTypes.size(); v++){
//            /* we get specific ArrowVectorType and associated BufferBacked */
//            ArrowVectorType vectorType = vectorTypes.get(v);
//            BufferBacked bufferBacked = fieldInnerVectors.get(v);
//            /* I don't like this explicit casting ? */
//            ValueVector valueVector = (ValueVector) bufferBacked;
//            System.out.println("\t valueVector minor type is : " + valueVector.getMinorType());
//            valueVector.setInitialCapacity(this.entries);
//            valueVector.allocateNew();
//            valueVector.clear();
//        }
    }

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

4 votes

public void readBatchOfTimestampMillis(final FieldVector vector, final int startOffset, final int typeWidth,
                                       final int numValsToRead, NullabilityHolder nullabilityHolder,
                                       ValuesAsBytesReader valuesReader) {
  int bufferIdx = startOffset;
  int left = numValsToRead;
  while (left > 0) {
    if (this.currentCount == 0) {
      this.readNextGroup();
    }
    int numValues = Math.min(left, this.currentCount);
    switch (mode) {
      case RLE:
        ArrowBuf validityBuffer = vector.getValidityBuffer();
        if (currentValue == maxDefLevel) {
          for (int i = 0; i < numValues; i++) {
            vector.getDataBuffer().setLong(bufferIdx * typeWidth, valuesReader.readLong() * 1000);
          }
          nullabilityHolder.setNotNulls(bufferIdx, numValues);
          if (setArrowValidityVector) {
            for (int i = 0; i < numValues; i++) {
              BitVectorHelper.setValidityBitToOne(validityBuffer, bufferIdx + i);
            }
          }
        } else {
          setNulls(nullabilityHolder, bufferIdx, numValues, validityBuffer);
        }
        bufferIdx += numValues;
        break;
      case PACKED:
        for (int i = 0; i < numValues; i++) {
          if (packedValuesBuffer[packedValuesBufferIdx++] == maxDefLevel) {
            vector.getDataBuffer().setLong(bufferIdx * typeWidth, valuesReader.readLong() * 1000);
            nullabilityHolder.setNotNull(bufferIdx);
            if (setArrowValidityVector) {
              BitVectorHelper.setValidityBitToOne(vector.getValidityBuffer(), bufferIdx);
            }
          } else {
            setNull(nullabilityHolder, bufferIdx, vector.getValidityBuffer());
          }
          bufferIdx++;
        }
        break;
    }
    left -= numValues;
    currentCount -= numValues;
  }
}

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

4 votes

public void readBatchOfDictionaryEncodedLongs(
    final FieldVector vector,
    final int startOffset,
    final int typeWidth,
    final int numValsToRead,
    NullabilityHolder nullabilityHolder,
    VectorizedDictionaryEncodedParquetValuesReader dictionaryEncodedValuesReader,
    Dictionary dict) {
  int idx = startOffset;
  int left = numValsToRead;
  while (left > 0) {
    if (this.currentCount == 0) {
      this.readNextGroup();
    }
    int numValues = Math.min(left, this.currentCount);
    ArrowBuf validityBuffer = vector.getValidityBuffer();
    switch (mode) {
      case RLE:
        if (currentValue == maxDefLevel) {
          dictionaryEncodedValuesReader.readBatchOfDictionaryEncodedLongs(vector,
              idx, numValues, dict, nullabilityHolder, typeWidth);
        } else {
          setNulls(nullabilityHolder, idx, numValues, validityBuffer);
        }
        idx += numValues;
        break;
      case PACKED:
        for (int i = 0; i < numValues; i++) {
          if (packedValuesBuffer[packedValuesBufferIdx++] == maxDefLevel) {
            vector.getDataBuffer().setLong(idx * typeWidth,
                dict.decodeToLong(dictionaryEncodedValuesReader.readInteger()));
            nullabilityHolder.setNotNull(idx);
            if (setArrowValidityVector) {
              BitVectorHelper.setValidityBitToOne(vector.getValidityBuffer(), idx);
            }
          } else {
            setNull(nullabilityHolder, idx, validityBuffer);
          }
          idx++;
        }
        break;
    }
    left -= numValues;
    currentCount -= numValues;
  }
}

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

4 votes

public void readBatchOfDictionaryEncodedTimestampMillis(
    final FieldVector vector,
    final int startOffset,
    final int typeWidth,
    final int numValsToRead,
    NullabilityHolder nullabilityHolder,
    VectorizedDictionaryEncodedParquetValuesReader dictionaryEncodedValuesReader,
    Dictionary dict) {
  int idx = startOffset;
  int left = numValsToRead;
  while (left > 0) {
    if (this.currentCount == 0) {
      this.readNextGroup();
    }
    int numValues = Math.min(left, this.currentCount);
    ArrowBuf validityBuffer = vector.getValidityBuffer();
    switch (mode) {
      case RLE:
        if (currentValue == maxDefLevel) {
          dictionaryEncodedValuesReader.readBatchOfDictionaryEncodedTimestampMillis(vector,
              idx, numValues, dict, nullabilityHolder, typeWidth);
        } else {
          setNulls(nullabilityHolder, idx, numValues, validityBuffer);
        }
        idx += numValues;
        break;
      case PACKED:
        for (int i = 0; i < numValues; i++) {
          if (packedValuesBuffer[packedValuesBufferIdx++] == maxDefLevel) {
            vector.getDataBuffer().setLong(idx * typeWidth,
                dict.decodeToLong(dictionaryEncodedValuesReader.readInteger()) * 1000);
            nullabilityHolder.setNotNull(idx);
            if (setArrowValidityVector) {
              BitVectorHelper.setValidityBitToOne(vector.getValidityBuffer(), idx);
            }
          } else {
            setNull(nullabilityHolder, idx, validityBuffer);
          }
          idx++;
        }
        break;
    }
    left -= numValues;
    currentCount -= numValues;
  }
}

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

4 votes

public void readBatchOfDictionaryEncodedFloats(
    final FieldVector vector,
    final int startOffset,
    final int typeWidth,
    final int numValsToRead,
    NullabilityHolder nullabilityHolder,
    VectorizedDictionaryEncodedParquetValuesReader dictionaryEncodedValuesReader,
    Dictionary dict) {
  int idx = startOffset;
  int left = numValsToRead;
  while (left > 0) {
    if (this.currentCount == 0) {
      this.readNextGroup();
    }
    int num = Math.min(left, this.currentCount);
    ArrowBuf validityBuffer = vector.getValidityBuffer();
    switch (mode) {
      case RLE:
        if (currentValue == maxDefLevel) {
          dictionaryEncodedValuesReader.readBatchOfDictionaryEncodedFloats(vector, idx,
              num, dict, nullabilityHolder, typeWidth);
        } else {
          setNulls(nullabilityHolder, idx, num, validityBuffer);
        }
        idx += num;
        break;
      case PACKED:
        for (int i = 0; i < num; i++) {
          if (packedValuesBuffer[packedValuesBufferIdx++] == maxDefLevel) {
            vector.getDataBuffer()
                .setFloat(idx * typeWidth, dict.decodeToFloat(dictionaryEncodedValuesReader.readInteger()));
            nullabilityHolder.setNotNull(idx);
            if (setArrowValidityVector) {
              BitVectorHelper.setValidityBitToOne(vector.getValidityBuffer(), idx);
            }
          } else {
            setNull(nullabilityHolder, idx, validityBuffer);
          }
          idx++;
        }
        break;
    }
    left -= num;
    currentCount -= num;
  }
}

Source File: VectorizedParquetDefinitionLevelReader.java From iceberg with Apache License 2.0

4 votes

private void setNull(NullabilityHolder nullabilityHolder, int bufferIdx, ArrowBuf validityBuffer) {
  nullabilityHolder.setNull(bufferIdx);
  if (setArrowValidityVector) {
    BitVectorHelper.setValidityBit(validityBuffer, bufferIdx, 0);
  }
}

Source File: BlockAllocator.java From aws-athena-query-federation with Apache License 2.0

2 votes

/**
 * Creates an empty Apache Arrow Buffer of the requested size. This is useful when working with certain Apache Arrow
 * types directly.
 *
 * @param size The number of bytes to reserve for the requested buffer.
 * @return THe resulting Apache Arrow Buffer..
 * @note Once created the buffer is also registered with this BlockAllocator such that closing this BlockAllocator
 * also closes this buffer, freeing its Apache Arrow resources.
 */
ArrowBuf createBuffer(int size);

io.netty.buffer.ArrowBuf Java Examples