org.apache.spark.sql.types.StructType#size

Source File: DBClientWrapper.java From spark-data-sources with MIT License

6 votes

public static edb.common.Row sparkToDBRow(org.apache.spark.sql.Row row, StructType type) {
    edb.common.Row dbRow = new edb.common.Row();
    StructField[] fields = type.fields();
    for (int i = 0; i < type.size(); i++) {
        StructField sf = fields[i];
        if (sf.dataType() == DataTypes.StringType) {
            dbRow.addField(new edb.common.Row.StringField(sf.name(), row.getString(i)));
        } else if (sf.dataType() == DataTypes.DoubleType) {
            dbRow.addField(new edb.common.Row.DoubleField(sf.name(), row.getDouble(i)));
        } else if (sf.dataType() == DataTypes.LongType) {
            dbRow.addField(new edb.common.Row.Int64Field(sf.name(), row.getLong(i)));
        } else {
            // TODO: type leakage
        }
    }

    return dbRow;
}

Source File: UnsafeFixedWidthAggregationMap.java From indexr with Apache License 2.0

6 votes

/**
 * Create a new UnsafeFixedWidthAggregationMap.
 *
 * @param emptyAggregationBuffer  the default value for new keys (a "zero" of the agg. function)
 * @param aggregationBufferSchema the schema of the aggregation buffer, used for row conversion.
 * @param groupingKeySchema       the schema of the grouping key, used for row conversion.
 * @param taskMemoryManager       the memory manager used to allocate our Unsafe memory structures.
 * @param initialCapacity         the initial capacity of the map (a sizing hint to avoid re-hashing).
 * @param pageSizeBytes           the data page size, in bytes; limits the maximum record size.
 * @param enablePerfMetrics       if true, performance metrics will be recorded (has minor perf impact)
 */
public UnsafeFixedWidthAggregationMap(
        InternalRow emptyAggregationBuffer,
        StructType aggregationBufferSchema,
        StructType groupingKeySchema,
        TaskMemoryManager taskMemoryManager,
        int initialCapacity,
        long pageSizeBytes,
        boolean enablePerfMetrics) {
    this.aggregationBufferSchema = aggregationBufferSchema;
    this.currentAggregationBuffer = new UnsafeRow(aggregationBufferSchema.size());
    this.groupingKeyProjection = UnsafeProjection.createFromSchema(groupingKeySchema);
    this.groupingKeySchema = groupingKeySchema;
    this.map =
            new BytesToBytesMap(taskMemoryManager, initialCapacity, pageSizeBytes, enablePerfMetrics);
    this.enablePerfMetrics = enablePerfMetrics;

    // Initialize the buffer for aggregation value
    final UnsafeProjection valueProjection = UnsafeProjection.createFromSchema(aggregationBufferSchema);
    this.emptyAggregationBuffer = valueProjection.apply(emptyAggregationBuffer).getBytes();
}

Source File: Reader.java From iceberg with Apache License 2.0

5 votes

StructLikeInternalRow(StructType struct) {
  this.types = new DataType[struct.size()];
  StructField[] fields = struct.fields();
  for (int i = 0; i < fields.length; i += 1) {
    types[i] = fields[i].dataType();
  }
}

Source File: SortPrefixUtils.java From indexr with Apache License 2.0

5 votes

/**
 * Creates the prefix comparator for the first field in the given schema, in ascending order.
 */
public static PrefixComparator getPrefixComparator(StructType schema) {
    if (schema.size() != 0) {
        return getPrefixComparator(
                new SortOrder(new BoundReference(0, schema.get(0).dataType),
                        SortOrder.SortDirection.Ascending));
    } else {
        return (a, b) -> 0;
    }
}

Source File: BitemporalHistoryPlanner.java From envelope with Apache License 2.0

5 votes

private Row getCurrentSystemTimeRow(long currentSystemTimeMillis) {
  StructType schema = 
      SchemaUtils.appendFields(systemEffectiveFromTimeModel.getSchema(),
          Lists.newArrayList(systemEffectiveToTimeModel.getSchema().fields()));
  Object[] nulls = new Object[schema.size()];
  Row row = new RowWithSchema(schema, nulls);
  row = systemEffectiveFromTimeModel.setCurrentSystemTime(row);
  row = systemEffectiveToTimeModel.setCurrentSystemTime(row);
  
  return row;
}

Source File: ExternalTableUtils.java From spliceengine with GNU Affero General Public License v3.0

5 votes

public static StructType supportAvroDateType(StructType schema, String storedAs) {
    if (storedAs.toLowerCase().equals("a")) {
        for (int i = 0; i < schema.size(); i++) {
            StructField column = schema.fields()[i];
            if (column.dataType().equals(DataTypes.DateType)) {
                StructField replace = DataTypes.createStructField(column.name(), DataTypes.StringType, column.nullable(), column.metadata());
                schema.fields()[i] = replace;
            }
        }
    }
    return schema;
}

Source File: SpliceOrcNewInputFormat.java From spliceengine with GNU Affero General Public License v3.0

5 votes

public static Map<Integer,DataType> getColumnsAndTypes(List<Integer> columnIds, StructType rowStruct) throws IOException {
    int structTypeSize = rowStruct.size();
    int columnIdsSize = columnIds.size();
    Map columnsAndTypes = new HashMap<>();
    for (int i = 0,j = 0; i < columnIdsSize; i++) {
        if (columnIds.get(i) == -1)
            continue;
        columnsAndTypes.put(i,rowStruct.fields()[j]);
        j++;
    }
    if (columnsAndTypes.size() != structTypeSize)
        throw new IOException(String.format("Column IDS do not match the underlying struct columnIds(%s), struct(%s)",columnIds,rowStruct.json()));
    return columnsAndTypes;
}

Source File: UnsafeKVExternalSorter.java From indexr with Apache License 2.0

4 votes

public UnsafeKVExternalSorter(
        StructType keySchema,
        StructType valueSchema,
        //BlockManager blockManager,
        long pageSizeBytes,
        @Nullable BytesToBytesMap map) throws IOException {
    this.keySchema = keySchema;
    this.valueSchema = valueSchema;
    final TaskContext taskContext = TaskContext.get();

    prefixComputer = SortPrefixUtils.createPrefixGenerator(keySchema);
    PrefixComparator prefixComparator = SortPrefixUtils.getPrefixComparator(keySchema);
    BaseOrdering ordering = BaseOrdering.create(keySchema);
    KVComparator recordComparator = new KVComparator(ordering, keySchema.size());

    TaskMemoryManager taskMemoryManager = taskContext.taskMemoryManager();

    if (map == null) {
        sorter = UnsafeExternalSorter.create(
                taskMemoryManager,
                //blockManager,
                taskContext,
                recordComparator,
                prefixComparator,
    /* initialSize */ 4096,
                pageSizeBytes);
    } else {
        // During spilling, the array in map will not be used, so we can borrow that and use it
        // as the underline array for in-memory sorter (it's always large enough).
        // Since we will not grow the array, it's fine to pass `null` as consumer.
        final UnsafeInMemorySorter inMemSorter = new UnsafeInMemorySorter(
                null, taskMemoryManager, recordComparator, prefixComparator, map.getArray());

        // We cannot use the destructive iterator here because we are reusing the existing memory
        // pages in BytesToBytesMap to hold records during sorting.
        // The only new memory we are allocating is the pointer/prefix array.
        BytesToBytesMap.MapIterator iter = map.iterator();
        final int numKeyFields = keySchema.size();
        UnsafeRow row = new UnsafeRow(numKeyFields);
        while (iter.hasNext()) {
            final BytesToBytesMap.Location loc = iter.next();
            final Object baseObject = loc.getKeyAddress().getBaseObject();
            final long baseOffset = loc.getKeyAddress().getBaseOffset();

            // Get encoded memory address
            // baseObject + baseOffset point to the beginning of the key data in the map, but that
            // the KV-pair's length data is stored in the word immediately before that address
            MemoryBlock page = loc.getMemoryPage();
            long address = taskMemoryManager.encodePageNumberAndOffset(page, baseOffset - 8);

            // Compute prefix
            row.pointTo(baseObject, baseOffset, loc.getKeyLength());
            final long prefix = prefixComputer.computePrefix(row);

            inMemSorter.insertRecord(address, prefix);
        }

        sorter = UnsafeExternalSorter.createWithExistingInMemorySorter(
                taskMemoryManager,
                //blockManager,
                taskContext,
                new KVComparator(ordering, keySchema.size()),
                prefixComparator,
    /* initialSize */ 4096,
                pageSizeBytes,
                inMemSorter);

        // reset the map, so we can re-use it to insert new records. the inMemSorter will not used
        // anymore, so the underline array could be used by map again.
        map.reset();
    }
}

Source File: ColumnarBatch.java From spliceengine with GNU Affero General Public License v3.0

4 votes

private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
    this.numRows = maxRows;
    this.columns = new WritableColumnVector[schema.size()];
}

Source File: ColumnarBatch.java From spliceengine with GNU Affero General Public License v3.0

4 votes

private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
    this.numRows = maxRows;
    this.columns = new WritableColumnVector[schema.size()];
}

Source File: ColumnarBatch.java From spliceengine with GNU Affero General Public License v3.0

4 votes

private ColumnarBatch(StructType schema, int maxRows, MemoryMode memMode) {
    this.numRows = maxRows;
    this.columns = new WritableColumnVector[schema.size()];
}

Java Code Examples for org.apache.spark.sql.types.StructType#size()