Java Code Examples for org.datavec.api.transform.schema.Schema#numColumns()

The following examples show how to use org.datavec.api.transform.schema.Schema#numColumns() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseJsonArrayConverter.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
protected Pair<Map<Integer, Integer>, List<? extends Map<FieldName, ?>>> doTransformProcessConvertPmmlWithErrors(Schema schema, JsonArray jsonArray, TransformProcess transformProcess, DataPipelineErrorHandler dataPipelineErrorHandler) {
    Schema outputSchema = transformProcess.getFinalSchema();

    if (!transformProcess.getInitialSchema().equals(schema)) {
        throw new IllegalArgumentException("Transform process specified, but does not match target input inputSchema");
    }


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());
    List<FieldName> fieldNames = getNameRepresentationFor(outputSchema);

    Pair<Map<Integer, Integer>, ArrowWritableRecordBatch> convertWithErrors = convertWithErrors(schema, jsonArray, transformProcess, dataPipelineErrorHandler);
    ArrowWritableRecordBatch conversion = convertWithErrors.getRight();
    for (int i = 0; i < conversion.size(); i++) {
        List<Writable> recordToMap = conversion.get(i);
        Map<FieldName, Object> record = new LinkedHashMap();
        for (int j = 0; j < outputSchema.numColumns(); j++) {
            record.put(fieldNames.get(j), WritableValueRetriever.getUnderlyingValue(recordToMap.get(j)));

        }

        ret.add(record);
    }

    return Pair.of(convertWithErrors.getKey(), ret);
}
 
Example 2
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a set of input strings to arrow columns
 * @param bufferAllocator the buffer allocator to use
 * @param schema the schema to use
 * @param dataVecRecord the collection of input strings to process
 * @return the created vectors
 */
public static  List<FieldVector> toArrowColumnsString(final BufferAllocator bufferAllocator, final Schema schema, List<List<String>> dataVecRecord) {
    int numRows = dataVecRecord.size();

    List<FieldVector> ret = createFieldVectors(bufferAllocator,schema,numRows);
    /**
     * Need to change iteration scheme
     */

    for(int j = 0; j < schema.numColumns(); j++) {
        FieldVector fieldVector = ret.get(j);
        for(int row = 0; row < numRows; row++) {
            String writable = dataVecRecord.get(row).get(j);
            setValue(schema.getType(j),fieldVector,writable,row);
        }

    }

    return ret;
}
 
Example 3
Source File: JsonArrayMapConverter.java    From konduit-serving with Apache License 2.0 6 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public List<? extends Map<FieldName, ?>> convertPmml(Schema schema, JsonArray jsonArray, TransformProcess transformProcess) {
    if (transformProcess != null) {
        return doTransformProcessConvertPmml(schema, jsonArray, transformProcess);
    }


    List<FieldName> fieldNames = getNameRepresentationFor(schema);


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());

    for (int i = 0; i < jsonArray.size(); i++) {
        JsonObject jsonObject = jsonArray.getJsonObject(i);
        Map<FieldName, Object> record = new LinkedHashMap();
        for (int j = 0; j < schema.numColumns(); j++) {
            record.put(fieldNames.get(j), jsonObject.getValue(schema.getName(j)));

        }

        ret.add(record);
    }

    return ret;
}
 
Example 4
Source File: ArrowConverter.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Given a buffer allocator and datavec schema,
 * convert the passed in batch of records
 * to a set of arrow columns
 * @param bufferAllocator the buffer allocator to use
 * @param schema the schema to convert
 * @param dataVecRecord the data vec record batch to convert
 * @return the converted list of {@link FieldVector}
 */
public static List<FieldVector> toArrowColumns(final BufferAllocator bufferAllocator, final Schema schema, List<List<Writable>> dataVecRecord) {
    int numRows = dataVecRecord.size();

    List<FieldVector> ret = createFieldVectors(bufferAllocator,schema,numRows);

    for(int j = 0; j < schema.numColumns(); j++) {
        FieldVector fieldVector = ret.get(j);
        int row = 0;
        for(List<Writable> record : dataVecRecord) {
            Writable writable = record.get(j);
            setValue(schema.getType(j),fieldVector,writable,row);
            row++;
        }

    }

    return ret;
}
 
Example 5
Source File: ArrowConverter.java    From DataVec with Apache License 2.0 6 votes vote down vote up
private static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator,Schema schema, int numRows) {
    List<FieldVector> ret = new ArrayList<>(schema.numColumns());

    for(int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getType(i)) {
            case Integer: ret.add(intVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Long: ret.add(longVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Double: ret.add(doubleVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Float: ret.add(floatVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Boolean: ret.add(booleanVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case String: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Categorical: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            case Time: ret.add(timeVectorOf(bufferAllocator,schema.getName(i),numRows)); break;
            default: throw new IllegalArgumentException("Illegal type found " + schema.getType(i));

        }
    }

    return ret;
}
 
Example 6
Source File: BaseSequenceExpansionTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    //Same schema *except* for the expanded columns

    List<ColumnMetaData> meta = new ArrayList<>(inputSchema.numColumns());

    List<ColumnMetaData> oldMetaToExpand = new ArrayList<>();
    for(String s : requiredColumns){
        oldMetaToExpand.add(inputSchema.getMetaData(s));
    }
    List<ColumnMetaData> newMetaToExpand = expandedColumnMetaDatas(oldMetaToExpand, expandedColumnNames);

    int modColumnIdx = 0;
    for(ColumnMetaData m : inputSchema.getColumnMetaData()){

        if(requiredColumns.contains(m.getName())){
            //Possibly changed column (expanded)
            meta.add(newMetaToExpand.get(modColumnIdx++));
        } else {
            //Unmodified column
            meta.add(m);
        }
    }

    return inputSchema.newSchema(meta);
}
 
Example 7
Source File: DataFrames.java    From DataVec with Apache License 2.0 6 votes vote down vote up
/**
 * Convert a datavec schema to a
 * struct type in spark
 *
 * @param schema the schema to convert
 * @return the datavec struct type
 */
public static StructType fromSchema(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns()];
    for (int i = 0; i < structFields.length; i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
Example 8
Source File: RemoveColumnsTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    int nToRemove = columnsToRemove.length;
    int newNumColumns = schema.numColumns() - nToRemove;
    if (newNumColumns <= 0)
        throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns
                        + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = "
                        + Arrays.toString(columnsToRemove));

    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> set = new HashSet<>();
    Collections.addAll(set, columnsToRemove);


    List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (!set.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 9
Source File: DataFrames.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Convert the DataVec sequence schema to a StructType for Spark, for example for use in
 * {@link #toDataFrameSequence(Schema, JavaRDD)}}
 * <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br>
 * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br>
 * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position
 * of this record in the original time series.<br>
 * These two columns are required if the data is to be converted back into a sequence at a later point, for example
 * using {@link #toRecordsSequence(Dataset<Row>)}
 *
 * @param schema Schema to convert
 * @return StructType for the schema
 */
public static StructType fromSchemaSequence(Schema schema) {
    StructField[] structFields = new StructField[schema.numColumns() + 2];

    structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty());
    structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty());

    for (int i = 0; i < schema.numColumns(); i++) {
        switch (schema.getColumnTypes().get(i)) {
            case Double:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty());
                break;
            case Integer:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty());
                break;
            case Long:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty());
                break;
            case Float:
                structFields[i + 2] =
                                new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty());
                break;
            default:
                throw new IllegalStateException(
                                "This api should not be used with strings , binary data or ndarrays. This is only for columnar data");
        }
    }
    return new StructType(structFields);
}
 
Example 10
Source File: CategoricalToOneHotTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 11
Source File: PmmlInferenceExecutionerStepRunner.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
@Override
public Record[] transform(Record[] input) {
    Schema schema = pipelineStep.inputSchemaForName("default");
    List<Map<FieldName, Object>> pmmlInput = new ArrayList<>(input.length);
    List<FieldName> fieldNames = new ArrayList<>();
    for (int i = 0; i < schema.numColumns(); i++) {
        fieldNames.add(FieldName.create(schema.getName(i)));
    }

    for (Record record : input) {
        Map<FieldName, Object> pmmlRecord = new LinkedHashMap<>();
        for (int i = 0; i < record.getRecord().size(); i++) {
            pmmlRecord.put(fieldNames.get(i), WritableValueRetriever.getUnderlyingValue(record.getRecord().get(i)));
        }

        pmmlInput.add(pmmlRecord);
    }

    List<Map<FieldName, Object>> execute = pmmlInferenceExecutioner.execute(pmmlInput);
    Record[] ret = new Record[1];
    String json = ObjectMappers.toJson(execute);

    ret[0] = new org.datavec.api.records.impl.Record(Collections.singletonList(new Text(json)), null);


    return ret;
}
 
Example 12
Source File: IntegerToOneHotTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (int x = minValue; x <= maxValue; x++) {
                String newName = s + "[" + x + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 13
Source File: ArrowUtils.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
public static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator, Schema schema, int numRows) {
    List<FieldVector> ret = new ArrayList(schema.numColumns());

    for (int i = 0; i < schema.numColumns(); ++i) {
        switch (schema.getType(i)) {
            case Integer:
                ret.add(intVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Float:
                ret.add(floatVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Double:
                ret.add(doubleVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Long:
                ret.add(longVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case NDArray:
                ret.add(ndarrayVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Boolean:
                ret.add(booleanVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Categorical:
                ret.add(stringVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Time:
                ret.add(timeVectorOf(bufferAllocator, schema.getName(i), numRows));
                break;
            case Bytes:
            default:
                throw new IllegalArgumentException("Illegal type found for creation of field vectors" + schema.getType(i));
            case String:
                ret.add(stringVectorOf(bufferAllocator, schema.getName(i), numRows));
        }
    }

    return ret;
}
 
Example 14
Source File: JsonArrayMapConverter.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public Pair<Map<Integer, Integer>, List<? extends Map<FieldName, ?>>> convertPmmlWithErrors(Schema schema, JsonArray jsonArray, TransformProcess transformProcess, DataPipelineErrorHandler dataPipelineErrorHandler) {
    if (transformProcess != null) {
        return doTransformProcessConvertPmmlWithErrors(schema, jsonArray, transformProcess, dataPipelineErrorHandler);
    }


    List<FieldName> fieldNames = getNameRepresentationFor(schema);


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());
    Map<Integer, Integer> mapping = new LinkedHashMap<>();
    int numSucceeded = 0;
    for (int i = 0; i < jsonArray.size(); i++) {
        try {
            JsonObject jsonObject = jsonArray.getJsonObject(i);
            if (jsonObject.size() != schema.numColumns()) {
                throw new IllegalArgumentException("Found illegal item at row " + i);
            }
            Map<FieldName, Object> record = new LinkedHashMap();
            for (int j = 0; j < schema.numColumns(); j++) {
                record.put(fieldNames.get(j), jsonObject.getValue(schema.getName(j)));
            }

            mapping.put(numSucceeded, i);
            numSucceeded++;
            ret.add(record);
        } catch (Exception e) {
            dataPipelineErrorHandler.onError(e, jsonArray.getJsonObject(i), i);
        }
    }


    return Pair.of(mapping, ret);
}
 
Example 15
Source File: SchemaTypeUtils.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
/**
 * Create a mapping of name {@link SchemaType}
 * based on the {@link Schema}
 *
 * @param schema the schema to decompose
 * @return the map of name to {@link SchemaType}
 */
public static Map<String, SchemaType> typeMappingsForSchema(Schema schema) {
    Map<String, SchemaType> ret = new LinkedHashMap<>();
    for (int i = 0; i < schema.numColumns(); i++) {
        ret.put(schema.getName(i), schemaTypeForColumnType(schema.getType(i)));
    }


    return ret;
}
 
Example 16
Source File: ParseDoubleTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}
 
Example 17
Source File: RegressionOutputAdapter.java    From konduit-serving with Apache License 2.0 5 votes vote down vote up
/**
 * Create the output adapter
 * with the output inputSchema
 *
 * @param schema the inputSchema of the output
 */
public RegressionOutputAdapter(Schema schema) {
    this.schema = schema;
    fieldNames = new ArrayList<>(schema.numColumns());
    for (int i = 0; i < schema.numColumns(); i++) {
        fieldNames.add(FieldName.create(schema.getName(i)));
    }

}
 
Example 18
Source File: PivotTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) {
        throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn
                        + " in " + inputSchema.getColumnNames());
    }

    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());

    int idxKey = inputSchema.getIndexOfColumn(keyColumn);
    int idxValue = inputSchema.getIndexOfColumn(valueColumn);

    ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue);

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i == idxKey) {
            //Convert this to a set of separate columns
            List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames();
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";

                ColumnMetaData newValueMeta = valueMeta.clone();
                newValueMeta.setName(newName);

                newMeta.add(newValueMeta);
            }
        } else if (i == idxValue) {
            i++;
            continue; //Skip column
        } else {
            newMeta.add(t);
        }
        i++;
    }

    //Infer the default value if necessary
    if (defaultValue == null) {
        switch (valueMeta.getColumnType()) {
            case String:
                defaultValue = new Text("");
                break;
            case Integer:
                defaultValue = new IntWritable(0);
                break;
            case Long:
                defaultValue = new LongWritable(0);
                break;
            case Double:
                defaultValue = new DoubleWritable(0.0);
                break;
            case Float:
                defaultValue = new FloatWritable(0.0f);
                break;
            case Categorical:
                defaultValue = new NullWritable();
                break;
            case Time:
                defaultValue = new LongWritable(0);
                break;
            case Bytes:
                throw new UnsupportedOperationException("Cannot infer default value for bytes");
            case Boolean:
                defaultValue = new Text("false");
                break;
            default:
                throw new UnsupportedOperationException(
                                "Cannot infer default value for " + valueMeta.getColumnType());
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 19
Source File: PivotTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) {
        throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn
                        + " in " + inputSchema.getColumnNames());
    }

    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());

    int idxKey = inputSchema.getIndexOfColumn(keyColumn);
    int idxValue = inputSchema.getIndexOfColumn(valueColumn);

    ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue);

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i == idxKey) {
            //Convert this to a set of separate columns
            List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames();
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";

                ColumnMetaData newValueMeta = valueMeta.clone();
                newValueMeta.setName(newName);

                newMeta.add(newValueMeta);
            }
        } else if (i == idxValue) {
            i++;
            continue; //Skip column
        } else {
            newMeta.add(t);
        }
        i++;
    }

    //Infer the default value if necessary
    if (defaultValue == null) {
        switch (valueMeta.getColumnType()) {
            case String:
                defaultValue = new Text("");
                break;
            case Integer:
                defaultValue = new IntWritable(0);
                break;
            case Long:
                defaultValue = new LongWritable(0);
                break;
            case Double:
                defaultValue = new DoubleWritable(0.0);
                break;
            case Float:
                defaultValue = new FloatWritable(0.0f);
                break;
            case Categorical:
                defaultValue = new NullWritable();
                break;
            case Time:
                defaultValue = new LongWritable(0);
                break;
            case Bytes:
                throw new UnsupportedOperationException("Cannot infer default value for bytes");
            case Boolean:
                defaultValue = new Text("false");
                break;
            default:
                throw new UnsupportedOperationException(
                                "Cannot infer default value for " + valueMeta.getColumnType());
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 20
Source File: BaseJsonArrayConverter.java    From konduit-serving with Apache License 2.0 3 votes vote down vote up
protected List<Map<FieldName, Object>> doTransformProcessConvertPmml(Schema schema, JsonArray jsonArray, TransformProcess transformProcess) {
    Schema outputSchema = transformProcess.getFinalSchema();

    if (!transformProcess.getInitialSchema().equals(schema)) {
        throw new IllegalArgumentException("Transform process specified, but does not match target input inputSchema");
    }


    List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size());
    List<FieldName> fieldNames = getNameRepresentationFor(outputSchema);

    ArrowWritableRecordBatch conversion = convert(schema, jsonArray, transformProcess);
    for (int i = 0; i < conversion.size(); i++) {
        List<Writable> recordToMap = conversion.get(i);
        Map<FieldName, Object> record = new LinkedHashMap();
        for (int j = 0; j < outputSchema.numColumns(); j++) {
            record.put(fieldNames.get(j), WritableValueRetriever.getUnderlyingValue(recordToMap.get(j)));

        }

        ret.add(record);
    }

    return ret;


}