Java Code Examples for org.datavec.api.transform.schema.Schema#numColumns()
The following examples show how to use
org.datavec.api.transform.schema.Schema#numColumns() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseJsonArrayConverter.java From konduit-serving with Apache License 2.0 | 6 votes |
protected Pair<Map<Integer, Integer>, List<? extends Map<FieldName, ?>>> doTransformProcessConvertPmmlWithErrors(Schema schema, JsonArray jsonArray, TransformProcess transformProcess, DataPipelineErrorHandler dataPipelineErrorHandler) { Schema outputSchema = transformProcess.getFinalSchema(); if (!transformProcess.getInitialSchema().equals(schema)) { throw new IllegalArgumentException("Transform process specified, but does not match target input inputSchema"); } List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size()); List<FieldName> fieldNames = getNameRepresentationFor(outputSchema); Pair<Map<Integer, Integer>, ArrowWritableRecordBatch> convertWithErrors = convertWithErrors(schema, jsonArray, transformProcess, dataPipelineErrorHandler); ArrowWritableRecordBatch conversion = convertWithErrors.getRight(); for (int i = 0; i < conversion.size(); i++) { List<Writable> recordToMap = conversion.get(i); Map<FieldName, Object> record = new LinkedHashMap(); for (int j = 0; j < outputSchema.numColumns(); j++) { record.put(fieldNames.get(j), WritableValueRetriever.getUnderlyingValue(recordToMap.get(j))); } ret.add(record); } return Pair.of(convertWithErrors.getKey(), ret); }
Example 2
Source File: ArrowConverter.java From DataVec with Apache License 2.0 | 6 votes |
/** * Convert a set of input strings to arrow columns * @param bufferAllocator the buffer allocator to use * @param schema the schema to use * @param dataVecRecord the collection of input strings to process * @return the created vectors */ public static List<FieldVector> toArrowColumnsString(final BufferAllocator bufferAllocator, final Schema schema, List<List<String>> dataVecRecord) { int numRows = dataVecRecord.size(); List<FieldVector> ret = createFieldVectors(bufferAllocator,schema,numRows); /** * Need to change iteration scheme */ for(int j = 0; j < schema.numColumns(); j++) { FieldVector fieldVector = ret.get(j); for(int row = 0; row < numRows; row++) { String writable = dataVecRecord.get(row).get(j); setValue(schema.getType(j),fieldVector,writable,row); } } return ret; }
Example 3
Source File: JsonArrayMapConverter.java From konduit-serving with Apache License 2.0 | 6 votes |
/** * {@inheritDoc} */ @Override public List<? extends Map<FieldName, ?>> convertPmml(Schema schema, JsonArray jsonArray, TransformProcess transformProcess) { if (transformProcess != null) { return doTransformProcessConvertPmml(schema, jsonArray, transformProcess); } List<FieldName> fieldNames = getNameRepresentationFor(schema); List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size()); for (int i = 0; i < jsonArray.size(); i++) { JsonObject jsonObject = jsonArray.getJsonObject(i); Map<FieldName, Object> record = new LinkedHashMap(); for (int j = 0; j < schema.numColumns(); j++) { record.put(fieldNames.get(j), jsonObject.getValue(schema.getName(j))); } ret.add(record); } return ret; }
Example 4
Source File: ArrowConverter.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * Given a buffer allocator and datavec schema, * convert the passed in batch of records * to a set of arrow columns * @param bufferAllocator the buffer allocator to use * @param schema the schema to convert * @param dataVecRecord the data vec record batch to convert * @return the converted list of {@link FieldVector} */ public static List<FieldVector> toArrowColumns(final BufferAllocator bufferAllocator, final Schema schema, List<List<Writable>> dataVecRecord) { int numRows = dataVecRecord.size(); List<FieldVector> ret = createFieldVectors(bufferAllocator,schema,numRows); for(int j = 0; j < schema.numColumns(); j++) { FieldVector fieldVector = ret.get(j); int row = 0; for(List<Writable> record : dataVecRecord) { Writable writable = record.get(j); setValue(schema.getType(j),fieldVector,writable,row); row++; } } return ret; }
Example 5
Source File: ArrowConverter.java From DataVec with Apache License 2.0 | 6 votes |
private static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator,Schema schema, int numRows) { List<FieldVector> ret = new ArrayList<>(schema.numColumns()); for(int i = 0; i < schema.numColumns(); i++) { switch (schema.getType(i)) { case Integer: ret.add(intVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case Long: ret.add(longVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case Double: ret.add(doubleVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case Float: ret.add(floatVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case Boolean: ret.add(booleanVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case String: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case Categorical: ret.add(stringVectorOf(bufferAllocator,schema.getName(i),numRows)); break; case Time: ret.add(timeVectorOf(bufferAllocator,schema.getName(i),numRows)); break; default: throw new IllegalArgumentException("Illegal type found " + schema.getType(i)); } } return ret; }
Example 6
Source File: BaseSequenceExpansionTransform.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Schema transform(Schema inputSchema) { //Same schema *except* for the expanded columns List<ColumnMetaData> meta = new ArrayList<>(inputSchema.numColumns()); List<ColumnMetaData> oldMetaToExpand = new ArrayList<>(); for(String s : requiredColumns){ oldMetaToExpand.add(inputSchema.getMetaData(s)); } List<ColumnMetaData> newMetaToExpand = expandedColumnMetaDatas(oldMetaToExpand, expandedColumnNames); int modColumnIdx = 0; for(ColumnMetaData m : inputSchema.getColumnMetaData()){ if(requiredColumns.contains(m.getName())){ //Possibly changed column (expanded) meta.add(newMetaToExpand.get(modColumnIdx++)); } else { //Unmodified column meta.add(m); } } return inputSchema.newSchema(meta); }
Example 7
Source File: DataFrames.java From DataVec with Apache License 2.0 | 6 votes |
/** * Convert a datavec schema to a * struct type in spark * * @param schema the schema to convert * @return the datavec struct type */ public static StructType fromSchema(Schema schema) { StructField[] structFields = new StructField[schema.numColumns()]; for (int i = 0; i < structFields.length; i++) { switch (schema.getColumnTypes().get(i)) { case Double: structFields[i] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty()); break; case Integer: structFields[i] = new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty()); break; case Long: structFields[i] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty()); break; case Float: structFields[i] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty()); break; default: throw new IllegalStateException( "This api should not be used with strings , binary data or ndarrays. This is only for columnar data"); } } return new StructType(structFields); }
Example 8
Source File: RemoveColumnsTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { int nToRemove = columnsToRemove.length; int newNumColumns = schema.numColumns() - nToRemove; if (newNumColumns <= 0) throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = " + Arrays.toString(columnsToRemove)); List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); Set<String> set = new HashSet<>(); Collections.addAll(set, columnsToRemove); List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns); Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> metaIter = origMeta.iterator(); while (namesIter.hasNext()) { String n = namesIter.next(); ColumnMetaData t = metaIter.next(); if (!set.contains(n)) { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 9
Source File: DataFrames.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Convert the DataVec sequence schema to a StructType for Spark, for example for use in * {@link #toDataFrameSequence(Schema, JavaRDD)}} * <b>Note</b>: as per {@link #toDataFrameSequence(Schema, JavaRDD)}}, the StructType has two additional columns added to it:<br> * - Column 0: Sequence UUID (name: {@link #SEQUENCE_UUID_COLUMN}) - a UUID for the original sequence<br> * - Column 1: Sequence index (name: {@link #SEQUENCE_INDEX_COLUMN} - an index (integer, starting at 0) for the position * of this record in the original time series.<br> * These two columns are required if the data is to be converted back into a sequence at a later point, for example * using {@link #toRecordsSequence(Dataset<Row>)} * * @param schema Schema to convert * @return StructType for the schema */ public static StructType fromSchemaSequence(Schema schema) { StructField[] structFields = new StructField[schema.numColumns() + 2]; structFields[0] = new StructField(SEQUENCE_UUID_COLUMN, DataTypes.StringType, false, Metadata.empty()); structFields[1] = new StructField(SEQUENCE_INDEX_COLUMN, DataTypes.IntegerType, false, Metadata.empty()); for (int i = 0; i < schema.numColumns(); i++) { switch (schema.getColumnTypes().get(i)) { case Double: structFields[i + 2] = new StructField(schema.getName(i), DataTypes.DoubleType, false, Metadata.empty()); break; case Integer: structFields[i + 2] = new StructField(schema.getName(i), DataTypes.IntegerType, false, Metadata.empty()); break; case Long: structFields[i + 2] = new StructField(schema.getName(i), DataTypes.LongType, false, Metadata.empty()); break; case Float: structFields[i + 2] = new StructField(schema.getName(i), DataTypes.FloatType, false, Metadata.empty()); break; default: throw new IllegalStateException( "This api should not be used with strings , binary data or ndarrays. This is only for columnar data"); } } return new StructType(structFields); }
Example 10
Source File: CategoricalToOneHotTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns()); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i++ == columnIdx) { //Convert this to one-hot: for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; newMeta.add(new IntegerMetaData(newName, 0, 1)); } } else { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 11
Source File: PmmlInferenceExecutionerStepRunner.java From konduit-serving with Apache License 2.0 | 5 votes |
@Override public Record[] transform(Record[] input) { Schema schema = pipelineStep.inputSchemaForName("default"); List<Map<FieldName, Object>> pmmlInput = new ArrayList<>(input.length); List<FieldName> fieldNames = new ArrayList<>(); for (int i = 0; i < schema.numColumns(); i++) { fieldNames.add(FieldName.create(schema.getName(i))); } for (Record record : input) { Map<FieldName, Object> pmmlRecord = new LinkedHashMap<>(); for (int i = 0; i < record.getRecord().size(); i++) { pmmlRecord.put(fieldNames.get(i), WritableValueRetriever.getUnderlyingValue(record.getRecord().get(i))); } pmmlInput.add(pmmlRecord); } List<Map<FieldName, Object>> execute = pmmlInferenceExecutioner.execute(pmmlInput); Record[] ret = new Record[1]; String json = ObjectMappers.toJson(execute); ret[0] = new org.datavec.api.records.impl.Record(Collections.singletonList(new Text(json)), null); return ret; }
Example 12
Source File: IntegerToOneHotTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns()); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i++ == columnIdx) { //Convert this to one-hot: for (int x = minValue; x <= maxValue; x++) { String newName = s + "[" + x + "]"; newMeta.add(new IntegerMetaData(newName, 0, 1)); } } else { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 13
Source File: ArrowUtils.java From konduit-serving with Apache License 2.0 | 5 votes |
public static List<FieldVector> createFieldVectors(BufferAllocator bufferAllocator, Schema schema, int numRows) { List<FieldVector> ret = new ArrayList(schema.numColumns()); for (int i = 0; i < schema.numColumns(); ++i) { switch (schema.getType(i)) { case Integer: ret.add(intVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Float: ret.add(floatVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Double: ret.add(doubleVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Long: ret.add(longVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case NDArray: ret.add(ndarrayVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Boolean: ret.add(booleanVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Categorical: ret.add(stringVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Time: ret.add(timeVectorOf(bufferAllocator, schema.getName(i), numRows)); break; case Bytes: default: throw new IllegalArgumentException("Illegal type found for creation of field vectors" + schema.getType(i)); case String: ret.add(stringVectorOf(bufferAllocator, schema.getName(i), numRows)); } } return ret; }
Example 14
Source File: JsonArrayMapConverter.java From konduit-serving with Apache License 2.0 | 5 votes |
/** * {@inheritDoc} */ @Override public Pair<Map<Integer, Integer>, List<? extends Map<FieldName, ?>>> convertPmmlWithErrors(Schema schema, JsonArray jsonArray, TransformProcess transformProcess, DataPipelineErrorHandler dataPipelineErrorHandler) { if (transformProcess != null) { return doTransformProcessConvertPmmlWithErrors(schema, jsonArray, transformProcess, dataPipelineErrorHandler); } List<FieldName> fieldNames = getNameRepresentationFor(schema); List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size()); Map<Integer, Integer> mapping = new LinkedHashMap<>(); int numSucceeded = 0; for (int i = 0; i < jsonArray.size(); i++) { try { JsonObject jsonObject = jsonArray.getJsonObject(i); if (jsonObject.size() != schema.numColumns()) { throw new IllegalArgumentException("Found illegal item at row " + i); } Map<FieldName, Object> record = new LinkedHashMap(); for (int j = 0; j < schema.numColumns(); j++) { record.put(fieldNames.get(j), jsonObject.getValue(schema.getName(j))); } mapping.put(numSucceeded, i); numSucceeded++; ret.add(record); } catch (Exception e) { dataPipelineErrorHandler.onError(e, jsonArray.getJsonObject(i), i); } } return Pair.of(mapping, ret); }
Example 15
Source File: SchemaTypeUtils.java From konduit-serving with Apache License 2.0 | 5 votes |
/** * Create a mapping of name {@link SchemaType} * based on the {@link Schema} * * @param schema the schema to decompose * @return the map of name to {@link SchemaType} */ public static Map<String, SchemaType> typeMappingsForSchema(Schema schema) { Map<String, SchemaType> ret = new LinkedHashMap<>(); for (int i = 0; i < schema.numColumns(); i++) { ret.put(schema.getName(i), schemaTypeForColumnType(schema.getType(i))); } return ret; }
Example 16
Source File: ParseDoubleTransform.java From DataVec with Apache License 2.0 | 5 votes |
/** * Get the output schema for this transformation, given an input schema * * @param inputSchema */ @Override public Schema transform(Schema inputSchema) { Schema.Builder newSchema = new Schema.Builder(); for (int i = 0; i < inputSchema.numColumns(); i++) { if (inputSchema.getType(i) == ColumnType.String) { newSchema.addColumnDouble(inputSchema.getMetaData(i).getName()); } else newSchema.addColumn(inputSchema.getMetaData(i)); } return newSchema.build(); }
Example 17
Source File: RegressionOutputAdapter.java From konduit-serving with Apache License 2.0 | 5 votes |
/** * Create the output adapter * with the output inputSchema * * @param schema the inputSchema of the output */ public RegressionOutputAdapter(Schema schema) { this.schema = schema; fieldNames = new ArrayList<>(schema.numColumns()); for (int i = 0; i < schema.numColumns(); i++) { fieldNames.add(FieldName.create(schema.getName(i))); } }
Example 18
Source File: PivotTransform.java From DataVec with Apache License 2.0 | 4 votes |
@Override public Schema transform(Schema inputSchema) { if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) { throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn + " in " + inputSchema.getColumnNames()); } List<String> origNames = inputSchema.getColumnNames(); List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns()); int idxKey = inputSchema.getIndexOfColumn(keyColumn); int idxValue = inputSchema.getIndexOfColumn(valueColumn); ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i == idxKey) { //Convert this to a set of separate columns List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames(); for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; ColumnMetaData newValueMeta = valueMeta.clone(); newValueMeta.setName(newName); newMeta.add(newValueMeta); } } else if (i == idxValue) { i++; continue; //Skip column } else { newMeta.add(t); } i++; } //Infer the default value if necessary if (defaultValue == null) { switch (valueMeta.getColumnType()) { case String: defaultValue = new Text(""); break; case Integer: defaultValue = new IntWritable(0); break; case Long: defaultValue = new LongWritable(0); break; case Double: defaultValue = new DoubleWritable(0.0); break; case Float: defaultValue = new FloatWritable(0.0f); break; case Categorical: defaultValue = new NullWritable(); break; case Time: defaultValue = new LongWritable(0); break; case Bytes: throw new UnsupportedOperationException("Cannot infer default value for bytes"); case Boolean: defaultValue = new Text("false"); break; default: throw new UnsupportedOperationException( "Cannot infer default value for " + valueMeta.getColumnType()); } } return inputSchema.newSchema(newMeta); }
Example 19
Source File: PivotTransform.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Schema transform(Schema inputSchema) { if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) { throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn + " in " + inputSchema.getColumnNames()); } List<String> origNames = inputSchema.getColumnNames(); List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns()); int idxKey = inputSchema.getIndexOfColumn(keyColumn); int idxValue = inputSchema.getIndexOfColumn(valueColumn); ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i == idxKey) { //Convert this to a set of separate columns List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames(); for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; ColumnMetaData newValueMeta = valueMeta.clone(); newValueMeta.setName(newName); newMeta.add(newValueMeta); } } else if (i == idxValue) { i++; continue; //Skip column } else { newMeta.add(t); } i++; } //Infer the default value if necessary if (defaultValue == null) { switch (valueMeta.getColumnType()) { case String: defaultValue = new Text(""); break; case Integer: defaultValue = new IntWritable(0); break; case Long: defaultValue = new LongWritable(0); break; case Double: defaultValue = new DoubleWritable(0.0); break; case Float: defaultValue = new FloatWritable(0.0f); break; case Categorical: defaultValue = new NullWritable(); break; case Time: defaultValue = new LongWritable(0); break; case Bytes: throw new UnsupportedOperationException("Cannot infer default value for bytes"); case Boolean: defaultValue = new Text("false"); break; default: throw new UnsupportedOperationException( "Cannot infer default value for " + valueMeta.getColumnType()); } } return inputSchema.newSchema(newMeta); }
Example 20
Source File: BaseJsonArrayConverter.java From konduit-serving with Apache License 2.0 | 3 votes |
protected List<Map<FieldName, Object>> doTransformProcessConvertPmml(Schema schema, JsonArray jsonArray, TransformProcess transformProcess) { Schema outputSchema = transformProcess.getFinalSchema(); if (!transformProcess.getInitialSchema().equals(schema)) { throw new IllegalArgumentException("Transform process specified, but does not match target input inputSchema"); } List<Map<FieldName, Object>> ret = new ArrayList<>(jsonArray.size()); List<FieldName> fieldNames = getNameRepresentationFor(outputSchema); ArrowWritableRecordBatch conversion = convert(schema, jsonArray, transformProcess); for (int i = 0; i < conversion.size(); i++) { List<Writable> recordToMap = conversion.get(i); Map<FieldName, Object> record = new LinkedHashMap(); for (int j = 0; j < outputSchema.numColumns(); j++) { record.put(fieldNames.get(j), WritableValueRetriever.getUnderlyingValue(recordToMap.get(j))); } ret.add(record); } return ret; }