Java Code Examples for org.datavec.api.transform.schema.Schema#getColumnNames()

The following examples show how to use org.datavec.api.transform.schema.Schema#getColumnNames() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RemoveAllColumnsExceptForTransform.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> keepSet = new HashSet<>();
    Collections.addAll(keepSet, columnsToKeep);


    List<ColumnMetaData> newMeta = new ArrayList<>(columnsToKeep.length);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (keepSet.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 2
Source File: DuplicateColumnsTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size());

    List<String> oldNames = inputSchema.getColumnNames();

    int dupCount = 0;
    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (columnsToDuplicateSet.contains(current)) {
            //Duplicate the current columnName, and place it after...
            String dupName = newColumnNames.get(dupCount);
            ColumnMetaData m = oldMeta.get(i).clone();
            m.setName(dupName);
            newMeta.add(m);
            dupCount++;
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 3
Source File: CategoricalToIntegerTransform.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to integer
            int nClasses = stateNames.size();
            newMeta.add(new IntegerMetaData(t.getName(), 0, nClasses - 1));
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 4
Source File: SequenceDifferenceTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    if (!inputSchema.hasColumn(columnName)) {
        throw new IllegalStateException("Invalid input schema: does not have column with name \"" + columnName
                        + "\"\n. All schema names: " + inputSchema.getColumnNames());
    }

    this.columnType = inputSchema.getMetaData(columnName).getColumnType();
    this.inputSchema = inputSchema;
}
 
Example 5
Source File: FirstDigitTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    Preconditions.checkState(origNames.contains(inputColumn), "Input column with name \"%s\" not found in schema", inputColumn);
    Preconditions.checkState(inputColumn.equals(outputColumn) || !origNames.contains(outputColumn),
            "Output column with name \"%s\" already exists in schema (only allowable if input column == output column)", outputColumn);

    List<ColumnMetaData> outMeta = new ArrayList<>(origNames.size()+1);
    for( int i=0; i<origNames.size(); i++ ){
        String s = origNames.get(i);
        if(s.equals(inputColumn)){
            if(!outputColumn.equals(inputColumn)){
                outMeta.add(origMeta.get(i));
            }

            List<String> l = Collections.unmodifiableList(
                    mode == Mode.INCLUDE_OTHER_CATEGORY ?
                            Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", OTHER_CATEGORY) :
                            Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"));

            CategoricalMetaData cm = new CategoricalMetaData(outputColumn, l);

            outMeta.add(cm);
        } else {
            outMeta.add(origMeta.get(i));
        }
    }

    return inputSchema.newSchema(outMeta);
}
 
Example 6
Source File: StringListToCategoricalSetTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 7
Source File: StringListToCountsNDArrayTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 8
Source File: IntegerToOneHotTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (int x = minValue; x <= maxValue; x++) {
                String newName = s + "[" + x + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 9
Source File: TestTransforms.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Test
public void testPivotTransform(){
    Schema schema = new Schema.Builder()
            .addColumnString("otherCol")
            .addColumnCategorical("key", Arrays.asList("first","second","third"))
            .addColumnDouble("value")
            .addColumnDouble("otherCol2")
            .build();

    Transform t = new PivotTransform("key","value");
    t.setInputSchema(schema);
    Schema out = t.transform(schema);

    List<String> expNames = Arrays.asList("otherCol", "key[first]", "key[second]", "key[third]", "otherCol2");
    List<String> actNames = out.getColumnNames();

    assertEquals(expNames, actNames);

    List<ColumnType> columnTypesExp = Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Double,
            ColumnType.Double, ColumnType.Double);
    assertEquals(columnTypesExp, out.getColumnTypes());

    //Expand (second,100) into (0,100,0). Leave the remaining columns as is
    List<Writable> e1 = Arrays.<Writable>asList(new DoubleWritable(1), new DoubleWritable(0), new DoubleWritable(100),
            new DoubleWritable(0), new DoubleWritable(-1));
    List<Writable> a1 = t.map(Arrays.<Writable>asList(new DoubleWritable(1), new Text("second"), new DoubleWritable(100),
            new DoubleWritable(-1)));
    assertEquals(e1,a1);

    //Expand (third,200) into (0,0,200). Leave the remaining columns as is
    List<Writable> e2 = Arrays.<Writable>asList(new DoubleWritable(1), new DoubleWritable(0), new DoubleWritable(0),
            new DoubleWritable(200), new DoubleWritable(-1));
    List<Writable> a2 = t.map(Arrays.<Writable>asList(new DoubleWritable(1), new Text("third"), new DoubleWritable(200),
            new DoubleWritable(-1)));
    assertEquals(e2,a2);
}
 
Example 10
Source File: CategoricalToOneHotTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 11
Source File: DeriveColumnsFromTimeTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + derivedColumns.size());

    List<String> oldNames = inputSchema.getColumnNames();

    for (int i = 0; i < oldMeta.size(); i++) {
        String current = oldNames.get(i);
        newMeta.add(oldMeta.get(i));

        if (insertAfter.equals(current)) {
            //Insert the derived columns here
            for (DerivedColumn d : derivedColumns) {
                switch (d.columnType) {
                    case String:
                        newMeta.add(new StringMetaData(d.columnName));
                        break;
                    case Integer:
                        newMeta.add(new IntegerMetaData(d.columnName)); //TODO: ranges... if it's a day, we know it must be 1 to 31, etc...
                        break;
                    default:
                        throw new IllegalStateException("Unexpected column type: " + d.columnType);
                }
            }
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 12
Source File: RemoveColumnsTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    int nToRemove = columnsToRemove.length;
    int newNumColumns = schema.numColumns() - nToRemove;
    if (newNumColumns <= 0)
        throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns
                        + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = "
                        + Arrays.toString(columnsToRemove));

    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> set = new HashSet<>();
    Collections.addAll(set, columnsToRemove);


    List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (!set.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 13
Source File: DuplicateColumnsTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    columnIndexesToDuplicateSet.clear();

    List<String> schemaColumnNames = inputSchema.getColumnNames();
    for (String s : columnsToDuplicate) {
        int idx = schemaColumnNames.indexOf(s);
        if (idx == -1)
            throw new IllegalStateException("Invalid state: column to duplicate \"" + s + "\" does not appear "
                            + "in input schema");
        columnIndexesToDuplicateSet.add(idx);
    }

    this.inputSchema = inputSchema;
}
 
Example 14
Source File: RemoveColumnsTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    int nToRemove = columnsToRemove.length;
    int newNumColumns = schema.numColumns() - nToRemove;
    if (newNumColumns <= 0)
        throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns
                        + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = "
                        + Arrays.toString(columnsToRemove));

    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    Set<String> set = new HashSet<>();
    Collections.addAll(set, columnsToRemove);


    List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns);

    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> metaIter = origMeta.iterator();

    while (namesIter.hasNext()) {
        String n = namesIter.next();
        ColumnMetaData t = metaIter.next();
        if (!set.contains(n)) {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 15
Source File: CategoricalToOneHotTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema schema) {
    List<String> origNames = schema.getColumnNames();
    List<ColumnMetaData> origMeta = schema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns());

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i++ == columnIdx) {
            //Convert this to one-hot:
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";
                newMeta.add(new IntegerMetaData(newName, 0, 1));
            }
        } else {
            newMeta.add(t);
        }
    }

    return schema.newSchema(newMeta);
}
 
Example 16
Source File: StringListToCountsNDArrayTransform.java    From DataVec with Apache License 2.0 5 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}
 
Example 17
Source File: DuplicateColumnsTransform.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void setInputSchema(Schema inputSchema) {
    columnIndexesToDuplicateSet.clear();

    List<String> schemaColumnNames = inputSchema.getColumnNames();
    for (String s : columnsToDuplicate) {
        int idx = schemaColumnNames.indexOf(s);
        if (idx == -1)
            throw new IllegalStateException("Invalid state: column to duplicate \"" + s + "\" does not appear "
                            + "in input schema");
        columnIndexesToDuplicateSet.add(idx);
    }

    this.inputSchema = inputSchema;
}
 
Example 18
Source File: SequenceDifferenceTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(columnName)) {
        throw new IllegalStateException("Invalid input schema: does not have column with name \"" + columnName
                        + "\"\n. All schema names: " + inputSchema.getColumnNames());
    }
    if (!(inputSchema instanceof SequenceSchema)) {
        throw new IllegalStateException(
                        "Invalid input schema: expected a SequenceSchema, got " + inputSchema.getClass());
    }

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());
    for (ColumnMetaData m : inputSchema.getColumnMetaData()) {
        if (columnName.equals(m.getName())) {
            switch (m.getColumnType()) {
                case Integer:
                    newMeta.add(new IntegerMetaData(newColumnName));
                    break;
                case Long:
                    newMeta.add(new LongMetaData(newColumnName));
                    break;
                case Double:
                    newMeta.add(new DoubleMetaData(newColumnName));
                    break;
                case Float:
                    newMeta.add(new FloatMetaData(newColumnName));
                    break;
                case Time:
                    newMeta.add(new LongMetaData(newColumnName)); //not Time - time column isn't used for duration...
                    break;
                case Categorical:
                case Bytes:
                case String:
                case Boolean:
                default:
                    throw new IllegalStateException(
                                    "Cannot perform sequence difference on column of type " + m.getColumnType());
            }
        } else {
            newMeta.add(m);
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 19
Source File: SequenceDifferenceTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(columnName)) {
        throw new IllegalStateException("Invalid input schema: does not have column with name \"" + columnName
                        + "\"\n. All schema names: " + inputSchema.getColumnNames());
    }
    if (!(inputSchema instanceof SequenceSchema)) {
        throw new IllegalStateException(
                        "Invalid input schema: expected a SequenceSchema, got " + inputSchema.getClass());
    }

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());
    for (ColumnMetaData m : inputSchema.getColumnMetaData()) {
        if (columnName.equals(m.getName())) {
            switch (m.getColumnType()) {
                case Integer:
                    newMeta.add(new IntegerMetaData(newColumnName));
                    break;
                case Long:
                    newMeta.add(new LongMetaData(newColumnName));
                    break;
                case Double:
                    newMeta.add(new DoubleMetaData(newColumnName));
                    break;
                case Float:
                    newMeta.add(new FloatMetaData(newColumnName));
                    break;
                case Time:
                    newMeta.add(new LongMetaData(newColumnName)); //not Time - time column isn't used for duration...
                    break;
                case Categorical:
                case Bytes:
                case String:
                case Boolean:
                default:
                    throw new IllegalStateException(
                                    "Cannot perform sequence difference on column of type " + m.getColumnType());
            }
        } else {
            newMeta.add(m);
        }
    }

    return inputSchema.newSchema(newMeta);
}
 
Example 20
Source File: PivotTransform.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Override
public Schema transform(Schema inputSchema) {
    if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) {
        throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn
                        + " in " + inputSchema.getColumnNames());
    }

    List<String> origNames = inputSchema.getColumnNames();
    List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData();

    int i = 0;
    Iterator<String> namesIter = origNames.iterator();
    Iterator<ColumnMetaData> typesIter = origMeta.iterator();

    List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns());

    int idxKey = inputSchema.getIndexOfColumn(keyColumn);
    int idxValue = inputSchema.getIndexOfColumn(valueColumn);

    ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue);

    while (namesIter.hasNext()) {
        String s = namesIter.next();
        ColumnMetaData t = typesIter.next();

        if (i == idxKey) {
            //Convert this to a set of separate columns
            List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames();
            for (String stateName : stateNames) {
                String newName = s + "[" + stateName + "]";

                ColumnMetaData newValueMeta = valueMeta.clone();
                newValueMeta.setName(newName);

                newMeta.add(newValueMeta);
            }
        } else if (i == idxValue) {
            i++;
            continue; //Skip column
        } else {
            newMeta.add(t);
        }
        i++;
    }

    //Infer the default value if necessary
    if (defaultValue == null) {
        switch (valueMeta.getColumnType()) {
            case String:
                defaultValue = new Text("");
                break;
            case Integer:
                defaultValue = new IntWritable(0);
                break;
            case Long:
                defaultValue = new LongWritable(0);
                break;
            case Double:
                defaultValue = new DoubleWritable(0.0);
                break;
            case Float:
                defaultValue = new FloatWritable(0.0f);
                break;
            case Categorical:
                defaultValue = new NullWritable();
                break;
            case Time:
                defaultValue = new LongWritable(0);
                break;
            case Bytes:
                throw new UnsupportedOperationException("Cannot infer default value for bytes");
            case Boolean:
                defaultValue = new Text("false");
                break;
            default:
                throw new UnsupportedOperationException(
                                "Cannot infer default value for " + valueMeta.getColumnType());
        }
    }

    return inputSchema.newSchema(newMeta);
}