Java Code Examples for org.datavec.api.transform.schema.Schema#getColumnNames()
The following examples show how to use
org.datavec.api.transform.schema.Schema#getColumnNames() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RemoveAllColumnsExceptForTransform.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); Set<String> keepSet = new HashSet<>(); Collections.addAll(keepSet, columnsToKeep); List<ColumnMetaData> newMeta = new ArrayList<>(columnsToKeep.length); Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> metaIter = origMeta.iterator(); while (namesIter.hasNext()) { String n = namesIter.next(); ColumnMetaData t = metaIter.next(); if (keepSet.contains(n)) { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 2
Source File: DuplicateColumnsTransform.java From DataVec with Apache License 2.0 | 6 votes |
@Override public Schema transform(Schema inputSchema) { List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size()); List<String> oldNames = inputSchema.getColumnNames(); int dupCount = 0; for (int i = 0; i < oldMeta.size(); i++) { String current = oldNames.get(i); newMeta.add(oldMeta.get(i)); if (columnsToDuplicateSet.contains(current)) { //Duplicate the current columnName, and place it after... String dupName = newColumnNames.get(dupCount); ColumnMetaData m = oldMeta.get(i).clone(); m.setName(dupName); newMeta.add(m); dupCount++; } } return inputSchema.newSchema(newMeta); }
Example 3
Source File: CategoricalToIntegerTransform.java From DataVec with Apache License 2.0 | 6 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns()); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i++ == columnIdx) { //Convert this to integer int nClasses = stateNames.size(); newMeta.add(new IntegerMetaData(t.getName(), 0, nClasses - 1)); } else { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 4
Source File: SequenceDifferenceTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema inputSchema) { if (!inputSchema.hasColumn(columnName)) { throw new IllegalStateException("Invalid input schema: does not have column with name \"" + columnName + "\"\n. All schema names: " + inputSchema.getColumnNames()); } this.columnType = inputSchema.getMetaData(columnName).getColumnType(); this.inputSchema = inputSchema; }
Example 5
Source File: FirstDigitTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { List<String> origNames = inputSchema.getColumnNames(); List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData(); Preconditions.checkState(origNames.contains(inputColumn), "Input column with name \"%s\" not found in schema", inputColumn); Preconditions.checkState(inputColumn.equals(outputColumn) || !origNames.contains(outputColumn), "Output column with name \"%s\" already exists in schema (only allowable if input column == output column)", outputColumn); List<ColumnMetaData> outMeta = new ArrayList<>(origNames.size()+1); for( int i=0; i<origNames.size(); i++ ){ String s = origNames.get(i); if(s.equals(inputColumn)){ if(!outputColumn.equals(inputColumn)){ outMeta.add(origMeta.get(i)); } List<String> l = Collections.unmodifiableList( mode == Mode.INCLUDE_OTHER_CATEGORY ? Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", OTHER_CATEGORY) : Arrays.asList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9")); CategoricalMetaData cm = new CategoricalMetaData(outputColumn, l); outMeta.add(cm); } else { outMeta.add(origMeta.get(i)); } } return inputSchema.newSchema(outMeta); }
Example 6
Source File: StringListToCategoricalSetTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1); List<String> oldNames = inputSchema.getColumnNames(); Iterator<ColumnMetaData> typesIter = oldMeta.iterator(); Iterator<String> namesIter = oldNames.iterator(); int i = 0; while (typesIter.hasNext()) { ColumnMetaData t = typesIter.next(); String name = namesIter.next(); if (i++ == colIdx) { //Replace String column with a set of binary/categorical columns if (t.getColumnType() != ColumnType.String) throw new IllegalStateException("Cannot convert non-string type"); for (int j = 0; j < newColumnNames.size(); j++) { ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false"); newMeta.add(meta); } } else { newMeta.add(t); } } return inputSchema.newSchema(newMeta); }
Example 7
Source File: StringListToCountsNDArrayTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(); List<String> oldNames = inputSchema.getColumnNames(); Iterator<ColumnMetaData> typesIter = oldMeta.iterator(); Iterator<String> namesIter = oldNames.iterator(); int i = 0; while (typesIter.hasNext()) { ColumnMetaData t = typesIter.next(); String name = namesIter.next(); if (i++ == colIdx) { //Replace String column with a set of binary/integer columns if (t.getColumnType() != ColumnType.String) throw new IllegalStateException("Cannot convert non-string type"); ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()}); newMeta.add(meta); } else { newMeta.add(t); } } return inputSchema.newSchema(newMeta); }
Example 8
Source File: IntegerToOneHotTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns()); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i++ == columnIdx) { //Convert this to one-hot: for (int x = minValue; x <= maxValue; x++) { String newName = s + "[" + x + "]"; newMeta.add(new IntegerMetaData(newName, 0, 1)); } } else { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 9
Source File: TestTransforms.java From DataVec with Apache License 2.0 | 5 votes |
@Test public void testPivotTransform(){ Schema schema = new Schema.Builder() .addColumnString("otherCol") .addColumnCategorical("key", Arrays.asList("first","second","third")) .addColumnDouble("value") .addColumnDouble("otherCol2") .build(); Transform t = new PivotTransform("key","value"); t.setInputSchema(schema); Schema out = t.transform(schema); List<String> expNames = Arrays.asList("otherCol", "key[first]", "key[second]", "key[third]", "otherCol2"); List<String> actNames = out.getColumnNames(); assertEquals(expNames, actNames); List<ColumnType> columnTypesExp = Arrays.asList(ColumnType.String, ColumnType.Double, ColumnType.Double, ColumnType.Double, ColumnType.Double); assertEquals(columnTypesExp, out.getColumnTypes()); //Expand (second,100) into (0,100,0). Leave the remaining columns as is List<Writable> e1 = Arrays.<Writable>asList(new DoubleWritable(1), new DoubleWritable(0), new DoubleWritable(100), new DoubleWritable(0), new DoubleWritable(-1)); List<Writable> a1 = t.map(Arrays.<Writable>asList(new DoubleWritable(1), new Text("second"), new DoubleWritable(100), new DoubleWritable(-1))); assertEquals(e1,a1); //Expand (third,200) into (0,0,200). Leave the remaining columns as is List<Writable> e2 = Arrays.<Writable>asList(new DoubleWritable(1), new DoubleWritable(0), new DoubleWritable(0), new DoubleWritable(200), new DoubleWritable(-1)); List<Writable> a2 = t.map(Arrays.<Writable>asList(new DoubleWritable(1), new Text("third"), new DoubleWritable(200), new DoubleWritable(-1))); assertEquals(e2,a2); }
Example 10
Source File: CategoricalToOneHotTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns()); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i++ == columnIdx) { //Convert this to one-hot: for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; newMeta.add(new IntegerMetaData(newName, 0, 1)); } } else { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 11
Source File: DeriveColumnsFromTimeTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + derivedColumns.size()); List<String> oldNames = inputSchema.getColumnNames(); for (int i = 0; i < oldMeta.size(); i++) { String current = oldNames.get(i); newMeta.add(oldMeta.get(i)); if (insertAfter.equals(current)) { //Insert the derived columns here for (DerivedColumn d : derivedColumns) { switch (d.columnType) { case String: newMeta.add(new StringMetaData(d.columnName)); break; case Integer: newMeta.add(new IntegerMetaData(d.columnName)); //TODO: ranges... if it's a day, we know it must be 1 to 31, etc... break; default: throw new IllegalStateException("Unexpected column type: " + d.columnType); } } } } return inputSchema.newSchema(newMeta); }
Example 12
Source File: RemoveColumnsTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { int nToRemove = columnsToRemove.length; int newNumColumns = schema.numColumns() - nToRemove; if (newNumColumns <= 0) throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = " + Arrays.toString(columnsToRemove)); List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); Set<String> set = new HashSet<>(); Collections.addAll(set, columnsToRemove); List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns); Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> metaIter = origMeta.iterator(); while (namesIter.hasNext()) { String n = namesIter.next(); ColumnMetaData t = metaIter.next(); if (!set.contains(n)) { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 13
Source File: DuplicateColumnsTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema inputSchema) { columnIndexesToDuplicateSet.clear(); List<String> schemaColumnNames = inputSchema.getColumnNames(); for (String s : columnsToDuplicate) { int idx = schemaColumnNames.indexOf(s); if (idx == -1) throw new IllegalStateException("Invalid state: column to duplicate \"" + s + "\" does not appear " + "in input schema"); columnIndexesToDuplicateSet.add(idx); } this.inputSchema = inputSchema; }
Example 14
Source File: RemoveColumnsTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { int nToRemove = columnsToRemove.length; int newNumColumns = schema.numColumns() - nToRemove; if (newNumColumns <= 0) throw new IllegalStateException("Number of columns after executing operation is " + newNumColumns + " (is <= 0). " + "origColumns = " + schema.getColumnNames() + ", toRemove = " + Arrays.toString(columnsToRemove)); List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); Set<String> set = new HashSet<>(); Collections.addAll(set, columnsToRemove); List<ColumnMetaData> newMeta = new ArrayList<>(newNumColumns); Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> metaIter = origMeta.iterator(); while (namesIter.hasNext()) { String n = namesIter.next(); ColumnMetaData t = metaIter.next(); if (!set.contains(n)) { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 15
Source File: CategoricalToOneHotTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema schema) { List<String> origNames = schema.getColumnNames(); List<ColumnMetaData> origMeta = schema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(schema.numColumns()); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i++ == columnIdx) { //Convert this to one-hot: for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; newMeta.add(new IntegerMetaData(newName, 0, 1)); } } else { newMeta.add(t); } } return schema.newSchema(newMeta); }
Example 16
Source File: StringListToCountsNDArrayTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(); List<String> oldNames = inputSchema.getColumnNames(); Iterator<ColumnMetaData> typesIter = oldMeta.iterator(); Iterator<String> namesIter = oldNames.iterator(); int i = 0; while (typesIter.hasNext()) { ColumnMetaData t = typesIter.next(); String name = namesIter.next(); if (i++ == colIdx) { //Replace String column with a set of binary/integer columns if (t.getColumnType() != ColumnType.String) throw new IllegalStateException("Cannot convert non-string type"); ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()}); newMeta.add(meta); } else { newMeta.add(t); } } return inputSchema.newSchema(newMeta); }
Example 17
Source File: DuplicateColumnsTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema inputSchema) { columnIndexesToDuplicateSet.clear(); List<String> schemaColumnNames = inputSchema.getColumnNames(); for (String s : columnsToDuplicate) { int idx = schemaColumnNames.indexOf(s); if (idx == -1) throw new IllegalStateException("Invalid state: column to duplicate \"" + s + "\" does not appear " + "in input schema"); columnIndexesToDuplicateSet.add(idx); } this.inputSchema = inputSchema; }
Example 18
Source File: SequenceDifferenceTransform.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Schema transform(Schema inputSchema) { if (!inputSchema.hasColumn(columnName)) { throw new IllegalStateException("Invalid input schema: does not have column with name \"" + columnName + "\"\n. All schema names: " + inputSchema.getColumnNames()); } if (!(inputSchema instanceof SequenceSchema)) { throw new IllegalStateException( "Invalid input schema: expected a SequenceSchema, got " + inputSchema.getClass()); } List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns()); for (ColumnMetaData m : inputSchema.getColumnMetaData()) { if (columnName.equals(m.getName())) { switch (m.getColumnType()) { case Integer: newMeta.add(new IntegerMetaData(newColumnName)); break; case Long: newMeta.add(new LongMetaData(newColumnName)); break; case Double: newMeta.add(new DoubleMetaData(newColumnName)); break; case Float: newMeta.add(new FloatMetaData(newColumnName)); break; case Time: newMeta.add(new LongMetaData(newColumnName)); //not Time - time column isn't used for duration... break; case Categorical: case Bytes: case String: case Boolean: default: throw new IllegalStateException( "Cannot perform sequence difference on column of type " + m.getColumnType()); } } else { newMeta.add(m); } } return inputSchema.newSchema(newMeta); }
Example 19
Source File: SequenceDifferenceTransform.java From DataVec with Apache License 2.0 | 4 votes |
@Override public Schema transform(Schema inputSchema) { if (!inputSchema.hasColumn(columnName)) { throw new IllegalStateException("Invalid input schema: does not have column with name \"" + columnName + "\"\n. All schema names: " + inputSchema.getColumnNames()); } if (!(inputSchema instanceof SequenceSchema)) { throw new IllegalStateException( "Invalid input schema: expected a SequenceSchema, got " + inputSchema.getClass()); } List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns()); for (ColumnMetaData m : inputSchema.getColumnMetaData()) { if (columnName.equals(m.getName())) { switch (m.getColumnType()) { case Integer: newMeta.add(new IntegerMetaData(newColumnName)); break; case Long: newMeta.add(new LongMetaData(newColumnName)); break; case Double: newMeta.add(new DoubleMetaData(newColumnName)); break; case Float: newMeta.add(new FloatMetaData(newColumnName)); break; case Time: newMeta.add(new LongMetaData(newColumnName)); //not Time - time column isn't used for duration... break; case Categorical: case Bytes: case String: case Boolean: default: throw new IllegalStateException( "Cannot perform sequence difference on column of type " + m.getColumnType()); } } else { newMeta.add(m); } } return inputSchema.newSchema(newMeta); }
Example 20
Source File: PivotTransform.java From DataVec with Apache License 2.0 | 4 votes |
@Override public Schema transform(Schema inputSchema) { if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) { throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn + " in " + inputSchema.getColumnNames()); } List<String> origNames = inputSchema.getColumnNames(); List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns()); int idxKey = inputSchema.getIndexOfColumn(keyColumn); int idxValue = inputSchema.getIndexOfColumn(valueColumn); ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i == idxKey) { //Convert this to a set of separate columns List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames(); for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; ColumnMetaData newValueMeta = valueMeta.clone(); newValueMeta.setName(newName); newMeta.add(newValueMeta); } } else if (i == idxValue) { i++; continue; //Skip column } else { newMeta.add(t); } i++; } //Infer the default value if necessary if (defaultValue == null) { switch (valueMeta.getColumnType()) { case String: defaultValue = new Text(""); break; case Integer: defaultValue = new IntWritable(0); break; case Long: defaultValue = new LongWritable(0); break; case Double: defaultValue = new DoubleWritable(0.0); break; case Float: defaultValue = new FloatWritable(0.0f); break; case Categorical: defaultValue = new NullWritable(); break; case Time: defaultValue = new LongWritable(0); break; case Bytes: throw new UnsupportedOperationException("Cannot infer default value for bytes"); case Boolean: defaultValue = new Text("false"); break; default: throw new UnsupportedOperationException( "Cannot infer default value for " + valueMeta.getColumnType()); } } return inputSchema.newSchema(newMeta); }