Java Code Examples for org.datavec.api.transform.schema.Schema#getIndexOfColumn()
The following examples show how to use
org.datavec.api.transform.schema.Schema#getIndexOfColumn() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CategoricalToIntegerTransform.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public void setInputSchema(Schema inputSchema) { super.setInputSchema(inputSchema); columnIdx = inputSchema.getIndexOfColumn(columnName); ColumnMetaData meta = inputSchema.getMetaData(columnName); if (!(meta instanceof CategoricalMetaData)) throw new IllegalStateException("Cannot convert column \"" + columnName + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType() + ")"); this.stateNames = ((CategoricalMetaData) meta).getStateNames(); this.statesMap = new HashMap<>(stateNames.size()); for (int i = 0; i < stateNames.size(); i++) { this.statesMap.put(stateNames.get(i), i); } }
Example 2
Source File: CategoricalToOneHotTransform.java From DataVec with Apache License 2.0 | 6 votes |
@Override public void setInputSchema(Schema inputSchema) { super.setInputSchema(inputSchema); columnIdx = inputSchema.getIndexOfColumn(columnName); ColumnMetaData meta = inputSchema.getMetaData(columnName); if (!(meta instanceof CategoricalMetaData)) throw new IllegalStateException("Cannot convert column \"" + columnName + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType() + ")"); this.stateNames = ((CategoricalMetaData) meta).getStateNames(); this.statesMap = new HashMap<>(stateNames.size()); for (int i = 0; i < stateNames.size(); i++) { this.statesMap.put(stateNames.get(i), i); } }
Example 3
Source File: CategoricalToOneHotTransform.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public void setInputSchema(Schema inputSchema) { super.setInputSchema(inputSchema); columnIdx = inputSchema.getIndexOfColumn(columnName); ColumnMetaData meta = inputSchema.getMetaData(columnName); if (!(meta instanceof CategoricalMetaData)) throw new IllegalStateException("Cannot convert column \"" + columnName + "\" from categorical to one-hot: column is not categorical (is: " + meta.getColumnType() + ")"); this.stateNames = ((CategoricalMetaData) meta).getStateNames(); this.statesMap = new HashMap<>(stateNames.size()); for (int i = 0; i < stateNames.size(); i++) { this.statesMap.put(stateNames.get(i), i); } }
Example 4
Source File: FilterInvalidValues.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema schema) { this.schema = schema; if (!filterAnyInvalid) { this.columnIdxs = new int[columnsToFilterIfInvalid.length]; for (int i = 0; i < columnsToFilterIfInvalid.length; i++) { this.columnIdxs[i] = schema.getIndexOfColumn(columnsToFilterIfInvalid[i]); } } }
Example 5
Source File: ConditionalCopyValueTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema inputSchema) { if (!inputSchema.hasColumn(columnToReplace)) throw new IllegalStateException("Column \"" + columnToReplace + "\" not found in input schema"); if (!inputSchema.hasColumn(sourceColumn)) throw new IllegalStateException("Column \"" + sourceColumn + "\" not found in input schema"); columnToReplaceIdx = inputSchema.getIndexOfColumn(columnToReplace); sourceColumnIdx = inputSchema.getIndexOfColumn(sourceColumn); condition.setInputSchema(inputSchema); }
Example 6
Source File: SequenceSplitTimeSeparation.java From DataVec with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema inputSchema) { if (!inputSchema.hasColumn(timeColumn)) throw new IllegalStateException( "Invalid state: schema does not have column " + "with name \"" + timeColumn + "\""); if (inputSchema.getMetaData(timeColumn).getColumnType() != ColumnType.Time) { throw new IllegalStateException("Invalid input schema: schema column \"" + timeColumn + "\" is not a time column." + " (Is type: " + inputSchema.getMetaData(timeColumn).getColumnType() + ")"); } this.timeColumnIdx = inputSchema.getIndexOfColumn(timeColumn); this.schema = inputSchema; }
Example 7
Source File: StringListToCountsNDArrayTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(); List<String> oldNames = inputSchema.getColumnNames(); Iterator<ColumnMetaData> typesIter = oldMeta.iterator(); Iterator<String> namesIter = oldNames.iterator(); int i = 0; while (typesIter.hasNext()) { ColumnMetaData t = typesIter.next(); String name = namesIter.next(); if (i++ == colIdx) { //Replace String column with a set of binary/integer columns if (t.getColumnType() != ColumnType.String) throw new IllegalStateException("Cannot convert non-string type"); ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()}); newMeta.add(meta); } else { newMeta.add(t); } } return inputSchema.newSchema(newMeta); }
Example 8
Source File: FirstDigitTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void setInputSchema(Schema schema){ super.setInputSchema(schema); columnIdx = schema.getIndexOfColumn(inputColumn); Preconditions.checkState(columnIdx >= 0, "Input column \"%s\" not found in schema", inputColumn); }
Example 9
Source File: SequenceMovingWindowReduceTransform.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); //Approach here: The reducer gives us a schema for one time step -> simply convert this to a sequence schema... List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> meta = new ArrayList<>(oldMeta); ColumnMetaData m; switch (op) { case Min: case Max: case Range: case TakeFirst: case TakeLast: //Same type as input m = oldMeta.get(colIdx); m = m.clone(); m.setName(newColumnName); break; case Prod: case Sum: case Mean: case Stdev: //Double type m = new DoubleMetaData(newColumnName); break; case Count: case CountUnique: //Integer type m = new IntegerMetaData(newColumnName); break; default: throw new UnsupportedOperationException("Unknown op type: " + op); } meta.add(m); return new SequenceSchema(meta); }
Example 10
Source File: AnalyzeLocal.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Get a list of unique values from the specified columns. * For sequence data, use {@link #getUniqueSequence(List, Schema, SequenceRecordReader)} * * @param columnName Name of the column to get unique values from * @param schema Data schema * @param data Data to get unique values from * @return List of unique values */ public static Set<Writable> getUnique(String columnName, Schema schema, RecordReader data) { int colIdx = schema.getIndexOfColumn(columnName); Set<Writable> unique = new HashSet<>(); while(data.hasNext()){ List<Writable> next = data.next(); unique.add(next.get(colIdx)); } return unique; }
Example 11
Source File: StringListToCategoricalSetTransform.java From DataVec with Apache License 2.0 | 5 votes |
@Override public Schema transform(Schema inputSchema) { int colIdx = inputSchema.getIndexOfColumn(columnName); List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData(); List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1); List<String> oldNames = inputSchema.getColumnNames(); Iterator<ColumnMetaData> typesIter = oldMeta.iterator(); Iterator<String> namesIter = oldNames.iterator(); int i = 0; while (typesIter.hasNext()) { ColumnMetaData t = typesIter.next(); String name = namesIter.next(); if (i++ == colIdx) { //Replace String column with a set of binary/categorical columns if (t.getColumnType() != ColumnType.String) throw new IllegalStateException("Cannot convert non-string type"); for (int j = 0; j < newColumnNames.size(); j++) { ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false"); newMeta.add(meta); } } else { newMeta.add(t); } } return inputSchema.newSchema(newMeta); }
Example 12
Source File: StringListToCategoricalSetTransform.java From DataVec with Apache License 2.0 | 4 votes |
@Override public void setInputSchema(Schema inputSchema) { this.inputSchema = inputSchema; this.columnIdx = inputSchema.getIndexOfColumn(columnName); }
Example 13
Source File: PivotTransform.java From DataVec with Apache License 2.0 | 4 votes |
@Override public Schema transform(Schema inputSchema) { if (!inputSchema.hasColumn(keyColumn) || !inputSchema.hasColumn(valueColumn)) { throw new UnsupportedOperationException("Key or value column not found: " + keyColumn + ", " + valueColumn + " in " + inputSchema.getColumnNames()); } List<String> origNames = inputSchema.getColumnNames(); List<ColumnMetaData> origMeta = inputSchema.getColumnMetaData(); int i = 0; Iterator<String> namesIter = origNames.iterator(); Iterator<ColumnMetaData> typesIter = origMeta.iterator(); List<ColumnMetaData> newMeta = new ArrayList<>(inputSchema.numColumns()); int idxKey = inputSchema.getIndexOfColumn(keyColumn); int idxValue = inputSchema.getIndexOfColumn(valueColumn); ColumnMetaData valueMeta = inputSchema.getMetaData(idxValue); while (namesIter.hasNext()) { String s = namesIter.next(); ColumnMetaData t = typesIter.next(); if (i == idxKey) { //Convert this to a set of separate columns List<String> stateNames = ((CategoricalMetaData) inputSchema.getMetaData(idxKey)).getStateNames(); for (String stateName : stateNames) { String newName = s + "[" + stateName + "]"; ColumnMetaData newValueMeta = valueMeta.clone(); newValueMeta.setName(newName); newMeta.add(newValueMeta); } } else if (i == idxValue) { i++; continue; //Skip column } else { newMeta.add(t); } i++; } //Infer the default value if necessary if (defaultValue == null) { switch (valueMeta.getColumnType()) { case String: defaultValue = new Text(""); break; case Integer: defaultValue = new IntWritable(0); break; case Long: defaultValue = new LongWritable(0); break; case Double: defaultValue = new DoubleWritable(0.0); break; case Float: defaultValue = new FloatWritable(0.0f); break; case Categorical: defaultValue = new NullWritable(); break; case Time: defaultValue = new LongWritable(0); break; case Bytes: throw new UnsupportedOperationException("Cannot infer default value for bytes"); case Boolean: defaultValue = new Text("false"); break; default: throw new UnsupportedOperationException( "Cannot infer default value for " + valueMeta.getColumnType()); } } return inputSchema.newSchema(newMeta); }
Example 14
Source File: BaseColumnComparator.java From DataVec with Apache License 2.0 | 4 votes |
@Override public void setSchema(Schema sequenceSchema) { this.schema = sequenceSchema; this.columnIdx = sequenceSchema.getIndexOfColumn(columnName); }
Example 15
Source File: StringListToCountsNDArrayTransform.java From DataVec with Apache License 2.0 | 4 votes |
@Override public void setInputSchema(Schema inputSchema) { this.inputSchema = inputSchema; this.columnIdx = inputSchema.getIndexOfColumn(columnName); }
Example 16
Source File: BaseColumnComparator.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public void setSchema(Schema sequenceSchema) { this.schema = sequenceSchema; this.columnIdx = sequenceSchema.getIndexOfColumn(columnName); }
Example 17
Source File: AnalyzeSpark.java From DataVec with Apache License 2.0 | 3 votes |
/** * Randomly sample values from a single column * * @param count Number of values to sample * @param columnName Name of the column to sample from * @param schema Schema * @param data Data to sample from * @return A list of random samples */ public static List<Writable> sampleFromColumn(int count, String columnName, Schema schema, JavaRDD<List<Writable>> data) { int colIdx = schema.getIndexOfColumn(columnName); JavaRDD<Writable> ithColumn = data.map(new SelectColumnFunction(colIdx)); return ithColumn.takeSample(false, count); }
Example 18
Source File: AnalyzeSpark.java From deeplearning4j with Apache License 2.0 | 2 votes |
/** * Get a list of unique values from the specified columns. * For sequence data, use {@link #getUniqueSequence(List, Schema, JavaRDD)} * * @param columnName Name of the column to get unique values from * @param schema Data schema * @param data Data to get unique values from * @return List of unique values */ public static List<Writable> getUnique(String columnName, Schema schema, JavaRDD<List<Writable>> data) { int colIdx = schema.getIndexOfColumn(columnName); JavaRDD<Writable> ithColumn = data.map(new SelectColumnFunction(colIdx)); return ithColumn.distinct().collect(); }
Example 19
Source File: AnalyzeSpark.java From DataVec with Apache License 2.0 | 2 votes |
/** * Get the maximum value for the specified column * * @param allData All data * @param columnName Name of the column to get the minimum value for * @param schema Schema of the data * @return Maximum value for the column */ public static Writable max(JavaRDD<List<Writable>> allData, String columnName, Schema schema){ int columnIdx = schema.getIndexOfColumn(columnName); JavaRDD<Writable> col = allData.map(new SelectColumnFunction(columnIdx)); return col.max(Comparators.forType(schema.getType(columnName).getWritableType())); }
Example 20
Source File: AnalyzeSpark.java From DataVec with Apache License 2.0 | 2 votes |
/** * Get a list of unique values from the specified columns. * For sequence data, use {@link #getUniqueSequence(List, Schema, JavaRDD)} * * @param columnName Name of the column to get unique values from * @param schema Data schema * @param data Data to get unique values from * @return List of unique values */ public static List<Writable> getUnique(String columnName, Schema schema, JavaRDD<List<Writable>> data) { int colIdx = schema.getIndexOfColumn(columnName); JavaRDD<Writable> ithColumn = data.map(new SelectColumnFunction(colIdx)); return ithColumn.distinct().collect(); }