org.datavec.api.transform.ColumnType#String

Source File: TestGeoReduction.java From DataVec with Apache License 2.0

5 votes

@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8")));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0"));

    Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key")
                    .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build();

    reducer.setInputSchema(schema);

    IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer();
    for (List<Writable> l : inputs)
        aggregableReduceOp.accept(l);
    List<Writable> out = aggregableReduceOp.get();

    assertEquals(2, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "coordSum"};
    ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(2, outSchema.numColumns());
    for (int i = 0; i < 2; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Source File: StringListToCountsNDArrayTransform.java From DataVec with Apache License 2.0

5 votes

@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}

Source File: StringListToCategoricalSetTransform.java From DataVec with Apache License 2.0

5 votes

@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}

Source File: ParseDoubleTransform.java From DataVec with Apache License 2.0

5 votes

/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}

Source File: TestGeoReduction.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("1#5")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("2#6")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("3#7")));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new Text("4#8")));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new Text("10.0#26.0"));

    Schema schema = new Schema.Builder().addColumnString("key").addColumnString("coord").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Count).keyColumns("key")
                    .customReduction("coord", new CoordinatesReduction("coordSum", ReduceOp.Sum, "#")).build();

    reducer.setInputSchema(schema);

    IAggregableReduceOp<List<Writable>, List<Writable>> aggregableReduceOp = reducer.aggregableReducer();
    for (List<Writable> l : inputs)
        aggregableReduceOp.accept(l);
    List<Writable> out = aggregableReduceOp.get();

    assertEquals(2, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "coordSum"};
    ColumnType[] expTypes = new ColumnType[] {ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(2, outSchema.numColumns());
    for (int i = 0; i < 2; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Source File: StringListToCountsNDArrayTransform.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>();
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/integer columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            ColumnMetaData meta = new NDArrayMetaData(newColumnName, new long[] {vocabulary.size()});
            newMeta.add(meta);
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}

Source File: StringListToCategoricalSetTransform.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Schema transform(Schema inputSchema) {

    int colIdx = inputSchema.getIndexOfColumn(columnName);

    List<ColumnMetaData> oldMeta = inputSchema.getColumnMetaData();
    List<ColumnMetaData> newMeta = new ArrayList<>(oldMeta.size() + newColumnNames.size() - 1);
    List<String> oldNames = inputSchema.getColumnNames();

    Iterator<ColumnMetaData> typesIter = oldMeta.iterator();
    Iterator<String> namesIter = oldNames.iterator();

    int i = 0;
    while (typesIter.hasNext()) {
        ColumnMetaData t = typesIter.next();
        String name = namesIter.next();
        if (i++ == colIdx) {
            //Replace String column with a set of binary/categorical columns
            if (t.getColumnType() != ColumnType.String)
                throw new IllegalStateException("Cannot convert non-string type");

            for (int j = 0; j < newColumnNames.size(); j++) {
                ColumnMetaData meta = new CategoricalMetaData(newColumnNames.get(j), "true", "false");
                newMeta.add(meta);
            }
        } else {
            newMeta.add(t);
        }
    }

    return inputSchema.newSchema(newMeta);

}

Source File: ParseDoubleTransform.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Get the output schema for this transformation, given an input schema
 *
 * @param inputSchema
 */
@Override
public Schema transform(Schema inputSchema) {
    Schema.Builder newSchema = new Schema.Builder();
    for (int i = 0; i < inputSchema.numColumns(); i++) {
        if (inputSchema.getType(i) == ColumnType.String) {
            newSchema.addColumnDouble(inputSchema.getMetaData(i).getName());
        } else
            newSchema.addColumn(inputSchema.getMetaData(i));

    }
    return newSchema.build();
}

Source File: StringMetaData.java From DataVec with Apache License 2.0

4 votes

@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}

Source File: StringAnalysis.java From DataVec with Apache License 2.0

4 votes

@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}

Source File: TestMultiOpReduce.java From DataVec with Apache License 2.0

4 votes

@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
                    new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
                    new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
                    new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
                    new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new Text("one"),
                    new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
                    .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
                    .customReduction("textCol", new CustomReduceTakeSecond())
                    .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "myCustomReduce(textCol)", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
                    new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Source File: TestMultiOpReduce.java From DataVec with Apache License 2.0

4 votes

@Test
public void testCustomReductionsWithCondition() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
            new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
            new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
            new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
            new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new IntWritable(3),
            new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
            .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
            .conditionalReduction("textCol", "condTextCol",
                    ReduceOp.Count, new StringColumnCondition("textCol", ConditionOp.NotEqual, "three"))
            .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "condTextCol", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
            new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.Long, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Source File: StringMetaData.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}

Source File: StringAnalysis.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public ColumnType getColumnType() {
    return ColumnType.String;
}

Source File: TestMultiOpReduce.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testCustomReductions() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
                    new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
                    new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
                    new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
                    new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new Text("one"),
                    new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
                    .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
                    .customReduction("textCol", new CustomReduceTakeSecond())
                    .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "myCustomReduce(textCol)", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
                    new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.String, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Source File: TestMultiOpReduce.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testCustomReductionsWithCondition() {

    List<List<Writable>> inputs = new ArrayList<>();
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(1), new Text("zero"),
            new DoubleWritable(0)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(2), new Text("one"),
            new DoubleWritable(1)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(3), new Text("two"),
            new DoubleWritable(2)));
    inputs.add(Arrays.asList((Writable) new Text("someKey"), new IntWritable(4), new Text("three"),
            new DoubleWritable(3)));

    List<Writable> expected = Arrays.asList((Writable) new Text("someKey"), new IntWritable(10), new IntWritable(3),
            new DoubleWritable(1));


    Schema schema = new Schema.Builder().addColumnString("key").addColumnInteger("intCol")
            .addColumnString("textCol").addColumnString("doubleCol").build();

    Reducer reducer = new Reducer.Builder(ReduceOp.Sum).keyColumns("key")
            .conditionalReduction("textCol", "condTextCol",
                    ReduceOp.Count, new StringColumnCondition("textCol", ConditionOp.NotEqual, "three"))
            .customReduction("doubleCol", new CustomReduceTakeSecond()).build();

    reducer.setInputSchema(schema);


    IAggregableReduceOp<List<Writable>, List<Writable>> accumulator = reducer.aggregableReducer();

    for (int i = 0; i < inputs.size(); i++) {
        accumulator.accept(inputs.get(i));
    }
    List<Writable> out = accumulator.get();

    assertEquals(4, out.size());
    assertEquals(expected, out);

    //Check schema:
    String[] expNames = new String[] {"key", "sum(intCol)", "condTextCol", "myCustomReduce(doubleCol)"};
    ColumnType[] expTypes =
            new ColumnType[] {ColumnType.String, ColumnType.Integer, ColumnType.Long, ColumnType.String};
    Schema outSchema = reducer.transform(schema);

    assertEquals(4, outSchema.numColumns());
    for (int i = 0; i < 4; i++) {
        assertEquals(expNames[i], outSchema.getName(i));
        assertEquals(expTypes[i], outSchema.getType(i));
    }
}

Java Code Examples for org.datavec.api.transform.ColumnType#String