Java Code Examples for org.datavec.api.transform.TransformProcess#fromJson()

The following examples show how to use org.datavec.api.transform.TransformProcess#fromJson() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestYamlJsonSerde.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformProcessAndSchema() {

    Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnNDArray("nd1a", new long[] {1, 10})
                    .addColumnNDArray("nd1b", new long[] {1, 10}).addColumnNDArray("nd2", new long[] {1, 100})
                    .addColumnNDArray("nd3", new long[] {-1, -1}).build();

    TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1)
                    .ndArrayColumnsMathOpTransform("added", MathOp.Add, "nd1a", "nd1b")
                    .ndArrayMathFunctionTransform("nd2", MathFunction.SQRT)
                    .ndArrayScalarOpTransform("nd3", MathOp.Multiply, 2.0).build();

    String asJson = tp.toJson();
    String asYaml = tp.toYaml();

    TransformProcess fromJson = TransformProcess.fromJson(asJson);
    TransformProcess fromYaml = TransformProcess.fromYaml(asYaml);

    assertEquals(tp, fromJson);
    assertEquals(tp, fromYaml);
}
 
Example 2
Source File: TestCustomTransformJsonYaml.java    From DataVec with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomTransform() {

    Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnDouble("secondCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1)
                    .transform(new CustomTransform("secondCol", 3.14159))
                    .doubleMathOp("secondCol", MathOp.Multiply, 2.0).filter(new CustomFilter(123))
                    .filter(new CustomCondition("someArg")).build();

    String asJson = tp.toJson();
    String asYaml = tp.toYaml();

    TransformProcess fromJson = TransformProcess.fromJson(asJson);
    TransformProcess fromYaml = TransformProcess.fromYaml(asYaml);

    assertEquals(tp, fromJson);
    assertEquals(tp, fromYaml);
}
 
Example 3
Source File: TestYamlJsonSerde.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformProcessAndSchema() {

    Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnNDArray("nd1a", new long[] {1, 10})
                    .addColumnNDArray("nd1b", new long[] {1, 10}).addColumnNDArray("nd2", new long[] {1, 100})
                    .addColumnNDArray("nd3", new long[] {-1, -1}).build();

    TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1)
                    .ndArrayColumnsMathOpTransform("added", MathOp.Add, "nd1a", "nd1b")
                    .ndArrayMathFunctionTransform("nd2", MathFunction.SQRT)
                    .ndArrayScalarOpTransform("nd3", MathOp.Multiply, 2.0).build();

    String asJson = tp.toJson();
    String asYaml = tp.toYaml();

    TransformProcess fromJson = TransformProcess.fromJson(asJson);
    TransformProcess fromYaml = TransformProcess.fromYaml(asYaml);

    assertEquals(tp, fromJson);
    assertEquals(tp, fromYaml);
}
 
Example 4
Source File: TestCustomTransformJsonYaml.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCustomTransform() {

    Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnDouble("secondCol").build();

    TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1)
                    .transform(new CustomTransform("secondCol", 3.14159))
                    .doubleMathOp("secondCol", MathOp.Multiply, 2.0).filter(new CustomFilter(123))
                    .filter(new CustomCondition("someArg")).build();

    String asJson = tp.toJson();
    String asYaml = tp.toYaml();

    TransformProcess fromJson = TransformProcess.fromJson(asJson);
    TransformProcess fromYaml = TransformProcess.fromYaml(asYaml);

    assertEquals(tp, fromJson);
    assertEquals(tp, fromYaml);
}
 
Example 5
Source File: DataVecTransformClient.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * @return
 */
@Override
public TransformProcess getCSVTransformProcess() {
    try {
        String s = Unirest.get(url + "/transformprocess").header("accept", "application/json")
                .header("Content-Type", "application/json").asString().getBody();
        return TransformProcess.fromJson(s);
    } catch (UnirestException e) {
        log.error("Error in getCSVTransformProcess()",e);
        e.printStackTrace();
    }

    return null;
}
 
Example 6
Source File: DataVecTransformClient.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * @return
 */
@Override
public TransformProcess getCSVTransformProcess() {
    try {
        String s = Unirest.get(url + "/transformprocess").header("accept", "application/json")
                .header("Content-Type", "application/json").asString().getBody();
        return TransformProcess.fromJson(s);
    } catch (UnirestException e) {
        log.error("Error in getCSVTransformProcess()",e);
    }

    return null;
}
 
Example 7
Source File: RegressionTestJson.java    From DataVec with Apache License 2.0 4 votes vote down vote up
@Test
public void regressionTestJson100a() throws Exception {
    //JSON saved in 1.0.0-alpha, before JSON format change

    File f = new ClassPathResource("regression_test/100a/transformprocess_regression_100a.json").getFile();
    String s = FileUtils.readFileToString(f);

    TransformProcess fromJson = TransformProcess.fromJson(s);



    Schema schema = new Schema.Builder().addColumnCategorical("Cat", "State1", "State2")
            .addColumnCategorical("Cat2", "State1", "State2").addColumnDouble("Dbl")
            .addColumnDouble("Dbl2", null, 100.0, true, false).addColumnInteger("Int")
            .addColumnInteger("Int2", 0, 10).addColumnLong("Long").addColumnLong("Long2", -100L, null)
            .addColumnString("Str").addColumnString("Str2", "someregexhere", 1, null)
            .addColumnString("Str3")
            .addColumnTime("TimeCol", DateTimeZone.UTC)
            .addColumnTime("TimeCol2", DateTimeZone.UTC, null, 1000L).build();

    Map<String, String> map = new HashMap<>();
    map.put("from", "to");
    map.put("anotherFrom", "anotherTo");

    TransformProcess expected =
            new TransformProcess.Builder(schema).categoricalToInteger("Cat").categoricalToOneHot("Cat2")
                    .appendStringColumnTransform("Str3", "ToAppend")
                    .integerToCategorical("Cat", Arrays.asList("State1", "State2"))
                    .stringToCategorical("Str", Arrays.asList("State1", "State2"))
                    .duplicateColumn("Str", "Str2a").removeColumns("Str2a")
                    .renameColumn("Str2", "Str2a").reorderColumns("Cat", "Dbl")
                    .conditionalCopyValueTransform("Dbl", "Dbl2",
                            new DoubleColumnCondition("Dbl", ConditionOp.Equal, 0.0))
                    .conditionalReplaceValueTransform("Dbl", new DoubleWritable(1.0),
                            new DoubleColumnCondition("Dbl", ConditionOp.Equal, 1.0))
                    .doubleColumnsMathOp("NewDouble", MathOp.Add, "Dbl", "Dbl2")
                    .doubleMathOp("Dbl", MathOp.Add, 1.0)
                    .integerColumnsMathOp("NewInt", MathOp.Subtract, "Int", "Int2")
                    .integerMathOp("Int", MathOp.Multiply, 2)
                    .transform(new ReplaceEmptyIntegerWithValueTransform("Int", 1))
                    .transform(new ReplaceInvalidWithIntegerTransform("Int", 1))
                    .longColumnsMathOp("Long", MathOp.Multiply, "Long", "Long2")
                    .longMathOp("Long", MathOp.ScalarMax, 0)
                    .transform(new MapAllStringsExceptListTransform("Str", "Other",
                            Arrays.asList("Ok", "SomeVal")))
                    .stringRemoveWhitespaceTransform("Str")
                    .transform(new ReplaceEmptyStringTransform("Str", "WasEmpty"))
                    .replaceStringTransform("Str", map)
                    .transform(new StringListToCategoricalSetTransform("Str",
                            Arrays.asList("StrA", "StrB"), Arrays.asList("StrA", "StrB"),
                            ","))
                    .stringMapTransform("Str2a", map)
                    .transform(new DeriveColumnsFromTimeTransform.Builder("TimeCol")
                            .addIntegerDerivedColumn("Hour", DateTimeFieldType.hourOfDay())
                            .addStringDerivedColumn("Date", "YYYY-MM-dd", DateTimeZone.UTC)
                            .build())
                    .stringToTimeTransform("Str2a", "YYYY-MM-dd hh:mm:ss", DateTimeZone.UTC)
                    .timeMathOp("TimeCol2", MathOp.Add, 1, TimeUnit.HOURS)

                    //Filters:
                    .filter(new FilterInvalidValues("Cat", "Str2a"))
                    .filter(new ConditionFilter(new NullWritableColumnCondition("Long")))

                    //Convert to/from sequence
                    .convertToSequence("Int", new NumericalColumnComparator("TimeCol2"))
                    .convertFromSequence()

                    //Sequence split
                    .convertToSequence("Int", new StringComparator("Str2a"))
                    .splitSequence(new SequenceSplitTimeSeparation("TimeCol2", 1, TimeUnit.HOURS))

                    //Reducers and reduce by window:
                    .reduce(new Reducer.Builder(ReduceOp.TakeFirst).keyColumns("TimeCol2")
                            .countColumns("Cat").sumColumns("Dbl").build())
                    .reduceSequenceByWindow(
                            new Reducer.Builder(ReduceOp.TakeFirst).countColumns("Cat2")
                                    .stdevColumns("Dbl2").build(),
                            new OverlappingTimeWindowFunction.Builder()
                                    .timeColumn("TimeCol2")
                                    .addWindowStartTimeColumn(true)
                                    .addWindowEndTimeColumn(true)
                                    .windowSize(1, TimeUnit.HOURS)
                                    .offset(5, TimeUnit.MINUTES)
                                    .windowSeparation(15, TimeUnit.MINUTES)
                                    .excludeEmptyWindows(true).build())

                    //Calculate sorted rank
                    .convertFromSequence()
                    .calculateSortedRank("rankColName", "TimeCol2", new LongWritableComparator())
                    .sequenceMovingWindowReduce("rankColName", 20, ReduceOp.Mean)
                    .addConstantColumn("someIntColumn", ColumnType.Integer, new IntWritable(0))
                    .integerToOneHot("someIntColumn", 0, 3)
                    .filter(new SequenceLengthCondition(ConditionOp.LessThan, 1))
                    .addConstantColumn("testColSeq", ColumnType.Integer, new DoubleWritable(0))
                    .offsetSequence(Collections.singletonList("testColSeq"), 1, SequenceOffsetTransform.OperationType.InPlace)
                    .addConstantColumn("someTextCol", ColumnType.String, new Text("some values"))
                    .build();


    assertEquals(expected, fromJson);
}
 
Example 8
Source File: TestGazeteerTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGazeteerTransform(){

    String[] corpus = {
            "hello I like apple".toLowerCase(),
            "cherry date eggplant potato".toLowerCase()
    };

    //Gazeteer transform: basically 0/1 if word is present. Assumes already tokenized input
    List<String> words = Arrays.asList("apple", "banana", "cherry", "date", "eggplant");

    GazeteerTransform t = new GazeteerTransform("words", "out", words);

    SequenceSchema schema = (SequenceSchema) new SequenceSchema.Builder()
            .addColumnString("words").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
            .transform(t)
            .build();

    List<List<List<Writable>>> input = new ArrayList<>();
    for(String s : corpus){
        String[] split = s.split(" ");
        List<List<Writable>> seq = new ArrayList<>();
        for(String s2 : split){
            seq.add(Collections.<Writable>singletonList(new Text(s2)));
        }
        input.add(seq);
    }

    List<List<List<Writable>>> execute = LocalTransformExecutor.executeSequenceToSequence(input, tp);

    INDArray arr0 = ((NDArrayWritable)execute.get(0).get(0).get(0)).get();
    INDArray arr1 = ((NDArrayWritable)execute.get(0).get(1).get(0)).get();

    INDArray exp0 = Nd4j.create(new float[]{1, 0, 0, 0, 0});
    INDArray exp1 = Nd4j.create(new float[]{0, 0, 1, 1, 1});

    assertEquals(exp0, arr0);
    assertEquals(exp1, arr1);


    String json = tp.toJson();
    TransformProcess tp2 = TransformProcess.fromJson(json);
    assertEquals(tp, tp2);

    List<List<List<Writable>>> execute2 = LocalTransformExecutor.executeSequenceToSequence(input, tp);
    INDArray arr0a = ((NDArrayWritable)execute2.get(0).get(0).get(0)).get();
    INDArray arr1a = ((NDArrayWritable)execute2.get(0).get(1).get(0)).get();

    assertEquals(exp0, arr0a);
    assertEquals(exp1, arr1a);
}
 
Example 9
Source File: TestMultiNLPTransform.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void test(){

    List<String> words = Arrays.asList("apple", "banana", "cherry", "date", "eggplant");
    GazeteerTransform t1 = new GazeteerTransform("words", "out", words);
    GazeteerTransform t2 = new GazeteerTransform("out", "out", words);


    MultiNlpTransform multi = new MultiNlpTransform("text", new BagOfWordsTransform[]{t1, t2}, "out");

    String[] corpus = {
            "hello I like apple".toLowerCase(),
            "date eggplant potato".toLowerCase()
    };

    List<List<List<Writable>>> input = new ArrayList<>();
    for(String s : corpus){
        String[] split = s.split(" ");
        List<List<Writable>> seq = new ArrayList<>();
        for(String s2 : split){
            seq.add(Collections.<Writable>singletonList(new Text(s2)));
        }
        input.add(seq);
    }

    SequenceSchema schema = (SequenceSchema) new SequenceSchema.Builder()
            .addColumnString("text").build();

    TransformProcess tp = new TransformProcess.Builder(schema)
            .transform(multi)
            .build();

    List<List<List<Writable>>> execute = LocalTransformExecutor.executeSequenceToSequence(input, tp);

    INDArray arr0 = ((NDArrayWritable)execute.get(0).get(0).get(0)).get();
    INDArray arr1 = ((NDArrayWritable)execute.get(0).get(1).get(0)).get();

    INDArray exp0 = Nd4j.create(new float[]{1, 0, 0, 0, 0, 1, 0, 0, 0, 0});
    INDArray exp1 = Nd4j.create(new float[]{0, 0, 0, 1, 1, 0, 0, 0, 1, 1});

    assertEquals(exp0, arr0);
    assertEquals(exp1, arr1);


    String json = tp.toJson();
    TransformProcess tp2 = TransformProcess.fromJson(json);
    assertEquals(tp, tp2);

    List<List<List<Writable>>> execute2 = LocalTransformExecutor.executeSequenceToSequence(input, tp);
    INDArray arr0a = ((NDArrayWritable)execute2.get(0).get(0).get(0)).get();
    INDArray arr1a = ((NDArrayWritable)execute2.get(0).get(1).get(0)).get();

    assertEquals(exp0, arr0a);
    assertEquals(exp1, arr1a);

}
 
Example 10
Source File: CSVSparkTransformServer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public void runMain(String[] args) throws Exception {
    JCommander jcmdr = new JCommander(this);

    try {
        jcmdr.parse(args);
    } catch (ParameterException e) {
        //User provides invalid input -> print the usage info
        jcmdr.usage();
        if (jsonPath == null)
            System.err.println("Json path parameter is missing.");
        try {
            Thread.sleep(500);
        } catch (Exception e2) {
        }
        System.exit(1);
    }

    if (jsonPath != null) {
        String json = FileUtils.readFileToString(new File(jsonPath));
        TransformProcess transformProcess = TransformProcess.fromJson(json);
        transform = new CSVSparkTransform(transformProcess);
    } else {
        log.warn("Server started with no json for transform process. Please ensure you specify a transform process via sending a post request with raw json"
                + "to /transformprocess");
    }

    //Set play secret key, if required
    //http://www.playframework.com/documentation/latest/ApplicationSecret
    String crypto = System.getProperty("play.crypto.secret");
    if (crypto == null || "changeme".equals(crypto) || "".equals(crypto) ) {
        byte[] newCrypto = new byte[1024];

        new Random().nextBytes(newCrypto);

        String base64 = Base64.getEncoder().encodeToString(newCrypto);
        System.setProperty("play.crypto.secret", base64);
    }


    server = Server.forRouter(Mode.PROD, port, this::createRouter);
}
 
Example 11
Source File: RegressionTestJson.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void regressionTestJson100a() throws Exception {
    //JSON saved in 1.0.0-alpha, before JSON format change

    File f = new ClassPathResource("datavec-api/regression_test/100a/transformprocess_regression_100a.json").getFile();
    String s = FileUtils.readFileToString(f);

    TransformProcess fromJson = TransformProcess.fromJson(s);



    Schema schema = new Schema.Builder().addColumnCategorical("Cat", "State1", "State2")
            .addColumnCategorical("Cat2", "State1", "State2").addColumnDouble("Dbl")
            .addColumnDouble("Dbl2", null, 100.0, true, false).addColumnInteger("Int")
            .addColumnInteger("Int2", 0, 10).addColumnLong("Long").addColumnLong("Long2", -100L, null)
            .addColumnString("Str").addColumnString("Str2", "someregexhere", 1, null)
            .addColumnString("Str3")
            .addColumnTime("TimeCol", DateTimeZone.UTC)
            .addColumnTime("TimeCol2", DateTimeZone.UTC, null, 1000L).build();

    Map<String, String> map = new HashMap<>();
    map.put("from", "to");
    map.put("anotherFrom", "anotherTo");

    TransformProcess expected =
            new TransformProcess.Builder(schema).categoricalToInteger("Cat").categoricalToOneHot("Cat2")
                    .appendStringColumnTransform("Str3", "ToAppend")
                    .integerToCategorical("Cat", Arrays.asList("State1", "State2"))
                    .stringToCategorical("Str", Arrays.asList("State1", "State2"))
                    .duplicateColumn("Str", "Str2a").removeColumns("Str2a")
                    .renameColumn("Str2", "Str2a").reorderColumns("Cat", "Dbl")
                    .conditionalCopyValueTransform("Dbl", "Dbl2",
                            new DoubleColumnCondition("Dbl", ConditionOp.Equal, 0.0))
                    .conditionalReplaceValueTransform("Dbl", new DoubleWritable(1.0),
                            new DoubleColumnCondition("Dbl", ConditionOp.Equal, 1.0))
                    .doubleColumnsMathOp("NewDouble", MathOp.Add, "Dbl", "Dbl2")
                    .doubleMathOp("Dbl", MathOp.Add, 1.0)
                    .integerColumnsMathOp("NewInt", MathOp.Subtract, "Int", "Int2")
                    .integerMathOp("Int", MathOp.Multiply, 2)
                    .transform(new ReplaceEmptyIntegerWithValueTransform("Int", 1))
                    .transform(new ReplaceInvalidWithIntegerTransform("Int", 1))
                    .longColumnsMathOp("Long", MathOp.Multiply, "Long", "Long2")
                    .longMathOp("Long", MathOp.ScalarMax, 0)
                    .transform(new MapAllStringsExceptListTransform("Str", "Other",
                            Arrays.asList("Ok", "SomeVal")))
                    .stringRemoveWhitespaceTransform("Str")
                    .transform(new ReplaceEmptyStringTransform("Str", "WasEmpty"))
                    .replaceStringTransform("Str", map)
                    .transform(new StringListToCategoricalSetTransform("Str",
                            Arrays.asList("StrA", "StrB"), Arrays.asList("StrA", "StrB"),
                            ","))
                    .stringMapTransform("Str2a", map)
                    .transform(new DeriveColumnsFromTimeTransform.Builder("TimeCol")
                            .addIntegerDerivedColumn("Hour", DateTimeFieldType.hourOfDay())
                            .addStringDerivedColumn("Date", "YYYY-MM-dd", DateTimeZone.UTC)
                            .build())
                    .stringToTimeTransform("Str2a", "YYYY-MM-dd hh:mm:ss", DateTimeZone.UTC)
                    .timeMathOp("TimeCol2", MathOp.Add, 1, TimeUnit.HOURS)

                    //Filters:
                    .filter(new FilterInvalidValues("Cat", "Str2a"))
                    .filter(new ConditionFilter(new NullWritableColumnCondition("Long")))

                    //Convert to/from sequence
                    .convertToSequence("Int", new NumericalColumnComparator("TimeCol2"))
                    .convertFromSequence()

                    //Sequence split
                    .convertToSequence("Int", new StringComparator("Str2a"))
                    .splitSequence(new SequenceSplitTimeSeparation("TimeCol2", 1, TimeUnit.HOURS))

                    //Reducers and reduce by window:
                    .reduce(new Reducer.Builder(ReduceOp.TakeFirst).keyColumns("TimeCol2")
                            .countColumns("Cat").sumColumns("Dbl").build())
                    .reduceSequenceByWindow(
                            new Reducer.Builder(ReduceOp.TakeFirst).countColumns("Cat2")
                                    .stdevColumns("Dbl2").build(),
                            new OverlappingTimeWindowFunction.Builder()
                                    .timeColumn("TimeCol2")
                                    .addWindowStartTimeColumn(true)
                                    .addWindowEndTimeColumn(true)
                                    .windowSize(1, TimeUnit.HOURS)
                                    .offset(5, TimeUnit.MINUTES)
                                    .windowSeparation(15, TimeUnit.MINUTES)
                                    .excludeEmptyWindows(true).build())

                    //Calculate sorted rank
                    .convertFromSequence()
                    .calculateSortedRank("rankColName", "TimeCol2", new LongWritableComparator())
                    .sequenceMovingWindowReduce("rankColName", 20, ReduceOp.Mean)
                    .addConstantColumn("someIntColumn", ColumnType.Integer, new IntWritable(0))
                    .integerToOneHot("someIntColumn", 0, 3)
                    .filter(new SequenceLengthCondition(ConditionOp.LessThan, 1))
                    .addConstantColumn("testColSeq", ColumnType.Integer, new DoubleWritable(0))
                    .offsetSequence(Collections.singletonList("testColSeq"), 1, SequenceOffsetTransform.OperationType.InPlace)
                    .addConstantColumn("someTextCol", ColumnType.String, new Text("some values"))
                    .build();


    assertEquals(expected, fromJson);
}