Java Code Examples for org.datavec.api.transform.TransformProcess#fromJson()
The following examples show how to use
org.datavec.api.transform.TransformProcess#fromJson() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestYamlJsonSerde.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testTransformProcessAndSchema() { Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnNDArray("nd1a", new long[] {1, 10}) .addColumnNDArray("nd1b", new long[] {1, 10}).addColumnNDArray("nd2", new long[] {1, 100}) .addColumnNDArray("nd3", new long[] {-1, -1}).build(); TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1) .ndArrayColumnsMathOpTransform("added", MathOp.Add, "nd1a", "nd1b") .ndArrayMathFunctionTransform("nd2", MathFunction.SQRT) .ndArrayScalarOpTransform("nd3", MathOp.Multiply, 2.0).build(); String asJson = tp.toJson(); String asYaml = tp.toYaml(); TransformProcess fromJson = TransformProcess.fromJson(asJson); TransformProcess fromYaml = TransformProcess.fromYaml(asYaml); assertEquals(tp, fromJson); assertEquals(tp, fromYaml); }
Example 2
Source File: TestCustomTransformJsonYaml.java From DataVec with Apache License 2.0 | 6 votes |
@Test public void testCustomTransform() { Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnDouble("secondCol").build(); TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1) .transform(new CustomTransform("secondCol", 3.14159)) .doubleMathOp("secondCol", MathOp.Multiply, 2.0).filter(new CustomFilter(123)) .filter(new CustomCondition("someArg")).build(); String asJson = tp.toJson(); String asYaml = tp.toYaml(); TransformProcess fromJson = TransformProcess.fromJson(asJson); TransformProcess fromYaml = TransformProcess.fromYaml(asYaml); assertEquals(tp, fromJson); assertEquals(tp, fromYaml); }
Example 3
Source File: TestYamlJsonSerde.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testTransformProcessAndSchema() { Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnNDArray("nd1a", new long[] {1, 10}) .addColumnNDArray("nd1b", new long[] {1, 10}).addColumnNDArray("nd2", new long[] {1, 100}) .addColumnNDArray("nd3", new long[] {-1, -1}).build(); TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1) .ndArrayColumnsMathOpTransform("added", MathOp.Add, "nd1a", "nd1b") .ndArrayMathFunctionTransform("nd2", MathFunction.SQRT) .ndArrayScalarOpTransform("nd3", MathOp.Multiply, 2.0).build(); String asJson = tp.toJson(); String asYaml = tp.toYaml(); TransformProcess fromJson = TransformProcess.fromJson(asJson); TransformProcess fromYaml = TransformProcess.fromYaml(asYaml); assertEquals(tp, fromJson); assertEquals(tp, fromYaml); }
Example 4
Source File: TestCustomTransformJsonYaml.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testCustomTransform() { Schema schema = new Schema.Builder().addColumnInteger("firstCol").addColumnDouble("secondCol").build(); TransformProcess tp = new TransformProcess.Builder(schema).integerMathOp("firstCol", MathOp.Add, 1) .transform(new CustomTransform("secondCol", 3.14159)) .doubleMathOp("secondCol", MathOp.Multiply, 2.0).filter(new CustomFilter(123)) .filter(new CustomCondition("someArg")).build(); String asJson = tp.toJson(); String asYaml = tp.toYaml(); TransformProcess fromJson = TransformProcess.fromJson(asJson); TransformProcess fromYaml = TransformProcess.fromYaml(asYaml); assertEquals(tp, fromJson); assertEquals(tp, fromYaml); }
Example 5
Source File: DataVecTransformClient.java From DataVec with Apache License 2.0 | 5 votes |
/** * @return */ @Override public TransformProcess getCSVTransformProcess() { try { String s = Unirest.get(url + "/transformprocess").header("accept", "application/json") .header("Content-Type", "application/json").asString().getBody(); return TransformProcess.fromJson(s); } catch (UnirestException e) { log.error("Error in getCSVTransformProcess()",e); e.printStackTrace(); } return null; }
Example 6
Source File: DataVecTransformClient.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * @return */ @Override public TransformProcess getCSVTransformProcess() { try { String s = Unirest.get(url + "/transformprocess").header("accept", "application/json") .header("Content-Type", "application/json").asString().getBody(); return TransformProcess.fromJson(s); } catch (UnirestException e) { log.error("Error in getCSVTransformProcess()",e); } return null; }
Example 7
Source File: RegressionTestJson.java From DataVec with Apache License 2.0 | 4 votes |
@Test public void regressionTestJson100a() throws Exception { //JSON saved in 1.0.0-alpha, before JSON format change File f = new ClassPathResource("regression_test/100a/transformprocess_regression_100a.json").getFile(); String s = FileUtils.readFileToString(f); TransformProcess fromJson = TransformProcess.fromJson(s); Schema schema = new Schema.Builder().addColumnCategorical("Cat", "State1", "State2") .addColumnCategorical("Cat2", "State1", "State2").addColumnDouble("Dbl") .addColumnDouble("Dbl2", null, 100.0, true, false).addColumnInteger("Int") .addColumnInteger("Int2", 0, 10).addColumnLong("Long").addColumnLong("Long2", -100L, null) .addColumnString("Str").addColumnString("Str2", "someregexhere", 1, null) .addColumnString("Str3") .addColumnTime("TimeCol", DateTimeZone.UTC) .addColumnTime("TimeCol2", DateTimeZone.UTC, null, 1000L).build(); Map<String, String> map = new HashMap<>(); map.put("from", "to"); map.put("anotherFrom", "anotherTo"); TransformProcess expected = new TransformProcess.Builder(schema).categoricalToInteger("Cat").categoricalToOneHot("Cat2") .appendStringColumnTransform("Str3", "ToAppend") .integerToCategorical("Cat", Arrays.asList("State1", "State2")) .stringToCategorical("Str", Arrays.asList("State1", "State2")) .duplicateColumn("Str", "Str2a").removeColumns("Str2a") .renameColumn("Str2", "Str2a").reorderColumns("Cat", "Dbl") .conditionalCopyValueTransform("Dbl", "Dbl2", new DoubleColumnCondition("Dbl", ConditionOp.Equal, 0.0)) .conditionalReplaceValueTransform("Dbl", new DoubleWritable(1.0), new DoubleColumnCondition("Dbl", ConditionOp.Equal, 1.0)) .doubleColumnsMathOp("NewDouble", MathOp.Add, "Dbl", "Dbl2") .doubleMathOp("Dbl", MathOp.Add, 1.0) .integerColumnsMathOp("NewInt", MathOp.Subtract, "Int", "Int2") .integerMathOp("Int", MathOp.Multiply, 2) .transform(new ReplaceEmptyIntegerWithValueTransform("Int", 1)) .transform(new ReplaceInvalidWithIntegerTransform("Int", 1)) .longColumnsMathOp("Long", MathOp.Multiply, "Long", "Long2") .longMathOp("Long", MathOp.ScalarMax, 0) .transform(new MapAllStringsExceptListTransform("Str", "Other", Arrays.asList("Ok", "SomeVal"))) .stringRemoveWhitespaceTransform("Str") .transform(new ReplaceEmptyStringTransform("Str", "WasEmpty")) .replaceStringTransform("Str", map) .transform(new StringListToCategoricalSetTransform("Str", Arrays.asList("StrA", "StrB"), Arrays.asList("StrA", "StrB"), ",")) .stringMapTransform("Str2a", map) .transform(new DeriveColumnsFromTimeTransform.Builder("TimeCol") .addIntegerDerivedColumn("Hour", DateTimeFieldType.hourOfDay()) .addStringDerivedColumn("Date", "YYYY-MM-dd", DateTimeZone.UTC) .build()) .stringToTimeTransform("Str2a", "YYYY-MM-dd hh:mm:ss", DateTimeZone.UTC) .timeMathOp("TimeCol2", MathOp.Add, 1, TimeUnit.HOURS) //Filters: .filter(new FilterInvalidValues("Cat", "Str2a")) .filter(new ConditionFilter(new NullWritableColumnCondition("Long"))) //Convert to/from sequence .convertToSequence("Int", new NumericalColumnComparator("TimeCol2")) .convertFromSequence() //Sequence split .convertToSequence("Int", new StringComparator("Str2a")) .splitSequence(new SequenceSplitTimeSeparation("TimeCol2", 1, TimeUnit.HOURS)) //Reducers and reduce by window: .reduce(new Reducer.Builder(ReduceOp.TakeFirst).keyColumns("TimeCol2") .countColumns("Cat").sumColumns("Dbl").build()) .reduceSequenceByWindow( new Reducer.Builder(ReduceOp.TakeFirst).countColumns("Cat2") .stdevColumns("Dbl2").build(), new OverlappingTimeWindowFunction.Builder() .timeColumn("TimeCol2") .addWindowStartTimeColumn(true) .addWindowEndTimeColumn(true) .windowSize(1, TimeUnit.HOURS) .offset(5, TimeUnit.MINUTES) .windowSeparation(15, TimeUnit.MINUTES) .excludeEmptyWindows(true).build()) //Calculate sorted rank .convertFromSequence() .calculateSortedRank("rankColName", "TimeCol2", new LongWritableComparator()) .sequenceMovingWindowReduce("rankColName", 20, ReduceOp.Mean) .addConstantColumn("someIntColumn", ColumnType.Integer, new IntWritable(0)) .integerToOneHot("someIntColumn", 0, 3) .filter(new SequenceLengthCondition(ConditionOp.LessThan, 1)) .addConstantColumn("testColSeq", ColumnType.Integer, new DoubleWritable(0)) .offsetSequence(Collections.singletonList("testColSeq"), 1, SequenceOffsetTransform.OperationType.InPlace) .addConstantColumn("someTextCol", ColumnType.String, new Text("some values")) .build(); assertEquals(expected, fromJson); }
Example 8
Source File: TestGazeteerTransform.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGazeteerTransform(){ String[] corpus = { "hello I like apple".toLowerCase(), "cherry date eggplant potato".toLowerCase() }; //Gazeteer transform: basically 0/1 if word is present. Assumes already tokenized input List<String> words = Arrays.asList("apple", "banana", "cherry", "date", "eggplant"); GazeteerTransform t = new GazeteerTransform("words", "out", words); SequenceSchema schema = (SequenceSchema) new SequenceSchema.Builder() .addColumnString("words").build(); TransformProcess tp = new TransformProcess.Builder(schema) .transform(t) .build(); List<List<List<Writable>>> input = new ArrayList<>(); for(String s : corpus){ String[] split = s.split(" "); List<List<Writable>> seq = new ArrayList<>(); for(String s2 : split){ seq.add(Collections.<Writable>singletonList(new Text(s2))); } input.add(seq); } List<List<List<Writable>>> execute = LocalTransformExecutor.executeSequenceToSequence(input, tp); INDArray arr0 = ((NDArrayWritable)execute.get(0).get(0).get(0)).get(); INDArray arr1 = ((NDArrayWritable)execute.get(0).get(1).get(0)).get(); INDArray exp0 = Nd4j.create(new float[]{1, 0, 0, 0, 0}); INDArray exp1 = Nd4j.create(new float[]{0, 0, 1, 1, 1}); assertEquals(exp0, arr0); assertEquals(exp1, arr1); String json = tp.toJson(); TransformProcess tp2 = TransformProcess.fromJson(json); assertEquals(tp, tp2); List<List<List<Writable>>> execute2 = LocalTransformExecutor.executeSequenceToSequence(input, tp); INDArray arr0a = ((NDArrayWritable)execute2.get(0).get(0).get(0)).get(); INDArray arr1a = ((NDArrayWritable)execute2.get(0).get(1).get(0)).get(); assertEquals(exp0, arr0a); assertEquals(exp1, arr1a); }
Example 9
Source File: TestMultiNLPTransform.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void test(){ List<String> words = Arrays.asList("apple", "banana", "cherry", "date", "eggplant"); GazeteerTransform t1 = new GazeteerTransform("words", "out", words); GazeteerTransform t2 = new GazeteerTransform("out", "out", words); MultiNlpTransform multi = new MultiNlpTransform("text", new BagOfWordsTransform[]{t1, t2}, "out"); String[] corpus = { "hello I like apple".toLowerCase(), "date eggplant potato".toLowerCase() }; List<List<List<Writable>>> input = new ArrayList<>(); for(String s : corpus){ String[] split = s.split(" "); List<List<Writable>> seq = new ArrayList<>(); for(String s2 : split){ seq.add(Collections.<Writable>singletonList(new Text(s2))); } input.add(seq); } SequenceSchema schema = (SequenceSchema) new SequenceSchema.Builder() .addColumnString("text").build(); TransformProcess tp = new TransformProcess.Builder(schema) .transform(multi) .build(); List<List<List<Writable>>> execute = LocalTransformExecutor.executeSequenceToSequence(input, tp); INDArray arr0 = ((NDArrayWritable)execute.get(0).get(0).get(0)).get(); INDArray arr1 = ((NDArrayWritable)execute.get(0).get(1).get(0)).get(); INDArray exp0 = Nd4j.create(new float[]{1, 0, 0, 0, 0, 1, 0, 0, 0, 0}); INDArray exp1 = Nd4j.create(new float[]{0, 0, 0, 1, 1, 0, 0, 0, 1, 1}); assertEquals(exp0, arr0); assertEquals(exp1, arr1); String json = tp.toJson(); TransformProcess tp2 = TransformProcess.fromJson(json); assertEquals(tp, tp2); List<List<List<Writable>>> execute2 = LocalTransformExecutor.executeSequenceToSequence(input, tp); INDArray arr0a = ((NDArrayWritable)execute2.get(0).get(0).get(0)).get(); INDArray arr1a = ((NDArrayWritable)execute2.get(0).get(1).get(0)).get(); assertEquals(exp0, arr0a); assertEquals(exp1, arr1a); }
Example 10
Source File: CSVSparkTransformServer.java From deeplearning4j with Apache License 2.0 | 4 votes |
public void runMain(String[] args) throws Exception { JCommander jcmdr = new JCommander(this); try { jcmdr.parse(args); } catch (ParameterException e) { //User provides invalid input -> print the usage info jcmdr.usage(); if (jsonPath == null) System.err.println("Json path parameter is missing."); try { Thread.sleep(500); } catch (Exception e2) { } System.exit(1); } if (jsonPath != null) { String json = FileUtils.readFileToString(new File(jsonPath)); TransformProcess transformProcess = TransformProcess.fromJson(json); transform = new CSVSparkTransform(transformProcess); } else { log.warn("Server started with no json for transform process. Please ensure you specify a transform process via sending a post request with raw json" + "to /transformprocess"); } //Set play secret key, if required //http://www.playframework.com/documentation/latest/ApplicationSecret String crypto = System.getProperty("play.crypto.secret"); if (crypto == null || "changeme".equals(crypto) || "".equals(crypto) ) { byte[] newCrypto = new byte[1024]; new Random().nextBytes(newCrypto); String base64 = Base64.getEncoder().encodeToString(newCrypto); System.setProperty("play.crypto.secret", base64); } server = Server.forRouter(Mode.PROD, port, this::createRouter); }
Example 11
Source File: RegressionTestJson.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void regressionTestJson100a() throws Exception { //JSON saved in 1.0.0-alpha, before JSON format change File f = new ClassPathResource("datavec-api/regression_test/100a/transformprocess_regression_100a.json").getFile(); String s = FileUtils.readFileToString(f); TransformProcess fromJson = TransformProcess.fromJson(s); Schema schema = new Schema.Builder().addColumnCategorical("Cat", "State1", "State2") .addColumnCategorical("Cat2", "State1", "State2").addColumnDouble("Dbl") .addColumnDouble("Dbl2", null, 100.0, true, false).addColumnInteger("Int") .addColumnInteger("Int2", 0, 10).addColumnLong("Long").addColumnLong("Long2", -100L, null) .addColumnString("Str").addColumnString("Str2", "someregexhere", 1, null) .addColumnString("Str3") .addColumnTime("TimeCol", DateTimeZone.UTC) .addColumnTime("TimeCol2", DateTimeZone.UTC, null, 1000L).build(); Map<String, String> map = new HashMap<>(); map.put("from", "to"); map.put("anotherFrom", "anotherTo"); TransformProcess expected = new TransformProcess.Builder(schema).categoricalToInteger("Cat").categoricalToOneHot("Cat2") .appendStringColumnTransform("Str3", "ToAppend") .integerToCategorical("Cat", Arrays.asList("State1", "State2")) .stringToCategorical("Str", Arrays.asList("State1", "State2")) .duplicateColumn("Str", "Str2a").removeColumns("Str2a") .renameColumn("Str2", "Str2a").reorderColumns("Cat", "Dbl") .conditionalCopyValueTransform("Dbl", "Dbl2", new DoubleColumnCondition("Dbl", ConditionOp.Equal, 0.0)) .conditionalReplaceValueTransform("Dbl", new DoubleWritable(1.0), new DoubleColumnCondition("Dbl", ConditionOp.Equal, 1.0)) .doubleColumnsMathOp("NewDouble", MathOp.Add, "Dbl", "Dbl2") .doubleMathOp("Dbl", MathOp.Add, 1.0) .integerColumnsMathOp("NewInt", MathOp.Subtract, "Int", "Int2") .integerMathOp("Int", MathOp.Multiply, 2) .transform(new ReplaceEmptyIntegerWithValueTransform("Int", 1)) .transform(new ReplaceInvalidWithIntegerTransform("Int", 1)) .longColumnsMathOp("Long", MathOp.Multiply, "Long", "Long2") .longMathOp("Long", MathOp.ScalarMax, 0) .transform(new MapAllStringsExceptListTransform("Str", "Other", Arrays.asList("Ok", "SomeVal"))) .stringRemoveWhitespaceTransform("Str") .transform(new ReplaceEmptyStringTransform("Str", "WasEmpty")) .replaceStringTransform("Str", map) .transform(new StringListToCategoricalSetTransform("Str", Arrays.asList("StrA", "StrB"), Arrays.asList("StrA", "StrB"), ",")) .stringMapTransform("Str2a", map) .transform(new DeriveColumnsFromTimeTransform.Builder("TimeCol") .addIntegerDerivedColumn("Hour", DateTimeFieldType.hourOfDay()) .addStringDerivedColumn("Date", "YYYY-MM-dd", DateTimeZone.UTC) .build()) .stringToTimeTransform("Str2a", "YYYY-MM-dd hh:mm:ss", DateTimeZone.UTC) .timeMathOp("TimeCol2", MathOp.Add, 1, TimeUnit.HOURS) //Filters: .filter(new FilterInvalidValues("Cat", "Str2a")) .filter(new ConditionFilter(new NullWritableColumnCondition("Long"))) //Convert to/from sequence .convertToSequence("Int", new NumericalColumnComparator("TimeCol2")) .convertFromSequence() //Sequence split .convertToSequence("Int", new StringComparator("Str2a")) .splitSequence(new SequenceSplitTimeSeparation("TimeCol2", 1, TimeUnit.HOURS)) //Reducers and reduce by window: .reduce(new Reducer.Builder(ReduceOp.TakeFirst).keyColumns("TimeCol2") .countColumns("Cat").sumColumns("Dbl").build()) .reduceSequenceByWindow( new Reducer.Builder(ReduceOp.TakeFirst).countColumns("Cat2") .stdevColumns("Dbl2").build(), new OverlappingTimeWindowFunction.Builder() .timeColumn("TimeCol2") .addWindowStartTimeColumn(true) .addWindowEndTimeColumn(true) .windowSize(1, TimeUnit.HOURS) .offset(5, TimeUnit.MINUTES) .windowSeparation(15, TimeUnit.MINUTES) .excludeEmptyWindows(true).build()) //Calculate sorted rank .convertFromSequence() .calculateSortedRank("rankColName", "TimeCol2", new LongWritableComparator()) .sequenceMovingWindowReduce("rankColName", 20, ReduceOp.Mean) .addConstantColumn("someIntColumn", ColumnType.Integer, new IntWritable(0)) .integerToOneHot("someIntColumn", 0, 3) .filter(new SequenceLengthCondition(ConditionOp.LessThan, 1)) .addConstantColumn("testColSeq", ColumnType.Integer, new DoubleWritable(0)) .offsetSequence(Collections.singletonList("testColSeq"), 1, SequenceOffsetTransform.OperationType.InPlace) .addConstantColumn("someTextCol", ColumnType.String, new Text("some values")) .build(); assertEquals(expected, fromJson); }