Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#computeGradientAndScore()
The following examples show how to use
org.deeplearning4j.nn.multilayer.MultiLayerNetwork#computeGradientAndScore() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WorkspaceTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testWithPreprocessorsMLN() { for (WorkspaceMode wm : WorkspaceMode.values()) { System.out.println(wm); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .trainingWorkspaceMode(wm) .inferenceWorkspaceMode(wm) .list() .layer(new GravesLSTM.Builder().nIn(10).nOut(5).build()) .layer(new GravesLSTM.Builder().nIn(5).nOut(8).build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(3).build()) .inputPreProcessor(0, new DupPreProcessor()) .setInputType(InputType.recurrent(10)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray input = Nd4j.zeros(1, 10, 5); for (boolean train : new boolean[]{false, true}) { net.clear(); net.feedForward(input, train); } net.setInput(input); net.setLabels(Nd4j.rand(new int[]{1, 3, 5})); net.computeGradientAndScore(); } }
Example 2
Source File: RegressionTest100a.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testUpsampling2d() throws Exception { File f = Resources.asFile("regression_testing/100a/upsampling/net.bin"); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); INDArray in; File fIn = Resources.asFile("regression_testing/100a/upsampling/in.bin"); try(DataInputStream dis = new DataInputStream(new FileInputStream(fIn))){ in = Nd4j.read(dis); } INDArray label; File fLabels = Resources.asFile("regression_testing/100a/upsampling/labels.bin"); try(DataInputStream dis = new DataInputStream(new FileInputStream(fLabels))){ label = Nd4j.read(dis); } INDArray outExp; File fOutExp = Resources.asFile("regression_testing/100a/upsampling/out.bin"); try(DataInputStream dis = new DataInputStream(new FileInputStream(fOutExp))){ outExp = Nd4j.read(dis); } INDArray gradExp; File fGradExp = Resources.asFile("regression_testing/100a/upsampling/gradient.bin"); try(DataInputStream dis = new DataInputStream(new FileInputStream(fGradExp))){ gradExp = Nd4j.read(dis); } INDArray out = net.output(in, false); assertEquals(outExp, out); net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); INDArray grad = net.getFlattenedGradients(); assertEquals(gradExp, grad); }
Example 3
Source File: TestSameDiffDense.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testSameDiffDenseBackward() { int nIn = 3; int nOut = 4; for (boolean workspaces : new boolean[]{false, true}) { for (int minibatch : new int[]{5, 1}) { Activation[] afns = new Activation[]{ Activation.TANH, Activation.SIGMOID, Activation.ELU, Activation.IDENTITY, Activation.SOFTPLUS, Activation.SOFTSIGN, Activation.HARDTANH, Activation.CUBE, Activation.RELU }; for (Activation a : afns) { log.info("Starting test - " + a + " - minibatch " + minibatch + ", workspaces: " + workspaces); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .trainingWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .inferenceWorkspaceMode(workspaces ? WorkspaceMode.ENABLED : WorkspaceMode.NONE) .list() .layer(new SameDiffDense.Builder().nIn(nIn).nOut(nOut) .activation(a) .build()) .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(conf); netSD.init(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .list() .layer(new DenseLayer.Builder().activation(a).nIn(nIn).nOut(nOut).build()) .layer(new OutputLayer.Builder().nIn(nOut).nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork netStandard = new MultiLayerNetwork(conf2); netStandard.init(); netSD.params().assign(netStandard.params()); //Check params: assertEquals(netStandard.params(), netSD.params()); assertEquals(netStandard.paramTable(), netSD.paramTable()); INDArray in = Nd4j.rand(minibatch, nIn); INDArray l = TestUtils.randomOneHot(minibatch, nOut, 12345); netSD.setInput(in); netStandard.setInput(in); netSD.setLabels(l); netStandard.setLabels(l); netSD.computeGradientAndScore(); netStandard.computeGradientAndScore(); Gradient gSD = netSD.gradient(); Gradient gStd = netStandard.gradient(); Map<String, INDArray> m1 = gSD.gradientForVariable(); Map<String, INDArray> m2 = gStd.gradientForVariable(); assertEquals(m2.keySet(), m1.keySet()); for (String s : m1.keySet()) { INDArray i1 = m1.get(s); INDArray i2 = m2.get(s); assertEquals(s, i2, i1); } assertEquals(gStd.gradient(), gSD.gradient()); //Sanity check: different minibatch size in = Nd4j.rand(2 * minibatch, nIn); l = TestUtils.randomOneHot(2 * minibatch, nOut, 12345); netSD.setInput(in); netStandard.setInput(in); netSD.setLabels(l); netStandard.setLabels(l); netSD.computeGradientAndScore(); // netStandard.computeGradientAndScore(); // assertEquals(netStandard.gradient().gradient(), netSD.gradient().gradient()); //Sanity check on different minibatch sizes: INDArray newIn = Nd4j.vstack(in, in); INDArray outMbsd = netSD.output(newIn); INDArray outMb = netStandard.output(newIn); assertEquals(outMb, outMbsd); } } } }
Example 4
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDtypesModelVsGlobalDtypeMisc() { for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() .layer(new SpaceToBatchLayer.Builder().blocks(1, 1).build()) .layer(new SpaceToDepthLayer.Builder().blocks(2).build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .setInputType(InputType.convolutional(28, 28, 5)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.initGradientsView(); assertEquals(msg, networkDtype, net.params().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); INDArray in = Nd4j.rand(networkDtype, 2, 5, 28, 28); INDArray label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); assertEquals(s, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label)); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } }
Example 5
Source File: ValidateCudnnLSTM.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void validateImplSimple() throws Exception { Nd4j.getRandom().setSeed(12345); int minibatch = 10; int inputSize = 3; int lstmLayerSize = 4; int timeSeriesLength = 3; int nOut = 2; INDArray input = Nd4j.rand(new int[] {minibatch, inputSize, timeSeriesLength}); INDArray labels = Nd4j.zeros(minibatch, nOut, timeSeriesLength); Random r = new Random(12345); for (int i = 0; i < minibatch; i++) { for (int j = 0; j < timeSeriesLength; j++) { labels.putScalar(i, r.nextInt(nOut), j, 1.0); } } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().inferenceWorkspaceMode(WorkspaceMode.NONE) .trainingWorkspaceMode(WorkspaceMode.NONE).updater(new NoOp()) .seed(12345L) .dist(new NormalDistribution(0, 2)).list() .layer(0, new LSTM.Builder().nIn(input.size(1)).nOut(lstmLayerSize) .gateActivationFunction(Activation.SIGMOID).activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(lstmLayerSize).nOut(nOut).build()) .build(); MultiLayerNetwork mln1 = new MultiLayerNetwork(conf.clone()); mln1.init(); MultiLayerNetwork mln2 = new MultiLayerNetwork(conf.clone()); mln2.init(); assertEquals(mln1.params(), mln2.params()); Field f = org.deeplearning4j.nn.layers.recurrent.LSTM.class.getDeclaredField("helper"); f.setAccessible(true); Layer l0 = mln1.getLayer(0); f.set(l0, null); assertNull(f.get(l0)); l0 = mln2.getLayer(0); assertTrue(f.get(l0) instanceof CudnnLSTMHelper); INDArray out1 = mln1.output(input); INDArray out2 = mln2.output(input); assertEquals(out1, out2); mln1.setInput(input); mln1.setLabels(labels); mln2.setInput(input); mln2.setLabels(labels); mln1.computeGradientAndScore(); mln2.computeGradientAndScore(); assertEquals(mln1.score(), mln2.score(), 1e-5); Gradient g1 = mln1.gradient(); Gradient g2 = mln2.gradient(); for (Map.Entry<String, INDArray> entry : g1.gradientForVariable().entrySet()) { INDArray exp = entry.getValue(); INDArray act = g2.gradientForVariable().get(entry.getKey()); //System.out.println(entry.getKey() + "\t" + exp.equals(act)); } assertEquals(mln1.getFlattenedGradients(), mln2.getFlattenedGradients()); }
Example 6
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingLayerWithMasking() { //Idea: have masking on the input with an embedding and dense layers on input //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked int[] miniBatchSizes = {1, 2, 5}; int nIn = 2; Random r = new Random(12345); int numInputClasses = 10; int timeSeriesLength = 5; for (DataType maskDtype : new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.INT}) { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) .nOut(5).build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) .layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) .build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) .layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); net2.setParams(net.params().dup()); INDArray inEmbedding = Nd4j.zeros(nExamples, 1, timeSeriesLength); INDArray inDense = Nd4j.zeros(nExamples, numInputClasses, timeSeriesLength); INDArray labels = Nd4j.zeros(nExamples, 4, timeSeriesLength); for (int i = 0; i < nExamples; i++) { for (int j = 0; j < timeSeriesLength; j++) { int inIdx = r.nextInt(numInputClasses); inEmbedding.putScalar(new int[]{i, 0, j}, inIdx); inDense.putScalar(new int[]{i, inIdx, j}, 1.0); int outIdx = r.nextInt(4); labels.putScalar(new int[]{i, outIdx, j}, 1.0); } } INDArray inputMask = Nd4j.zeros(maskDtype, nExamples, timeSeriesLength); for (int i = 0; i < nExamples; i++) { for (int j = 0; j < timeSeriesLength; j++) { inputMask.putScalar(new int[]{i, j}, (r.nextBoolean() ? 1.0 : 0.0)); } } net.setLayerMaskArrays(inputMask, null); net2.setLayerMaskArrays(inputMask, null); List<INDArray> actEmbedding = net.feedForward(inEmbedding, false); List<INDArray> actDense = net2.feedForward(inDense, false); for (int i = 1; i < actEmbedding.size(); i++) { assertEquals(actDense.get(i), actEmbedding.get(i)); } net.setLabels(labels); net2.setLabels(labels); net.computeGradientAndScore(); net2.computeGradientAndScore(); // System.out.println(net.score() + "\t" + net2.score()); assertEquals(net2.score(), net.score(), 1e-5); Map<String, INDArray> gradients = net.gradient().gradientForVariable(); Map<String, INDArray> gradients2 = net2.gradient().gradientForVariable(); assertEquals(gradients.keySet(), gradients2.keySet()); for (String s : gradients.keySet()) { assertEquals(gradients2.get(s), gradients.get(s)); } } } }
Example 7
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingLayerRNN() { int nClassesIn = 10; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH) .dataType(DataType.DOUBLE) .list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) .layer(1, new GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4) .activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) .build(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH) .weightInit(WeightInit.XAVIER) .dataType(DataType.DOUBLE) .list() .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(1, new GravesLSTM.Builder().nIn(5).nOut(7).activation(Activation.SOFTSIGN).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(7).nOut(4) .activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(1, new FeedForwardToRnnPreProcessor()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net.init(); net2.init(); net2.setParams(net.params().dup()); int batchSize = 3; int timeSeriesLength = 8; INDArray inEmbedding = Nd4j.create(batchSize, 1, timeSeriesLength); INDArray inOneHot = Nd4j.create(batchSize, nClassesIn, timeSeriesLength); INDArray outLabels = Nd4j.create(batchSize, 4, timeSeriesLength); Random r = new Random(12345); for (int i = 0; i < batchSize; i++) { for (int j = 0; j < timeSeriesLength; j++) { int classIdx = r.nextInt(nClassesIn); inEmbedding.putScalar(new int[]{i, 0, j}, classIdx); inOneHot.putScalar(new int[]{i, classIdx, j}, 1.0); int labelIdx = r.nextInt(4); outLabels.putScalar(new int[]{i, labelIdx, j}, 1.0); } } net.setInput(inEmbedding); net2.setInput(inOneHot); net.setLabels(outLabels); net2.setLabels(outLabels); net.computeGradientAndScore(); net2.computeGradientAndScore(); // System.out.println(net.score() + "\t" + net2.score()); assertEquals(net2.score(), net.score(), 1e-5); Map<String, INDArray> gradient = net.gradient().gradientForVariable(); Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable(); assertEquals(gradient.size(), gradient2.size()); for (String s : gradient.keySet()) { assertEquals(gradient2.get(s), gradient.get(s)); } }
Example 8
Source File: CuDNNGradientChecks.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testConvolutional() throws Exception { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first int[] minibatchSizes = {1, 4}; int width = 6; int height = 6; int inputDepth = 2; int nOut = 3; Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper"); f.setAccessible(true); Random r = new Random(12345); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int minibatchSize : minibatchSizes) { INDArray input = Nd4j.rand(new int[] {minibatchSize, inputDepth, height, width}); INDArray labels = Nd4j.zeros(minibatchSize, nOut); for (int i = 0; i < minibatchSize; i++) { labels.putScalar(i, r.nextInt(nOut), 1.0); } MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .dist(new UniformDistribution(-1, 1)) .updater(new NoOp()).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3) .activation(afn).build()) .layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3) .activation(afn).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) .setInputType(InputType.convolutional(height, width, inputDepth)) ; MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0); ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0); assertTrue(ch0 instanceof CudnnConvolutionHelper); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1); ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1); assertTrue(ch1 instanceof CudnnConvolutionHelper); //------------------------------- //For debugging/comparison to no-cudnn case: set helper field to null // f.set(c0, null); // f.set(c1, null); // assertNull(f.get(c0)); // assertNull(f.get(c1)); //------------------------------- String name = new Object() {}.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(input); mln.setLabels(labels); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(input, labels); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", doLearningFirst=" + doLearningFirst); for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); } } } }
Example 9
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingBackwardPass() { //With the same parameters, embedding layer should have same activations as the equivalent one-hot representation // input with a DenseLayer int nClassesIn = 10; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()).layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) .activation(Activation.SOFTMAX).build()) .build(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH) .weightInit(WeightInit.XAVIER).list() .layer(new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5).nOut(4) .activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net.init(); net2.init(); net2.setParams(net.params().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); INDArray inOneHot = Nd4j.create(batchSize, nClassesIn); INDArray outLabels = Nd4j.create(batchSize, 4); Random r = new Random(12345); for (int i = 0; i < batchSize; i++) { int classIdx = r.nextInt(nClassesIn); inEmbedding.putScalar(i, classIdx); inOneHot.putScalar(new int[]{i, classIdx}, 1.0); int labelIdx = r.nextInt(4); outLabels.putScalar(new int[]{i, labelIdx}, 1.0); } net.setInput(inEmbedding); net2.setInput(inOneHot); net.setLabels(outLabels); net2.setLabels(outLabels); net.computeGradientAndScore(); net2.computeGradientAndScore(); assertEquals(net2.score(), net.score(), 1e-6); Map<String, INDArray> gradient = net.gradient().gradientForVariable(); Map<String, INDArray> gradient2 = net2.gradient().gradientForVariable(); assertEquals(gradient.size(), gradient2.size()); for (String s : gradient.keySet()) { assertEquals(gradient2.get(s), gradient.get(s)); } }
Example 10
Source File: OutputLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCnnLossLayer(){ for(WorkspaceMode ws : WorkspaceMode.values()) { log.info("*** Testing workspace: " + ws); for (Activation a : new Activation[]{Activation.TANH, Activation.SELU}) { //Check that (A+identity) is equal to (identity+A), for activation A //i.e., should get same output and weight gradients for both MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) .list() .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(Activation.IDENTITY) .kernelSize(2, 2).stride(1, 1) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) .layer(new CnnLossLayer.Builder(LossFunction.MSE) .activation(a) .build()) .build(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345L) .updater(new NoOp()) .convolutionMode(ConvolutionMode.Same) .inferenceWorkspaceMode(ws) .trainingWorkspaceMode(ws) .list() .layer(new ConvolutionLayer.Builder().nIn(3).nOut(4).activation(a) .kernelSize(2, 2).stride(1, 1) .dist(new NormalDistribution(0, 1.0)) .updater(new NoOp()).build()) .layer(new CnnLossLayer.Builder(LossFunction.MSE) .activation(Activation.IDENTITY) .build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf1); mln.init(); MultiLayerNetwork mln2 = new MultiLayerNetwork(conf2); mln2.init(); mln2.setParams(mln.params()); INDArray in = Nd4j.rand(new int[]{3, 3, 5, 5}); INDArray out1 = mln.output(in); INDArray out2 = mln2.output(in); assertEquals(out1, out2); INDArray labels = Nd4j.rand(out1.shape()); mln.setInput(in); mln.setLabels(labels); mln2.setInput(in); mln2.setLabels(labels); mln.computeGradientAndScore(); mln2.computeGradientAndScore(); assertEquals(mln.score(), mln2.score(), 1e-6); assertEquals(mln.gradient().gradient(), mln2.gradient().gradient()); //Also check computeScoreForExamples INDArray in2a = Nd4j.rand(new int[]{1, 3, 5, 5}); INDArray labels2a = Nd4j.rand(new int[]{1, 4, 5, 5}); INDArray in2 = Nd4j.concat(0, in2a, in2a); INDArray labels2 = Nd4j.concat(0, labels2a, labels2a); INDArray s = mln.scoreExamples(new DataSet(in2, labels2), false); assertArrayEquals(new long[]{2, 1}, s.shape()); assertEquals(s.getDouble(0), s.getDouble(1), 1e-6); TestUtils.testModelSerialization(mln); } } }
Example 11
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDtypesModelVsGlobalDtypeCnn1d() { //Nd4jCpu.Environment.getInstance().setUseMKLDNN(false); for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { for (int outputLayer = 0; outputLayer < 3; outputLayer++) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; Layer ol; Layer secondLast; switch (outputLayer) { case 0: ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new GlobalPoolingLayer(PoolingType.MAX); break; case 1: ol = new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(5).build(); secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build(); break; case 2: ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build(); break; default: throw new RuntimeException(); } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .trainingWorkspaceMode(WorkspaceMode.NONE) .inferenceWorkspaceMode(WorkspaceMode.NONE) .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() .layer(new Convolution1D.Builder().kernelSize(2).stride(1).nOut(3).activation(Activation.TANH).build()) .layer(new Subsampling1DLayer.Builder().poolingType(PoolingType.MAX).kernelSize(5).stride(1).build()) .layer(new Cropping1D.Builder(1).build()) .layer(new ZeroPadding1DLayer(1)) .layer(new Upsampling1D.Builder(2).build()) .layer(secondLast) .layer(ol) .setInputType(InputType.recurrent(5, 10)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.initGradientsView(); assertEquals(msg, networkDtype, net.params().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); INDArray in = Nd4j.rand(networkDtype, 2, 5, 10); INDArray label; if (outputLayer == 0) { //OutputLayer label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); } else { //RnnOutputLayer, RnnLossLayer label = Nd4j.rand(networkDtype, 2, 5, 20); //Longer sequence due to upsampling } INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); assertEquals(s, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label)); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { System.out.println(msg + " - " + inputLabelDtype); INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } } }
Example 12
Source File: TestSameDiffOutput.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testMSEOutputLayer(){ //Faliing 2019/04/17 - https://github.com/deeplearning4j/deeplearning4j/issues/7560 Nd4j.getRandom().setSeed(12345); for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) { log.info("Starting test: " + a); MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER)) .build(); MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); netSD.init(); MultiLayerNetwork netStd = new MultiLayerNetwork(confStd); netStd.init(); netSD.params().assign(netStd.params()); assertEquals(netStd.paramTable(), netSD.paramTable()); int minibatch = 2; INDArray in = Nd4j.rand(minibatch, 5); INDArray label = Nd4j.rand(minibatch, 5); INDArray outSD = netSD.output(in); INDArray outStd = netStd.output(in); assertEquals(outStd, outSD); DataSet ds = new DataSet(in, label); double scoreSD = netSD.score(ds); double scoreStd = netStd.score(ds); assertEquals(scoreStd, scoreSD, 1e-6); netSD.setInput(in); netSD.setLabels(label); netStd.setInput(in); netStd.setLabels(label); //System.out.println(((SameDiffOutputLayer) netSD.getLayer(1)).sameDiff.summary()); netSD.computeGradientAndScore(); netStd.computeGradientAndScore(); assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); for (int i = 0; i < 3; i++) { netSD.fit(ds); netStd.fit(ds); String s = String.valueOf(i); assertEquals(s, netStd.params(), netSD.params()); assertEquals(s, netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); } //Test fit before output: MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone()); net.init(); net.fit(ds); //Sanity check on different minibatch sizes: INDArray newIn = Nd4j.vstack(in, in); INDArray outMbsd = netSD.output(newIn); INDArray outMb = netStd.output(newIn); assertEquals(outMb, outMbsd); } }
Example 13
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDtypesModelVsGlobalDtypeRnn() { for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { for (int outputLayer = 0; outputLayer < 3; outputLayer++) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; Layer ol; Layer secondLast; switch (outputLayer) { case 0: ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build(); break; case 1: ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build(); break; case 2: ol = new OutputLayer.Builder().nOut(5).build(); secondLast = new LastTimeStep(new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build()); break; default: throw new RuntimeException(); } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() .layer(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new GravesLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new DenseLayer.Builder().nOut(5).build()) .layer(new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new Bidirectional(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())) .layer(new TimeDistributed(new DenseLayer.Builder().nIn(10).nOut(5).activation(Activation.TANH).build())) .layer(new SimpleRnn.Builder().nIn(5).nOut(5).build()) .layer(new MaskZeroLayer.Builder().underlying(new SimpleRnn.Builder().nIn(5).nOut(5).build()).maskValue(0.0).build()) .layer(secondLast) .layer(ol) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.initGradientsView(); assertEquals(msg, networkDtype, net.params().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); INDArray in = Nd4j.rand(networkDtype, 2, 5, 2); INDArray label; if (outputLayer == 2) { label = TestUtils.randomOneHot(2, 5).castTo(networkDtype); } else { label = TestUtils.randomOneHotTimeSeries(2, 5, 2).castTo(networkDtype); } INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { assertEquals(msg, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 2), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 2))); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } } }
Example 14
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testMultiLayerNetworkTypeConversion() { for (DataType dt : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(dt, dt); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Adam(0.01)) .dataType(DataType.DOUBLE) .list() .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray inD = Nd4j.rand(DataType.DOUBLE, 1, 10); INDArray lD = Nd4j.create(DataType.DOUBLE, 1, 10); net.fit(inD, lD); INDArray outDouble = net.output(inD); net.setInput(inD); net.setLabels(lD); net.computeGradientAndScore(); double scoreDouble = net.score(); INDArray grads = net.getFlattenedGradients(); INDArray u = net.getUpdater().getStateViewArray(); assertEquals(DataType.DOUBLE, net.params().dataType()); assertEquals(DataType.DOUBLE, grads.dataType()); assertEquals(DataType.DOUBLE, u.dataType()); MultiLayerNetwork netFloat = net.convertDataType(DataType.FLOAT); netFloat.initGradientsView(); assertEquals(DataType.FLOAT, netFloat.params().dataType()); assertEquals(DataType.FLOAT, netFloat.getFlattenedGradients().dataType()); assertEquals(DataType.FLOAT, netFloat.getUpdater(true).getStateViewArray().dataType()); INDArray inF = inD.castTo(DataType.FLOAT); INDArray lF = lD.castTo(DataType.FLOAT); INDArray outFloat = netFloat.output(inF); netFloat.setInput(inF); netFloat.setLabels(lF); netFloat.computeGradientAndScore(); double scoreFloat = netFloat.score(); INDArray gradsFloat = netFloat.getFlattenedGradients(); INDArray uFloat = netFloat.getUpdater().getStateViewArray(); assertEquals(scoreDouble, scoreFloat, 1e-6); assertEquals(outDouble.castTo(DataType.FLOAT), outFloat); assertEquals(grads.castTo(DataType.FLOAT), gradsFloat); INDArray uCast = u.castTo(DataType.FLOAT); assertTrue(uCast.equalsWithEps(uFloat, 1e-4)); MultiLayerNetwork netFP16 = net.convertDataType(DataType.HALF); netFP16.initGradientsView(); assertEquals(DataType.HALF, netFP16.params().dataType()); assertEquals(DataType.HALF, netFP16.getFlattenedGradients().dataType()); assertEquals(DataType.HALF, netFP16.getUpdater(true).getStateViewArray().dataType()); INDArray inH = inD.castTo(DataType.HALF); INDArray lH = lD.castTo(DataType.HALF); INDArray outHalf = netFP16.output(inH); netFP16.setInput(inH); netFP16.setLabels(lH); netFP16.computeGradientAndScore(); double scoreHalf = netFP16.score(); INDArray gradsHalf = netFP16.getFlattenedGradients(); INDArray uHalf = netFP16.getUpdater().getStateViewArray(); assertEquals(scoreDouble, scoreHalf, 1e-4); boolean outHalfEq = outDouble.castTo(DataType.HALF).equalsWithEps(outHalf, 1e-3); assertTrue(outHalfEq); boolean gradsHalfEq = grads.castTo(DataType.HALF).equalsWithEps(gradsHalf, 1e-3); assertTrue(gradsHalfEq); INDArray uHalfCast = u.castTo(DataType.HALF); assertTrue(uHalfCast.equalsWithEps(uHalf, 1e-4)); } }
Example 15
Source File: TestCustomUpdater.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCustomUpdater() { //Create a simple custom updater, equivalent to SGD updater double lr = 0.03; Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder().seed(12345) .activation(Activation.TANH).updater(new CustomIUpdater(lr)) //Specify custom IUpdater .list().layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(1, new OutputLayer.Builder().nIn(10).nOut(10) .lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().seed(12345) .activation(Activation.TANH).updater(new Sgd(lr)).list() .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder() .nIn(10).nOut(10).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); //First: Check updater config assertTrue(((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater() instanceof CustomIUpdater); assertTrue(((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater() instanceof CustomIUpdater); assertTrue(((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater() instanceof Sgd); assertTrue(((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater() instanceof Sgd); CustomIUpdater u0_0 = (CustomIUpdater) ((BaseLayer) conf1.getConf(0).getLayer()).getIUpdater(); CustomIUpdater u0_1 = (CustomIUpdater) ((BaseLayer) conf1.getConf(1).getLayer()).getIUpdater(); assertEquals(lr, u0_0.getLearningRate(), 1e-6); assertEquals(lr, u0_1.getLearningRate(), 1e-6); Sgd u1_0 = (Sgd) ((BaseLayer) conf2.getConf(0).getLayer()).getIUpdater(); Sgd u1_1 = (Sgd) ((BaseLayer) conf2.getConf(1).getLayer()).getIUpdater(); assertEquals(lr, u1_0.getLearningRate(), 1e-6); assertEquals(lr, u1_1.getLearningRate(), 1e-6); //Second: check JSON String asJson = conf1.toJson(); MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(asJson); assertEquals(conf1, fromJson); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork net1 = new MultiLayerNetwork(conf1); net1.init(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); //Third: check gradients are equal INDArray in = Nd4j.rand(5, 10); INDArray labels = Nd4j.rand(5, 10); net1.setInput(in); net2.setInput(in); net1.setLabels(labels); net2.setLabels(labels); net1.computeGradientAndScore(); net2.computeGradientAndScore();; assertEquals(net1.getFlattenedGradients(), net2.getFlattenedGradients()); }
Example 16
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNMLN() { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn) .cudnnAllowFallback(false) .build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } } } }
Example 17
Source File: CNN1DGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCnn1dWithMasking(){ int length = 12; int convNIn = 2; int convNOut1 = 3; int convNOut2 = 4; int finalNOut = 3; int pnorm = 2; SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG}; for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { for(ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Same, ConvolutionMode.Truncate}){ for( int stride : new int[]{1, 2}){ String s = cm + ", stride=" + stride + ", pooling=" + poolingType; log.info("Starting test: " + s); Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH) .dist(new NormalDistribution(0, 1)).convolutionMode(cm) .seed(12345) .list() .layer(new Convolution1DLayer.Builder().kernelSize(2) .stride(stride).nIn(convNIn).nOut(convNOut1) .build()) .layer(new Subsampling1DLayer.Builder(poolingType).kernelSize(2) .stride(stride).pnorm(pnorm).build()) .layer(new Convolution1DLayer.Builder().kernelSize(2) .stride(stride).nIn(convNOut1).nOut(convNOut2) .build()) .layer(new GlobalPoolingLayer(PoolingType.AVG)) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .setInputType(InputType.recurrent(convNIn, length)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray f = Nd4j.rand(new int[]{2, convNIn, length}); INDArray fm = Nd4j.create(2, length); fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0,6)).assign(1); INDArray label = TestUtils.randomOneHot(2, finalNOut); boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(f) .labels(label).inputMask(fm)); assertTrue(s, gradOK); TestUtils.testModelSerialization(net); //TODO also check that masked step values don't impact forward pass, score or gradients DataSet ds = new DataSet(f,label,fm,null); double scoreBefore = net.score(ds); net.setInput(f); net.setLabels(label); net.setLayerMaskArrays(fm, null); net.computeGradientAndScore(); INDArray gradBefore = net.getFlattenedGradients().dup(); f.putScalar(1, 0, 10, 10.0); f.putScalar(1, 1, 11, 20.0); double scoreAfter = net.score(ds); net.setInput(f); net.setLabels(label); net.setLayerMaskArrays(fm, null); net.computeGradientAndScore(); INDArray gradAfter = net.getFlattenedGradients().dup(); assertEquals(scoreBefore, scoreAfter, 1e-6); assertEquals(gradBefore, gradAfter); } } } }
Example 18
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNL1L2MLN() { if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... return; //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS}; boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here for( int i=0; i<l2vals.length; i++ ){ Activation afn = activFns[i]; boolean doLearningFirst = characteristic[i]; LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]) .optimizationAlgo( OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) .weightInit(WeightInit.XAVIER).activation(afn) .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String testName = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } }
Example 19
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNMLN() { if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... return; //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.9 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } } } }
Example 20
Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAutoEncoder() { //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied //Need to run gradient through updater, so that L2 can be applied Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); scaler.fit(iter); iter.setPreProcessor(scaler); DataSet ds = iter.next(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(ds); norm.transform(ds); double[] l2vals = {0.2, 0.0, 0.2}; double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i] for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2(l2).l1(l1) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L) .dist(new NormalDistribution(0, 1)) .list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3) .activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) .activation(outputActivation).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String msg; if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < scoreBefore); } msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1; if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } } } } }