Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#feedForward()
The following examples show how to use
org.deeplearning4j.nn.multilayer.MultiLayerNetwork#feedForward() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PLNetDyadRanker.java From AILibs with GNU Affero General Public License v3.0 | 6 votes |
private INDArray computeScaledGradient(final INDArray dyadMatrix) { int dyadRankingLength = dyadMatrix.rows(); List<INDArray> activations = this.plNet.feedForward(dyadMatrix); INDArray output = activations.get(activations.size() - 1); output = output.transpose(); INDArray deltaW = Nd4j.zeros(this.plNet.params().length()); Gradient deltaWk = null; MultiLayerNetwork plNetClone = this.plNet.clone(); for (int k = 0; k < dyadRankingLength; k++) { // compute derivative of loss w.r.t. k plNetClone.setInput(dyadMatrix.getRow(k)); plNetClone.feedForward(true, false); INDArray lossGradient = PLNetLoss.computeLossGradient(output, k); // compute backprop gradient for weight updates w.r.t. k Pair<Gradient, INDArray> p = plNetClone.backpropGradient(lossGradient, null); deltaWk = p.getFirst(); this.plNet.getUpdater().update(this.plNet, deltaWk, this.iteration, this.epoch, 1, LayerWorkspaceMgr.noWorkspaces()); deltaW.addi(deltaWk.gradient()); } return deltaW; }
Example 2
Source File: TestVAE.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testForwardPass() { int[][] encLayerSizes = new int[][] {{12}, {12, 13}, {12, 13, 14}}; for (int i = 0; i < encLayerSizes.length; i++) { MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder().list().layer(0, new org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder.Builder().nIn(10) .nOut(5).encoderLayerSizes(encLayerSizes[i]).decoderLayerSizes(13).build()) .build(); NeuralNetConfiguration c = mlc.getConf(0); org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder vae = (org.deeplearning4j.nn.conf.layers.variational.VariationalAutoencoder) c.getLayer(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); INDArray in = Nd4j.rand(1, 10); // net.output(in); List<INDArray> out = net.feedForward(in); assertArrayEquals(new long[] {1, 10}, out.get(0).shape()); assertArrayEquals(new long[] {1, 5}, out.get(1).shape()); } }
Example 3
Source File: PLNetInputOptimizer.java From AILibs with GNU Affero General Public License v3.0 | 5 votes |
private static INDArray computeInputDerivative(PLNetDyadRanker plNet, INDArray input, InputOptimizerLoss loss) { MultiLayerNetwork net = plNet.getPlNet(); INDArray output = net.output(input); INDArray lossGradient = Nd4j.create(new double[] { loss.lossGradient(output) }); net.setInput(input); net.feedForward(false, false); Pair<Gradient, INDArray> p = net.backpropGradient(lossGradient, null); return p.getSecond(); }
Example 4
Source File: WorkspaceTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testWithPreprocessorsMLN() { for (WorkspaceMode wm : WorkspaceMode.values()) { System.out.println(wm); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .trainingWorkspaceMode(wm) .inferenceWorkspaceMode(wm) .list() .layer(new GravesLSTM.Builder().nIn(10).nOut(5).build()) .layer(new GravesLSTM.Builder().nIn(5).nOut(8).build()) .layer(new RnnOutputLayer.Builder(LossFunctions.LossFunction.MSE).activation(Activation.SIGMOID).nOut(3).build()) .inputPreProcessor(0, new DupPreProcessor()) .setInputType(InputType.recurrent(10)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray input = Nd4j.zeros(1, 10, 5); for (boolean train : new boolean[]{false, true}) { net.clear(); net.feedForward(input, train); } net.setInput(input); net.setLabels(Nd4j.rand(new int[]{1, 3, 5})); net.computeGradientAndScore(); } }
Example 5
Source File: TestConvolutionModes.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testSameModeActivationSizes() { int inH = 3; int inW = 4; int inDepth = 3; int minibatch = 5; int sH = 2; int sW = 2; int kH = 3; int kW = 3; Layer[] l = new Layer[2]; l[0] = new ConvolutionLayer.Builder().nOut(4).kernelSize(kH, kW).stride(sH, sW).build(); l[1] = new SubsamplingLayer.Builder().kernelSize(kH, kW).stride(sH, sW).build(); for (int i = 0; i < l.length; i++) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().convolutionMode(ConvolutionMode.Same) .list().layer(0, l[i]).layer(1, new OutputLayer.Builder().nOut(3).activation(Activation.SOFTMAX).build()) .setInputType(InputType.convolutional(inH, inW, inDepth)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray inData = Nd4j.create(minibatch, inDepth, inH, inW); List<INDArray> activations = net.feedForward(inData); INDArray actL0 = activations.get(1); int outH = (int) Math.ceil(inH / ((double) sH)); int outW = (int) Math.ceil(inW / ((double) sW)); System.out.println(Arrays.toString(actL0.shape())); assertArrayEquals(new long[] {minibatch, (i == 0 ? 4 : inDepth), outH, outW}, actL0.shape()); } }
Example 6
Source File: DropoutLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDropoutLayerWithConvMnist() throws Exception { Nd4j.setDefaultDataTypes(DataType.DOUBLE, DataType.DOUBLE); //Set to double datatype - MKL-DNN not used for CPU (otherwise different strides due to Dl4J impl permutes) DataSetIterator iter = new MnistDataSetIterator(2, 2); DataSet next = iter.next(); // Run without separate activation layer Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration confIntegrated = new NeuralNetConfiguration.Builder().seed(123) .list().layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.TANH).weightInit(WeightInit.XAVIER) .build()) .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).dropOut(0.5) .nOut(10).build()) .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); // Run with separate activation layer Nd4j.getRandom().setSeed(12345); //Manually configure preprocessors //This is necessary, otherwise CnnToFeedForwardPreprocessor will be in different locatinos //i.e., dropout on 4d activations in latter, and dropout on 2d activations in former Map<Integer, InputPreProcessor> preProcessorMap = new HashMap<>(); preProcessorMap.put(1, new CnnToFeedForwardPreProcessor(13, 13, 20)); MultiLayerConfiguration confSeparate = new NeuralNetConfiguration.Builder().seed(123).list() .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()) .layer(1, new DropoutLayer.Builder(0.5).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nOut(10).build()) .inputPreProcessors(preProcessorMap) .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork netIntegrated = new MultiLayerNetwork(confIntegrated); netIntegrated.init(); Nd4j.getRandom().setSeed(12345); MultiLayerNetwork netSeparate = new MultiLayerNetwork(confSeparate); netSeparate.init(); assertEquals(netIntegrated.params(), netSeparate.params()); Nd4j.getRandom().setSeed(12345); netIntegrated.fit(next); Nd4j.getRandom().setSeed(12345); netSeparate.fit(next); assertEquals(netIntegrated.params(), netSeparate.params()); // check parameters assertEquals(netIntegrated.getLayer(0).getParam("W"), netSeparate.getLayer(0).getParam("W")); assertEquals(netIntegrated.getLayer(0).getParam("b"), netSeparate.getLayer(0).getParam("b")); assertEquals(netIntegrated.getLayer(1).getParam("W"), netSeparate.getLayer(2).getParam("W")); assertEquals(netIntegrated.getLayer(1).getParam("b"), netSeparate.getLayer(2).getParam("b")); // check activations netIntegrated.setInput(next.getFeatures().dup()); netSeparate.setInput(next.getFeatures().dup()); Nd4j.getRandom().setSeed(12345); List<INDArray> actTrainIntegrated = netIntegrated.feedForward(true); Nd4j.getRandom().setSeed(12345); List<INDArray> actTrainSeparate = netSeparate.feedForward(true); assertEquals(actTrainIntegrated.get(1), actTrainSeparate.get(1)); assertEquals(actTrainIntegrated.get(2), actTrainSeparate.get(3)); netIntegrated.setInput(next.getFeatures().dup()); netSeparate.setInput(next.getFeatures().dup()); Nd4j.getRandom().setSeed(12345); List<INDArray> actTestIntegrated = netIntegrated.feedForward(false); Nd4j.getRandom().setSeed(12345); List<INDArray> actTestSeparate = netSeparate.feedForward(false); assertEquals(actTestIntegrated.get(1), actTestSeparate.get(1)); assertEquals(actTestIntegrated.get(2), actTestSeparate.get(3)); }
Example 7
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDtypesModelVsGlobalDtypeRnn() { for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { for (int outputLayer = 0; outputLayer < 3; outputLayer++) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; Layer ol; Layer secondLast; switch (outputLayer) { case 0: ol = new RnnOutputLayer.Builder().nOut(5).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build(); break; case 1: ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build(); break; case 2: ol = new OutputLayer.Builder().nOut(5).build(); secondLast = new LastTimeStep(new SimpleRnn.Builder().nOut(5).activation(Activation.TANH).build()); break; default: throw new RuntimeException(); } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() .layer(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new GravesLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new DenseLayer.Builder().nOut(5).build()) .layer(new GravesBidirectionalLSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new Bidirectional(new LSTM.Builder().nIn(5).nOut(5).activation(Activation.TANH).build())) .layer(new TimeDistributed(new DenseLayer.Builder().nIn(10).nOut(5).activation(Activation.TANH).build())) .layer(new SimpleRnn.Builder().nIn(5).nOut(5).build()) .layer(new MaskZeroLayer.Builder().underlying(new SimpleRnn.Builder().nIn(5).nOut(5).build()).maskValue(0.0).build()) .layer(secondLast) .layer(ol) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.initGradientsView(); assertEquals(msg, networkDtype, net.params().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); INDArray in = Nd4j.rand(networkDtype, 2, 5, 2); INDArray label; if (outputLayer == 2) { label = TestUtils.randomOneHot(2, 5).castTo(networkDtype); } else { label = TestUtils.randomOneHotTimeSeries(2, 5, 2).castTo(networkDtype); } INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { assertEquals(msg, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label, Nd4j.ones(networkDtype, 2, 2), outputLayer == 2 ? null : Nd4j.ones(networkDtype, 2, 2))); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } } }
Example 8
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDtypesModelVsGlobalDtypeMisc() { for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() .layer(new SpaceToBatchLayer.Builder().blocks(1, 1).build()) .layer(new SpaceToDepthLayer.Builder().blocks(2).build()) .layer(new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .setInputType(InputType.convolutional(28, 28, 5)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.initGradientsView(); assertEquals(msg, networkDtype, net.params().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); INDArray in = Nd4j.rand(networkDtype, 2, 5, 28, 28); INDArray label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); assertEquals(s, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label)); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } }
Example 9
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDtypesModelVsGlobalDtypeCnn1d() { //Nd4jCpu.Environment.getInstance().setUseMKLDNN(false); for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { for (int outputLayer = 0; outputLayer < 3; outputLayer++) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype + ", outputLayer=" + outputLayer; Layer ol; Layer secondLast; switch (outputLayer) { case 0: ol = new OutputLayer.Builder().nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new GlobalPoolingLayer(PoolingType.MAX); break; case 1: ol = new RnnOutputLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).nOut(5).build(); secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build(); break; case 2: ol = new RnnLossLayer.Builder().activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build(); secondLast = new Convolution1D.Builder().kernelSize(2).nOut(5).build(); break; default: throw new RuntimeException(); } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .trainingWorkspaceMode(WorkspaceMode.NONE) .inferenceWorkspaceMode(WorkspaceMode.NONE) .dataType(networkDtype) .convolutionMode(ConvolutionMode.Same) .updater(new Adam(1e-2)) .list() .layer(new Convolution1D.Builder().kernelSize(2).stride(1).nOut(3).activation(Activation.TANH).build()) .layer(new Subsampling1DLayer.Builder().poolingType(PoolingType.MAX).kernelSize(5).stride(1).build()) .layer(new Cropping1D.Builder(1).build()) .layer(new ZeroPadding1DLayer(1)) .layer(new Upsampling1D.Builder(2).build()) .layer(secondLast) .layer(ol) .setInputType(InputType.recurrent(5, 10)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.initGradientsView(); assertEquals(msg, networkDtype, net.params().dataType()); assertEquals(msg, networkDtype, net.getFlattenedGradients().dataType()); assertEquals(msg, networkDtype, net.getUpdater(true).getStateViewArray().dataType()); INDArray in = Nd4j.rand(networkDtype, 2, 5, 10); INDArray label; if (outputLayer == 0) { //OutputLayer label = TestUtils.randomOneHot(2, 10).castTo(networkDtype); } else { //RnnOutputLayer, RnnLossLayer label = Nd4j.rand(networkDtype, 2, 5, 20); //Longer sequence due to upsampling } INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); assertEquals(s, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label)); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { System.out.println(msg + " - " + inputLabelDtype); INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } } }
Example 10
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingSequenceLayerWithMasking() { //Idea: have masking on the input with an embedding and dense layers on input //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked int[] miniBatchSizes = {1, 3}; int nIn = 2; Random r = new Random(12345); int numInputClasses = 10; int timeSeriesLength = 5; for (DataType maskDtype : new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.INT}) { for (DataType inLabelDtype : new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.INT}) { for(int inputRank : new int[]{2, 3}) { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new EmbeddingSequenceLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) .nOut(5).build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .setInputType(InputType.recurrent(1)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) .build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) .layer(2, new LSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .setInputType(InputType.recurrent(1)).build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); net2.setParams(net.params().dup()); INDArray inEmbedding = Nd4j.zeros(inLabelDtype, inputRank == 2 ? new long[]{nExamples, timeSeriesLength} : new long[]{nExamples, 1, timeSeriesLength}); INDArray inDense = Nd4j.zeros(inLabelDtype, nExamples, numInputClasses, timeSeriesLength); INDArray labels = Nd4j.zeros(inLabelDtype, nExamples, 4, timeSeriesLength); for (int i = 0; i < nExamples; i++) { for (int j = 0; j < timeSeriesLength; j++) { int inIdx = r.nextInt(numInputClasses); inEmbedding.putScalar(inputRank == 2 ? new int[]{i, j} : new int[]{i, 0, j}, inIdx); inDense.putScalar(new int[]{i, inIdx, j}, 1.0); int outIdx = r.nextInt(4); labels.putScalar(new int[]{i, outIdx, j}, 1.0); } } INDArray inputMask = Nd4j.zeros(maskDtype, nExamples, timeSeriesLength); for (int i = 0; i < nExamples; i++) { for (int j = 0; j < timeSeriesLength; j++) { inputMask.putScalar(new int[]{i, j}, (r.nextBoolean() ? 1.0 : 0.0)); } } net.setLayerMaskArrays(inputMask, null); net2.setLayerMaskArrays(inputMask, null); List<INDArray> actEmbedding = net.feedForward(inEmbedding, false); List<INDArray> actDense = net2.feedForward(inDense, false); for (int i = 2; i < actEmbedding.size(); i++) { //Start from layer 2: EmbeddingSequence is 3d, first dense is 2d (before reshape) assertEquals(actDense.get(i), actEmbedding.get(i)); } net.setLabels(labels); net2.setLabels(labels); net.computeGradientAndScore(); net2.computeGradientAndScore(); assertEquals(net2.score(), net.score(), 1e-5); Map<String, INDArray> gradients = net.gradient().gradientForVariable(); Map<String, INDArray> gradients2 = net2.gradient().gradientForVariable(); assertEquals(gradients.keySet(), gradients2.keySet()); for (String s : gradients.keySet()) { assertEquals(gradients2.get(s), gradients.get(s)); } } } } } }
Example 11
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingLayerWithMasking() { //Idea: have masking on the input with an embedding and dense layers on input //Ensure that the parameter gradients for the inputs don't depend on the inputs when inputs are masked int[] miniBatchSizes = {1, 2, 5}; int nIn = 2; Random r = new Random(12345); int numInputClasses = 10; int timeSeriesLength = 5; for (DataType maskDtype : new DataType[]{DataType.FLOAT, DataType.DOUBLE, DataType.INT}) { for (int nExamples : miniBatchSizes) { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).activation(Activation.TANH).nIn(numInputClasses) .nOut(5).build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) .layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new DenseLayer.Builder().activation(Activation.TANH).nIn(numInputClasses).nOut(5) .build()) .layer(1, new DenseLayer.Builder().activation(Activation.TANH).nIn(5).nOut(4).build()) .layer(2, new GravesLSTM.Builder().activation(Activation.TANH).nIn(4).nOut(3).build()) .layer(3, new RnnOutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(3) .nOut(4).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .inputPreProcessor(2, new FeedForwardToRnnPreProcessor()).build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); net2.setParams(net.params().dup()); INDArray inEmbedding = Nd4j.zeros(nExamples, 1, timeSeriesLength); INDArray inDense = Nd4j.zeros(nExamples, numInputClasses, timeSeriesLength); INDArray labels = Nd4j.zeros(nExamples, 4, timeSeriesLength); for (int i = 0; i < nExamples; i++) { for (int j = 0; j < timeSeriesLength; j++) { int inIdx = r.nextInt(numInputClasses); inEmbedding.putScalar(new int[]{i, 0, j}, inIdx); inDense.putScalar(new int[]{i, inIdx, j}, 1.0); int outIdx = r.nextInt(4); labels.putScalar(new int[]{i, outIdx, j}, 1.0); } } INDArray inputMask = Nd4j.zeros(maskDtype, nExamples, timeSeriesLength); for (int i = 0; i < nExamples; i++) { for (int j = 0; j < timeSeriesLength; j++) { inputMask.putScalar(new int[]{i, j}, (r.nextBoolean() ? 1.0 : 0.0)); } } net.setLayerMaskArrays(inputMask, null); net2.setLayerMaskArrays(inputMask, null); List<INDArray> actEmbedding = net.feedForward(inEmbedding, false); List<INDArray> actDense = net2.feedForward(inDense, false); for (int i = 1; i < actEmbedding.size(); i++) { assertEquals(actDense.get(i), actEmbedding.get(i)); } net.setLabels(labels); net2.setLabels(labels); net.computeGradientAndScore(); net2.computeGradientAndScore(); // System.out.println(net.score() + "\t" + net2.score()); assertEquals(net2.score(), net.score(), 1e-5); Map<String, INDArray> gradients = net.gradient().gradientForVariable(); Map<String, INDArray> gradients2 = net2.gradient().gradientForVariable(); assertEquals(gradients.keySet(), gradients2.keySet()); for (String s : gradients.keySet()) { assertEquals(gradients2.get(s), gradients.get(s)); } } } }
Example 12
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingForwardPass() { //With the same parameters, embedding layer should have same activations as the equivalent one-hot representation // input with a DenseLayer int nClassesIn = 10; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() .layer(0, new EmbeddingLayer.Builder().hasBias(true).nIn(nClassesIn).nOut(5).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net.init(); net2.init(); net2.setParams(net.params().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); INDArray inOneHot = Nd4j.create(batchSize, nClassesIn); Random r = new Random(12345); for (int i = 0; i < batchSize; i++) { int classIdx = r.nextInt(nClassesIn); inEmbedding.putScalar(i, classIdx); inOneHot.putScalar(new int[]{i, classIdx}, 1.0); } List<INDArray> activationsEmbedding = net.feedForward(inEmbedding, false); List<INDArray> activationsDense = net2.feedForward(inOneHot, false); for (int i = 1; i < 3; i++) { INDArray actE = activationsEmbedding.get(i); INDArray actD = activationsDense.get(i); assertEquals(actE, actD); } }
Example 13
Source File: EmbeddingLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingSingleSequenceForwardPass() { int nClassesIn = 10; int embeddingDim = 5; int nOut = 4; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() .layer(new EmbeddingSequenceLayer.Builder().inputLength(1) .hasBias(true).nIn(nClassesIn).nOut(embeddingDim).build()) .layer(new RnnOutputLayer.Builder().nIn(embeddingDim).nOut(nOut).activation(Activation.SOFTMAX).build()) .build(); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder().activation(Activation.TANH).list() .layer(0, new DenseLayer.Builder().nIn(nClassesIn).nOut(5).activation(Activation.IDENTITY).build()) .layer(1, new OutputLayer.Builder().nIn(5).nOut(4).activation(Activation.SOFTMAX).build()) .inputPreProcessor(0, new RnnToFeedForwardPreProcessor()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net.init(); net2.init(); net2.setParams(net.params().dup()); int batchSize = 3; INDArray inEmbedding = Nd4j.create(batchSize, 1); INDArray inOneHot = Nd4j.create(batchSize, nClassesIn, 1); Random r = new Random(12345); for (int i = 0; i < batchSize; i++) { int classIdx = r.nextInt(nClassesIn); inEmbedding.putScalar(i, classIdx); inOneHot.putScalar(new int[]{i, classIdx, 0}, 1.0); } List<INDArray> activationsDense = net2.feedForward(inOneHot, false); List<INDArray> activationEmbedding = net.feedForward(inEmbedding, false); INDArray actD1 = activationsDense.get(1); INDArray actE1 = activationEmbedding.get(1).reshape(batchSize, embeddingDim); assertEquals(actD1, actE1); INDArray actD2 = activationsDense.get(2); INDArray actE2 = activationEmbedding.get(2).reshape(batchSize, nOut); assertEquals(actD2, actE2); }
Example 14
Source File: ActivationLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCNNActivationLayer() throws Exception { DataSetIterator iter = new MnistDataSetIterator(2, 2); DataSet next = iter.next(); // Run without separate activation layer MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.RELU).weightInit(WeightInit.XAVIER).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).nOut(10).build()) .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); network.fit(next); // Run with separate activation layer MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .seed(123).list() .layer(0, new ConvolutionLayer.Builder(4, 4).stride(2, 2).nIn(1).nOut(20) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) .build()) .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder() .activation(Activation.RELU).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX) .nOut(10).build()) .setInputType(InputType.convolutionalFlat(28, 28, 1)).build(); MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); network2.init(); network2.fit(next); // check parameters assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); // check activations network.init(); network.setInput(next.getFeatures()); List<INDArray> activations = network.feedForward(true); network2.init(); network2.setInput(next.getFeatures()); List<INDArray> activations2 = network2.feedForward(true); assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); assertEquals(activations.get(2), activations2.get(3)); }
Example 15
Source File: ActivationLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAutoEncoderActivationLayer() throws Exception { int minibatch = 3; int nIn = 5; int layerSize = 5; int nOut = 3; INDArray next = Nd4j.rand(new int[] {minibatch, nIn}); INDArray labels = Nd4j.zeros(minibatch, nOut); for (int i = 0; i < minibatch; i++) { labels.putScalar(i, i % nOut, 1.0); } // Run without separate activation layer Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new AutoEncoder.Builder().nIn(nIn).nOut(layerSize).corruptionLevel(0.0) .activation(Activation.SIGMOID).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); network.fit(next, labels); //Labels are necessary for this test: layer activation function affect pretraining results, otherwise // Run with separate activation layer Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new AutoEncoder.Builder().nIn(nIn).nOut(layerSize).corruptionLevel(0.0) .activation(Activation.IDENTITY).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder() .activation(Activation.SIGMOID).build()) .layer(2, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.RECONSTRUCTION_CROSSENTROPY) .activation(Activation.SOFTMAX).nIn(layerSize).nOut(nOut) .build()) .build(); MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); network2.init(); network2.fit(next, labels); // check parameters assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b")); // check activations network.init(); network.setInput(next); List<INDArray> activations = network.feedForward(true); network2.init(); network2.setInput(next); List<INDArray> activations2 = network2.feedForward(true); assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); assertEquals(activations.get(2), activations2.get(3)); }
Example 16
Source File: ActivationLayerTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDenseActivationLayer() throws Exception { DataSetIterator iter = new MnistDataSetIterator(2, 2); DataSet next = iter.next(); // Run without separate activation layer MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28 * 1).nOut(10).activation(Activation.RELU) .weightInit(WeightInit.XAVIER).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).weightInit(WeightInit.XAVIER) .activation(Activation.SOFTMAX).nIn(10).nOut(10).build()) .build(); MultiLayerNetwork network = new MultiLayerNetwork(conf); network.init(); network.fit(next); // Run with separate activation layer MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28 * 1).nOut(10).activation(Activation.IDENTITY) .weightInit(WeightInit.XAVIER).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.ActivationLayer.Builder() .activation(Activation.RELU).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).nIn(10).nOut(10) .build()) .build(); MultiLayerNetwork network2 = new MultiLayerNetwork(conf2); network2.init(); network2.fit(next); // check parameters assertEquals(network.getLayer(0).getParam("W"), network2.getLayer(0).getParam("W")); assertEquals(network.getLayer(1).getParam("W"), network2.getLayer(2).getParam("W")); assertEquals(network.getLayer(0).getParam("b"), network2.getLayer(0).getParam("b")); assertEquals(network.getLayer(1).getParam("b"), network2.getLayer(2).getParam("b")); // check activations network.init(); network.setInput(next.getFeatures()); List<INDArray> activations = network.feedForward(true); network2.init(); network2.setInput(next.getFeatures()); List<INDArray> activations2 = network2.feedForward(true); assertEquals(activations.get(1).reshape(activations2.get(2).shape()), activations2.get(2)); assertEquals(activations.get(2), activations2.get(3)); }
Example 17
Source File: GravesLSTMTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testSingleExample() { Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Sgd(0.1)).seed(12345).list() .layer(0, new org.deeplearning4j.nn.conf.layers.GravesLSTM.Builder().activation(Activation.TANH) .nIn(2).nOut(2).build()) .layer(1, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder() .lossFunction(LossFunctions.LossFunction.MSE).nIn(2).nOut(1) .activation(Activation.TANH).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray in1 = Nd4j.rand(new int[] {1, 2, 4}); INDArray in2 = Nd4j.rand(new int[] {1, 2, 5}); in2.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)}, in1); assertEquals(in1, in2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4))); INDArray labels1 = Nd4j.rand(new int[] {1, 1, 4}); INDArray labels2 = Nd4j.create(1, 1, 5); labels2.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4)}, labels1); assertEquals(labels1, labels2.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(0, 4))); INDArray out1 = net.output(in1); INDArray out2 = net.output(in2); // System.out.println(Arrays.toString(net.output(in1).data().asFloat())); // System.out.println(Arrays.toString(net.output(in2).data().asFloat())); List<INDArray> activations1 = net.feedForward(in1); List<INDArray> activations2 = net.feedForward(in2); // for (int i = 0; i < 3; i++) { // System.out.println("-----\n" + i); // System.out.println(Arrays.toString(activations1.get(i).dup().data().asDouble())); // System.out.println(Arrays.toString(activations2.get(i).dup().data().asDouble())); // // System.out.println(activations1.get(i)); // System.out.println(activations2.get(i)); // } //Expect first 4 time steps to be indentical... for (int i = 0; i < 4; i++) { double d1 = out1.getDouble(i); double d2 = out2.getDouble(i); assertEquals(d1, d2, 0.0); } }
Example 18
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCapsNetDtypes() { for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype; int primaryCapsDim = 2; int primarpCapsChannel = 8; int capsule = 5; int minibatchSize = 8; int routing = 1; int capsuleDim = 4; int height = 6; int width = 6; int inputDepth = 4; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(networkDtype) .seed(123) .updater(new NoOp()) .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6))) .list() .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel) .kernelSize(3, 3) .stride(2, 2) .build()) .layer(new CapsuleLayer.Builder(capsule, capsuleDim, routing).build()) .layer(new CapsuleStrengthLayer.Builder().build()) .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build()) .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build()) .setInputType(InputType.convolutional(height, width, inputDepth)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray in = Nd4j.rand(networkDtype, minibatchSize, inputDepth * height * width).mul(10) .reshape(-1, inputDepth, height, width); INDArray label = Nd4j.zeros(networkDtype, minibatchSize, capsule); for (int i = 0; i < minibatchSize; i++) { label.putScalar(new int[]{i, i % capsule}, 1.0); } INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); assertEquals(s, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(label); net.computeGradientAndScore(); net.fit(new DataSet(in, label)); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = label.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } }
Example 19
Source File: RegressionTest100b3.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCustomLayer() throws Exception { for( int i=1; i<2; i++ ) { String dtype = (i == 0 ? "float" : "double"); DataType dt = (i == 0 ? DataType.FLOAT : DataType.DOUBLE); File f = Resources.asFile("regression_testing/100b3/CustomLayerExample_100b3_" + dtype + ".bin"); MultiLayerNetwork.load(f, true); MultiLayerNetwork net = MultiLayerNetwork.load(f, true); // net = net.clone(); DenseLayer l0 = (DenseLayer) net.getLayer(0).conf().getLayer(); assertEquals(new ActivationTanH(), l0.getActivationFn()); assertEquals(new WeightDecay(0.03, false), TestUtils.getWeightDecayReg(l0)); assertEquals(new RmsProp(0.95), l0.getIUpdater()); CustomLayer l1 = (CustomLayer) net.getLayer(1).conf().getLayer(); assertEquals(new ActivationTanH(), l1.getActivationFn()); assertEquals(new ActivationSigmoid(), l1.getSecondActivationFunction()); assertEquals(new RmsProp(0.95), l1.getIUpdater()); INDArray outExp; File f2 = Resources.asFile("regression_testing/100b3/CustomLayerExample_Output_100b3_" + dtype + ".bin"); try (DataInputStream dis = new DataInputStream(new FileInputStream(f2))) { outExp = Nd4j.read(dis); } INDArray in; File f3 = Resources.asFile("regression_testing/100b3/CustomLayerExample_Input_100b3_" + dtype + ".bin"); try (DataInputStream dis = new DataInputStream(new FileInputStream(f3))) { in = Nd4j.read(dis); } assertEquals(dt, in.dataType()); assertEquals(dt, outExp.dataType()); assertEquals(dt, net.params().dataType()); assertEquals(dt, net.getFlattenedGradients().dataType()); assertEquals(dt, net.getUpdater().getStateViewArray().dataType()); //System.out.println(Arrays.toString(net.params().data().asFloat())); INDArray outAct = net.output(in); assertEquals(dt, outAct.dataType()); List<INDArray> activations = net.feedForward(in); assertEquals(dt, net.getLayerWiseConfigurations().getDataType()); assertEquals(dt, net.params().dataType()); assertEquals(dtype, outExp, outAct); } }
Example 20
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAttentionDTypes() { for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(globalDtype, globalDtype); for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { assertEquals(globalDtype, Nd4j.dataType()); assertEquals(globalDtype, Nd4j.defaultFloatingPointType()); String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype; int mb = 3; int nIn = 3; int nOut = 5; int tsLength = 4; int layerSize = 8; int numQueries = 6; INDArray in = Nd4j.rand(networkDtype, new long[]{mb, nIn, tsLength}); INDArray labels = TestUtils.randomOneHot(mb, nOut); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(networkDtype) .activation(Activation.TANH) .updater(new NoOp()) .weightInit(WeightInit.XAVIER) .list() .layer(new LSTM.Builder().nOut(layerSize).build()) .layer(new SelfAttentionLayer.Builder().nOut(8).nHeads(2).projectInput(true).build()) .layer(new LearnedSelfAttentionLayer.Builder().nOut(8).nHeads(2).nQueries(numQueries).projectInput(true).build()) .layer(new RecurrentAttentionLayer.Builder().nIn(layerSize).nOut(layerSize).nHeads(1).projectInput(false).hasBias(false).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.MAX).build()) .layer(new OutputLayer.Builder().nOut(nOut).activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).build()) .setInputType(InputType.recurrent(nIn)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray out = net.output(in); assertEquals(msg, networkDtype, out.dataType()); List<INDArray> ff = net.feedForward(in); for (int i = 0; i < ff.size(); i++) { String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName()); assertEquals(s, networkDtype, ff.get(i).dataType()); } net.setInput(in); net.setLabels(labels); net.computeGradientAndScore(); net.fit(new DataSet(in, labels)); logUsedClasses(net); //Now, test mismatched dtypes for input/labels: for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { INDArray in2 = in.castTo(inputLabelDtype); INDArray label2 = labels.castTo(inputLabelDtype); net.output(in2); net.setInput(in2); net.setLabels(label2); net.computeGradientAndScore(); net.fit(new DataSet(in2, label2)); } } } }