org.deeplearning4j.nn.gradient.Gradient Java Examples
The following examples show how to use
org.deeplearning4j.nn.gradient.Gradient.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SpaceToBatch.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray input = this.input.castTo(dataType); //Cast to network dtype if required (no-op if already correct type) boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW; INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c'); Gradient gradient = new DefaultGradient(); INDArray epsilonNHWC = nchw ? epsilon.permute(0, 2, 3, 1) : epsilon; INDArray outEpsilonNHWC = nchw ? outEpsilon.permute(0, 2, 3, 1) : outEpsilon; CustomOp op = DynamicCustomOp.builder("batch_to_space_nd") .addInputs(epsilonNHWC, getBlocksArray(), getPaddingArray()) .addOutputs(outEpsilonNHWC) .callInplace(false) .build(); Nd4j.exec(op); outEpsilon = backpropDropOutIfPresent(outEpsilon); return new Pair<>(gradient, outEpsilon); }
Example #2
Source File: DeepFMOutputLayer.java From jstarcraft-rns with Apache License 2.0 | 6 votes |
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); // INDArray delta = lossFunction.computeGradient(labels2d, preOut, // layerConf().getActivationFunction(), maskArray); INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView); if (hasBias()) { INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView); } delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example #3
Source File: ZeroPaddingLayer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); val inShape = input.shape(); boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW; int hIdx = nchw ? 2 : 1; int wIdx = nchw ? 3 : 2; INDArray epsNext; int[] padding = layerConf().getPadding(); if(layerConf().getDataFormat() == CNN2DFormat.NCHW){ epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]), NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx])); } else { //NHWC epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]), NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]), NDArrayIndex.all()); } epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #4
Source File: BaseStatsListener.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public void onGradientCalculation(Model model) { int iterCount = getModelInfo(model).iterCount; if (calcFromGradients() && updateConfig.reportingFrequency() > 0 && (iterCount == 0 || iterCount % updateConfig.reportingFrequency() == 0)) { Gradient g = model.gradient(); if (updateConfig.collectHistograms(StatsType.Gradients)) { gradientHistograms = getHistograms(g.gradientForVariable(), updateConfig.numHistogramBins(StatsType.Gradients)); } if (updateConfig.collectMean(StatsType.Gradients)) { meanGradients = calculateSummaryStats(g.gradientForVariable(), StatType.Mean); } if (updateConfig.collectStdev(StatsType.Gradients)) { stdevGradient = calculateSummaryStats(g.gradientForVariable(), StatType.Stdev); } if (updateConfig.collectMeanMagnitudes(StatsType.Gradients)) { meanMagGradients = calculateSummaryStats(g.gradientForVariable(), StatType.MeanMagnitude); } } }
Example #5
Source File: BasePretrainNetwork.java From deeplearning4j with Apache License 2.0 | 6 votes |
protected Gradient createGradient(INDArray wGradient, INDArray vBiasGradient, INDArray hBiasGradient) { Gradient ret = new DefaultGradient(gradientsFlattened); // The order of the following statements matter! The gradient is being flattened and applied to // flattened params in this order. // The arrays neeed to be views, with the current Updater implementation //TODO: optimize this, to do it would the assigns INDArray wg = gradientViews.get(PretrainParamInitializer.WEIGHT_KEY); wg.assign(wGradient); INDArray hbg = gradientViews.get(PretrainParamInitializer.BIAS_KEY); hbg.assign(hBiasGradient); INDArray vbg = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY); vbg.assign(vBiasGradient); ret.gradientForVariable().put(PretrainParamInitializer.WEIGHT_KEY, wg); ret.gradientForVariable().put(PretrainParamInitializer.BIAS_KEY, hbg); ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg); return ret; }
Example #6
Source File: CenterLossOutputLayer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1) INDArray delta = pair.getSecond(); // centers INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY); INDArray l = labels.castTo(centers.dataType()); //Ensure correct dtype (same as params); no-op if already correct dtype INDArray centersForExamples = l.mmul(centers); INDArray dLcdai = input.sub(centersForExamples); INDArray w = getParamWithNoise(CenterLossParamInitializer.WEIGHT_KEY, true, workspaceMgr); INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, w.dataType(), new long[]{w.size(0), delta.size(0)}, 'f'); epsilonNext = w.mmuli(delta.transpose(), epsilonNext).transpose(); double lambda = layerConf().getLambda(); epsilonNext.addi(dLcdai.muli(lambda)); // add center loss here weightNoiseParams.clear(); return new Pair<>(pair.getFirst(), epsilonNext); }
Example #7
Source File: TestGraphNodes.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testStackNode() { Nd4j.getRandom().setSeed(12345); GraphVertex unstack = new StackVertex(null, "", -1, Nd4j.dataType()); INDArray in1 = Nd4j.rand(5, 2); INDArray in2 = Nd4j.rand(5, 2); INDArray in3 = Nd4j.rand(5, 2); unstack.setInputs(in1, in2, in3); INDArray out = unstack.doForward(false, LayerWorkspaceMgr.noWorkspaces()); assertEquals(in1, out.get(NDArrayIndex.interval(0, 5), NDArrayIndex.all())); assertEquals(in2, out.get(NDArrayIndex.interval(5, 10), NDArrayIndex.all())); assertEquals(in3, out.get(NDArrayIndex.interval(10, 15), NDArrayIndex.all())); unstack.setEpsilon(out); Pair<Gradient, INDArray[]> b = unstack.doBackward(false, LayerWorkspaceMgr.noWorkspaces()); assertEquals(in1, b.getSecond()[0]); assertEquals(in2, b.getSecond()[1]); assertEquals(in3, b.getSecond()[2]); }
Example #8
Source File: SubsetVertex.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) { if (!canDoBackward()) throw new IllegalStateException("Cannot do backward pass: error not set"); INDArray out = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), forwardShape); switch (forwardShape.length) { case 2: out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(from, to, true)}, epsilon); break; case 3: out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(from, to, true), NDArrayIndex.all()}, epsilon); break; case 4: out.put(new INDArrayIndex[] {NDArrayIndex.all(), NDArrayIndex.interval(from, to, true), NDArrayIndex.all(), NDArrayIndex.all()}, epsilon); break; default: throw new RuntimeException("Invalid activation rank"); //Should never happen } return new Pair<>(null, new INDArray[] {out}); }
Example #9
Source File: FrozenLayerWithBackprop.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { INDArray backpropEpsilon = underlying.backpropGradient(epsilon, workspaceMgr).getSecond(); //backprop might have already changed the gradient view (like BaseLayer and BaseOutputLayer do) //so we want to put it back to zeroes INDArray gradientView = underlying.getGradientsViewArray(); if(gradientView != null){ gradientView.assign(0); } return new Pair<>(zeroGradient, backpropEpsilon); }
Example #10
Source File: SubsamplingLayerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testSubSampleLayerMaxBackprop() throws Exception { INDArray expectedContainedEpsilonInput = Nd4j.create(new double[] {1., 1., 1., 1., 1., 1., 1., 1.}, new int[] {1, 2, 2, 2}).castTo(Nd4j.defaultFloatingPointType()); INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] {0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.}, new int[] {1, 2, 4, 4}).castTo(Nd4j.defaultFloatingPointType()); INDArray input = getContainedData(); Layer layer = getSubsamplingLayer(SubsamplingLayer.PoolingType.MAX); layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); Pair<Gradient, INDArray> containedOutput = layer.backpropGradient(expectedContainedEpsilonInput, LayerWorkspaceMgr.noWorkspaces()); assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond()); assertEquals(null, containedOutput.getFirst().getGradientFor("W")); assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length); INDArray input2 = getData(); layer.activate(input2, false, LayerWorkspaceMgr.noWorkspaces()); long depth = input2.size(1); epsilon = Nd4j.ones(5, depth, featureMapHeight, featureMapWidth); Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces()); assertEquals(input.shape().length, out.getSecond().shape().length); assertEquals(depth, out.getSecond().size(1)); // channels retained }
Example #11
Source File: Upsampling1DTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testUpsampling1DBackprop() throws Exception { INDArray expectedContainedEpsilonInput = Nd4j.create(new double[] {1., 3., 2., 6., 7., 2., 5., 5.}, new int[] {1, 1, 8}); INDArray expectedContainedEpsilonResult = Nd4j.create(new double[] {4., 8., 9., 10.}, new int[] {1, 1, 4}); INDArray input = getContainedData(); Layer layer = getUpsampling1DLayer(); layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); Pair<Gradient, INDArray> containedOutput = layer.backpropGradient(expectedContainedEpsilonInput, LayerWorkspaceMgr.noWorkspaces()); assertEquals(expectedContainedEpsilonResult, containedOutput.getSecond()); assertEquals(null, containedOutput.getFirst().getGradientFor("W")); assertEquals(expectedContainedEpsilonResult.shape().length, containedOutput.getSecond().shape().length); INDArray input2 = getData(); layer.activate(input2, false, LayerWorkspaceMgr.noWorkspaces()); val depth = input2.size(1); epsilon = Nd4j.ones(5, depth, outputLength); Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces()); assertEquals(input.shape().length, out.getSecond().shape().length); assertEquals(depth, out.getSecond().size(1)); }
Example #12
Source File: DropoutLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { INDArray delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon); if (maskArray != null) { delta.muliColumnVector(maskArray); } Gradient ret = new DefaultGradient(); delta = backpropDropOutIfPresent(delta); return new Pair<>(ret, delta); }
Example #13
Source File: LayerVertex.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) { if (!canDoBackward()) { if(inputs == null || inputs[0] == null){ throw new IllegalStateException("Cannot do backward pass: inputs not set. Layer: \"" + vertexName + "\" (idx " + vertexIndex + "), numInputs: " + getNumInputArrays()); } else { throw new IllegalStateException("Cannot do backward pass: all epsilons not set. Layer \"" + vertexName + "\" (idx " + vertexIndex + "), numInputs :" + getNumInputArrays() + "; numOutputs: " + getNumOutputConnections()); } } //Edge case: output layer - never did forward pass hence layer.setInput was never called... if(!setLayerInput){ applyPreprocessorAndSetInput(workspaceMgr); } Pair<Gradient, INDArray> pair; if (tbptt && layer instanceof RecurrentLayer) { //Truncated BPTT for recurrent layers pair = ((RecurrentLayer) layer).tbpttBackpropGradient(epsilon, graph.getConfiguration().getTbpttBackLength(), workspaceMgr); } else { //Normal backprop pair = layer.backpropGradient(epsilon, workspaceMgr); //epsTotal may be null for OutputLayers } if (layerPreProcessor != null) { INDArray eps = pair.getSecond(); eps = layerPreProcessor.backprop(eps, graph.batchSize(), workspaceMgr); pair.setSecond(eps); } //Layers always have single activations input -> always have single epsilon output during backprop return new Pair<>(pair.getFirst(), new INDArray[] {pair.getSecond()}); }
Example #14
Source File: ZeroPadding1DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); val inShape = input.shape(); INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[2])); return new Pair<>((Gradient) new DefaultGradient(), workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext)); }
Example #15
Source File: RepeatVectorTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testRepeatVector() { double[] arr = new double[] {1., 2., 3., 1., 2., 3., 1., 2., 3., 1., 2., 3.}; INDArray expectedOut = Nd4j.create(arr, new long[] {1, 3, REPEAT}, 'f'); INDArray input = Nd4j.create(new double[] {1., 2., 3.}, new long[] {1, 3}); Layer layer = getRepeatVectorLayer(); INDArray output = layer.activate(input, false, LayerWorkspaceMgr.noWorkspaces()); assertTrue(Arrays.equals(expectedOut.shape(), output.shape())); assertEquals(expectedOut, output); INDArray epsilon = Nd4j.ones(1,3,4); Pair<Gradient, INDArray> out = layer.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces()); INDArray outEpsilon = out.getSecond(); INDArray expectedEpsilon = Nd4j.create(new double[] {4., 4., 4.}, new long[] {1, 3}); assertEquals(expectedEpsilon, outEpsilon); }
Example #16
Source File: LBFGS.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void setupSearchState(Pair<Gradient, Double> pair) { super.setupSearchState(pair); INDArray params = (INDArray) searchState.get(PARAMS_KEY); searchState.put("s", new LinkedList<INDArray>()); // holds parameters differences searchState.put("y", new LinkedList<INDArray>()); // holds gradients differences searchState.put("rho", new LinkedList<Double>()); searchState.put("oldparams", params.dup()); }
Example #17
Source File: EmbeddingLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) INDArray z = preOutput(true, workspaceMgr); INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params if (maskArray != null) { delta.muliColumnVector(maskArray.castTo(dataType)); } INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); weightGradients.assign(0); long[] indexes = new long[(int) input.length()]; for (int i = 0; i < indexes.length; i++) { indexes[i] = input.getInt(i, 0); } INDArray indices = Nd4j.createFromArray(indexes); Nd4j.scatterUpdate(org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate.UpdateOp.ADD, weightGradients, indices, delta, DIM_1); Gradient ret = new DefaultGradient(); ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradients); if(hasBias()) { INDArray biasGradientsView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradientsView, 0); //biasGradientView is initialized/zeroed first in sum op ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradientsView); } return new Pair<>(ret, null); //Don't bother returning epsilons: no layer below this one... }
Example #18
Source File: DQN.java From deeplearning4j with Apache License 2.0 | 5 votes |
public Gradient[] gradient(INDArray input, INDArray labels) { mln.setInput(input); mln.setLabels(labels); mln.computeGradientAndScore(); Collection<TrainingListener> iterationListeners = mln.getListeners(); if (iterationListeners != null && iterationListeners.size() > 0) { for (TrainingListener l : iterationListeners) { l.onGradientCalculation(mln); } } //System.out.println("SCORE: " + mln.score()); return new Gradient[] {mln.gradient()}; }
Example #19
Source File: FrozenLayerWithBackprop.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void update(Gradient gradient) { if (!logUpdate) { OneTimeLogger.info(log, "Frozen layers will not be updated. Warning will be issued only once per instance"); logUpdate = true; } //no op }
Example #20
Source File: Cropping2DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { val inShape = input.shape(); INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c'); INDArray epsNextSubset = inputSubset(epsNext); epsNextSubset.assign(epsilon); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #21
Source File: Cnn3DLossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if (input.rank() != 5) throw new UnsupportedOperationException( "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape " + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]"); if (labels == null) throw new IllegalStateException("Labels are not set (null)"); INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM); // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped); delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d); long n = input.size(0); long d, h, w, c; if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){ d = input.size(1); h = input.size(2); w = input.size(3); c = input.size(4); } else { d = input.size(2); h = input.size(3); w = input.size(4); c = input.size(1); } INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD); // grab the empty gradient Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, delta5d); }
Example #22
Source File: L2Vertex.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) { if (!canDoBackward()) throw new IllegalStateException("Cannot do backward pass: error not set"); INDArray a = inputs[0]; INDArray b = inputs[1]; INDArray out = doForward(tbptt, workspaceMgr); Transforms.max(out, eps, false); // in case of 0 INDArray dLdlambda = epsilon; //dL/dlambda aka 'epsilon' - from layer above INDArray sNegHalf = out.rdiv(1.0); //s^(-1/2) = 1.0 / s^(1/2) = 1.0 / out INDArray diff; try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)){ diff = a.sub(b); } INDArray first = dLdlambda.mul(sNegHalf); //Column vector for all cases INDArray dLda; INDArray dLdb; if (a.rank() == 2) { //2d case (MLPs etc) dLda = diff.muliColumnVector(first); try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)) { dLdb = dLda.neg(); } } else { //RNN and CNN case - Broadcast along dimension 0 dLda = Nd4j.getExecutioner().exec(new BroadcastMulOp(diff, first, diff, 0)); try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)) { dLdb = dLda.neg(); } } return new Pair<>(null, new INDArray[] {dLda, dLdb}); }
Example #23
Source File: ReshapeVertex.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray[]> doBackward(boolean tbptt, LayerWorkspaceMgr workspaceMgr) { if (!canDoBackward()) throw new IllegalStateException("Cannot do backward pass: errors not set"); INDArray[] out = new INDArray[1]; out[0] = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon.reshape(order, inputs[0].shape())); return new Pair<>(null, out); }
Example #24
Source File: Upsampling2D.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); CNN2DFormat format = getFormat(); boolean nchw = format == CNN2DFormat.NCHW; long miniBatch = (int) input.size(0); long inDepth = (int) input.size(nchw ? 1 : 3); long inH = (int) input.size(nchw ? 2 : 1); long inW = (int) input.size(nchw ? 3 : 2); long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth}; INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c'); Gradient gradient = new DefaultGradient(); CustomOp op = DynamicCustomOp.builder("upsampling_bp") .addIntegerArguments(nchw ? 1 : 0) //1=NCHW, 0=NHWC .addInputs(input, epsilon) .addOutputs(epsOut) .callInplace(false) .build(); Nd4j.getExecutioner().exec(op); epsOut = backpropDropOutIfPresent(epsOut); return new Pair<>(gradient, epsOut); }
Example #25
Source File: RnnDataFormatTests.java From deeplearning4j with Apache License 2.0 | 5 votes |
private static List<String> differentGrads(Gradient g1, Gradient g2){ List<String> differs = new ArrayList<>(); Map<String,INDArray> m1 = g1.gradientForVariable(); Map<String,INDArray> m2 = g2.gradientForVariable(); for(String s : m1.keySet()){ INDArray a1 = m1.get(s); INDArray a2 = m2.get(s); if(!a1.equals(a2)){ differs.add(s); } } return differs; }
Example #26
Source File: LossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray); // grab the empty gradient Gradient gradient = new DefaultGradient(); delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example #27
Source File: LSTMHelper.java From deeplearning4j with Apache License 2.0 | 5 votes |
Pair<Gradient, INDArray> backpropGradient(final NeuralNetConfiguration conf, final IActivation gateActivationFn, final INDArray input, final INDArray recurrentWeights, //Shape: [hiddenLayerSize,4*hiddenLayerSize+3]; order: [wI,wF,wO,wG,wFF,wOO,wGG] final INDArray inputWeights, //Shape: [n^(L-1),4*hiddenLayerSize]; order: [wi,wf,wo,wg] final INDArray epsilon, final boolean truncatedBPTT, final int tbpttBackwardLength, final FwdPassReturn fwdPass, final boolean forwards, final String inputWeightKey, final String recurrentWeightKey, final String biasWeightKey, final Map<String, INDArray> gradientViews, INDArray maskArray, //Input mask: should only be used with bidirectional RNNs + variable length final boolean hasPeepholeConnections, //True for GravesLSTM, false for LSTM final LayerWorkspaceMgr workspaceMgr);
Example #28
Source File: FrozenLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void update(Gradient gradient) { if (!logUpdate) { OneTimeLogger.info(log, "Frozen layers will not be updated. Warning will be issued only once per instance"); logUpdate = true; } //no op }
Example #29
Source File: Upsampling1D.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); int[] size = ((BaseUpsamplingLayer) layerConf()).getSize(); epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1); // we replicate the error term times "size" so that backprop works properly on it epsilon = epsilon.repeat(3, size[0]); INDArray originalInput = input; input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1); long miniBatch = input.size(0); long inDepth = input.size(1); long inH = input.size(2); long inW = input.size(3); INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW); INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW); int[] intArgs = new int[] {1}; // 1 is for NCHW CustomOp op = DynamicCustomOp.builder("upsampling_bp") .addIntegerArguments(intArgs) .addInputs(input, epsilon) .addOutputs(reshapedEpsilon) .callInplace(false) .build(); Nd4j.getExecutioner().exec(op); Gradient gradient = new DefaultGradient(); reshapedEpsilon = reshapedEpsilon.slice(0, 3); input = originalInput; // Since we aggregate the gradient across "size" slices, we need to normalize afterwards. return new Pair<>(gradient, reshapedEpsilon.divi(size[0])); }
Example #30
Source File: Subsampling1DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { if (epsilon.rank() != 3) throw new DL4JInvalidInputException("Got rank " + epsilon.rank() + " array as epsilon for Subsampling1DLayer backprop with shape " + Arrays.toString(epsilon.shape()) + ". Expected rank 3 array with shape [minibatchSize, features, length]. " + layerId()); if(maskArray != null){ INDArray maskOut = feedForwardMaskArray(maskArray, MaskState.Active, (int)epsilon.size(0)).getFirst(); Preconditions.checkState(epsilon.size(0) == maskOut.size(0) && epsilon.size(2) == maskOut.size(1), "Activation gradients dimensions (0,2) and mask dimensions (0,1) don't match: Activation gradients %s, Mask %s", epsilon.shape(), maskOut.shape()); Broadcast.mul(epsilon, maskOut, epsilon, 0, 2); } // add singleton fourth dimension to input and next layer's epsilon INDArray origInput = input; input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1); epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1); // call 2D SubsamplingLayer's backpropGradient method Pair<Gradient, INDArray> gradientEpsNext = super.backpropGradient(epsilon, workspaceMgr); INDArray epsNext = gradientEpsNext.getSecond(); // remove singleton fourth dimension from input and current epsilon input = origInput; epsNext = epsNext.reshape(epsNext.size(0), epsNext.size(1), epsNext.size(2)); return new Pair<>(gradientEpsNext.getFirst(), epsNext); }