org.deeplearning4j.nn.gradient.DefaultGradient Java Examples
The following examples show how to use
org.deeplearning4j.nn.gradient.DefaultGradient.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BaseOutputLayer.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray); INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); Nd4j.gemm(input.castTo(weightGradView.dataType()), delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); //TODO can we avoid cast? gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView); if(hasBias()){ INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView); } delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example #2
Source File: DeepFMOutputLayer.java From jstarcraft-rns with Apache License 2.0 | 6 votes |
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); // INDArray delta = lossFunction.computeGradient(labels2d, preOut, // layerConf().getActivationFunction(), maskArray); INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView); if (hasBias()) { INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView); } delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example #3
Source File: SpaceToBatch.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray input = this.input.castTo(dataType); //Cast to network dtype if required (no-op if already correct type) boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW; INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c'); Gradient gradient = new DefaultGradient(); INDArray epsilonNHWC = nchw ? epsilon.permute(0, 2, 3, 1) : epsilon; INDArray outEpsilonNHWC = nchw ? outEpsilon.permute(0, 2, 3, 1) : outEpsilon; CustomOp op = DynamicCustomOp.builder("batch_to_space_nd") .addInputs(epsilonNHWC, getBlocksArray(), getPaddingArray()) .addOutputs(outEpsilonNHWC) .callInplace(false) .build(); Nd4j.exec(op); outEpsilon = backpropDropOutIfPresent(outEpsilon); return new Pair<>(gradient, outEpsilon); }
Example #4
Source File: MKLDNNLocalResponseNormalizationHelper.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) { INDArray gradAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); if(context == null){ context = Nd4j.getExecutioner().buildContext(); context.setTArguments(k, alpha, beta); context.setIArguments((int)n); } else context.purge(); LocalResponseNormalization op = new LocalResponseNormalization(); context.setInputArray(0, input); context.setInputArray(0, epsilon); context.setOutputArray(0, gradAtInput); Nd4j.exec(op, context); Gradient g = new DefaultGradient(); return new Pair<>(g, gradAtInput); }
Example #5
Source File: ZeroPaddingLayer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); val inShape = input.shape(); boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW; int hIdx = nchw ? 2 : 1; int wIdx = nchw ? 3 : 2; INDArray epsNext; int[] padding = layerConf().getPadding(); if(layerConf().getDataFormat() == CNN2DFormat.NCHW){ epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]), NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx])); } else { //NHWC epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]), NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]), NDArrayIndex.all()); } epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #6
Source File: PReLU.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray layerInput = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering()); INDArray alpha = getParam(PReLUParamInitializer.WEIGHT_KEY); IActivation prelu = new ActivationPReLU(alpha, axes); Pair<INDArray, INDArray> deltas = prelu.backprop(layerInput, epsilon); INDArray delta = deltas.getFirst(); INDArray weightGrad = deltas.getSecond(); INDArray weightGradView = gradientViews.get(PReLUParamInitializer.WEIGHT_KEY); weightGradView.assign(weightGrad); delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); //Usually a no-op (except for perhaps identity) delta = backpropDropOutIfPresent(delta); Gradient ret = new DefaultGradient(); ret.setGradientFor(PReLUParamInitializer.WEIGHT_KEY, weightGradView, 'c'); return new Pair<>(ret, delta); }
Example #7
Source File: RepeatVector.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if(epsilon.dataType() != dataType){ epsilon = epsilon.castTo(dataType); } INDArray outEpsilon; try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)){ if (layerConf().getDataFormat() == RNNFormat.NCW) { outEpsilon = epsilon.sum(2); }else{ outEpsilon = epsilon.sum(1); } } Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, outEpsilon); }
Example #8
Source File: BasePretrainNetwork.java From deeplearning4j with Apache License 2.0 | 6 votes |
protected Gradient createGradient(INDArray wGradient, INDArray vBiasGradient, INDArray hBiasGradient) { Gradient ret = new DefaultGradient(gradientsFlattened); // The order of the following statements matter! The gradient is being flattened and applied to // flattened params in this order. // The arrays neeed to be views, with the current Updater implementation //TODO: optimize this, to do it would the assigns INDArray wg = gradientViews.get(PretrainParamInitializer.WEIGHT_KEY); wg.assign(wGradient); INDArray hbg = gradientViews.get(PretrainParamInitializer.BIAS_KEY); hbg.assign(hBiasGradient); INDArray vbg = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY); vbg.assign(vBiasGradient); ret.gradientForVariable().put(PretrainParamInitializer.WEIGHT_KEY, wg); ret.gradientForVariable().put(PretrainParamInitializer.BIAS_KEY, hbg); ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg); return ret; }
Example #9
Source File: ZeroPadding3DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); val inShape = input.shape(); INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[2]), NDArrayIndex.interval(padding[2], padding[2] + inShape[3]), NDArrayIndex.interval(padding[4], padding[4] + inShape[4])); epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #10
Source File: SpaceToDepth.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray input = this.input.castTo(epsilon.dataType()); boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW; long miniBatch = input.size(0); long inDepth = input.size(nchw ? 1 : 3); long inH = input.size(nchw ? 2 : 1); long inW = input.size(nchw ? 3 : 2); long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth}; INDArray outEpsilon = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), epsShape, 'c'); Gradient gradient = new DefaultGradient(); int blockSize = getBlockSize(); //Workaround for issue: https://github.com/eclipse/deeplearning4j/issues/8859 if(!Shape.hasDefaultStridesForShape(epsilon)) epsilon = epsilon.dup('c'); CustomOp op = DynamicCustomOp.builder("depth_to_space") .addInputs(epsilon) .addIntegerArguments(blockSize, nchw ? 0 : 1) //nchw = 0, nhwc = 1 .addOutputs(outEpsilon) .build(); Nd4j.getExecutioner().exec(op); return new Pair<>(gradient, outEpsilon); }
Example #11
Source File: Cropping1DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { val inShape = input.shape(); INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, dataType, inShape, 'c'); INDArray epsNextSubset = epsNext.get(all(), all(), interval(cropping[0], epsNext.size(2)-cropping[1])); epsNextSubset.assign(epsilon); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #12
Source File: ZeroPadding1DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); val inShape = input.shape(); INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.interval(padding[0], padding[0] + inShape[2])); return new Pair<>((Gradient) new DefaultGradient(), workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext)); }
Example #13
Source File: CnnLossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if (input.rank() != 4) throw new UnsupportedOperationException( "Input is not rank 4. Got input with rank " + input.rank() + " " + layerId() + " with shape " + Arrays.toString(input.shape()) + " - expected shape " + layerConf().getFormat().dimensionNames()); if (labels == null) throw new IllegalStateException("Labels are not set (null)"); Preconditions.checkState(input.equalShapes(labels), "Input and label arrays do not have same shape: %ndShape vs. %ndShape",input, labels); CNN2DFormat format = layerConf().getFormat(); INDArray input2d = ConvolutionUtils.reshape4dTo2d(input, format, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray labels2d = ConvolutionUtils.reshape4dTo2d(labels, format, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray maskReshaped = ConvolutionUtils.reshapeMaskIfRequired(maskArray, input, format, workspaceMgr, ArrayType.FF_WORKING_MEM); // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped); delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d); INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, input.shape(), format, workspaceMgr, ArrayType.ACTIVATION_GRAD); // grab the empty gradient Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, delta4d); }
Example #14
Source File: Cropping3DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { val inShape = input.shape(); INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c'); INDArray epsNextSubset = inputSubset(epsNext); epsNextSubset.assign(epsilon); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #15
Source File: ElementWiseMultiplicationLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent) INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params if (maskArray != null) { applyMask(delta); } INDArray input = this.input.castTo(dataType); Gradient ret = new DefaultGradient(); INDArray weightGrad = gradientViews.get(ElementWiseParamInitializer.WEIGHT_KEY); weightGrad.subi(weightGrad); weightGrad.addi(input.mul(delta).sum(0)); INDArray biasGrad = gradientViews.get(ElementWiseParamInitializer.BIAS_KEY); delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first ret.gradientForVariable().put(ElementWiseParamInitializer.WEIGHT_KEY, weightGrad); ret.gradientForVariable().put(ElementWiseParamInitializer.BIAS_KEY, biasGrad); // epsilonNext is a 2d matrix INDArray epsilonNext = delta.mulRowVector(params.get(ElementWiseParamInitializer.WEIGHT_KEY)); epsilonNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsilonNext); epsilonNext = backpropDropOutIfPresent(epsilonNext); return new Pair<>(ret, epsilonNext); }
Example #16
Source File: DropoutLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { INDArray delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon); if (maskArray != null) { delta.muliColumnVector(maskArray); } Gradient ret = new DefaultGradient(); delta = backpropDropOutIfPresent(delta); return new Pair<>(ret, delta); }
Example #17
Source File: Upsampling2D.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); CNN2DFormat format = getFormat(); boolean nchw = format == CNN2DFormat.NCHW; long miniBatch = (int) input.size(0); long inDepth = (int) input.size(nchw ? 1 : 3); long inH = (int) input.size(nchw ? 2 : 1); long inW = (int) input.size(nchw ? 3 : 2); long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth}; INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c'); Gradient gradient = new DefaultGradient(); CustomOp op = DynamicCustomOp.builder("upsampling_bp") .addIntegerArguments(nchw ? 1 : 0) //1=NCHW, 0=NHWC .addInputs(input, epsilon) .addOutputs(epsOut) .callInplace(false) .build(); Nd4j.getExecutioner().exec(op); epsOut = backpropDropOutIfPresent(epsOut); return new Pair<>(gradient, epsOut); }
Example #18
Source File: Upsampling1D.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); int[] size = ((BaseUpsamplingLayer) layerConf()).getSize(); epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1); // we replicate the error term times "size" so that backprop works properly on it epsilon = epsilon.repeat(3, size[0]); INDArray originalInput = input; input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1); long miniBatch = input.size(0); long inDepth = input.size(1); long inH = input.size(2); long inW = input.size(3); INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW); INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW); int[] intArgs = new int[] {1}; // 1 is for NCHW CustomOp op = DynamicCustomOp.builder("upsampling_bp") .addIntegerArguments(intArgs) .addInputs(input, epsilon) .addOutputs(reshapedEpsilon) .callInplace(false) .build(); Nd4j.getExecutioner().exec(op); Gradient gradient = new DefaultGradient(); reshapedEpsilon = reshapedEpsilon.slice(0, 3); input = originalInput; // Since we aggregate the gradient across "size" slices, we need to normalize afterwards. return new Pair<>(gradient, reshapedEpsilon.divi(size[0])); }
Example #19
Source File: Cnn3DLossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if (input.rank() != 5) throw new UnsupportedOperationException( "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape " + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]"); if (labels == null) throw new IllegalStateException("Labels are not set (null)"); INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM); // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped); delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d); long n = input.size(0); long d, h, w, c; if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){ d = input.size(1); h = input.size(2); w = input.size(3); c = input.size(4); } else { d = input.size(2); h = input.size(3); w = input.size(4); c = input.size(1); } INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD); // grab the empty gradient Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, delta5d); }
Example #20
Source File: Cropping2DLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { val inShape = input.shape(); INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c'); INDArray epsNextSubset = inputSubset(epsNext); epsNextSubset.assign(epsilon); return new Pair<>((Gradient) new DefaultGradient(), epsNext); }
Example #21
Source File: EmbeddingLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent) INDArray z = preOutput(true, workspaceMgr); INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params if (maskArray != null) { delta.muliColumnVector(maskArray.castTo(dataType)); } INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); weightGradients.assign(0); long[] indexes = new long[(int) input.length()]; for (int i = 0; i < indexes.length; i++) { indexes[i] = input.getInt(i, 0); } INDArray indices = Nd4j.createFromArray(indexes); Nd4j.scatterUpdate(org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate.UpdateOp.ADD, weightGradients, indices, delta, DIM_1); Gradient ret = new DefaultGradient(); ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradients); if(hasBias()) { INDArray biasGradientsView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradientsView, 0); //biasGradientView is initialized/zeroed first in sum op ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradientsView); } return new Pair<>(ret, null); //Don't bother returning epsilons: no layer below this one... }
Example #22
Source File: LossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray); // grab the empty gradient Gradient gradient = new DefaultGradient(); delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example #23
Source File: LinearModel.java From FederatedAndroidTrainer with MIT License | 5 votes |
@Override public void updateWeights(INDArray remoteGradient) { Log.d(TAG, "Remote Gradient " + remoteGradient); Gradient gradient = new DefaultGradient(remoteGradient); Log.d(TAG, "Updating weights from server with gradient " + gradient.gradient().toString()); // TODO Transform the remoteGradient flattened array into the map required by the network? Map<String, INDArray> netGradients = mNetwork.gradient().gradientForVariable(); for (Map.Entry<String, INDArray> entry : netGradients.entrySet()) { Log.d(TAG, entry.getKey()); for (int i : entry.getValue().shape()) { Log.d(TAG, "Shape " + i); } for (int i = 0; i < entry.getValue().shape().length; i++) { Log.d(TAG, "Size (" + i + ")" + entry.getValue().size(i)); } } Log.d(TAG, "Updating weights with INDArray object"); INDArray params = mNetwork.params(true); params.addi(remoteGradient); /* 0_W Shape 2 Shape 10 0_b Shape 1 Shape 10 1_W Shape 10 Shape 1 1_b Shape 1 Shape 1 Weights updated */ mNetwork.update(gradient); Log.d(TAG, "Weights updated"); }
Example #24
Source File: ActivationLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray temp = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering()); INDArray delta = layerConf().getActivationFn().backprop(temp, epsilon).getFirst(); //TODO handle activation function params if(delta == epsilon ){ //Edge case: identity activation + external errors -> no-op delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, delta); } delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); //Usually a no-op (except for perhaps identity) Gradient ret = new DefaultGradient(); return new Pair<>(ret, delta); }
Example #25
Source File: SubsamplingLayerTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
private Gradient createPrevGradient() { Gradient gradient = new DefaultGradient(); INDArray pseudoGradients = Nd4j.ones(nExamples, nChannelsIn, inputHeight, inputWidth); gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, pseudoGradients); gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, pseudoGradients); return gradient; }
Example #26
Source File: BarnesHutTsne.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Gradient gradient() { /*MemoryWorkspace workspace = workspaceMode == WorkspaceMode.NONE ? new DummyWorkspace() : Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread( workspaceConfigurationExternal, workspaceExternal); try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ { if (yIncs == null) yIncs = Y.like(); if (gains == null) gains = Y.ulike().assign(1.0D); AtomicDouble sumQ = new AtomicDouble(0); /* Calculate gradient based on barnes hut approximation with positive and negative forces */ INDArray posF = Y.like(); INDArray negF = Y.like(); tree = new SpTree(Y); tree.computeEdgeForces(rows, cols, vals, N, posF); for (int n = 0; n < N; n++) { INDArray temp = negF.slice(n); tree.computeNonEdgeForces(n, theta, temp, sumQ); } INDArray dC = posF.subi(negF.divi(sumQ)); Gradient ret = new DefaultGradient(); ret.gradientForVariable().put(Y_GRAD, dC); return ret; } }
Example #27
Source File: BasePretrainNetwork.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { Pair<Gradient, INDArray> result = super.backpropGradient(epsilon, workspaceMgr); ((DefaultGradient) result.getFirst()).setFlattenedGradient(gradientsFlattened); //During backprop, visible bias gradients are set to 0 - this is necessary due to the gradient view mechanics // that DL4J uses INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY); result.getFirst().gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient); vBiasGradient.assign(0); weightNoiseParams.clear(); return result; }
Example #28
Source File: FrozenLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
public FrozenLayer(Layer insideLayer) { super(insideLayer); if (insideLayer instanceof OutputLayer) { throw new IllegalArgumentException("Output Layers are not allowed to be frozen " + layerId()); } this.zeroGradient = new DefaultGradient(insideLayer.params()); if (insideLayer.paramTable() != null) { for (String paramType : insideLayer.paramTable().keySet()) { //save memory?? zeroGradient.setGradientFor(paramType, null); } } }
Example #29
Source File: TestComputationGraphNetwork.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testGradientUpdate() { DataSetIterator iter = new IrisDataSetIterator(1, 1); Gradient expectedGradient = new DefaultGradient(); expectedGradient.setGradientFor("first_W", Nd4j.ones(4, 5)); expectedGradient.setGradientFor("first_b", Nd4j.ones(1, 5)); expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3)); expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3)); ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder() .addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input") .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first") .setOutputs("output").build(); ComputationGraph net = new ComputationGraph(conf); net.init(); net.fit(iter.next()); Gradient actualGradient = net.gradient; assertNotEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W")); net.update(expectedGradient); actualGradient = net.gradient; assertEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W")); // Update params with set net.setParam("first_W", Nd4j.ones(4, 5)); net.setParam("first_b", Nd4j.ones(1, 5)); net.setParam("output_W", Nd4j.ones(5, 3)); net.setParam("output_b", Nd4j.ones(1, 3)); INDArray actualParams = net.params(); // Confirm params assertEquals(Nd4j.ones(1, 43), actualParams); net.update(expectedGradient); actualParams = net.params(); assertEquals(Nd4j.ones(1, 43).addi(1), actualParams); }
Example #30
Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) { // Gradients: d(x^2)/dx = 2x INDArray gradient = parameters.mul(2); Gradient g = new DefaultGradient(); g.gradientForVariable().put("W", this.gradientView); this.gradient = g; this.score = Nd4j.getBlasWrapper().dot(parameters, parameters); //sum_i x_i^2 this.gradientView.assign(gradient); }