Java Code Examples for org.nd4j.linalg.lossfunctions.ILossFunction#computeGradient()
The following examples show how to use
org.nd4j.linalg.lossfunctions.ILossFunction#computeGradient() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DeepFMOutputLayer.java From jstarcraft-rns with Apache License 2.0 | 6 votes |
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); // INDArray delta = lossFunction.computeGradient(labels2d, preOut, // layerConf().getActivationFunction(), maskArray); INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView); if (hasBias()) { INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView); } delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example 2
Source File: BaseOutputLayer.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray); INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); Nd4j.gemm(input.castTo(weightGradView.dataType()), delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); //TODO can we avoid cast? gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView); if(hasBias()){ INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView); } delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example 3
Source File: LossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray); // grab the empty gradient Gradient gradient = new DefaultGradient(); delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta); return new Pair<>(gradient, delta); }
Example 4
Source File: CnnLossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if (input.rank() != 4) throw new UnsupportedOperationException( "Input is not rank 4. Got input with rank " + input.rank() + " " + layerId() + " with shape " + Arrays.toString(input.shape()) + " - expected shape " + layerConf().getFormat().dimensionNames()); if (labels == null) throw new IllegalStateException("Labels are not set (null)"); Preconditions.checkState(input.equalShapes(labels), "Input and label arrays do not have same shape: %ndShape vs. %ndShape",input, labels); CNN2DFormat format = layerConf().getFormat(); INDArray input2d = ConvolutionUtils.reshape4dTo2d(input, format, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray labels2d = ConvolutionUtils.reshape4dTo2d(labels, format, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray maskReshaped = ConvolutionUtils.reshapeMaskIfRequired(maskArray, input, format, workspaceMgr, ArrayType.FF_WORKING_MEM); // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped); delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d); INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, input.shape(), format, workspaceMgr, ArrayType.ACTIVATION_GRAD); // grab the empty gradient Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, delta4d); }
Example 5
Source File: Cnn3DLossLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if (input.rank() != 5) throw new UnsupportedOperationException( "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape " + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]"); if (labels == null) throw new IllegalStateException("Labels are not set (null)"); INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM); INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM); // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped); delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d); long n = input.size(0); long d, h, w, c; if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){ d = input.size(1); h = input.size(2); w = input.size(3); c = input.size(4); } else { d = input.size(2); h = input.size(3); w = input.size(4); c = input.size(1); } INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD); // grab the empty gradient Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, delta5d); }
Example 6
Source File: RnnLossLayer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); INDArray input = this.input; INDArray labels = this.labels; if (input.rank() != 3) throw new UnsupportedOperationException( "Input is not rank 3. Expected rank 3 input of shape [minibatch, size, sequenceLength]. Got input with rank " + input.rank() + " with shape " + Arrays.toString(input.shape()) + " for layer " + layerId()); if (labels == null) throw new IllegalStateException("Labels are not set (null)"); if (layerConf().getRnnDataFormat() == RNNFormat.NWC){ input = input.permute(0, 2, 1); labels = labels.permute(0, 2, 1); } Preconditions.checkState(labels.rank() == 3, "Expected rank 3 labels array, got label array with shape %ndShape", labels); Preconditions.checkState(input.size(2) == labels.size(2), "Sequence lengths do not match for RnnOutputLayer input and labels:" + "Arrays should be rank 3 with shape [minibatch, size, sequenceLength] - mismatch on dimension 2 (sequence length) - input=%ndShape vs. label=%ndShape", input, labels); INDArray input2d = TimeSeriesUtils.reshape3dTo2d(input, workspaceMgr, ArrayType.BP_WORKING_MEM); INDArray labels2d = TimeSeriesUtils.reshape3dTo2d(labels, workspaceMgr, ArrayType.BP_WORKING_MEM); INDArray maskReshaped; if(this.maskArray != null){ if(this.maskArray.rank() == 3){ maskReshaped = TimeSeriesUtils.reshapePerOutputTimeSeriesMaskTo2d(this.maskArray, workspaceMgr, ArrayType.BP_WORKING_MEM); } else { maskReshaped = TimeSeriesUtils.reshapeTimeSeriesMaskToVector(this.maskArray, workspaceMgr, ArrayType.BP_WORKING_MEM); } } else { maskReshaped = null; } // delta calculation ILossFunction lossFunction = layerConf().getLossFn(); INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped); INDArray delta3d = TimeSeriesUtils.reshape2dTo3d(delta2d, input.size(0), workspaceMgr, ArrayType.ACTIVATION_GRAD); if (layerConf().getRnnDataFormat() == RNNFormat.NWC){ delta3d = delta3d.permute(0, 2, 1); } // grab the empty gradient Gradient gradient = new DefaultGradient(); return new Pair<>(gradient, delta3d); }
Example 7
Source File: CenterLossOutputLayer.java From deeplearning4j with Apache License 2.0 | 4 votes |
/** Returns tuple: {Gradient,Delta,Output} given preOut */ private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) { ILossFunction lossFunction = layerConf().getLossFn(); INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM); if (labels2d.size(1) != preOut.size(1)) { throw new DL4JInvalidInputException( "Labels array numColumns (size(1) = " + labels2d.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOut.size(1) + ") " + layerId()); } INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray); Gradient gradient = new DefaultGradient(); INDArray weightGradView = gradientViews.get(CenterLossParamInitializer.WEIGHT_KEY); INDArray biasGradView = gradientViews.get(CenterLossParamInitializer.BIAS_KEY); INDArray centersGradView = gradientViews.get(CenterLossParamInitializer.CENTER_KEY); // centers delta double alpha = layerConf().getAlpha(); INDArray centers = params.get(CenterLossParamInitializer.CENTER_KEY); INDArray l = labels.castTo(centers.dataType()); //Ensure correct dtype (same as params); no-op if already correct dtype INDArray centersForExamples = l.mmul(centers); INDArray diff = centersForExamples.sub(input).muli(alpha); INDArray numerator = l.transpose().mmul(diff); INDArray denominator = l.sum(0).reshape(l.size(1), 1).addi(1.0); INDArray deltaC; if (layerConf().getGradientCheck()) { double lambda = layerConf().getLambda(); //For gradient checks: need to multiply dLc/dcj by lambda to get dL/dcj deltaC = numerator.muli(lambda); } else { deltaC = numerator.diviColumnVector(denominator); } centersGradView.assign(deltaC); // other standard calculations Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to: weightGradView.assign(input.transpose().mmul(delta)); delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op gradient.gradientForVariable().put(CenterLossParamInitializer.WEIGHT_KEY, weightGradView); gradient.gradientForVariable().put(CenterLossParamInitializer.BIAS_KEY, biasGradView); gradient.gradientForVariable().put(CenterLossParamInitializer.CENTER_KEY, centersGradView); return new Pair<>(gradient, delta); }