org.deeplearning4j.nn.gradient.DefaultGradient Java Exaples

Source File: BaseOutputLayer.java From deeplearning4j with Apache License 2.0

6 votes

/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input.castTo(weightGradView.dataType()), delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));         //TODO can we avoid cast?
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if(hasBias()){
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}

Source File: DeepFMOutputLayer.java From jstarcraft-rns with Apache License 2.0

6 votes

private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    // INDArray delta = lossFunction.computeGradient(labels2d, preOut,
    // layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta));
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if (hasBias()) {
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}

Source File: SpaceToBatch.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    INDArray input = this.input.castTo(dataType);   //Cast to network dtype if required (no-op if already correct type)

    boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;

    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c');

    Gradient gradient = new DefaultGradient();

    INDArray epsilonNHWC = nchw ? epsilon.permute(0, 2, 3, 1) : epsilon;
    INDArray outEpsilonNHWC = nchw ? outEpsilon.permute(0, 2, 3, 1) : outEpsilon;

    CustomOp op = DynamicCustomOp.builder("batch_to_space_nd")
            .addInputs(epsilonNHWC, getBlocksArray(), getPaddingArray())
            .addOutputs(outEpsilonNHWC)
            .callInplace(false)
            .build();
    Nd4j.exec(op);

    outEpsilon = backpropDropOutIfPresent(outEpsilon);
    return new Pair<>(gradient, outEpsilon);
}

Source File: MKLDNNLocalResponseNormalizationHelper.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) {
    INDArray gradAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());

    if(context == null){
        context = Nd4j.getExecutioner().buildContext();
        context.setTArguments(k, alpha, beta);
        context.setIArguments((int)n);
    } else
        context.purge();

    LocalResponseNormalization op = new LocalResponseNormalization();

    context.setInputArray(0, input);
    context.setInputArray(0, epsilon);
    context.setOutputArray(0, gradAtInput);

    Nd4j.exec(op, context);
    Gradient g = new DefaultGradient();
    return new Pair<>(g, gradAtInput);
}

Source File: ZeroPaddingLayer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    int hIdx = nchw ? 2 : 1;
    int wIdx = nchw ? 3 : 2;

    INDArray epsNext;
    int[] padding = layerConf().getPadding();
    if(layerConf().getDataFormat() == CNN2DFormat.NCHW){
        epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]));
    } else {
        //NHWC
        epsNext = epsilon.get(NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]),
                NDArrayIndex.all());
    }

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}

Source File: PReLU.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    INDArray layerInput = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering());

    INDArray alpha = getParam(PReLUParamInitializer.WEIGHT_KEY);
    IActivation prelu = new ActivationPReLU(alpha, axes);

    Pair<INDArray, INDArray> deltas = prelu.backprop(layerInput, epsilon);
    INDArray delta = deltas.getFirst();
    INDArray weightGrad = deltas.getSecond();
    INDArray weightGradView = gradientViews.get(PReLUParamInitializer.WEIGHT_KEY);
    weightGradView.assign(weightGrad);


    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);  //Usually a no-op (except for perhaps identity)
    delta = backpropDropOutIfPresent(delta);
    Gradient ret = new DefaultGradient();
    ret.setGradientFor(PReLUParamInitializer.WEIGHT_KEY, weightGradView, 'c');

    return new Pair<>(ret, delta);
}

Source File: RepeatVector.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    if(epsilon.dataType() != dataType){
        epsilon = epsilon.castTo(dataType);
    }

    INDArray outEpsilon;
    try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)){
        if (layerConf().getDataFormat() == RNNFormat.NCW) {
            outEpsilon = epsilon.sum(2);
        }else{
            outEpsilon = epsilon.sum(1);
        }
    }

    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, outEpsilon);
}

Source File: BasePretrainNetwork.java From deeplearning4j with Apache License 2.0

6 votes

protected Gradient createGradient(INDArray wGradient, INDArray vBiasGradient, INDArray hBiasGradient) {
    Gradient ret = new DefaultGradient(gradientsFlattened);
    // The order of the following statements matter! The gradient is being flattened and applied to
    // flattened params in this order.
    // The arrays neeed to be views, with the current Updater implementation

    //TODO: optimize this, to do it would the assigns
    INDArray wg = gradientViews.get(PretrainParamInitializer.WEIGHT_KEY);
    wg.assign(wGradient);

    INDArray hbg = gradientViews.get(PretrainParamInitializer.BIAS_KEY);
    hbg.assign(hBiasGradient);

    INDArray vbg = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    vbg.assign(vBiasGradient);

    ret.gradientForVariable().put(PretrainParamInitializer.WEIGHT_KEY, wg);
    ret.gradientForVariable().put(PretrainParamInitializer.BIAS_KEY, hbg);
    ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg);

    return ret;
}

Source File: ZeroPadding3DLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]),
            NDArrayIndex.interval(padding[2], padding[2] + inShape[3]),
            NDArrayIndex.interval(padding[4], padding[4] + inShape[4]));

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}

Source File: SpaceToDepth.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    INDArray input = this.input.castTo(epsilon.dataType());

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    long miniBatch = input.size(0);
    long inDepth = input.size(nchw ? 1 : 3);
    long inH = input.size(nchw ? 2 : 1);
    long inW = input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ?  new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray outEpsilon = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    int blockSize = getBlockSize();

    //Workaround for issue: https://github.com/eclipse/deeplearning4j/issues/8859
    if(!Shape.hasDefaultStridesForShape(epsilon))
        epsilon = epsilon.dup('c');

    CustomOp op = DynamicCustomOp.builder("depth_to_space")
            .addInputs(epsilon)
            .addIntegerArguments(blockSize, nchw ? 0 : 1)       //nchw = 0, nhwc = 1
            .addOutputs(outEpsilon)
            .build();
    Nd4j.getExecutioner().exec(op);

    return new Pair<>(gradient, outEpsilon);
}

Source File: Cropping1DLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, dataType, inShape, 'c');
    INDArray epsNextSubset = epsNext.get(all(), all(), interval(cropping[0], epsNext.size(2)-cropping[1]));
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}

Source File: ZeroPadding1DLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]));

    return new Pair<>((Gradient) new DefaultGradient(), workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext));
}

Source File: CnnLossLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 4)
        throw new UnsupportedOperationException(
                "Input is not rank 4. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape " + layerConf().getFormat().dimensionNames());
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    Preconditions.checkState(input.equalShapes(labels), "Input and label arrays do not have same shape: %ndShape vs. %ndShape",input, labels);

    CNN2DFormat format = layerConf().getFormat();
    INDArray input2d = ConvolutionUtils.reshape4dTo2d(input, format, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape4dTo2d(labels, format, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeMaskIfRequired(maskArray, input, format, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, input.shape(), format, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta4d);
}

Source File: Cropping3DLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}

Source File: ElementWiseMultiplicationLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
        //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent)
        INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag
        INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

        if (maskArray != null) {
            applyMask(delta);
        }

        INDArray input = this.input.castTo(dataType);

        Gradient ret = new DefaultGradient();

        INDArray weightGrad =  gradientViews.get(ElementWiseParamInitializer.WEIGHT_KEY);
        weightGrad.subi(weightGrad);

        weightGrad.addi(input.mul(delta).sum(0));

        INDArray biasGrad = gradientViews.get(ElementWiseParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first

        ret.gradientForVariable().put(ElementWiseParamInitializer.WEIGHT_KEY, weightGrad);
        ret.gradientForVariable().put(ElementWiseParamInitializer.BIAS_KEY, biasGrad);

//      epsilonNext is a 2d matrix
        INDArray epsilonNext = delta.mulRowVector(params.get(ElementWiseParamInitializer.WEIGHT_KEY));
        epsilonNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsilonNext);

        epsilonNext = backpropDropOutIfPresent(epsilonNext);
        return new Pair<>(ret, epsilonNext);
    }

Source File: DropoutLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon);

    if (maskArray != null) {
        delta.muliColumnVector(maskArray);
    }

    Gradient ret = new DefaultGradient();
    delta = backpropDropOutIfPresent(delta);
    return new Pair<>(ret, delta);
}

Source File: Upsampling2D.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    CNN2DFormat format = getFormat();
    boolean nchw = format == CNN2DFormat.NCHW;

    long miniBatch = (int) input.size(0);
    long inDepth = (int) input.size(nchw ? 1 : 3);
    long inH = (int) input.size(nchw ? 2 : 1);
    long inW = (int) input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray epsOut =  workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(nchw ? 1 : 0)      //1=NCHW, 0=NHWC
            .addInputs(input, epsilon)
            .addOutputs(epsOut)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    epsOut = backpropDropOutIfPresent(epsOut);

    return new Pair<>(gradient, epsOut);
}

Source File: Upsampling1D.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    int[] size = ((BaseUpsamplingLayer) layerConf()).getSize();
    epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
    // we replicate the error term times "size" so that backprop works properly on it
    epsilon = epsilon.repeat(3, size[0]);

    INDArray originalInput = input;
    input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);

    long miniBatch = input.size(0);
    long inDepth = input.size(1);
    long inH = input.size(2);
    long inW = input.size(3);


    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW);
    INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW);

    int[] intArgs = new int[] {1}; // 1 is for NCHW

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(intArgs)
            .addInputs(input, epsilon)
            .addOutputs(reshapedEpsilon)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    Gradient gradient = new DefaultGradient();

    reshapedEpsilon = reshapedEpsilon.slice(0, 3);
    input = originalInput;

    // Since we aggregate the gradient across "size" slices, we need to normalize afterwards.
    return new Pair<>(gradient, reshapedEpsilon.divi(size[0]));
}

Source File: Cnn3DLossLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 5)
        throw new UnsupportedOperationException(
                "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]");
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    long n = input.size(0);
    long d, h, w, c;
    if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
        d = input.size(1);
        h = input.size(2);
        w = input.size(3);
        c = input.size(4);
    } else {
        d = input.size(2);
        h = input.size(3);
        w = input.size(4);
        c = input.size(1);
    }
    INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta5d);
}

Source File: Cropping2DLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}

Source File: EmbeddingLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
    INDArray z = preOutput(true, workspaceMgr);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

    if (maskArray != null) {
        delta.muliColumnVector(maskArray.castTo(dataType));
    }

    INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    weightGradients.assign(0);

    long[] indexes = new long[(int) input.length()];
    for (int i = 0; i < indexes.length; i++) {
        indexes[i] = input.getInt(i, 0);
    }

    INDArray indices = Nd4j.createFromArray(indexes);
    Nd4j.scatterUpdate(org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate.UpdateOp.ADD, weightGradients, indices, delta, DIM_1);


    Gradient ret = new DefaultGradient();
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradients);

    if(hasBias()) {
        INDArray biasGradientsView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradientsView, 0); //biasGradientView is initialized/zeroed first in sum op
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradientsView);
    }

    return new Pair<>(ret, null); //Don't bother returning epsilons: no layer below this one...
}

Source File: LossLayer.java From deeplearning4j with Apache License 2.0

5 votes

/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}

Source File: LinearModel.java From FederatedAndroidTrainer with MIT License

5 votes

@Override
   public void updateWeights(INDArray remoteGradient) {
       Log.d(TAG, "Remote Gradient " + remoteGradient);
       Gradient gradient = new DefaultGradient(remoteGradient);
       Log.d(TAG, "Updating weights from server with gradient " + gradient.gradient().toString());
       // TODO Transform the remoteGradient flattened array into the map required by the network?
       Map<String, INDArray> netGradients = mNetwork.gradient().gradientForVariable();
       for (Map.Entry<String, INDArray> entry : netGradients.entrySet()) {
           Log.d(TAG, entry.getKey());
           for (int i : entry.getValue().shape()) {
               Log.d(TAG, "Shape " + i);
           }
           for (int i = 0; i < entry.getValue().shape().length; i++) {
               Log.d(TAG, "Size (" + i + ")" + entry.getValue().size(i));
           }
       }
       Log.d(TAG, "Updating weights with INDArray object");
       INDArray params = mNetwork.params(true);
       params.addi(remoteGradient);

       /*
0_W
Shape 2
Shape 10
0_b
Shape 1
Shape 10
1_W
Shape 10
Shape 1
1_b
Shape 1
Shape 1
Weights updated
        */

       mNetwork.update(gradient);
       Log.d(TAG, "Weights updated");
   }

Source File: ActivationLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    INDArray temp = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering());
    INDArray delta = layerConf().getActivationFn().backprop(temp, epsilon).getFirst(); //TODO handle activation function params
    if(delta == epsilon ){
        //Edge case: identity activation + external errors -> no-op
        delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, delta);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);  //Usually a no-op (except for perhaps identity)
    Gradient ret = new DefaultGradient();
    return new Pair<>(ret, delta);
}

Source File: SubsamplingLayerTest.java From deeplearning4j with Apache License 2.0

5 votes

private Gradient createPrevGradient() {
    Gradient gradient = new DefaultGradient();
    INDArray pseudoGradients = Nd4j.ones(nExamples, nChannelsIn, inputHeight, inputWidth);

    gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, pseudoGradients);
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, pseudoGradients);
    return gradient;
}

Source File: BarnesHutTsne.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Gradient gradient() {
    /*MemoryWorkspace workspace =
            workspaceMode == WorkspaceMode.NONE ? new DummyWorkspace()
                    : Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(
                    workspaceConfigurationExternal,
                    workspaceExternal);


    try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ {


        if (yIncs == null)
            yIncs = Y.like();
        if (gains == null)
            gains = Y.ulike().assign(1.0D);

        AtomicDouble sumQ = new AtomicDouble(0);
        /* Calculate gradient based on barnes hut approximation with positive and negative forces */
        INDArray posF = Y.like();
        INDArray negF = Y.like();

        tree = new SpTree(Y);

        tree.computeEdgeForces(rows, cols, vals, N, posF);
        for (int n = 0; n < N; n++) {
            INDArray temp = negF.slice(n);
            tree.computeNonEdgeForces(n, theta, temp, sumQ);
        }
        INDArray dC = posF.subi(negF.divi(sumQ));

        Gradient ret = new DefaultGradient();
        ret.gradientForVariable().put(Y_GRAD, dC);
        return ret;
    }
}

Source File: BasePretrainNetwork.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    Pair<Gradient, INDArray> result = super.backpropGradient(epsilon, workspaceMgr);
    ((DefaultGradient) result.getFirst()).setFlattenedGradient(gradientsFlattened);

    //During backprop, visible bias gradients are set to 0 - this is necessary due to the gradient view mechanics
    // that DL4J uses
    INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    result.getFirst().gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient);
    vBiasGradient.assign(0);

    weightNoiseParams.clear();

    return result;
}

Source File: FrozenLayer.java From deeplearning4j with Apache License 2.0

5 votes

public FrozenLayer(Layer insideLayer) {
    super(insideLayer);
    if (insideLayer instanceof OutputLayer) {
        throw new IllegalArgumentException("Output Layers are not allowed to be frozen " + layerId());
    }
    this.zeroGradient = new DefaultGradient(insideLayer.params());
    if (insideLayer.paramTable() != null) {
        for (String paramType : insideLayer.paramTable().keySet()) {
            //save memory??
            zeroGradient.setGradientFor(paramType, null);
        }
    }
}

Source File: TestComputationGraphNetwork.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testGradientUpdate() {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);

    Gradient expectedGradient = new DefaultGradient();
    expectedGradient.setGradientFor("first_W", Nd4j.ones(4, 5));
    expectedGradient.setGradientFor("first_b", Nd4j.ones(1, 5));
    expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3));
    expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3));

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder()
            .addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input")
            .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first")
            .setOutputs("output").build();

    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    net.fit(iter.next());
    Gradient actualGradient = net.gradient;
    assertNotEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));

    net.update(expectedGradient);
    actualGradient = net.gradient;
    assertEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));

    // Update params with set
    net.setParam("first_W", Nd4j.ones(4, 5));
    net.setParam("first_b", Nd4j.ones(1, 5));
    net.setParam("output_W", Nd4j.ones(5, 3));
    net.setParam("output_b", Nd4j.ones(1, 3));
    INDArray actualParams = net.params();

    // Confirm params
    assertEquals(Nd4j.ones(1, 43), actualParams);

    net.update(expectedGradient);
    actualParams = net.params();
    assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}

Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) {
    // Gradients: d(x^2)/dx = 2x
    INDArray gradient = parameters.mul(2);
    Gradient g = new DefaultGradient();
    g.gradientForVariable().put("W", this.gradientView);
    this.gradient = g;
    this.score = Nd4j.getBlasWrapper().dot(parameters, parameters); //sum_i x_i^2
    this.gradientView.assign(gradient);
}

org.deeplearning4j.nn.gradient.DefaultGradient Java Examples