org.deeplearning4j.nn.gradient.DefaultGradient Java Examples

The following examples show how to use org.deeplearning4j.nn.gradient.DefaultGradient. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BaseOutputLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input.castTo(weightGradView.dataType()), delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));         //TODO can we avoid cast?
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if(hasBias()){
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example #2
Source File: DeepFMOutputLayer.java    From jstarcraft-rns with Apache License 2.0 6 votes vote down vote up
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    // INDArray delta = lossFunction.computeGradient(labels2d, preOut,
    // layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta));
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if (hasBias()) {
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example #3
Source File: SpaceToBatch.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    INDArray input = this.input.castTo(dataType);   //Cast to network dtype if required (no-op if already correct type)

    boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;

    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c');

    Gradient gradient = new DefaultGradient();

    INDArray epsilonNHWC = nchw ? epsilon.permute(0, 2, 3, 1) : epsilon;
    INDArray outEpsilonNHWC = nchw ? outEpsilon.permute(0, 2, 3, 1) : outEpsilon;

    CustomOp op = DynamicCustomOp.builder("batch_to_space_nd")
            .addInputs(epsilonNHWC, getBlocksArray(), getPaddingArray())
            .addOutputs(outEpsilonNHWC)
            .callInplace(false)
            .build();
    Nd4j.exec(op);

    outEpsilon = backpropDropOutIfPresent(outEpsilon);
    return new Pair<>(gradient, outEpsilon);
}
 
Example #4
Source File: MKLDNNLocalResponseNormalizationHelper.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) {
    INDArray gradAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());

    if(context == null){
        context = Nd4j.getExecutioner().buildContext();
        context.setTArguments(k, alpha, beta);
        context.setIArguments((int)n);
    } else
        context.purge();

    LocalResponseNormalization op = new LocalResponseNormalization();

    context.setInputArray(0, input);
    context.setInputArray(0, epsilon);
    context.setOutputArray(0, gradAtInput);

    Nd4j.exec(op, context);
    Gradient g = new DefaultGradient();
    return new Pair<>(g, gradAtInput);
}
 
Example #5
Source File: ZeroPaddingLayer.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    int hIdx = nchw ? 2 : 1;
    int wIdx = nchw ? 3 : 2;

    INDArray epsNext;
    int[] padding = layerConf().getPadding();
    if(layerConf().getDataFormat() == CNN2DFormat.NCHW){
        epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]));
    } else {
        //NHWC
        epsNext = epsilon.get(NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]),
                NDArrayIndex.all());
    }

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #6
Source File: PReLU.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    INDArray layerInput = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering());

    INDArray alpha = getParam(PReLUParamInitializer.WEIGHT_KEY);
    IActivation prelu = new ActivationPReLU(alpha, axes);

    Pair<INDArray, INDArray> deltas = prelu.backprop(layerInput, epsilon);
    INDArray delta = deltas.getFirst();
    INDArray weightGrad = deltas.getSecond();
    INDArray weightGradView = gradientViews.get(PReLUParamInitializer.WEIGHT_KEY);
    weightGradView.assign(weightGrad);


    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);  //Usually a no-op (except for perhaps identity)
    delta = backpropDropOutIfPresent(delta);
    Gradient ret = new DefaultGradient();
    ret.setGradientFor(PReLUParamInitializer.WEIGHT_KEY, weightGradView, 'c');

    return new Pair<>(ret, delta);
}
 
Example #7
Source File: RepeatVector.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    if(epsilon.dataType() != dataType){
        epsilon = epsilon.castTo(dataType);
    }

    INDArray outEpsilon;
    try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)){
        if (layerConf().getDataFormat() == RNNFormat.NCW) {
            outEpsilon = epsilon.sum(2);
        }else{
            outEpsilon = epsilon.sum(1);
        }
    }

    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, outEpsilon);
}
 
Example #8
Source File: BasePretrainNetwork.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
protected Gradient createGradient(INDArray wGradient, INDArray vBiasGradient, INDArray hBiasGradient) {
    Gradient ret = new DefaultGradient(gradientsFlattened);
    // The order of the following statements matter! The gradient is being flattened and applied to
    // flattened params in this order.
    // The arrays neeed to be views, with the current Updater implementation

    //TODO: optimize this, to do it would the assigns
    INDArray wg = gradientViews.get(PretrainParamInitializer.WEIGHT_KEY);
    wg.assign(wGradient);

    INDArray hbg = gradientViews.get(PretrainParamInitializer.BIAS_KEY);
    hbg.assign(hBiasGradient);

    INDArray vbg = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    vbg.assign(vBiasGradient);

    ret.gradientForVariable().put(PretrainParamInitializer.WEIGHT_KEY, wg);
    ret.gradientForVariable().put(PretrainParamInitializer.BIAS_KEY, hbg);
    ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg);

    return ret;
}
 
Example #9
Source File: ZeroPadding3DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]),
            NDArrayIndex.interval(padding[2], padding[2] + inShape[3]),
            NDArrayIndex.interval(padding[4], padding[4] + inShape[4]));

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #10
Source File: SpaceToDepth.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    INDArray input = this.input.castTo(epsilon.dataType());

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    long miniBatch = input.size(0);
    long inDepth = input.size(nchw ? 1 : 3);
    long inH = input.size(nchw ? 2 : 1);
    long inW = input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ?  new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray outEpsilon = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    int blockSize = getBlockSize();

    //Workaround for issue: https://github.com/eclipse/deeplearning4j/issues/8859
    if(!Shape.hasDefaultStridesForShape(epsilon))
        epsilon = epsilon.dup('c');

    CustomOp op = DynamicCustomOp.builder("depth_to_space")
            .addInputs(epsilon)
            .addIntegerArguments(blockSize, nchw ? 0 : 1)       //nchw = 0, nhwc = 1
            .addOutputs(outEpsilon)
            .build();
    Nd4j.getExecutioner().exec(op);

    return new Pair<>(gradient, outEpsilon);
}
 
Example #11
Source File: Cropping1DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, dataType, inShape, 'c');
    INDArray epsNextSubset = epsNext.get(all(), all(), interval(cropping[0], epsNext.size(2)-cropping[1]));
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #12
Source File: ZeroPadding1DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]));

    return new Pair<>((Gradient) new DefaultGradient(), workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext));
}
 
Example #13
Source File: CnnLossLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 4)
        throw new UnsupportedOperationException(
                "Input is not rank 4. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape " + layerConf().getFormat().dimensionNames());
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    Preconditions.checkState(input.equalShapes(labels), "Input and label arrays do not have same shape: %ndShape vs. %ndShape",input, labels);

    CNN2DFormat format = layerConf().getFormat();
    INDArray input2d = ConvolutionUtils.reshape4dTo2d(input, format, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape4dTo2d(labels, format, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeMaskIfRequired(maskArray, input, format, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, input.shape(), format, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta4d);
}
 
Example #14
Source File: Cropping3DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #15
Source File: ElementWiseMultiplicationLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
        //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent)
        INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag
        INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

        if (maskArray != null) {
            applyMask(delta);
        }

        INDArray input = this.input.castTo(dataType);

        Gradient ret = new DefaultGradient();

        INDArray weightGrad =  gradientViews.get(ElementWiseParamInitializer.WEIGHT_KEY);
        weightGrad.subi(weightGrad);

        weightGrad.addi(input.mul(delta).sum(0));

        INDArray biasGrad = gradientViews.get(ElementWiseParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first

        ret.gradientForVariable().put(ElementWiseParamInitializer.WEIGHT_KEY, weightGrad);
        ret.gradientForVariable().put(ElementWiseParamInitializer.BIAS_KEY, biasGrad);

//      epsilonNext is a 2d matrix
        INDArray epsilonNext = delta.mulRowVector(params.get(ElementWiseParamInitializer.WEIGHT_KEY));
        epsilonNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsilonNext);

        epsilonNext = backpropDropOutIfPresent(epsilonNext);
        return new Pair<>(ret, epsilonNext);
    }
 
Example #16
Source File: DropoutLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon);

    if (maskArray != null) {
        delta.muliColumnVector(maskArray);
    }

    Gradient ret = new DefaultGradient();
    delta = backpropDropOutIfPresent(delta);
    return new Pair<>(ret, delta);
}
 
Example #17
Source File: Upsampling2D.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    CNN2DFormat format = getFormat();
    boolean nchw = format == CNN2DFormat.NCHW;

    long miniBatch = (int) input.size(0);
    long inDepth = (int) input.size(nchw ? 1 : 3);
    long inH = (int) input.size(nchw ? 2 : 1);
    long inW = (int) input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray epsOut =  workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(nchw ? 1 : 0)      //1=NCHW, 0=NHWC
            .addInputs(input, epsilon)
            .addOutputs(epsOut)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    epsOut = backpropDropOutIfPresent(epsOut);

    return new Pair<>(gradient, epsOut);
}
 
Example #18
Source File: Upsampling1D.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    int[] size = ((BaseUpsamplingLayer) layerConf()).getSize();
    epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
    // we replicate the error term times "size" so that backprop works properly on it
    epsilon = epsilon.repeat(3, size[0]);

    INDArray originalInput = input;
    input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);

    long miniBatch = input.size(0);
    long inDepth = input.size(1);
    long inH = input.size(2);
    long inW = input.size(3);


    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW);
    INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW);

    int[] intArgs = new int[] {1}; // 1 is for NCHW

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(intArgs)
            .addInputs(input, epsilon)
            .addOutputs(reshapedEpsilon)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    Gradient gradient = new DefaultGradient();

    reshapedEpsilon = reshapedEpsilon.slice(0, 3);
    input = originalInput;

    // Since we aggregate the gradient across "size" slices, we need to normalize afterwards.
    return new Pair<>(gradient, reshapedEpsilon.divi(size[0]));
}
 
Example #19
Source File: Cnn3DLossLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (input.rank() != 5)
        throw new UnsupportedOperationException(
                "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]");
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    long n = input.size(0);
    long d, h, w, c;
    if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
        d = input.size(1);
        h = input.size(2);
        w = input.size(3);
        c = input.size(4);
    } else {
        d = input.size(2);
        h = input.size(3);
        w = input.size(4);
        c = input.size(1);
    }
    INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta5d);
}
 
Example #20
Source File: Cropping2DLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    epsNextSubset.assign(epsilon);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
}
 
Example #21
Source File: EmbeddingLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
    INDArray z = preOutput(true, workspaceMgr);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

    if (maskArray != null) {
        delta.muliColumnVector(maskArray.castTo(dataType));
    }

    INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    weightGradients.assign(0);

    long[] indexes = new long[(int) input.length()];
    for (int i = 0; i < indexes.length; i++) {
        indexes[i] = input.getInt(i, 0);
    }

    INDArray indices = Nd4j.createFromArray(indexes);
    Nd4j.scatterUpdate(org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate.UpdateOp.ADD, weightGradients, indices, delta, DIM_1);


    Gradient ret = new DefaultGradient();
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradients);

    if(hasBias()) {
        INDArray biasGradientsView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradientsView, 0); //biasGradientView is initialized/zeroed first in sum op
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradientsView);
    }

    return new Pair<>(ret, null); //Don't bother returning epsilons: no layer below this one...
}
 
Example #22
Source File: LossLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
}
 
Example #23
Source File: LinearModel.java    From FederatedAndroidTrainer with MIT License 5 votes vote down vote up
@Override
   public void updateWeights(INDArray remoteGradient) {
       Log.d(TAG, "Remote Gradient " + remoteGradient);
       Gradient gradient = new DefaultGradient(remoteGradient);
       Log.d(TAG, "Updating weights from server with gradient " + gradient.gradient().toString());
       // TODO Transform the remoteGradient flattened array into the map required by the network?
       Map<String, INDArray> netGradients = mNetwork.gradient().gradientForVariable();
       for (Map.Entry<String, INDArray> entry : netGradients.entrySet()) {
           Log.d(TAG, entry.getKey());
           for (int i : entry.getValue().shape()) {
               Log.d(TAG, "Shape " + i);
           }
           for (int i = 0; i < entry.getValue().shape().length; i++) {
               Log.d(TAG, "Size (" + i + ")" + entry.getValue().size(i));
           }
       }
       Log.d(TAG, "Updating weights with INDArray object");
       INDArray params = mNetwork.params(true);
       params.addi(remoteGradient);

       /*
0_W
Shape 2
Shape 10
0_b
Shape 1
Shape 10
1_W
Shape 10
Shape 1
1_b
Shape 1
Shape 1
Weights updated
        */

       mNetwork.update(gradient);
       Log.d(TAG, "Weights updated");
   }
 
Example #24
Source File: ActivationLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    INDArray temp = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering());
    INDArray delta = layerConf().getActivationFn().backprop(temp, epsilon).getFirst(); //TODO handle activation function params
    if(delta == epsilon ){
        //Edge case: identity activation + external errors -> no-op
        delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, delta);
    }

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);  //Usually a no-op (except for perhaps identity)
    Gradient ret = new DefaultGradient();
    return new Pair<>(ret, delta);
}
 
Example #25
Source File: SubsamplingLayerTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private Gradient createPrevGradient() {
    Gradient gradient = new DefaultGradient();
    INDArray pseudoGradients = Nd4j.ones(nExamples, nChannelsIn, inputHeight, inputWidth);

    gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, pseudoGradients);
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, pseudoGradients);
    return gradient;
}
 
Example #26
Source File: BarnesHutTsne.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Gradient gradient() {
    /*MemoryWorkspace workspace =
            workspaceMode == WorkspaceMode.NONE ? new DummyWorkspace()
                    : Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(
                    workspaceConfigurationExternal,
                    workspaceExternal);


    try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ {


        if (yIncs == null)
            yIncs = Y.like();
        if (gains == null)
            gains = Y.ulike().assign(1.0D);

        AtomicDouble sumQ = new AtomicDouble(0);
        /* Calculate gradient based on barnes hut approximation with positive and negative forces */
        INDArray posF = Y.like();
        INDArray negF = Y.like();

        tree = new SpTree(Y);

        tree.computeEdgeForces(rows, cols, vals, N, posF);
        for (int n = 0; n < N; n++) {
            INDArray temp = negF.slice(n);
            tree.computeNonEdgeForces(n, theta, temp, sumQ);
        }
        INDArray dC = posF.subi(negF.divi(sumQ));

        Gradient ret = new DefaultGradient();
        ret.gradientForVariable().put(Y_GRAD, dC);
        return ret;
    }
}
 
Example #27
Source File: BasePretrainNetwork.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    Pair<Gradient, INDArray> result = super.backpropGradient(epsilon, workspaceMgr);
    ((DefaultGradient) result.getFirst()).setFlattenedGradient(gradientsFlattened);

    //During backprop, visible bias gradients are set to 0 - this is necessary due to the gradient view mechanics
    // that DL4J uses
    INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    result.getFirst().gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient);
    vBiasGradient.assign(0);

    weightNoiseParams.clear();

    return result;
}
 
Example #28
Source File: FrozenLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public FrozenLayer(Layer insideLayer) {
    super(insideLayer);
    if (insideLayer instanceof OutputLayer) {
        throw new IllegalArgumentException("Output Layers are not allowed to be frozen " + layerId());
    }
    this.zeroGradient = new DefaultGradient(insideLayer.params());
    if (insideLayer.paramTable() != null) {
        for (String paramType : insideLayer.paramTable().keySet()) {
            //save memory??
            zeroGradient.setGradientFor(paramType, null);
        }
    }
}
 
Example #29
Source File: TestComputationGraphNetwork.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testGradientUpdate() {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);

    Gradient expectedGradient = new DefaultGradient();
    expectedGradient.setGradientFor("first_W", Nd4j.ones(4, 5));
    expectedGradient.setGradientFor("first_b", Nd4j.ones(1, 5));
    expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3));
    expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3));

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).graphBuilder()
            .addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input")
            .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first")
            .setOutputs("output").build();

    ComputationGraph net = new ComputationGraph(conf);
    net.init();
    net.fit(iter.next());
    Gradient actualGradient = net.gradient;
    assertNotEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));

    net.update(expectedGradient);
    actualGradient = net.gradient;
    assertEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));

    // Update params with set
    net.setParam("first_W", Nd4j.ones(4, 5));
    net.setParam("first_b", Nd4j.ones(1, 5));
    net.setParam("output_W", Nd4j.ones(5, 3));
    net.setParam("output_b", Nd4j.ones(1, 3));
    INDArray actualParams = net.params();

    // Confirm params
    assertEquals(Nd4j.ones(1, 43), actualParams);

    net.update(expectedGradient);
    actualParams = net.params();
    assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
}
 
Example #30
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) {
    // Gradients: d(x^2)/dx = 2x
    INDArray gradient = parameters.mul(2);
    Gradient g = new DefaultGradient();
    g.gradientForVariable().put("W", this.gradientView);
    this.gradient = g;
    this.score = Nd4j.getBlasWrapper().dot(parameters, parameters); //sum_i x_i^2
    this.gradientView.assign(gradient);
}