Example #1
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    //INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input.castTo(weightGradView.dataType()), delta, weightGradView, true, false, 1.0, 0.0); //Equivalent to:  weightGradView.assign(input.transpose().mmul(delta));         //TODO can we avoid cast?
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); //biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
Example #2
Source File:    From jstarcraft-rns with Apache License 2.0 6 votes vote down vote up
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray labels2d = getLabels2d(workspaceMgr, ArrayType.BP_WORKING_MEM);
    // INDArray delta = lossFunction.computeGradient(labels2d, preOut,
    // layerConf().getActivationFunction(), maskArray);
    INDArray delta = lossFunction.computeGradient(labels2d, preOut, layerConf().getActivationFn(), maskArray);

    Gradient gradient = new DefaultGradient();

    INDArray weightGradView = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);
    Nd4j.gemm(input, delta, weightGradView, true, false, 1.0, 0.0); // Equivalent to: weightGradView.assign(input.transpose().mmul(delta));
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradView);

    if (hasBias()) {
        INDArray biasGradView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradView, 0); // biasGradView is initialized/zeroed first in sum op
        gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradView);

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
Example #3
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {

    INDArray input = this.input.castTo(dataType);   //Cast to network dtype if required (no-op if already correct type)

    boolean nchw = layerConf().getFormat() == CNN2DFormat.NCHW;

    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape(), 'c');

    Gradient gradient = new DefaultGradient();

    INDArray epsilonNHWC = nchw ? epsilon.permute(0, 2, 3, 1) : epsilon;
    INDArray outEpsilonNHWC = nchw ? outEpsilon.permute(0, 2, 3, 1) : outEpsilon;

    CustomOp op = DynamicCustomOp.builder("batch_to_space_nd")
            .addInputs(epsilonNHWC, getBlocksArray(), getPaddingArray())

    outEpsilon = backpropDropOutIfPresent(outEpsilon);
    return new Pair<>(gradient, outEpsilon);
Example #4
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) {
    INDArray gradAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());

    if(context == null){
        context = Nd4j.getExecutioner().buildContext();
        context.setTArguments(k, alpha, beta);
    } else

    LocalResponseNormalization op = new LocalResponseNormalization();

    context.setInputArray(0, input);
    context.setInputArray(0, epsilon);
    context.setOutputArray(0, gradAtInput);

    Nd4j.exec(op, context);
    Gradient g = new DefaultGradient();
    return new Pair<>(g, gradAtInput);
Example #5
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    int hIdx = nchw ? 2 : 1;
    int wIdx = nchw ? 3 : 2;

    INDArray epsNext;
    int[] padding = layerConf().getPadding();
    if(layerConf().getDataFormat() == CNN2DFormat.NCHW){
        epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]));
    } else {
        epsNext = epsilon.get(NDArrayIndex.all(),
                NDArrayIndex.interval(padding[0], padding[0] + inShape[hIdx]),
                NDArrayIndex.interval(padding[2], padding[2] + inShape[wIdx]),

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
Example #6
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray layerInput = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering());

    INDArray alpha = getParam(PReLUParamInitializer.WEIGHT_KEY);
    IActivation prelu = new ActivationPReLU(alpha, axes);

    Pair<INDArray, INDArray> deltas = prelu.backprop(layerInput, epsilon);
    INDArray delta = deltas.getFirst();
    INDArray weightGrad = deltas.getSecond();
    INDArray weightGradView = gradientViews.get(PReLUParamInitializer.WEIGHT_KEY);

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);  //Usually a no-op (except for perhaps identity)
    delta = backpropDropOutIfPresent(delta);
    Gradient ret = new DefaultGradient();
    ret.setGradientFor(PReLUParamInitializer.WEIGHT_KEY, weightGradView, 'c');

    return new Pair<>(ret, delta);
Example #7
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {

    if(epsilon.dataType() != dataType){
        epsilon = epsilon.castTo(dataType);

    INDArray outEpsilon;
    try(MemoryWorkspace ws = workspaceMgr.notifyScopeBorrowed(ArrayType.ACTIVATION_GRAD)){
        if (layerConf().getDataFormat() == RNNFormat.NCW) {
            outEpsilon = epsilon.sum(2);
            outEpsilon = epsilon.sum(1);

    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, outEpsilon);
Example #8
Source File:    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
protected Gradient createGradient(INDArray wGradient, INDArray vBiasGradient, INDArray hBiasGradient) {
    Gradient ret = new DefaultGradient(gradientsFlattened);
    // The order of the following statements matter! The gradient is being flattened and applied to
    // flattened params in this order.
    // The arrays neeed to be views, with the current Updater implementation

    //TODO: optimize this, to do it would the assigns
    INDArray wg = gradientViews.get(PretrainParamInitializer.WEIGHT_KEY);

    INDArray hbg = gradientViews.get(PretrainParamInitializer.BIAS_KEY);

    INDArray vbg = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);

    ret.gradientForVariable().put(PretrainParamInitializer.WEIGHT_KEY, wg);
    ret.gradientForVariable().put(PretrainParamInitializer.BIAS_KEY, hbg);
    ret.gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vbg);

    return ret;
Example #9
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]),
            NDArrayIndex.interval(padding[2], padding[2] + inShape[3]),
            NDArrayIndex.interval(padding[4], padding[4] + inShape[4]));

    epsNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
Example #10
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {

    INDArray input = this.input.castTo(epsilon.dataType());

    boolean nchw = layerConf().getDataFormat() == CNN2DFormat.NCHW;
    long miniBatch = input.size(0);
    long inDepth = input.size(nchw ? 1 : 3);
    long inH = input.size(nchw ? 2 : 1);
    long inW = input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ?  new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray outEpsilon = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    int blockSize = getBlockSize();

    //Workaround for issue:
        epsilon = epsilon.dup('c');

    CustomOp op = DynamicCustomOp.builder("depth_to_space")
            .addIntegerArguments(blockSize, nchw ? 0 : 1)       //nchw = 0, nhwc = 1

    return new Pair<>(gradient, outEpsilon);
Example #11
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, dataType, inShape, 'c');
    INDArray epsNextSubset = epsNext.get(all(), all(), interval(cropping[0], epsNext.size(2)-cropping[1]));
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
Example #12
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();

    INDArray epsNext = epsilon.get(NDArrayIndex.all(), NDArrayIndex.all(),
            NDArrayIndex.interval(padding[0], padding[0] + inShape[2]));

    return new Pair<>((Gradient) new DefaultGradient(), workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsNext));
Example #13
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    if (input.rank() != 4)
        throw new UnsupportedOperationException(
                "Input is not rank 4. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape " + layerConf().getFormat().dimensionNames());
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    Preconditions.checkState(input.equalShapes(labels), "Input and label arrays do not have same shape: %ndShape vs. %ndShape",input, labels);

    CNN2DFormat format = layerConf().getFormat();
    INDArray input2d = ConvolutionUtils.reshape4dTo2d(input, format, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape4dTo2d(labels, format, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeMaskIfRequired(maskArray, input, format, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    INDArray delta4d = ConvolutionUtils.reshape2dTo4d(delta2d, input.shape(), format, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta4d);
Example #14
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
Example #15
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
        //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent)
        INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag
        INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

        if (maskArray != null) {

        INDArray input = this.input.castTo(dataType);

        Gradient ret = new DefaultGradient();

        INDArray weightGrad =  gradientViews.get(ElementWiseParamInitializer.WEIGHT_KEY);


        INDArray biasGrad = gradientViews.get(ElementWiseParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first

        ret.gradientForVariable().put(ElementWiseParamInitializer.WEIGHT_KEY, weightGrad);
        ret.gradientForVariable().put(ElementWiseParamInitializer.BIAS_KEY, biasGrad);

//      epsilonNext is a 2d matrix
        INDArray epsilonNext = delta.mulRowVector(params.get(ElementWiseParamInitializer.WEIGHT_KEY));
        epsilonNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsilonNext);

        epsilonNext = backpropDropOutIfPresent(epsilonNext);
        return new Pair<>(ret, epsilonNext);
Example #16
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, epsilon);

    if (maskArray != null) {

    Gradient ret = new DefaultGradient();
    delta = backpropDropOutIfPresent(delta);
    return new Pair<>(ret, delta);
Example #17
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {

    CNN2DFormat format = getFormat();
    boolean nchw = format == CNN2DFormat.NCHW;

    long miniBatch = (int) input.size(0);
    long inDepth = (int) input.size(nchw ? 1 : 3);
    long inH = (int) input.size(nchw ? 2 : 1);
    long inW = (int) input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray epsOut =  workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(nchw ? 1 : 0)      //1=NCHW, 0=NHWC
            .addInputs(input, epsilon)

    epsOut = backpropDropOutIfPresent(epsOut);

    return new Pair<>(gradient, epsOut);
Example #18
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {

    int[] size = ((BaseUpsamplingLayer) layerConf()).getSize();
    epsilon = epsilon.reshape(epsilon.size(0), epsilon.size(1), epsilon.size(2), 1);
    // we replicate the error term times "size" so that backprop works properly on it
    epsilon = epsilon.repeat(3, size[0]);

    INDArray originalInput = input;
    input = input.castTo(dataType).reshape(input.size(0), input.size(1), input.size(2), 1);

    long miniBatch = input.size(0);
    long inDepth = input.size(1);
    long inH = input.size(2);
    long inW = input.size(3);

    INDArray outEpsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), miniBatch * inDepth * inH * inW);
    INDArray reshapedEpsilon = outEpsilon.reshape('c', miniBatch, inDepth, inH, inW);

    int[] intArgs = new int[] {1}; // 1 is for NCHW

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addInputs(input, epsilon)

    Gradient gradient = new DefaultGradient();

    reshapedEpsilon = reshapedEpsilon.slice(0, 3);
    input = originalInput;

    // Since we aggregate the gradient across "size" slices, we need to normalize afterwards.
    return new Pair<>(gradient, reshapedEpsilon.divi(size[0]));
Example #19
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    if (input.rank() != 5)
        throw new UnsupportedOperationException(
                "Input is not rank 5. Got input with rank " + input.rank() + " " + layerId() + " with shape "
                        + Arrays.toString(input.shape()) + " - expected shape [minibatch,channels,depth,height,width]");
    if (labels == null)
        throw new IllegalStateException("Labels are not set (null)");

    INDArray input2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), input, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray labels2d = ConvolutionUtils.reshape5dTo2d(layerConf().getDataFormat(), labels, workspaceMgr, ArrayType.FF_WORKING_MEM);
    INDArray maskReshaped = ConvolutionUtils.reshapeCnn3dMask(layerConf().getDataFormat(), maskArray, labels, workspaceMgr, ArrayType.FF_WORKING_MEM);

    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta2d = lossFunction.computeGradient(labels2d, input2d.dup(input2d.ordering()), layerConf().getActivationFn(), maskReshaped);
    delta2d = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta2d);

    long n = input.size(0);
    long d, h, w, c;
    if(layerConf().getDataFormat() == Convolution3D.DataFormat.NDHWC){
        d = input.size(1);
        h = input.size(2);
        w = input.size(3);
        c = input.size(4);
    } else {
        d = input.size(2);
        h = input.size(3);
        w = input.size(4);
        c = input.size(1);
    INDArray delta5d = ConvolutionUtils.reshape2dTo5d(layerConf().getDataFormat(), delta2d, n, d, h, w, c, workspaceMgr, ArrayType.ACTIVATION_GRAD);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();
    return new Pair<>(gradient, delta5d);
Example #20
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    val inShape = input.shape();
    INDArray epsNext = workspaceMgr.create(ArrayType.ACTIVATION_GRAD, input.dataType(), inShape, 'c');
    INDArray epsNextSubset = inputSubset(epsNext);
    return new Pair<>((Gradient) new DefaultGradient(), epsNext);
Example #21
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    //If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or equivalent)
    INDArray z = preOutput(true, workspaceMgr);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

    if (maskArray != null) {

    INDArray weightGradients = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY);

    long[] indexes = new long[(int) input.length()];
    for (int i = 0; i < indexes.length; i++) {
        indexes[i] = input.getInt(i, 0);

    INDArray indices = Nd4j.createFromArray(indexes);
    Nd4j.scatterUpdate(org.nd4j.linalg.api.ops.impl.scatter.ScatterUpdate.UpdateOp.ADD, weightGradients, indices, delta, DIM_1);

    Gradient ret = new DefaultGradient();
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGradients);

    if(hasBias()) {
        INDArray biasGradientsView = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGradientsView, 0); //biasGradientView is initialized/zeroed first in sum op
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGradientsView);

    return new Pair<>(ret, null); //Don't bother returning epsilons: no layer below this one...
Example #22
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/** Returns tuple: {Gradient,Delta,Output} given preOut */
private Pair<Gradient, INDArray> getGradientsAndDelta(INDArray preOut, LayerWorkspaceMgr workspaceMgr) {
    // delta calculation
    ILossFunction lossFunction = layerConf().getLossFn();
    INDArray delta = lossFunction.computeGradient(getLabels2d(), preOut, layerConf().getActivationFn(), maskArray);

    // grab the empty gradient
    Gradient gradient = new DefaultGradient();

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);
    return new Pair<>(gradient, delta);
Example #23
Source File:    From FederatedAndroidTrainer with MIT License 5 votes vote down vote up
   public void updateWeights(INDArray remoteGradient) {
       Log.d(TAG, "Remote Gradient " + remoteGradient);
       Gradient gradient = new DefaultGradient(remoteGradient);
       Log.d(TAG, "Updating weights from server with gradient " + gradient.gradient().toString());
       // TODO Transform the remoteGradient flattened array into the map required by the network?
       Map<String, INDArray> netGradients = mNetwork.gradient().gradientForVariable();
       for (Map.Entry<String, INDArray> entry : netGradients.entrySet()) {
           Log.d(TAG, entry.getKey());
           for (int i : entry.getValue().shape()) {
               Log.d(TAG, "Shape " + i);
           for (int i = 0; i < entry.getValue().shape().length; i++) {
               Log.d(TAG, "Size (" + i + ")" + entry.getValue().size(i));
       Log.d(TAG, "Updating weights with INDArray object");
       INDArray params = mNetwork.params(true);

       Log.d(TAG, "Weights updated");
Example #24
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    INDArray temp = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, input, input.ordering());
    INDArray delta = layerConf().getActivationFn().backprop(temp, epsilon).getFirst(); //TODO handle activation function params
    if(delta == epsilon ){
        //Edge case: identity activation + external errors -> no-op
        delta = workspaceMgr.dup(ArrayType.ACTIVATION_GRAD, delta);

    delta = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, delta);  //Usually a no-op (except for perhaps identity)
    Gradient ret = new DefaultGradient();
    return new Pair<>(ret, delta);
Example #25
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private Gradient createPrevGradient() {
    Gradient gradient = new DefaultGradient();
    INDArray pseudoGradients = Nd4j.ones(nExamples, nChannelsIn, inputHeight, inputWidth);

    gradient.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, pseudoGradients);
    gradient.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, pseudoGradients);
    return gradient;
Example #26
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Gradient gradient() {
    /*MemoryWorkspace workspace =
            workspaceMode == WorkspaceMode.NONE ? new DummyWorkspace()
                    : Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread(

    try (MemoryWorkspace ws = workspace.notifyScopeEntered())*/ {

        if (yIncs == null)
            yIncs =;
        if (gains == null)
            gains = Y.ulike().assign(1.0D);

        AtomicDouble sumQ = new AtomicDouble(0);
        /* Calculate gradient based on barnes hut approximation with positive and negative forces */
        INDArray posF =;
        INDArray negF =;

        tree = new SpTree(Y);

        tree.computeEdgeForces(rows, cols, vals, N, posF);
        for (int n = 0; n < N; n++) {
            INDArray temp = negF.slice(n);
            tree.computeNonEdgeForces(n, theta, temp, sumQ);
        INDArray dC = posF.subi(negF.divi(sumQ));

        Gradient ret = new DefaultGradient();
        ret.gradientForVariable().put(Y_GRAD, dC);
        return ret;
Example #27
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    Pair<Gradient, INDArray> result = super.backpropGradient(epsilon, workspaceMgr);
    ((DefaultGradient) result.getFirst()).setFlattenedGradient(gradientsFlattened);

    //During backprop, visible bias gradients are set to 0 - this is necessary due to the gradient view mechanics
    // that DL4J uses
    INDArray vBiasGradient = gradientViews.get(PretrainParamInitializer.VISIBLE_BIAS_KEY);
    result.getFirst().gradientForVariable().put(PretrainParamInitializer.VISIBLE_BIAS_KEY, vBiasGradient);


    return result;
Example #28
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public FrozenLayer(Layer insideLayer) {
    if (insideLayer instanceof OutputLayer) {
        throw new IllegalArgumentException("Output Layers are not allowed to be frozen " + layerId());
    this.zeroGradient = new DefaultGradient(insideLayer.params());
    if (insideLayer.paramTable() != null) {
        for (String paramType : insideLayer.paramTable().keySet()) {
            //save memory??
            zeroGradient.setGradientFor(paramType, null);
Example #29
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public void testGradientUpdate() {
    DataSetIterator iter = new IrisDataSetIterator(1, 1);

    Gradient expectedGradient = new DefaultGradient();
    expectedGradient.setGradientFor("first_W", Nd4j.ones(4, 5));
    expectedGradient.setGradientFor("first_b", Nd4j.ones(1, 5));
    expectedGradient.setGradientFor("output_W", Nd4j.ones(5, 3));
    expectedGradient.setGradientFor("output_b", Nd4j.ones(1, 3));

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
            .addInputs("input").addLayer("first", new DenseLayer.Builder().nIn(4).nOut(5).build(), "input")
            .addLayer("output", new OutputLayer.Builder().nIn(5).nOut(3).activation(Activation.SOFTMAX).build(), "first")

    ComputationGraph net = new ComputationGraph(conf);
    Gradient actualGradient = net.gradient;
    assertNotEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));

    actualGradient = net.gradient;
    assertEquals(expectedGradient.getGradientFor("first_W"), actualGradient.getGradientFor("first_W"));

    // Update params with set
    net.setParam("first_W", Nd4j.ones(4, 5));
    net.setParam("first_b", Nd4j.ones(1, 5));
    net.setParam("output_W", Nd4j.ones(5, 3));
    net.setParam("output_b", Nd4j.ones(1, 3));
    INDArray actualParams = net.params();

    // Confirm params
    assertEquals(Nd4j.ones(1, 43), actualParams);

    actualParams = net.params();
    assertEquals(Nd4j.ones(1, 43).addi(1), actualParams);
Example #30
Source File:    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public void computeGradientAndScore(LayerWorkspaceMgr workspaceMgr) {
    // Gradients: d(x^2)/dx = 2x
    INDArray gradient = parameters.mul(2);
    Gradient g = new DefaultGradient();
    g.gradientForVariable().put("W", this.gradientView);
    this.gradient = g;
    this.score = Nd4j.getBlasWrapper().dot(parameters, parameters); //sum_i x_i^2