org.nd4j.linalg.api.ndarray.INDArray#mulRowVector

Source File: LayerOpValidation.java From deeplearning4j with Apache License 2.0

6 votes

@Test
public void testLayerNormNoBias() {
    final INDArray random = Nd4j.rand(DataType.DOUBLE, 10, 4);
    final INDArray standardized = random.ulike();
    Nd4j.getExecutioner().exec(new Standardize(random, standardized, 1));

    final INDArray gain = Nd4j.rand(DataType.DOUBLE, 4);
    final INDArray res = standardized.mulRowVector(gain);
    final INDArray expOut = res.norm1();

    final int[] axis = new int[]{1};
    SameDiff sd = SameDiff.create();
    SDVariable sdInput = sd.var("input", standardized);
    SDVariable sdGain = sd.var("gain", gain);
    SDVariable out = sd.nn.layerNorm(sdInput, sdGain, true, axis);
    out.norm1("out");

    String err = OpValidation.validate(new TestCase(sd)
            .expectedOutput("out", expOut)
            .gradientCheck(true));
    assertNull(err, err);
}

Source File: LayerOpValidation.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testLayerNormOPNoBias() {
    final INDArray random = Nd4j.rand(DataType.DOUBLE, 10, 4);
    final INDArray standardized = random.ulike();
    Nd4j.getExecutioner().exec(new Standardize(random, standardized, 1));

    final INDArray gain = Nd4j.rand(DataType.DOUBLE, 4);
    final INDArray res = standardized.mulRowVector(gain);

    final INDArray output = Nd4j.zerosLike(res);
    Nd4j.getExecutioner().exec(new LayerNorm(standardized, gain, output, true, 1));

    assertEquals(res, output);
}

Source File: ReductionBpOpValidation.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testNorm1AlongDimensionBP() {
    //dL/dIn = dL/dOut * dOut/dIn
    //       = dL/dOut * sgn(in)

    for (boolean keepDims : new boolean[]{false, true}) {

        long[] reducedShape_0 = (keepDims ? new long[]{1, 4} : new long[]{4});
        INDArray preReduceInput = Nd4j.linspace(-5, 6, 12).addi(0.1).reshape(3, 4);
        INDArray sgn = Transforms.sign(preReduceInput, true);
        INDArray dLdOut_0 = Nd4j.create(new double[]{1, 2, 3, 4}, reducedShape_0);
        INDArray dLdInExpected_0 = sgn.mulRowVector(dLdOut_0);

        INDArray dLdIn = Nd4j.createUninitialized(3, 4);

        String err = OpValidation.validate(new OpTestCase(new Norm1Bp(preReduceInput, dLdOut_0, dLdIn, keepDims, 0))
                .expectedOutput(0, dLdInExpected_0));
        assertNull(err, err);


        long[] reducedShape_1 = (keepDims ? new long[]{3, 1} : new long[]{3});
        INDArray dLdOut_1 = Nd4j.create(new double[]{1, 2, 3}, reducedShape_1);
        INDArray dLdInExpected_1 = sgn.mulColumnVector(dLdOut_1);
        dLdIn = Nd4j.createUninitialized(3, 4);

        err = OpValidation.validate(new OpTestCase(new Norm1Bp(preReduceInput, dLdOut_1, dLdIn, keepDims, 1))
                .expectedOutput(0, dLdInExpected_1));

        assertNull(err, err);
    }
}

Source File: ElementWiseMultiplicationLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
    public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
        //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent)
        INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag
        INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params

        if (maskArray != null) {
            applyMask(delta);
        }

        INDArray input = this.input.castTo(dataType);

        Gradient ret = new DefaultGradient();

        INDArray weightGrad =  gradientViews.get(ElementWiseParamInitializer.WEIGHT_KEY);
        weightGrad.subi(weightGrad);

        weightGrad.addi(input.mul(delta).sum(0));

        INDArray biasGrad = gradientViews.get(ElementWiseParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first

        ret.gradientForVariable().put(ElementWiseParamInitializer.WEIGHT_KEY, weightGrad);
        ret.gradientForVariable().put(ElementWiseParamInitializer.BIAS_KEY, biasGrad);

//      epsilonNext is a 2d matrix
        INDArray epsilonNext = delta.mulRowVector(params.get(ElementWiseParamInitializer.WEIGHT_KEY));
        epsilonNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsilonNext);

        epsilonNext = backpropDropOutIfPresent(epsilonNext);
        return new Pair<>(ret, epsilonNext);
    }

Source File: LossMCXENT.java From nd4j with Apache License 2.0

4 votes

@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if (labels.size(1) != preOutput.size(1)) {
        throw new IllegalArgumentException(
                        "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer"
                                        + " number of outputs (nOut = " + preOutput.size(1) + ") ");

    }
    INDArray grad;
    //INDArray output = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(activationFn, preOutput.dup()));
    INDArray output = activationFn.getActivation(preOutput.dup(), true);

    if (activationFn instanceof ActivationSoftmax) {

        if (mask != null && LossUtil.isPerOutputMasking(output, mask)) {
            throw new UnsupportedOperationException("Per output masking for MCXENT + softmax: not supported");
        }

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            INDArray temp = labels.mulRowVector(weights);
            INDArray col = temp.sum(1);
            grad = output.mulColumnVector(col).sub(temp);
        } else {
            grad = output.subi(labels);
        }
    } else {
        INDArray dLda = output.rdivi(labels).negi();

        grad = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation function with weights

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            grad.muliRowVector(weights);
        }
    }

    //Loss function with masking
    if (mask != null) {
        LossUtil.applyMask(grad, mask);
    }

    return grad;
}

Source File: LossMCXENT.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if(!labels.equalShapes(preOutput)){
        Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape());
    }
    INDArray grad;
    INDArray output = activationFn.getActivation(preOutput.dup(), true);
    labels = labels.castTo(preOutput.dataType());   //No-op if already correct dtype

    if (activationFn instanceof ActivationSoftmax) {

        if (mask != null && LossUtil.isPerOutputMasking(output, mask)) {
            throw new UnsupportedOperationException("Per output masking for MCXENT + softmax: not supported");
        }

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            INDArray temp = labels.mulRowVector(weights.castTo(labels.dataType()));
            INDArray col = temp.sum(true,1);
            grad = output.mulColumnVector(col).sub(temp);
        } else {
            grad = output.subi(labels);
        }
    } else {
        INDArray dLda = output.rdivi(labels).negi();

        grad = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation function with weights

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            grad.muliRowVector(weights.castTo(grad.dataType()));
        }
    }

    //Loss function with masking
    if (mask != null) {
        LossUtil.applyMask(grad, mask);
    }

    return grad;
}

Source File: LastTimeStepVertex.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) {
    //First: get the mask arrays for the given input, if any
    INDArray[] inputMaskArrays = graph.getInputMaskArrays();
    INDArray mask = (inputMaskArrays != null ? inputMaskArrays[inputIdx] : null);

    //Then: work out, from the mask array, which time step of activations we want, extract activations
    //Also: record where they came from (so we can do errors later)
    fwdPassShape = inputs[0].shape();

    INDArray out;
    if (mask == null) {
        //No mask array -> extract same (last) column for all
        long lastTS = inputs[0].size(2) - 1;
        out = inputs[0].get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS));
        out = workspaceMgr.dup(ArrayType.ACTIVATIONS, out);
        fwdPassTimeSteps = null; //Null -> last time step for all examples
    } else {
        val outShape = new long[] {inputs[0].size(0), inputs[0].size(1)};
        out = workspaceMgr.create(ArrayType.ACTIVATIONS, inputs[0].dataType(), outShape);

        //Want the index of the last non-zero entry in the mask array.
        //Check a little here by using mulRowVector([0,1,2,3,...]) and argmax
        long maxTsLength = fwdPassShape[2];
        INDArray row = Nd4j.linspace(0, maxTsLength - 1, maxTsLength, mask.dataType());
        INDArray temp = mask.mulRowVector(row);
        INDArray lastElementIdx = Nd4j.argMax(temp, 1);
        fwdPassTimeSteps = new int[(int)fwdPassShape[0]];
        for (int i = 0; i < fwdPassTimeSteps.length; i++) {
            fwdPassTimeSteps[i] = (int) lastElementIdx.getDouble(i);
        }

        //Now, get and assign the corresponding subsets of 3d activations:
        for (int i = 0; i < fwdPassTimeSteps.length; i++) {
            out.putRow(i, inputs[0].get(NDArrayIndex.point(i), NDArrayIndex.all(),
                            NDArrayIndex.point(fwdPassTimeSteps[i])));
        }
    }

    return out;
}

Source File: BatchNormalizationTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
    public void testDnnForwardBackward() {
        double eps = 1e-5;
        int nIn = 4;
        int minibatch = 2;
        Nd4j.getRandom().setSeed(12345);
        INDArray input = Nd4j.rand('c', new int[]{minibatch, nIn});

        //TODO: other values for gamma/beta
        INDArray gamma = Nd4j.ones(1, nIn);
        INDArray beta = Nd4j.zeros(1, nIn);

        Layer l = getLayer(nIn, eps, false, -1, -1);

        INDArray mean = input.mean(0);
        INDArray var = input.var(false, 0);
        INDArray xHat = input.subRowVector(mean).divRowVector(Transforms.sqrt(var.add(eps), true));
        INDArray outExpected = xHat.mulRowVector(gamma).addRowVector(beta);

        INDArray out = l.activate(input, true, LayerWorkspaceMgr.noWorkspaces());

//        System.out.println(Arrays.toString(outExpected.data().asDouble()));
//        System.out.println(Arrays.toString(out.data().asDouble()));

        assertEquals(outExpected, out);

        //-------------------------------------------------------------
        //Check backprop
        INDArray epsilon = Nd4j.rand(minibatch, nIn); //dL/dy

        INDArray dldgammaExp = epsilon.mul(xHat).sum(true, 0);
        INDArray dldbetaExp = epsilon.sum(true, 0);

        INDArray dldxhat = epsilon.mulRowVector(gamma);
        INDArray dldvar = dldxhat.mul(input.subRowVector(mean)).mul(-0.5)
                .mulRowVector(Transforms.pow(var.add(eps), -3.0 / 2.0, true)).sum(0);
        INDArray dldmu = dldxhat.mulRowVector(Transforms.pow(var.add(eps), -1.0 / 2.0, true)).neg().sum(0)
                .add(dldvar.mul(input.subRowVector(mean).mul(-2.0).sum(0).div(minibatch)));
        INDArray dldinExp = dldxhat.mulRowVector(Transforms.pow(var.add(eps), -1.0 / 2.0, true))
                .add(input.subRowVector(mean).mul(2.0 / minibatch).mulRowVector(dldvar))
                .addRowVector(dldmu.mul(1.0 / minibatch));

        Pair<Gradient, INDArray> p = l.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces());

        INDArray dldgamma = p.getFirst().getGradientFor("gamma");
        INDArray dldbeta = p.getFirst().getGradientFor("beta");

        assertEquals(dldgammaExp, dldgamma);
        assertEquals(dldbetaExp, dldbeta);

//        System.out.println("EPSILONS");
//        System.out.println(Arrays.toString(dldinExp.data().asDouble()));
//        System.out.println(Arrays.toString(p.getSecond().dup().data().asDouble()));
        assertEquals(dldinExp, p.getSecond());
    }

Java Code Examples for org.nd4j.linalg.api.ndarray.INDArray#mulRowVector()