Java Code Examples for org.nd4j.linalg.api.ndarray.INDArray#mulRowVector()
The following examples show how to use
org.nd4j.linalg.api.ndarray.INDArray#mulRowVector() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LayerOpValidation.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testLayerNormNoBias() { final INDArray random = Nd4j.rand(DataType.DOUBLE, 10, 4); final INDArray standardized = random.ulike(); Nd4j.getExecutioner().exec(new Standardize(random, standardized, 1)); final INDArray gain = Nd4j.rand(DataType.DOUBLE, 4); final INDArray res = standardized.mulRowVector(gain); final INDArray expOut = res.norm1(); final int[] axis = new int[]{1}; SameDiff sd = SameDiff.create(); SDVariable sdInput = sd.var("input", standardized); SDVariable sdGain = sd.var("gain", gain); SDVariable out = sd.nn.layerNorm(sdInput, sdGain, true, axis); out.norm1("out"); String err = OpValidation.validate(new TestCase(sd) .expectedOutput("out", expOut) .gradientCheck(true)); assertNull(err, err); }
Example 2
Source File: LayerOpValidation.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testLayerNormOPNoBias() { final INDArray random = Nd4j.rand(DataType.DOUBLE, 10, 4); final INDArray standardized = random.ulike(); Nd4j.getExecutioner().exec(new Standardize(random, standardized, 1)); final INDArray gain = Nd4j.rand(DataType.DOUBLE, 4); final INDArray res = standardized.mulRowVector(gain); final INDArray output = Nd4j.zerosLike(res); Nd4j.getExecutioner().exec(new LayerNorm(standardized, gain, output, true, 1)); assertEquals(res, output); }
Example 3
Source File: ReductionBpOpValidation.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testNorm1AlongDimensionBP() { //dL/dIn = dL/dOut * dOut/dIn // = dL/dOut * sgn(in) for (boolean keepDims : new boolean[]{false, true}) { long[] reducedShape_0 = (keepDims ? new long[]{1, 4} : new long[]{4}); INDArray preReduceInput = Nd4j.linspace(-5, 6, 12).addi(0.1).reshape(3, 4); INDArray sgn = Transforms.sign(preReduceInput, true); INDArray dLdOut_0 = Nd4j.create(new double[]{1, 2, 3, 4}, reducedShape_0); INDArray dLdInExpected_0 = sgn.mulRowVector(dLdOut_0); INDArray dLdIn = Nd4j.createUninitialized(3, 4); String err = OpValidation.validate(new OpTestCase(new Norm1Bp(preReduceInput, dLdOut_0, dLdIn, keepDims, 0)) .expectedOutput(0, dLdInExpected_0)); assertNull(err, err); long[] reducedShape_1 = (keepDims ? new long[]{3, 1} : new long[]{3}); INDArray dLdOut_1 = Nd4j.create(new double[]{1, 2, 3}, reducedShape_1); INDArray dLdInExpected_1 = sgn.mulColumnVector(dLdOut_1); dLdIn = Nd4j.createUninitialized(3, 4); err = OpValidation.validate(new OpTestCase(new Norm1Bp(preReduceInput, dLdOut_1, dLdIn, keepDims, 1)) .expectedOutput(0, dLdInExpected_1)); assertNull(err, err); } }
Example 4
Source File: ElementWiseMultiplicationLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { //If this layer is layer L, then epsilon for this layer is ((w^(L+1)*(delta^(L+1))^T))^T (or equivalent) INDArray z = preOutput(true, workspaceMgr); //Note: using preOutput(INDArray) can't be used as this does a setInput(input) and resets the 'appliedDropout' flag INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); //TODO handle activation function params if (maskArray != null) { applyMask(delta); } INDArray input = this.input.castTo(dataType); Gradient ret = new DefaultGradient(); INDArray weightGrad = gradientViews.get(ElementWiseParamInitializer.WEIGHT_KEY); weightGrad.subi(weightGrad); weightGrad.addi(input.mul(delta).sum(0)); INDArray biasGrad = gradientViews.get(ElementWiseParamInitializer.BIAS_KEY); delta.sum(biasGrad, 0); //biasGrad is initialized/zeroed first ret.gradientForVariable().put(ElementWiseParamInitializer.WEIGHT_KEY, weightGrad); ret.gradientForVariable().put(ElementWiseParamInitializer.BIAS_KEY, biasGrad); // epsilonNext is a 2d matrix INDArray epsilonNext = delta.mulRowVector(params.get(ElementWiseParamInitializer.WEIGHT_KEY)); epsilonNext = workspaceMgr.leverageTo(ArrayType.ACTIVATION_GRAD, epsilonNext); epsilonNext = backpropDropOutIfPresent(epsilonNext); return new Pair<>(ret, epsilonNext); }
Example 5
Source File: LossMCXENT.java From nd4j with Apache License 2.0 | 4 votes |
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException( "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray grad; //INDArray output = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(activationFn, preOutput.dup())); INDArray output = activationFn.getActivation(preOutput.dup(), true); if (activationFn instanceof ActivationSoftmax) { if (mask != null && LossUtil.isPerOutputMasking(output, mask)) { throw new UnsupportedOperationException("Per output masking for MCXENT + softmax: not supported"); } //Weighted loss function if (weights != null) { if (weights.length() != output.size(1)) { throw new IllegalStateException("Weights vector (length " + weights.length() + ") does not match output.size(1)=" + output.size(1)); } INDArray temp = labels.mulRowVector(weights); INDArray col = temp.sum(1); grad = output.mulColumnVector(col).sub(temp); } else { grad = output.subi(labels); } } else { INDArray dLda = output.rdivi(labels).negi(); grad = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation function with weights //Weighted loss function if (weights != null) { if (weights.length() != output.size(1)) { throw new IllegalStateException("Weights vector (length " + weights.length() + ") does not match output.size(1)=" + output.size(1)); } grad.muliRowVector(weights); } } //Loss function with masking if (mask != null) { LossUtil.applyMask(grad, mask); } return grad; }
Example 6
Source File: LossMCXENT.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if(!labels.equalShapes(preOutput)){ Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape()); } INDArray grad; INDArray output = activationFn.getActivation(preOutput.dup(), true); labels = labels.castTo(preOutput.dataType()); //No-op if already correct dtype if (activationFn instanceof ActivationSoftmax) { if (mask != null && LossUtil.isPerOutputMasking(output, mask)) { throw new UnsupportedOperationException("Per output masking for MCXENT + softmax: not supported"); } //Weighted loss function if (weights != null) { if (weights.length() != output.size(1)) { throw new IllegalStateException("Weights vector (length " + weights.length() + ") does not match output.size(1)=" + output.size(1)); } INDArray temp = labels.mulRowVector(weights.castTo(labels.dataType())); INDArray col = temp.sum(true,1); grad = output.mulColumnVector(col).sub(temp); } else { grad = output.subi(labels); } } else { INDArray dLda = output.rdivi(labels).negi(); grad = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation function with weights //Weighted loss function if (weights != null) { if (weights.length() != output.size(1)) { throw new IllegalStateException("Weights vector (length " + weights.length() + ") does not match output.size(1)=" + output.size(1)); } grad.muliRowVector(weights.castTo(grad.dataType())); } } //Loss function with masking if (mask != null) { LossUtil.applyMask(grad, mask); } return grad; }
Example 7
Source File: LastTimeStepVertex.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) { //First: get the mask arrays for the given input, if any INDArray[] inputMaskArrays = graph.getInputMaskArrays(); INDArray mask = (inputMaskArrays != null ? inputMaskArrays[inputIdx] : null); //Then: work out, from the mask array, which time step of activations we want, extract activations //Also: record where they came from (so we can do errors later) fwdPassShape = inputs[0].shape(); INDArray out; if (mask == null) { //No mask array -> extract same (last) column for all long lastTS = inputs[0].size(2) - 1; out = inputs[0].get(NDArrayIndex.all(), NDArrayIndex.all(), NDArrayIndex.point(lastTS)); out = workspaceMgr.dup(ArrayType.ACTIVATIONS, out); fwdPassTimeSteps = null; //Null -> last time step for all examples } else { val outShape = new long[] {inputs[0].size(0), inputs[0].size(1)}; out = workspaceMgr.create(ArrayType.ACTIVATIONS, inputs[0].dataType(), outShape); //Want the index of the last non-zero entry in the mask array. //Check a little here by using mulRowVector([0,1,2,3,...]) and argmax long maxTsLength = fwdPassShape[2]; INDArray row = Nd4j.linspace(0, maxTsLength - 1, maxTsLength, mask.dataType()); INDArray temp = mask.mulRowVector(row); INDArray lastElementIdx = Nd4j.argMax(temp, 1); fwdPassTimeSteps = new int[(int)fwdPassShape[0]]; for (int i = 0; i < fwdPassTimeSteps.length; i++) { fwdPassTimeSteps[i] = (int) lastElementIdx.getDouble(i); } //Now, get and assign the corresponding subsets of 3d activations: for (int i = 0; i < fwdPassTimeSteps.length; i++) { out.putRow(i, inputs[0].get(NDArrayIndex.point(i), NDArrayIndex.all(), NDArrayIndex.point(fwdPassTimeSteps[i]))); } } return out; }
Example 8
Source File: BatchNormalizationTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDnnForwardBackward() { double eps = 1e-5; int nIn = 4; int minibatch = 2; Nd4j.getRandom().setSeed(12345); INDArray input = Nd4j.rand('c', new int[]{minibatch, nIn}); //TODO: other values for gamma/beta INDArray gamma = Nd4j.ones(1, nIn); INDArray beta = Nd4j.zeros(1, nIn); Layer l = getLayer(nIn, eps, false, -1, -1); INDArray mean = input.mean(0); INDArray var = input.var(false, 0); INDArray xHat = input.subRowVector(mean).divRowVector(Transforms.sqrt(var.add(eps), true)); INDArray outExpected = xHat.mulRowVector(gamma).addRowVector(beta); INDArray out = l.activate(input, true, LayerWorkspaceMgr.noWorkspaces()); // System.out.println(Arrays.toString(outExpected.data().asDouble())); // System.out.println(Arrays.toString(out.data().asDouble())); assertEquals(outExpected, out); //------------------------------------------------------------- //Check backprop INDArray epsilon = Nd4j.rand(minibatch, nIn); //dL/dy INDArray dldgammaExp = epsilon.mul(xHat).sum(true, 0); INDArray dldbetaExp = epsilon.sum(true, 0); INDArray dldxhat = epsilon.mulRowVector(gamma); INDArray dldvar = dldxhat.mul(input.subRowVector(mean)).mul(-0.5) .mulRowVector(Transforms.pow(var.add(eps), -3.0 / 2.0, true)).sum(0); INDArray dldmu = dldxhat.mulRowVector(Transforms.pow(var.add(eps), -1.0 / 2.0, true)).neg().sum(0) .add(dldvar.mul(input.subRowVector(mean).mul(-2.0).sum(0).div(minibatch))); INDArray dldinExp = dldxhat.mulRowVector(Transforms.pow(var.add(eps), -1.0 / 2.0, true)) .add(input.subRowVector(mean).mul(2.0 / minibatch).mulRowVector(dldvar)) .addRowVector(dldmu.mul(1.0 / minibatch)); Pair<Gradient, INDArray> p = l.backpropGradient(epsilon, LayerWorkspaceMgr.noWorkspaces()); INDArray dldgamma = p.getFirst().getGradientFor("gamma"); INDArray dldbeta = p.getFirst().getGradientFor("beta"); assertEquals(dldgammaExp, dldgamma); assertEquals(dldbetaExp, dldbeta); // System.out.println("EPSILONS"); // System.out.println(Arrays.toString(dldinExp.data().asDouble())); // System.out.println(Arrays.toString(p.getSecond().dup().data().asDouble())); assertEquals(dldinExp, p.getSecond()); }