Java Code Examples for org.deeplearning4j.nn.workspace.LayerWorkspaceMgr#createUninitialized()
The following examples show how to use
org.deeplearning4j.nn.workspace.LayerWorkspaceMgr#createUninitialized() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MKLDNNLocalResponseNormalizationHelper.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public INDArray activate(INDArray x, boolean training, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) { INDArray out = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, x.dataType(), x.shape()); if(context == null){ context = Nd4j.getExecutioner().buildContext(); context.setTArguments(k, alpha, beta); context.setIArguments((int)n); } else context.purge(); context.setInputArray(0, x); context.setOutputArray(0, out); LocalResponseNormalization op = new LocalResponseNormalization(); Nd4j.exec(op, context); return out; }
Example 2
Source File: DropConnect.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train, LayerWorkspaceMgr workspaceMgr) { ParamInitializer init = layer.conf().getLayer().initializer(); INDArray param = layer.getParam(paramKey); double p; if(weightRetainProbSchedule == null){ p = weightRetainProb; } else { p = weightRetainProbSchedule.valueAt(iteration, epoch); } if (train && init.isWeightParam(layer.conf().getLayer(), paramKey) || (applyToBiases && init.isBiasParam(layer.conf().getLayer(), paramKey))) { INDArray out = workspaceMgr.createUninitialized(ArrayType.INPUT, param.dataType(), param.shape(), param.ordering()); Nd4j.getExecutioner().exec(new DropOut(param, out, p)); return out; } return param; }
Example 3
Source File: BaseOutputLayer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1) INDArray delta = pair.getSecond(); INDArray w = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr); INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, delta.dataType(), new long[]{w.size(0), delta.size(0)}, 'f'); epsilonNext = w.mmuli(delta.transpose(), epsilonNext).transpose(); //Normally we would clear weightNoiseParams here - but we want to reuse them for forward + backward + score // So this is instead done in MultiLayerNetwork/CompGraph backprop methods epsilonNext = backpropDropOutIfPresent(epsilonNext); return new Pair<>(pair.getFirst(), epsilonNext); }
Example 4
Source File: ConvolutionUtils.java From deeplearning4j with Apache License 2.0 | 6 votes |
public static INDArray reshapeCnn3dMask(@NonNull Convolution3D.DataFormat format, INDArray mask, INDArray label, LayerWorkspaceMgr workspaceMgr, ArrayType type){ if(mask == null) return null; Preconditions.checkState(mask.rank() == 5, "Expected rank 5 mask for Cnn3DLossLayer in a shape broadcastable to labels shape:" + " got mask shape %ndShape with label shape %ndShape", mask, label); if(mask.equalShapes(label) || (format == Convolution3D.DataFormat.NDHWC && mask.size(0) == label.size(0) && mask.size(1) == label.size(1) && mask.size(2) == label.size(2) && mask.size(3) == label.size(3)) || (format == Convolution3D.DataFormat.NDHWC && mask.size(0) == label.size(0) && mask.size(2) == label.size(2) && mask.size(3) == label.size(3) && mask.size(4) == label.size(4))) { //Already OK shape for reshaping return reshape5dTo2d(format, mask, workspaceMgr, type); } else { //Need to broadcast first long[] lShape = label.shape().clone(); int channelIdx = format == Convolution3D.DataFormat.NCDHW ? 1 : 4; lShape[channelIdx] = mask.size(channelIdx); //Keep existing channel size INDArray bMask = workspaceMgr.createUninitialized(type, mask.dataType(), lShape, 'c'); Nd4j.exec(new Assign(new INDArray[]{bMask, mask}, new INDArray[]{bMask})); return reshape5dTo2d(format, bMask, workspaceMgr, type); } }
Example 5
Source File: MKLDNNLocalResponseNormalizationHelper.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) { INDArray gradAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); if(context == null){ context = Nd4j.getExecutioner().buildContext(); context.setTArguments(k, alpha, beta); context.setIArguments((int)n); } else context.purge(); LocalResponseNormalization op = new LocalResponseNormalization(); context.setInputArray(0, input); context.setInputArray(0, epsilon); context.setOutputArray(0, gradAtInput); Nd4j.exec(op, context); Gradient g = new DefaultGradient(); return new Pair<>(g, gradAtInput); }
Example 6
Source File: DeepFMOutputLayer.java From jstarcraft-rns with Apache License 2.0 | 6 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray previous, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); // Returns Gradient and delta^(this), not Gradient and epsilon^(this-1) INDArray delta = pair.getSecond(); INDArray w = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr); INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { w.size(0), delta.size(0) }, 'f'); epsilonNext = w.mmuli(delta.transpose(), epsilonNext).transpose(); // Normally we would clear weightNoiseParams here - but we want to reuse them // for forward + backward + score // So this is instead done in MultiLayerNetwork/CompGraph backprop methods epsilonNext = backpropDropOutIfPresent(epsilonNext); return new Pair<>(pair.getFirst(), epsilonNext); }
Example 7
Source File: DeepFMProductVertex.java From jstarcraft-rns with Apache License 2.0 | 6 votes |
@Override public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) { if (!canDoForward()) { throw new IllegalStateException("Cannot do forward pass: inputs not set"); } // inputs[index] => {batchSize, numberOfEmbeds} INDArray left = inputs[0]; INDArray right = inputs[1]; long size = inputs[0].shape()[0]; INDArray value = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, size); // 求两个行向量的点积 for (int index = 0; index < size; index++) { INDArray product = left.getRow(index).mmul(right.getRow(index).transpose()); value.put(index, product); } // outputs[index] => {batchSize, 1} return Shape.newShapeNoCopy(value, new long[] { value.length(), 1L }, value.ordering() == 'f'); }
Example 8
Source File: AbstractLayer.java From deeplearning4j with Apache License 2.0 | 5 votes |
protected void applyDropOutIfNecessary(boolean training, LayerWorkspaceMgr workspaceMgr){ if(training && !dropoutApplied && layerConf().getIDropout() != null ){ INDArray result; if(inputModificationAllowed){ result = input; } else { result = workspaceMgr.createUninitialized(ArrayType.INPUT, input.dataType(), input.shape(), input.ordering()); } input = layerConf().getIDropout().applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr); dropoutApplied = true; } }
Example 9
Source File: VariationalAutoencoder.java From deeplearning4j with Apache License 2.0 | 5 votes |
private VAEFwdHelper doForward(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { assertInputSet(false); //TODO input validation int nEncoderLayers = encoderLayerSizes.length; INDArray[] encoderPreOuts = new INDArray[encoderLayerSizes.length]; INDArray[] encoderActivations = new INDArray[encoderLayerSizes.length]; INDArray current = input.castTo(getParam("e0" + WEIGHT_KEY_SUFFIX).dataType()); for (int i = 0; i < nEncoderLayers; i++) { String wKey = "e" + i + WEIGHT_KEY_SUFFIX; String bKey = "e" + i + BIAS_KEY_SUFFIX; INDArray weights = getParamWithNoise(wKey, training, workspaceMgr); INDArray bias = getParamWithNoise(bKey, training, workspaceMgr); current = current.mmul(weights).addiRowVector(bias); if (forBackprop) { encoderPreOuts[i] = current.dup(); } layerConf().getActivationFn().getActivation(current, training); encoderActivations[i] = current; } //Finally, calculate mean value: INDArray mW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, training, workspaceMgr); INDArray mB = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_B, training, workspaceMgr); INDArray pzxMean = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, mW.dataType(), new long[]{current.size(0), mW.size(1)}, 'f'); pzxMean = current.mmuli(mW, pzxMean).addiRowVector(mB); return new VAEFwdHelper(encoderPreOuts, pzxMean, encoderActivations); }
Example 10
Source File: Upsampling2D.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); CNN2DFormat format = getFormat(); boolean nchw = format == CNN2DFormat.NCHW; long miniBatch = (int) input.size(0); long inDepth = (int) input.size(nchw ? 1 : 3); long inH = (int) input.size(nchw ? 2 : 1); long inW = (int) input.size(nchw ? 3 : 2); long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth}; INDArray epsOut = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c'); Gradient gradient = new DefaultGradient(); CustomOp op = DynamicCustomOp.builder("upsampling_bp") .addIntegerArguments(nchw ? 1 : 0) //1=NCHW, 0=NHWC .addInputs(input, epsilon) .addOutputs(epsOut) .callInplace(false) .build(); Nd4j.getExecutioner().exec(op); epsOut = backpropDropOutIfPresent(epsOut); return new Pair<>(gradient, epsOut); }
Example 11
Source File: CDAELayer.java From jstarcraft-rns with Apache License 2.0 | 5 votes |
@Override public INDArray preOutput(boolean training, LayerWorkspaceMgr workspaceMgr) { assertInputSet(false); applyDropOutIfNecessary(training, workspaceMgr); INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); INDArray U = getParamWithNoise(CDAEParameter.USER_KEY, training, workspaceMgr); INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); // Input validation: if (input.rank() != 2 || input.columns() != W.rows()) { if (input.rank() != 2) { throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank " + input.rank() + " array with shape " + Arrays.toString(input.shape()) + ". Missing preprocessor or wrong input type? " + layerId()); } throw new DL4JInvalidInputException("Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + ") is invalid: does not match layer input size (layer # inputs = " + W.size(0) + ") " + layerId()); } INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.size(0), W.size(1)); input.mmuli(W, ret); ret.addi(U); if (hasBias()) { ret.addiRowVector(b); } if (maskArray != null) { applyMask(ret); } return ret; }
Example 12
Source File: MKLDNNSubsamplingHelper.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public INDArray activate(INDArray input, boolean training, int[] kernel, int[] strides, int[] pad, PoolingType poolingType, ConvolutionMode convolutionMode, int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) { int hIdx = 2; int wIdx = 3; if(format == CNN2DFormat.NHWC){ hIdx = 1; wIdx = 2; } int[] outSize; if (convolutionMode == ConvolutionMode.Same) { outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int)input.size(hIdx), (int)input.size(wIdx)}, kernel, strides, dilation); } else { outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } long[] outShape = format == CNN2DFormat.NCHW ? new long[]{input.size(0), input.size(1), outSize[0], outSize[1]} : new long[]{input.size(0), outSize[0], outSize[1], input.size(3)}; INDArray output = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), outShape); if(context == null){ context = Nd4j.getExecutioner().buildContext(); context.setIArguments( kernel[0], kernel[1], strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1], ArrayUtil.fromBoolean(convolutionMode == ConvolutionMode.Same), 0, //Extra - not used? format == CNN2DFormat.NCHW ? 0 : 1); //0 = NCHW, 1=NHWC } DynamicCustomOp op; switch (poolingType){ case MAX: op = new MaxPooling2D(); break; case AVG: op = new AvgPooling2D(); break; case SUM: case PNORM: default: return null; } context.purge(); context.setInputArray(0, input); context.setOutputArray(0, output); Nd4j.exec(op, context); return output; }
Example 13
Source File: CDAELayer.java From jstarcraft-rns with Apache License 2.0 | 4 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); // If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or // equivalent) INDArray z = preOutput(true, workspaceMgr); // Note: using preOutput(INDArray) can't be used as this does a setInput(input) // and resets the 'appliedDropout' flag // INDArray activationDerivative = // Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf().getLayer().getActivationFunction(), // z).derivative()); // INDArray activationDerivative = // conf().getLayer().getActivationFn().getGradient(z); // INDArray delta = epsilon.muli(activationDerivative); INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); // TODO handle activation function params if (maskArray != null) { applyMask(delta); } Gradient ret = new DefaultGradient(); INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); // f order Nd4j.gemm(input, delta, weightGrad, true, false, 1.0, 0.0); ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad); INDArray userWeightGrad = gradientViews.get(CDAEParameter.USER_KEY); userWeightGrad.assign(delta); ret.gradientForVariable().put(CDAEParameter.USER_KEY, userWeightGrad); if (hasBias()) { INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGrad, 0); // biasGrad is initialized/zeroed first ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); } INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr); INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { W.size(0), delta.size(0) }, 'f'); epsilonNext = W.mmuli(delta.transpose(), epsilonNext).transpose(); // W.mmul(delta.transpose()).transpose(); weightNoiseParams.clear(); epsilonNext = backpropDropOutIfPresent(epsilonNext); return new Pair<>(ret, epsilonNext); }
Example 14
Source File: MKLDNNLSTMHelper.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public FwdPassReturn activate(Layer layer, NeuralNetConfiguration conf, IActivation gateActivationFn, INDArray input, INDArray recurrentWeights, INDArray inputWeights, INDArray biases, boolean training, INDArray prevOutputActivations, INDArray prevMemCellState, boolean forBackprop, boolean forwards, String inputWeightKey, INDArray maskArray, boolean hasPeepholeConnections, LayerWorkspaceMgr workspaceMgr) { /* DL4J data format: [bS, nIn, sL] - dataFormat == 2, directionMode == 0 (forward) Inputs: x = [bS, nIn, sL] Wx = [nIn, 4*nOut] Wr = [nOut, 4*nOut] Wp = [3*nOut] Optional peephole weights b = [4*nOut] seqLen = [bS] initialOut = [bs, nOut] initialCell = [bs, nOut] Outputs: out = [bS, nOut, sL] outLast = [bs, nOut] cellLast = [bs,nOut] Gates order: input, forget, input modulation, output const auto hasBiases = B_ARG(0); // indicates whether biases array is provided const auto hasSeqLen = B_ARG(1); // indicates whether seqLen array is provided const auto hasInitH = B_ARG(2); // indicates whether initial output is provided const auto hasInitC = B_ARG(3); // indicates whether initial cell state is provided const auto hasPH = B_ARG(4); // indicates whether peephole connections are present const auto retFullSeq = B_ARG(5); // indicates whether to return whole time sequence h {h_0, h_1, ... , h_sL-1} const auto retLastH = B_ARG(6); // indicates whether to return output at last time step only, in this case shape would be [bS, nOut] (exact shape depends on dataFormat argument) const auto retLastC = B_ARG(7); // indicates whether to return cells state at last time step only, in this case shape would be [bS, nOut] (exact shape depends on dataFormat argument) */ INDArray b1d = biases.reshape(biases.length()); INDArray seqLen = null; if(maskArray != null){ seqLen = BooleanIndexing.firstIndex(maskArray, Conditions.equals(0), 1); //First 0 along dimension 1 (for [mb, seqLen]) } List<INDArray> args = new ArrayList<>(); args.add(input); args.add(inputWeights); args.add(recurrentWeights); if(hasPeepholeConnections){ throw new IllegalStateException("Not yet implemented"); } args.add(b1d); if(seqLen != null) args.add(seqLen); if(prevOutputActivations != null) args.add(prevOutputActivations); if(prevMemCellState != null) args.add(prevMemCellState); IActivation a = ((LSTM)conf.getLayer()).getActivationFn(); DynamicCustomOp op = DynamicCustomOp.builder("lstmLayer") .addInputs(args.toArray(new INDArray[0])) .addBooleanArguments( true, //hasBiases seqLen != null, //hasSeqLen prevOutputActivations != null, //hasInitH prevMemCellState != null, //hasInitC hasPeepholeConnections, //hasPh true, //retFullSeq true, //retLastH true //retLastC ) .addIntegerArguments( 2, //data format: 2 = [bS, nIn, sL] 0, //direction: 0 = forward activationToArg(gateActivationFn), //Gate activation activationToArg(a), //Cell state activation activationToArg(a) //Output activation (same as cell in DL4J) ) .build(); List<LongShapeDescriptor> outShapes = op.calculateOutputShape(); for(LongShapeDescriptor lsd : outShapes){ INDArray arr = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, lsd.dataType(), lsd.getShape(), lsd.getOrder()); op.addOutputArgument(arr); } FwdPassReturn f = new FwdPassReturn(); f.fwdPassOutput = op.getOutputArgument(0); f.lastAct = op.getOutputArgument(1); f.lastMemCell = op.getOutputArgument(2); return f; }
Example 15
Source File: BaseLayer.java From deeplearning4j with Apache License 2.0 | 4 votes |
protected Pair<INDArray, INDArray> preOutputWithPreNorm(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) { assertInputSet(forBackprop); applyDropOutIfNecessary(training, workspaceMgr); INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr); INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr); INDArray g = (hasLayerNorm() ? getParam(DefaultParamInitializer.GAIN_KEY) : null); INDArray input = this.input.castTo(dataType); //Input validation: if (input.rank() != 2 || input.columns() != W.rows()) { if (input.rank() != 2) { throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank " + input.rank() + " array with shape " + Arrays.toString(input.shape()) + ". Missing preprocessor or wrong input type? " + layerId()); } throw new DL4JInvalidInputException( "Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + ") is invalid: does not match layer input size (layer # inputs = " + W.size(0) + ") " + layerId()); } INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, W.dataType(), input.size(0), W.size(1)); input.castTo(ret.dataType()).mmuli(W, ret); //TODO Can we avoid this cast? (It sohuld be a no op if not required, however) INDArray preNorm = ret; if(hasLayerNorm()){ preNorm = (forBackprop ? ret.dup(ret.ordering()) : ret); Nd4j.getExecutioner().exec(new LayerNorm(preNorm, g, ret, true, 1)); } if(hasBias()){ ret.addiRowVector(b); } if (maskArray != null) { applyMask(ret); } return new Pair<>(ret, preNorm); }
Example 16
Source File: MKLDNNConvHelper.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] kernel, int[] strides, int[] pad, ConvolutionLayer.AlgoMode mode, ConvolutionLayer.FwdAlgo fwdAlgo, ConvolutionMode convolutionMode, int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) { if(input.dataType() != DataType.FLOAT || weights.dataType() != DataType.FLOAT) return null; //MKL-DNN only supports floating point dtype int hDim = 2; int wDim = 3; if(format == CNN2DFormat.NHWC){ hDim = 1; wDim = 2; } int inH = (int)input.size(hDim); int inW = (int)input.size(wDim); int[] outSize; if (convolutionMode == ConvolutionMode.Same) { outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation); } else { outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation } if(context == null ){ context = Nd4j.getExecutioner().buildContext(); context.setIArguments(kernel[0], kernel[1], strides[0], strides[1], pad[0], pad[1], dilation[0], dilation[1], ArrayUtil.fromBoolean(convolutionMode == ConvolutionMode.Same), format == CNN2DFormat.NCHW ? 0 : 1, //0=NCHW, 1=NHWC 1 //Weight format: 1 - [oC, iC, kH, kW] ); }; int outDepth = (int) weights.size(0); long[] outShape = (format == CNN2DFormat.NCHW) ? new long[]{input.size(0), outDepth, outSize[0], outSize[1]} : new long[]{input.size(0), outSize[0], outSize[1], outDepth}; INDArray out = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), outShape); INDArray[] inputsArr = bias == null ? new INDArray[]{input, weights} : new INDArray[]{input, weights, bias}; context.purge(); for( int i=0; i<inputsArr.length; i++ ){ context.setInputArray(i, inputsArr[i]); } context.setOutputArray(0, out); Conv2D op = new Conv2D(); Nd4j.exec(op, context); return out; }
Example 17
Source File: MKLDNNBatchNormHelper.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma, INDArray beta, INDArray dGammaView, INDArray dBetaView, double eps, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) { //Workaround for: https://github.com/eclipse/deeplearning4j/issues/8860 if(!Shape.hasDefaultStridesForShape(epsilon)) epsilon = epsilon.dup('c'); if(input.dataType() != DataType.FLOAT) return null; //MKL-DNN only supports float int axis = (input.rank() != 4 || format == CNN2DFormat.NCHW) ? 1 : 3; List<INDArray> args = new ArrayList<>(); args.add(input); args.add(meanCache); args.add(varCache); if(gamma != null) args.add(gamma.reshape(gamma.length())); if(beta != null) args.add(beta.reshape(beta.length())); args.add(epsilon); DynamicCustomOp op = DynamicCustomOp.builder("batchnorm_bp") .addInputs(args.toArray(new INDArray[0])) .addIntegerArguments( gamma == null ? 0 : 1, //Apply scale beta == null ? 0 : 1, //Apply beta axis) //Axis (NCHW) - 1=NCHW, 3=NHWC .addFloatingPointArguments(eps) .build(); INDArray epsAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape()); INDArray dLdm = workspaceMgr.createUninitialized(ArrayType.BP_WORKING_MEM, meanCache.dataType(), meanCache.shape()); INDArray dLdv = workspaceMgr.createUninitialized(ArrayType.BP_WORKING_MEM, meanCache.dataType(), meanCache.shape()); op.setOutputArgument(0, epsAtInput); op.setOutputArgument(1, dLdm); op.setOutputArgument(2, dLdv); if(dGammaView != null) { //Both are always null/not null simultaneously op.setOutputArgument(3, dGammaView.reshape(dGammaView.length())); op.setOutputArgument(4, dBetaView.reshape(dBetaView.length())); } Nd4j.exec(op); Gradient g = new DefaultGradient(); g.setGradientFor(BatchNormalizationParamInitializer.GAMMA, dGammaView); g.setGradientFor(BatchNormalizationParamInitializer.BETA, dBetaView); return new Pair<>(g, epsAtInput); }
Example 18
Source File: VariationalAutoencoder.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); if (!zeroedPretrainParamGradients) { for (Map.Entry<String, INDArray> entry : gradientViews.entrySet()) { if (isPretrainParam(entry.getKey())) { entry.getValue().assign(0); } } zeroedPretrainParamGradients = true; } INDArray input = this.input.castTo(dataType); Gradient gradient = new DefaultGradient(); VAEFwdHelper fwd = doForward(true, true, workspaceMgr); INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst(); //Finally, calculate mean value: INDArray meanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true, workspaceMgr); INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); //f order INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1]; Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0); INDArray dLdMeanB = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B); currentDelta.sum(dLdMeanB, 0); //dLdMeanB is initialized/zeroed first in sum op gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_W, dLdMeanW); gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_B, dLdMeanB); epsilon = meanW.mmul(currentDelta.transpose()).transpose(); int nEncoderLayers = encoderLayerSizes.length; IActivation afn = layerConf().getActivationFn(); for (int i = nEncoderLayers - 1; i >= 0; i--) { String wKey = "e" + i + WEIGHT_KEY_SUFFIX; String bKey = "e" + i + BIAS_KEY_SUFFIX; INDArray weights = getParamWithNoise(wKey, true, workspaceMgr); INDArray dLdW = gradientViews.get(wKey); INDArray dLdB = gradientViews.get(bKey); INDArray preOut = fwd.encoderPreOuts[i]; currentDelta = afn.backprop(preOut, epsilon).getFirst(); INDArray actInput; if (i == 0) { actInput = input; } else { actInput = fwd.encoderActivations[i - 1]; } Nd4j.gemm(actInput, currentDelta, dLdW, true, false, 1.0, 0.0); currentDelta.sum(dLdB, 0); //dLdB is initialized/zeroed first in sum op gradient.gradientForVariable().put(wKey, dLdW); gradient.gradientForVariable().put(bKey, dLdB); if(i == 0) { epsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, currentDelta.dataType(), new long[]{weights.size(0), currentDelta.size(0)}, 'f'); weights.mmuli(currentDelta.transpose(), epsilon); epsilon = epsilon.transpose(); } else { epsilon = weights.mmul(currentDelta.transpose()).transpose(); } } return new Pair<>(gradient, epsilon); }
Example 19
Source File: MaskLayer.java From deeplearning4j with Apache License 2.0 | 4 votes |
private static INDArray applyMask(INDArray input, INDArray maskArray, LayerWorkspaceMgr workspaceMgr, ArrayType type){ if(maskArray == null){ return workspaceMgr.leverageTo(type, input); } switch (input.rank()){ case 2: if(!maskArray.isColumnVectorOrScalar() || maskArray.size(0) != input.size(0)){ throw new IllegalStateException("Expected column vector for mask with 2d input, with same size(0)" + " as input. Got mask with shape: " + Arrays.toString(maskArray.shape()) + ", input shape = " + Arrays.toString(input.shape())); } return workspaceMgr.leverageTo(type, input.mulColumnVector(maskArray)); case 3: //Time series input, shape [Minibatch, size, tsLength], Expect rank 2 mask if(maskArray.rank() != 2 || input.size(0) != maskArray.size(0) || input.size(2) != maskArray.size(1)){ throw new IllegalStateException("With 3d (time series) input with shape [minibatch, size, sequenceLength]=" + Arrays.toString(input.shape()) + ", expected 2d mask array with shape [minibatch, sequenceLength]." + " Got mask with shape: "+ Arrays.toString(maskArray.shape())); } INDArray fwd = workspaceMgr.createUninitialized(type, input.dataType(), input.shape(), 'f'); Broadcast.mul(input, maskArray, fwd, 0, 2); return fwd; case 4: //CNN input. Expect column vector to be shape [mb,1,h,1], [mb,1,1,w], or [mb,1,h,w] int[] dimensions = new int[4]; int count = 0; for(int i=0; i<4; i++ ){ if(input.size(i) == maskArray.size(i)){ dimensions[count++] = i; } } if(count < 4){ dimensions = Arrays.copyOfRange(dimensions, 0, count); } INDArray fwd2 = workspaceMgr.createUninitialized(type, input.dataType(), input.shape(), 'c'); Broadcast.mul(input, maskArray, fwd2, dimensions); return fwd2; default: throw new RuntimeException("Expected rank 2 to 4 input. Got rank " + input.rank() + " with shape " + Arrays.toString(input.shape())); } }
Example 20
Source File: DeepFMInputLayer.java From jstarcraft-rns with Apache License 2.0 | 4 votes |
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) { assertInputSet(true); // If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or // equivalent) INDArray z = preOutput(true, workspaceMgr); // Note: using preOutput(INDArray) can't be used as this does a setInput(input) // and resets the 'appliedDropout' flag // INDArray activationDerivative = // Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf().getLayer().getActivationFunction(), // z).derivative()); // INDArray activationDerivative = // conf().getLayer().getActivationFn().getGradient(z); // INDArray delta = epsilon.muli(activationDerivative); INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); // TODO handle activation function params if (maskArray != null) { applyMask(delta); } Gradient ret = new DefaultGradient(); INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); // f order weightGrad.assign(0F); for (int index = 0; index < input.rows(); index++) { for (int column = 0; column < delta.columns(); column++) { int cursor = 0; for (int dimension = 0; dimension < dimensionSizes.length; dimension++) { int point = cursor + input.getInt(index, dimension); float value = weightGrad.getFloat(point, column); value += delta.getFloat(index, column); weightGrad.put(point, column, value); cursor += dimensionSizes[dimension]; } } } ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad); if (hasBias()) { INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY); delta.sum(biasGrad, 0); // biasGrad is initialized/zeroed first ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad); } INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr); INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { W.size(0), delta.size(0) }, 'f'); epsilonNext = W.mmuli(delta.transpose(), epsilonNext).transpose(); // W.mmul(delta.transpose()).transpose(); weightNoiseParams.clear(); epsilonNext = backpropDropOutIfPresent(epsilonNext); return new Pair<>(ret, epsilonNext); }