org.deeplearning4j.nn.workspace.LayerWorkspaceMgr#createUninitialized

Source File: MKLDNNLocalResponseNormalizationHelper.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public INDArray activate(INDArray x, boolean training, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) {
    INDArray out = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, x.dataType(), x.shape());

    if(context == null){
        context = Nd4j.getExecutioner().buildContext();
        context.setTArguments(k, alpha, beta);
        context.setIArguments((int)n);
    } else
        context.purge();

    context.setInputArray(0, x);
    context.setOutputArray(0, out);

    LocalResponseNormalization op = new LocalResponseNormalization();

    Nd4j.exec(op, context);
    return out;
}

Source File: DropConnect.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public INDArray getParameter(Layer layer, String paramKey, int iteration, int epoch, boolean train, LayerWorkspaceMgr workspaceMgr) {
    ParamInitializer init = layer.conf().getLayer().initializer();
    INDArray param = layer.getParam(paramKey);

    double p;
    if(weightRetainProbSchedule == null){
        p = weightRetainProb;
    } else {
        p = weightRetainProbSchedule.valueAt(iteration, epoch);
    }

    if (train && init.isWeightParam(layer.conf().getLayer(), paramKey)
            || (applyToBiases && init.isBiasParam(layer.conf().getLayer(), paramKey))) {
        INDArray out = workspaceMgr.createUninitialized(ArrayType.INPUT, param.dataType(), param.shape(), param.ordering());
        Nd4j.getExecutioner().exec(new DropOut(param, out, p));
        return out;
    }
    return param;
}

Source File: BaseOutputLayer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); //Returns Gradient and delta^(this), not Gradient and epsilon^(this-1)
    INDArray delta = pair.getSecond();

    INDArray w = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, delta.dataType(), new long[]{w.size(0), delta.size(0)}, 'f');
    epsilonNext = w.mmuli(delta.transpose(), epsilonNext).transpose();

    //Normally we would clear weightNoiseParams here - but we want to reuse them for forward + backward + score
    // So this is instead done in MultiLayerNetwork/CompGraph backprop methods

    epsilonNext = backpropDropOutIfPresent(epsilonNext);
    return new Pair<>(pair.getFirst(), epsilonNext);
}

Source File: ConvolutionUtils.java From deeplearning4j with Apache License 2.0

6 votes

public static INDArray reshapeCnn3dMask(@NonNull Convolution3D.DataFormat format, INDArray mask, INDArray label, LayerWorkspaceMgr workspaceMgr, ArrayType type){
    if(mask == null)
        return null;
    Preconditions.checkState(mask.rank() == 5, "Expected rank 5 mask for Cnn3DLossLayer in a shape broadcastable to labels shape:" +
            " got mask shape %ndShape with label shape %ndShape", mask, label);

    if(mask.equalShapes(label) ||
            (format == Convolution3D.DataFormat.NDHWC && mask.size(0) == label.size(0) && mask.size(1) == label.size(1) && mask.size(2) == label.size(2) && mask.size(3) == label.size(3)) ||
            (format == Convolution3D.DataFormat.NDHWC && mask.size(0) == label.size(0) && mask.size(2) == label.size(2) && mask.size(3) == label.size(3) && mask.size(4) == label.size(4))) {
        //Already OK shape for reshaping
        return reshape5dTo2d(format, mask, workspaceMgr, type);
    } else {
        //Need to broadcast first
        long[] lShape = label.shape().clone();
        int channelIdx = format == Convolution3D.DataFormat.NCDHW ? 1 : 4;
        lShape[channelIdx] = mask.size(channelIdx);     //Keep existing channel size

        INDArray bMask = workspaceMgr.createUninitialized(type, mask.dataType(), lShape, 'c');
        Nd4j.exec(new Assign(new INDArray[]{bMask, mask}, new INDArray[]{bMask}));
        return reshape5dTo2d(format, bMask, workspaceMgr, type);
    }
}

Source File: MKLDNNLocalResponseNormalizationHelper.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, double k, double n, double alpha, double beta, LayerWorkspaceMgr workspaceMgr) {
    INDArray gradAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());

    if(context == null){
        context = Nd4j.getExecutioner().buildContext();
        context.setTArguments(k, alpha, beta);
        context.setIArguments((int)n);
    } else
        context.purge();

    LocalResponseNormalization op = new LocalResponseNormalization();

    context.setInputArray(0, input);
    context.setInputArray(0, epsilon);
    context.setOutputArray(0, gradAtInput);

    Nd4j.exec(op, context);
    Gradient g = new DefaultGradient();
    return new Pair<>(g, gradAtInput);
}

Source File: DeepFMOutputLayer.java From jstarcraft-rns with Apache License 2.0

6 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray previous, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    Pair<Gradient, INDArray> pair = getGradientsAndDelta(preOutput2d(true, workspaceMgr), workspaceMgr); // Returns Gradient and delta^(this), not Gradient and epsilon^(this-1)
    INDArray delta = pair.getSecond();

    INDArray w = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { w.size(0), delta.size(0) }, 'f');
    epsilonNext = w.mmuli(delta.transpose(), epsilonNext).transpose();

    // Normally we would clear weightNoiseParams here - but we want to reuse them
    // for forward + backward + score
    // So this is instead done in MultiLayerNetwork/CompGraph backprop methods

    epsilonNext = backpropDropOutIfPresent(epsilonNext);
    return new Pair<>(pair.getFirst(), epsilonNext);
}

Source File: DeepFMProductVertex.java From jstarcraft-rns with Apache License 2.0

6 votes

@Override
public INDArray doForward(boolean training, LayerWorkspaceMgr workspaceMgr) {
    if (!canDoForward()) {
        throw new IllegalStateException("Cannot do forward pass: inputs not set");
    }
    // inputs[index] => {batchSize, numberOfEmbeds}
    INDArray left = inputs[0];
    INDArray right = inputs[1];
    long size = inputs[0].shape()[0];
    INDArray value = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, size);
    // 求两个行向量的点积
    for (int index = 0; index < size; index++) {
        INDArray product = left.getRow(index).mmul(right.getRow(index).transpose());
        value.put(index, product);
    }
    // outputs[index] => {batchSize, 1}
    return Shape.newShapeNoCopy(value, new long[] { value.length(), 1L }, value.ordering() == 'f');
}

Source File: AbstractLayer.java From deeplearning4j with Apache License 2.0

5 votes

protected void applyDropOutIfNecessary(boolean training, LayerWorkspaceMgr workspaceMgr){
    if(training && !dropoutApplied && layerConf().getIDropout() != null ){
        INDArray result;
        if(inputModificationAllowed){
            result = input;
        } else {
            result = workspaceMgr.createUninitialized(ArrayType.INPUT, input.dataType(), input.shape(), input.ordering());
        }

        input = layerConf().getIDropout().applyDropout(input, result, getIterationCount(), getEpochCount(), workspaceMgr);
        dropoutApplied = true;
    }
}

Source File: VariationalAutoencoder.java From deeplearning4j with Apache License 2.0

5 votes

private VAEFwdHelper doForward(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);

    //TODO input validation

    int nEncoderLayers = encoderLayerSizes.length;

    INDArray[] encoderPreOuts = new INDArray[encoderLayerSizes.length];
    INDArray[] encoderActivations = new INDArray[encoderLayerSizes.length];
    INDArray current = input.castTo(getParam("e0" + WEIGHT_KEY_SUFFIX).dataType());
    for (int i = 0; i < nEncoderLayers; i++) {
        String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
        String bKey = "e" + i + BIAS_KEY_SUFFIX;

        INDArray weights = getParamWithNoise(wKey, training, workspaceMgr);
        INDArray bias = getParamWithNoise(bKey, training, workspaceMgr);

        current = current.mmul(weights).addiRowVector(bias);
        if (forBackprop) {
            encoderPreOuts[i] = current.dup();
        }
        layerConf().getActivationFn().getActivation(current, training);
        encoderActivations[i] = current;
    }

    //Finally, calculate mean value:
    INDArray mW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, training, workspaceMgr);
    INDArray mB = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_B, training, workspaceMgr);

    INDArray pzxMean = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, mW.dataType(), new long[]{current.size(0), mW.size(1)}, 'f');
    pzxMean = current.mmuli(mW, pzxMean).addiRowVector(mB);


    return new VAEFwdHelper(encoderPreOuts, pzxMean, encoderActivations);
}

Source File: Upsampling2D.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);

    CNN2DFormat format = getFormat();
    boolean nchw = format == CNN2DFormat.NCHW;

    long miniBatch = (int) input.size(0);
    long inDepth = (int) input.size(nchw ? 1 : 3);
    long inH = (int) input.size(nchw ? 2 : 1);
    long inW = (int) input.size(nchw ? 3 : 2);

    long[] epsShape = nchw ? new long[]{miniBatch, inDepth, inH, inW} : new long[]{miniBatch, inH, inW, inDepth};
    INDArray epsOut =  workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, epsilon.dataType(), epsShape, 'c');

    Gradient gradient = new DefaultGradient();

    CustomOp op = DynamicCustomOp.builder("upsampling_bp")
            .addIntegerArguments(nchw ? 1 : 0)      //1=NCHW, 0=NHWC
            .addInputs(input, epsilon)
            .addOutputs(epsOut)
            .callInplace(false)
            .build();
    Nd4j.getExecutioner().exec(op);

    epsOut = backpropDropOutIfPresent(epsOut);

    return new Pair<>(gradient, epsOut);
}

Source File: CDAELayer.java From jstarcraft-rns with Apache License 2.0

5 votes

@Override
public INDArray preOutput(boolean training, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(false);
    applyDropOutIfNecessary(training, workspaceMgr);
    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
    INDArray U = getParamWithNoise(CDAEParameter.USER_KEY, training, workspaceMgr);
    INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr);

    // Input validation:
    if (input.rank() != 2 || input.columns() != W.rows()) {
        if (input.rank() != 2) {
            throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank " + input.rank() + " array with shape " + Arrays.toString(input.shape()) + ". Missing preprocessor or wrong input type? " + layerId());
        }
        throw new DL4JInvalidInputException("Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape()) + ") is invalid: does not match layer input size (layer # inputs = " + W.size(0) + ") " + layerId());
    }

    INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.size(0), W.size(1));
    input.mmuli(W, ret);
    ret.addi(U);
    if (hasBias()) {
        ret.addiRowVector(b);
    }

    if (maskArray != null) {
        applyMask(ret);
    }

    return ret;
}

Source File: MKLDNNSubsamplingHelper.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public INDArray activate(INDArray input, boolean training, int[] kernel, int[] strides, int[] pad, PoolingType poolingType,
                         ConvolutionMode convolutionMode, int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {

    int hIdx = 2;
    int wIdx = 3;
    if(format == CNN2DFormat.NHWC){
        hIdx = 1;
        wIdx = 2;
    }

    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation
        pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {(int)input.size(hIdx), (int)input.size(wIdx)}, kernel, strides, dilation);
    } else {
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation
    }

    long[] outShape = format == CNN2DFormat.NCHW ? new long[]{input.size(0), input.size(1), outSize[0], outSize[1]} :
            new long[]{input.size(0), outSize[0], outSize[1], input.size(3)};
    INDArray output = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), outShape);

    if(context == null){
        context = Nd4j.getExecutioner().buildContext();
        context.setIArguments(
                kernel[0], kernel[1],
                strides[0], strides[1],
                pad[0], pad[1],
                dilation[0], dilation[1],
                ArrayUtil.fromBoolean(convolutionMode == ConvolutionMode.Same),
                0,  //Extra - not used?
                format == CNN2DFormat.NCHW ? 0 : 1); //0 = NCHW, 1=NHWC
    }

    DynamicCustomOp op;
    switch (poolingType){
        case MAX:
            op = new MaxPooling2D();
            break;
        case AVG:
            op = new AvgPooling2D();
            break;
        case SUM:
        case PNORM:
        default:
            return null;
    }

    context.purge();
    context.setInputArray(0, input);
    context.setOutputArray(0, output);

    Nd4j.exec(op, context);

    return output;
}

Source File: CDAELayer.java From jstarcraft-rns with Apache License 2.0

4 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    // If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or
    // equivalent)
    INDArray z = preOutput(true, workspaceMgr); // Note: using preOutput(INDArray) can't be used as this does a setInput(input)
                                                // and resets the 'appliedDropout' flag
    // INDArray activationDerivative =
    // Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf().getLayer().getActivationFunction(),
    // z).derivative());
    // INDArray activationDerivative =
    // conf().getLayer().getActivationFn().getGradient(z);
    // INDArray delta = epsilon.muli(activationDerivative);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); // TODO handle activation function params

    if (maskArray != null) {
        applyMask(delta);
    }

    Gradient ret = new DefaultGradient();

    INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); // f order
    Nd4j.gemm(input, delta, weightGrad, true, false, 1.0, 0.0);
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);

    INDArray userWeightGrad = gradientViews.get(CDAEParameter.USER_KEY);
    userWeightGrad.assign(delta);
    ret.gradientForVariable().put(CDAEParameter.USER_KEY, userWeightGrad);

    if (hasBias()) {
        INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); // biasGrad is initialized/zeroed first
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
    }

    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { W.size(0), delta.size(0) }, 'f');
    epsilonNext = W.mmuli(delta.transpose(), epsilonNext).transpose(); // W.mmul(delta.transpose()).transpose();

    weightNoiseParams.clear();

    epsilonNext = backpropDropOutIfPresent(epsilonNext);
    return new Pair<>(ret, epsilonNext);
}

Source File: MKLDNNLSTMHelper.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public FwdPassReturn activate(Layer layer, NeuralNetConfiguration conf, IActivation gateActivationFn, INDArray input,
                              INDArray recurrentWeights, INDArray inputWeights, INDArray biases, boolean training,
                              INDArray prevOutputActivations, INDArray prevMemCellState, boolean forBackprop, boolean forwards,
                              String inputWeightKey, INDArray maskArray, boolean hasPeepholeConnections, LayerWorkspaceMgr workspaceMgr) {

    /*
    DL4J data format: [bS, nIn, sL] - dataFormat == 2, directionMode == 0 (forward)
    Inputs:
    x = [bS, nIn, sL]
    Wx = [nIn, 4*nOut]
    Wr = [nOut, 4*nOut]
    Wp = [3*nOut]               Optional peephole weights
    b = [4*nOut]
    seqLen = [bS]
    initialOut = [bs, nOut]
    initialCell = [bs, nOut]

    Outputs:
    out = [bS, nOut, sL]
    outLast = [bs, nOut]
    cellLast = [bs,nOut]

    Gates order: input, forget, input modulation, output


    const auto hasBiases  = B_ARG(0);   // indicates whether biases array is provided
    const auto hasSeqLen  = B_ARG(1);   // indicates whether seqLen array is provided
    const auto hasInitH   = B_ARG(2);   // indicates whether initial output is provided
    const auto hasInitC   = B_ARG(3);   // indicates whether initial cell state is provided
    const auto hasPH      = B_ARG(4);   // indicates whether peephole connections are present
    const auto retFullSeq = B_ARG(5);   // indicates whether to return whole time sequence h {h_0, h_1, ... , h_sL-1}
    const auto retLastH   = B_ARG(6);   // indicates whether to return output at last time step only, in this case shape would be [bS, nOut] (exact shape depends on dataFormat argument)
    const auto retLastC   = B_ARG(7);   // indicates whether to return cells state at last time step only, in this case shape would be [bS, nOut] (exact shape depends on dataFormat argument)
     */

    INDArray b1d = biases.reshape(biases.length());
    INDArray seqLen = null;
    if(maskArray != null){
        seqLen = BooleanIndexing.firstIndex(maskArray, Conditions.equals(0), 1);    //First 0 along dimension 1 (for [mb, seqLen])
    }

    List<INDArray> args = new ArrayList<>();
    args.add(input);
    args.add(inputWeights);
    args.add(recurrentWeights);
    if(hasPeepholeConnections){
        throw new IllegalStateException("Not yet implemented");
    }
    args.add(b1d);
    if(seqLen != null)
        args.add(seqLen);
    if(prevOutputActivations != null)
        args.add(prevOutputActivations);
    if(prevMemCellState != null)
        args.add(prevMemCellState);

    IActivation a = ((LSTM)conf.getLayer()).getActivationFn();

    DynamicCustomOp op = DynamicCustomOp.builder("lstmLayer")
            .addInputs(args.toArray(new INDArray[0]))
            .addBooleanArguments(
                    true,                               //hasBiases
                    seqLen != null,                     //hasSeqLen
                    prevOutputActivations != null,      //hasInitH
                    prevMemCellState != null,           //hasInitC
                    hasPeepholeConnections,             //hasPh
                    true,                               //retFullSeq
                    true,                               //retLastH
                    true                                //retLastC
            )
            .addIntegerArguments(
                    2,                                  //data format: 2 = [bS, nIn, sL]
                    0,                                  //direction: 0 = forward
                    activationToArg(gateActivationFn),  //Gate activation
                    activationToArg(a),                 //Cell state activation
                    activationToArg(a)                  //Output activation (same as cell in DL4J)
            )
            .build();

    List<LongShapeDescriptor> outShapes = op.calculateOutputShape();

    for(LongShapeDescriptor lsd : outShapes){
        INDArray arr = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, lsd.dataType(), lsd.getShape(), lsd.getOrder());
        op.addOutputArgument(arr);
    }

    FwdPassReturn f = new FwdPassReturn();
    f.fwdPassOutput = op.getOutputArgument(0);
    f.lastAct = op.getOutputArgument(1);
    f.lastMemCell = op.getOutputArgument(2);

    return f;
}

Source File: BaseLayer.java From deeplearning4j with Apache License 2.0

4 votes

protected Pair<INDArray, INDArray> preOutputWithPreNorm(boolean training, boolean forBackprop, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(forBackprop);
    applyDropOutIfNecessary(training, workspaceMgr);
    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, training, workspaceMgr);
    INDArray b = getParamWithNoise(DefaultParamInitializer.BIAS_KEY, training, workspaceMgr);
    INDArray g = (hasLayerNorm() ? getParam(DefaultParamInitializer.GAIN_KEY) : null);

    INDArray input = this.input.castTo(dataType);

    //Input validation:
    if (input.rank() != 2 || input.columns() != W.rows()) {
        if (input.rank() != 2) {
            throw new DL4JInvalidInputException("Input that is not a matrix; expected matrix (rank 2), got rank "
                    + input.rank() + " array with shape " + Arrays.toString(input.shape())
                    + ". Missing preprocessor or wrong input type? " + layerId());
        }
        throw new DL4JInvalidInputException(
                "Input size (" + input.columns() + " columns; shape = " + Arrays.toString(input.shape())
                        + ") is invalid: does not match layer input size (layer # inputs = "
                        + W.size(0) + ") " + layerId());
    }


    INDArray ret = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, W.dataType(), input.size(0), W.size(1));
    input.castTo(ret.dataType()).mmuli(W, ret);     //TODO Can we avoid this cast? (It sohuld be a no op if not required, however)

    INDArray preNorm = ret;
    if(hasLayerNorm()){
        preNorm = (forBackprop ? ret.dup(ret.ordering()) : ret);
        Nd4j.getExecutioner().exec(new LayerNorm(preNorm, g, ret, true, 1));
    }

    if(hasBias()){
        ret.addiRowVector(b);
    }

    if (maskArray != null) {
        applyMask(ret);
    }

    return new Pair<>(ret, preNorm);
}

Source File: MKLDNNConvHelper.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public INDArray preOutput(INDArray input, INDArray weights, INDArray bias, int[] kernel, int[] strides, int[] pad,
                          ConvolutionLayer.AlgoMode mode, ConvolutionLayer.FwdAlgo fwdAlgo, ConvolutionMode convolutionMode,
                          int[] dilation, CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {
    if(input.dataType() != DataType.FLOAT || weights.dataType() != DataType.FLOAT)
        return null;    //MKL-DNN only supports floating point dtype


    int hDim = 2;
    int wDim = 3;
    if(format == CNN2DFormat.NHWC){
        hDim = 1;
        wDim = 2;
    }

    int inH = (int)input.size(hDim);
    int inW = (int)input.size(wDim);
    int[] outSize;
    if (convolutionMode == ConvolutionMode.Same) {
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, null, convolutionMode, dilation, format); //Also performs validation
        pad = ConvolutionUtils.getSameModeTopLeftPadding(outSize, new int[] {inH, inW}, kernel, strides, dilation);
    } else {
        outSize = ConvolutionUtils.getOutputSize(input, kernel, strides, pad, convolutionMode, dilation, format); //Also performs validation
    }

    if(context == null ){
        context = Nd4j.getExecutioner().buildContext();
        context.setIArguments(kernel[0], kernel[1],
                strides[0], strides[1],
                pad[0], pad[1],
                dilation[0], dilation[1],
                ArrayUtil.fromBoolean(convolutionMode == ConvolutionMode.Same),
                format == CNN2DFormat.NCHW ? 0 : 1,  //0=NCHW, 1=NHWC
                1   //Weight format: 1 - [oC, iC, kH, kW]
        );
    };

    int outDepth = (int) weights.size(0);
    long[] outShape = (format == CNN2DFormat.NCHW) ? new long[]{input.size(0), outDepth, outSize[0], outSize[1]} : new long[]{input.size(0), outSize[0], outSize[1], outDepth};
    INDArray out = workspaceMgr.createUninitialized(ArrayType.ACTIVATIONS, input.dataType(), outShape);

    INDArray[] inputsArr = bias == null ? new INDArray[]{input, weights} : new INDArray[]{input, weights, bias};
    context.purge();
    for( int i=0; i<inputsArr.length; i++ ){
        context.setInputArray(i, inputsArr[i]);
    }

    context.setOutputArray(0, out);
    Conv2D op = new Conv2D();
    Nd4j.exec(op, context);

    return out;
}

Source File: MKLDNNBatchNormHelper.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray input, INDArray epsilon, long[] shape, INDArray gamma,
                                                 INDArray beta, INDArray dGammaView, INDArray dBetaView, double eps,
                                                 CNN2DFormat format, LayerWorkspaceMgr workspaceMgr) {

    //Workaround for: https://github.com/eclipse/deeplearning4j/issues/8860
    if(!Shape.hasDefaultStridesForShape(epsilon))
        epsilon = epsilon.dup('c');

    if(input.dataType() != DataType.FLOAT)
        return null;    //MKL-DNN only supports float

    int axis = (input.rank() != 4 || format == CNN2DFormat.NCHW) ? 1 : 3;

    List<INDArray> args = new ArrayList<>();
    args.add(input);
    args.add(meanCache);
    args.add(varCache);
    if(gamma != null)
        args.add(gamma.reshape(gamma.length()));
    if(beta != null)
        args.add(beta.reshape(beta.length()));
    args.add(epsilon);


    DynamicCustomOp op = DynamicCustomOp.builder("batchnorm_bp")
            .addInputs(args.toArray(new INDArray[0]))
            .addIntegerArguments(
                    gamma == null ? 0 : 1,          //Apply scale
                    beta == null ? 0 : 1,           //Apply beta
                    axis)                              //Axis (NCHW) - 1=NCHW, 3=NHWC
            .addFloatingPointArguments(eps)
            .build();

    INDArray epsAtInput = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, input.dataType(), input.shape());
    INDArray dLdm = workspaceMgr.createUninitialized(ArrayType.BP_WORKING_MEM, meanCache.dataType(), meanCache.shape());
    INDArray dLdv = workspaceMgr.createUninitialized(ArrayType.BP_WORKING_MEM, meanCache.dataType(), meanCache.shape());

    op.setOutputArgument(0, epsAtInput);
    op.setOutputArgument(1, dLdm);
    op.setOutputArgument(2, dLdv);
    if(dGammaView != null) {
        //Both are always null/not null simultaneously
        op.setOutputArgument(3, dGammaView.reshape(dGammaView.length()));
        op.setOutputArgument(4, dBetaView.reshape(dBetaView.length()));
    }


    Nd4j.exec(op);

    Gradient g = new DefaultGradient();
    g.setGradientFor(BatchNormalizationParamInitializer.GAMMA, dGammaView);
    g.setGradientFor(BatchNormalizationParamInitializer.BETA, dBetaView);

    return new Pair<>(g, epsAtInput);
}

Source File: VariationalAutoencoder.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    if (!zeroedPretrainParamGradients) {
        for (Map.Entry<String, INDArray> entry : gradientViews.entrySet()) {
            if (isPretrainParam(entry.getKey())) {
                entry.getValue().assign(0);
            }
        }
        zeroedPretrainParamGradients = true;
    }

    INDArray input = this.input.castTo(dataType);

    Gradient gradient = new DefaultGradient();

    VAEFwdHelper fwd = doForward(true, true, workspaceMgr);
    INDArray currentDelta = pzxActivationFn.backprop(fwd.pzxMeanPreOut, epsilon).getFirst();

    //Finally, calculate mean value:
    INDArray meanW = getParamWithNoise(VariationalAutoencoderParamInitializer.PZX_MEAN_W, true, workspaceMgr);
    INDArray dLdMeanW = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_W); //f order
    INDArray lastEncoderActivation = fwd.encoderActivations[fwd.encoderActivations.length - 1];
    Nd4j.gemm(lastEncoderActivation, currentDelta, dLdMeanW, true, false, 1.0, 0.0);
    INDArray dLdMeanB = gradientViews.get(VariationalAutoencoderParamInitializer.PZX_MEAN_B);
    currentDelta.sum(dLdMeanB, 0); //dLdMeanB is initialized/zeroed first in sum op

    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_W, dLdMeanW);
    gradient.gradientForVariable().put(VariationalAutoencoderParamInitializer.PZX_MEAN_B, dLdMeanB);

    epsilon = meanW.mmul(currentDelta.transpose()).transpose();

    int nEncoderLayers = encoderLayerSizes.length;

    IActivation afn = layerConf().getActivationFn();
    for (int i = nEncoderLayers - 1; i >= 0; i--) {
        String wKey = "e" + i + WEIGHT_KEY_SUFFIX;
        String bKey = "e" + i + BIAS_KEY_SUFFIX;

        INDArray weights = getParamWithNoise(wKey, true, workspaceMgr);

        INDArray dLdW = gradientViews.get(wKey);
        INDArray dLdB = gradientViews.get(bKey);

        INDArray preOut = fwd.encoderPreOuts[i];

        currentDelta = afn.backprop(preOut, epsilon).getFirst();

        INDArray actInput;
        if (i == 0) {
            actInput = input;
        } else {
            actInput = fwd.encoderActivations[i - 1];
        }
        Nd4j.gemm(actInput, currentDelta, dLdW, true, false, 1.0, 0.0);
        currentDelta.sum(dLdB, 0); //dLdB is initialized/zeroed first in sum op

        gradient.gradientForVariable().put(wKey, dLdW);
        gradient.gradientForVariable().put(bKey, dLdB);

        if(i == 0) {
            epsilon = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, currentDelta.dataType(), new long[]{weights.size(0), currentDelta.size(0)}, 'f');
            weights.mmuli(currentDelta.transpose(), epsilon);
            epsilon = epsilon.transpose();
        } else {
            epsilon = weights.mmul(currentDelta.transpose()).transpose();
        }
    }

    return new Pair<>(gradient, epsilon);
}

Source File: MaskLayer.java From deeplearning4j with Apache License 2.0

4 votes

private static INDArray applyMask(INDArray input, INDArray maskArray, LayerWorkspaceMgr workspaceMgr, ArrayType type){
    if(maskArray == null){
        return workspaceMgr.leverageTo(type, input);
    }
    switch (input.rank()){
        case 2:
            if(!maskArray.isColumnVectorOrScalar() || maskArray.size(0) != input.size(0)){
                throw new IllegalStateException("Expected column vector for mask with 2d input, with same size(0)" +
                        " as input. Got mask with shape: " + Arrays.toString(maskArray.shape()) +
                        ", input shape = " + Arrays.toString(input.shape()));
            }
            return workspaceMgr.leverageTo(type, input.mulColumnVector(maskArray));
        case 3:
            //Time series input, shape [Minibatch, size, tsLength], Expect rank 2 mask
            if(maskArray.rank() != 2 || input.size(0) != maskArray.size(0) || input.size(2) != maskArray.size(1)){
                throw new IllegalStateException("With 3d (time series) input with shape [minibatch, size, sequenceLength]=" +
                        Arrays.toString(input.shape()) + ", expected 2d mask array with shape [minibatch, sequenceLength]." +
                        " Got mask with shape: "+ Arrays.toString(maskArray.shape()));
            }
            INDArray fwd = workspaceMgr.createUninitialized(type, input.dataType(), input.shape(), 'f');
            Broadcast.mul(input, maskArray, fwd, 0, 2);
            return fwd;
        case 4:
            //CNN input. Expect column vector to be shape [mb,1,h,1], [mb,1,1,w], or [mb,1,h,w]
            int[] dimensions = new int[4];
            int count = 0;
            for(int i=0; i<4; i++ ){
                if(input.size(i) == maskArray.size(i)){
                    dimensions[count++] = i;
                }
            }
            if(count < 4){
                dimensions = Arrays.copyOfRange(dimensions, 0, count);
            }

            INDArray fwd2 = workspaceMgr.createUninitialized(type, input.dataType(), input.shape(), 'c');
            Broadcast.mul(input, maskArray, fwd2, dimensions);
            return fwd2;
        default:
            throw new RuntimeException("Expected rank 2 to 4 input. Got rank " + input.rank() + " with shape "
                    + Arrays.toString(input.shape()));
    }
}

Source File: DeepFMInputLayer.java From jstarcraft-rns with Apache License 2.0

4 votes

@Override
public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon, LayerWorkspaceMgr workspaceMgr) {
    assertInputSet(true);
    // If this layer is layer L, then epsilon is (w^(L+1)*(d^(L+1))^T) (or
    // equivalent)
    INDArray z = preOutput(true, workspaceMgr); // Note: using preOutput(INDArray) can't be used as this does a setInput(input)
                                                // and resets the 'appliedDropout' flag
    // INDArray activationDerivative =
    // Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf().getLayer().getActivationFunction(),
    // z).derivative());
    // INDArray activationDerivative =
    // conf().getLayer().getActivationFn().getGradient(z);
    // INDArray delta = epsilon.muli(activationDerivative);
    INDArray delta = layerConf().getActivationFn().backprop(z, epsilon).getFirst(); // TODO handle activation function params

    if (maskArray != null) {
        applyMask(delta);
    }

    Gradient ret = new DefaultGradient();

    INDArray weightGrad = gradientViews.get(DefaultParamInitializer.WEIGHT_KEY); // f order
    weightGrad.assign(0F);
    for (int index = 0; index < input.rows(); index++) {
        for (int column = 0; column < delta.columns(); column++) {
            int cursor = 0;
            for (int dimension = 0; dimension < dimensionSizes.length; dimension++) {
                int point = cursor + input.getInt(index, dimension);
                float value = weightGrad.getFloat(point, column);
                value += delta.getFloat(index, column);
                weightGrad.put(point, column, value);
                cursor += dimensionSizes[dimension];
            }
        }
    }
    ret.gradientForVariable().put(DefaultParamInitializer.WEIGHT_KEY, weightGrad);

    if (hasBias()) {
        INDArray biasGrad = gradientViews.get(DefaultParamInitializer.BIAS_KEY);
        delta.sum(biasGrad, 0); // biasGrad is initialized/zeroed first
        ret.gradientForVariable().put(DefaultParamInitializer.BIAS_KEY, biasGrad);
    }

    INDArray W = getParamWithNoise(DefaultParamInitializer.WEIGHT_KEY, true, workspaceMgr);
    INDArray epsilonNext = workspaceMgr.createUninitialized(ArrayType.ACTIVATION_GRAD, new long[] { W.size(0), delta.size(0) }, 'f');
    epsilonNext = W.mmuli(delta.transpose(), epsilonNext).transpose(); // W.mmul(delta.transpose()).transpose();

    weightNoiseParams.clear();

    epsilonNext = backpropDropOutIfPresent(epsilonNext);
    return new Pair<>(ret, epsilonNext);
}

Java Code Examples for org.deeplearning4j.nn.workspace.LayerWorkspaceMgr#createUninitialized()