org.deeplearning4j.nn.conf.inputs.InputType#arrayElementsPerExample

Source File: Subsampling3DLayer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType;
    InputType.InputTypeConvolutional3D outputType =
                    (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType);
    val actElementsPerEx = outputType.arrayElementsPerExample();

    //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * outputType.getDepth()
                    * kernelSize[0] * kernelSize[1];

    //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Subsampling3DLayer.class, inputType, outputType)
                    .standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: SubsamplingLayer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
    InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);
    val actElementsPerEx = outputType.arrayElementsPerExample();

    //TODO Subsampling helper memory use... (CuDNN etc)

    //During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * kernelSize[0]
                    * kernelSize[1];

    //Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, SubsamplingLayer.class, inputType, outputType)
                    .standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: Upsampling1D.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeRecurrent recurrent = (InputType.InputTypeRecurrent) inputType;
    InputType.InputTypeRecurrent outputType = (InputType.InputTypeRecurrent) getOutputType(-1, inputType);

    long im2colSizePerEx = recurrent.getSize() * outputType.getTimeSeriesLength() * size[0];
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Upsampling1D.class, inputType, outputType).standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: EmbeddingLayer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Basically a dense layer, but no dropout is possible here, and no epsilons
    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    //Embedding layer does not use caching.
    //Inference: no working memory - just activations (pullRows)
    //Training: preout op, the only in-place ops on epsilon (from layer above) + assign ops

    return new LayerMemoryReport.Builder(layerName, EmbeddingLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: Upsampling3D.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional3D c = (InputType.InputTypeConvolutional3D) inputType;
    InputType.InputTypeConvolutional3D outputType =
                    (InputType.InputTypeConvolutional3D) getOutputType(-1, inputType);

    // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx = c.getChannels() & outputType.getDepth() * outputType.getHeight() * outputType.getWidth()
                    * size[0] * size[1] * size[2];

    // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Upsampling3D.class, inputType, outputType).standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: Upsampling2D.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
    InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);

    // During forward pass: im2col array + reduce. Reduce is counted as activations, so only im2col is working mem
    val im2colSizePerEx =
                    c.getChannels() * outputType.getHeight() * outputType.getWidth() * size[0] * size[1];

    // Current implementation does NOT cache im2col etc... which means: it's recalculated on each backward pass
    long trainingWorkingSizePerEx = im2colSizePerEx;
    if (getIDropout() != null) {
        //Dup on the input before dropout, but only for training
        trainingWorkingSizePerEx += inputType.arrayElementsPerExample();
    }

    return new LayerMemoryReport.Builder(layerName, Upsampling2D.class, inputType, outputType).standardMemory(0, 0) //No params
                    .workingMemory(0, im2colSizePerEx, 0, trainingWorkingSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: DropoutLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val actElementsPerEx = inputType.arrayElementsPerExample();
    //During inference: not applied. During  backprop: dup the input, in case it's used elsewhere
    //But: this will be counted in the activations
    //(technically inference memory is over-estimated as a result)

    return new LayerMemoryReport.Builder(layerName, DropoutLayer.class, inputType, inputType).standardMemory(0, 0) //No params
                    .workingMemory(0, 0, 0, 0) //No working mem, other than activations etc
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: DenseLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
                    .build();
}

Source File: CustomLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Memory report is used to estimate how much memory is required for the layer, for different configurations
    //If you don't need this functionality for your custom layer, you can return a LayerMemoryReport
    // with all 0s, or

    //This implementation: based on DenseLayer implementation
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    int updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        //Assume we dup the input for dropout
        trainSizeVariable += inputType.arrayElementsPerExample();
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, CustomLayer.class, inputType, outputType)
            .standardMemory(numParams, updaterStateSize)
            .workingMemory(0, 0, trainSizeFixed,
                    trainSizeVariable)     //No additional memory (beyond activations) for inference
            .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS,
                    MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
            .build();
}

Source File: BaseOutputLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Basically a dense layer...
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, OutputLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: EmbeddingSequenceLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    return new LayerMemoryReport.Builder(layerName, EmbeddingSequenceLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: CenterLossOutputLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Basically a dense layer, with some extra params...
    InputType outputType = getOutputType(-1, inputType);

    val nParamsW = nIn * nOut;
    val nParamsB = nOut;
    val nParamsCenter = nIn * nOut;
    val numParams = nParamsW + nParamsB + nParamsCenter;

    int updaterStateSize = (int) (getUpdaterByParam(CenterLossParamInitializer.WEIGHT_KEY).stateSize(nParamsW)
                    + getUpdaterByParam(CenterLossParamInitializer.BIAS_KEY).stateSize(nParamsB)
                    + getUpdaterByParam(CenterLossParamInitializer.CENTER_KEY).stateSize(nParamsCenter));

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, CenterLossOutputLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: Yolo2OutputLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    long numValues = inputType.arrayElementsPerExample();

    //This is a VERY rough estimate...
    return new LayerMemoryReport.Builder(layerName, Yolo2OutputLayer.class, inputType, inputType)
                    .standardMemory(0, 0) //No params
                    .workingMemory(0, numValues, 0, 6 * numValues).cacheMemory(0, 0) //No cache
                    .build();
}

Source File: LocalResponseNormalization.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val actElementsPerEx = inputType.arrayElementsPerExample();

    //Forward pass: 3x input size as working memory, in addition to output activations
    //Backward pass: 2x input size as working memory, in addition to epsilons

    return new LayerMemoryReport.Builder(layerName, DenseLayer.class, inputType, inputType).standardMemory(0, 0)
                    .workingMemory(0, 2 * actElementsPerEx, 0, 3 * actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
                    .build();
}

Source File: ActivationLayer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val actElementsPerEx = inputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, ActivationLayer.class, inputType, inputType)
                    .standardMemory(0, 0) //No params
                    //During inference: modify input activation in-place
                    //During  backprop: dup the input for later re-use
                    .workingMemory(0, 0, 0, actElementsPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: ElementWiseMultiplicationLayer.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * This is a report of the estimated memory consumption for the given layer
 *
 * @param inputType Input type to the layer. Memory consumption is often a function of the input type
 * @return Memory report for the layer
 */
@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizeFixed = 0;
    int trainSizeVariable = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
            trainSizeVariable += 0; //TODO
        } else {
            //Assume we dup the input
            trainSizeVariable += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by activation function backprop
    // then we have 'epsilonNext' which is equivalent to input size
    trainSizeVariable += outputType.arrayElementsPerExample();

    return new LayerMemoryReport.Builder(layerName, ElementWiseMultiplicationLayer.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainSizeFixed, trainSizeVariable) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching in DenseLayer
                    .build();
}

Source File: BatchNormalization.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    InputType outputType = getOutputType(-1, inputType);

    //TODO CuDNN helper etc

    val numParams = initializer().numParams(this);
    int updaterStateSize = 0;

    for (String s : BatchNormalizationParamInitializer.getInstance().paramKeys(this)) {
        updaterStateSize += getUpdaterByParam(s).stateSize(nOut);
    }

    //During forward pass: working memory size approx. equal to 2x input size (copy ops, etc)
    val inferenceWorkingSize = 2 * inputType.arrayElementsPerExample();

    //During training: we calculate mean and variance... result is equal to nOut, and INDEPENDENT of minibatch size
    val trainWorkFixed = 2 * nOut;
    //During backprop: multiple working arrays... output size, 2 * output size (indep. of example size),
    val trainWorkingSizePerExample = inferenceWorkingSize //Inference during backprop
                    + (outputType.arrayElementsPerExample() + 2 * nOut); //Backprop gradient calculation

    return new LayerMemoryReport.Builder(layerName, BatchNormalization.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, 0, trainWorkFixed, trainWorkingSizePerExample) //No additional memory (beyond activations) for inference
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: AutoEncoder.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //Because of supervised + unsupervised modes: we'll assume unsupervised, which has the larger memory requirements
    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample() + inputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int trainSizePerEx = 0;
    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
        } else {
            //Assume we dup the input
            trainSizePerEx += inputType.arrayElementsPerExample();
        }
    }

    //Also, during backprop: we do a preOut call -> gives us activations size equal to the output size
    // which is modified in-place by loss function
    trainSizePerEx += actElementsPerEx;

    return new LayerMemoryReport.Builder(layerName, AutoEncoder.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize).workingMemory(0, 0, 0, trainSizePerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: VariationalAutoencoder.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    //For training: we'll assume unsupervised pretraining, as this has higher memory requirements

    InputType outputType = getOutputType(-1, inputType);

    val actElementsPerEx = outputType.arrayElementsPerExample();
    val numParams = initializer().numParams(this);
    int updaterStateSize = (int) getIUpdater().stateSize(numParams);

    int inferenceWorkingMemSizePerEx = 0;
    //Forward pass size through the encoder:
    for (int i = 1; i < encoderLayerSizes.length; i++) {
        inferenceWorkingMemSizePerEx += encoderLayerSizes[i];
    }

    //Forward pass size through the decoder, during training
    //p(Z|X) mean and stdev; pzxSigmaSquared, pzxSigma -> all size equal to nOut
    long decoderFwdSizeWorking = 4 * nOut;
    //plus, nSamples * decoder size
    //For each decoding: random sample (nOut), z (nOut), activations for each decoder layer
    decoderFwdSizeWorking += numSamples * (2 * nOut + ArrayUtil.sum(getDecoderLayerSizes()));
    //Plus, component of score
    decoderFwdSizeWorking += nOut;

    //Backprop size through the decoder and decoder: approx. 2x forward pass size
    long trainWorkingMemSize = 2 * (inferenceWorkingMemSizePerEx + decoderFwdSizeWorking);

    if (getIDropout() != null) {
        if (false) {
            //TODO drop connect
            //Dup the weights... note that this does NOT depend on the minibatch size...
        } else {
            //Assume we dup the input
            trainWorkingMemSize += inputType.arrayElementsPerExample();
        }
    }

    return new LayerMemoryReport.Builder(layerName, VariationalAutoencoder.class, inputType, outputType)
                    .standardMemory(numParams, updaterStateSize)
                    .workingMemory(0, inferenceWorkingMemSizePerEx, 0, trainWorkingMemSize)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, MemoryReport.CACHE_MODE_ALL_ZEROS) //No caching
                    .build();
}

Source File: ConvolutionLayer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public LayerMemoryReport getMemoryReport(InputType inputType) {
    val paramSize = initializer().numParams(this);
    val updaterStateSize = (int) getIUpdater().stateSize(paramSize);

    InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType;
    InputType.InputTypeConvolutional outputType = (InputType.InputTypeConvolutional) getOutputType(-1, inputType);

    //TODO convolution helper memory use... (CuDNN etc)

    //During forward pass: im2col array, mmul (result activations), in-place broadcast add
    val im2colSizePerEx = c.getChannels() * outputType.getHeight() * outputType.getWidth() * kernelSize[0]
                    * kernelSize[1];

    //During training: have im2col array, in-place gradient calculation, then epsilons...
    //But: im2col array may be cached...
    Map<CacheMode, Long> trainWorkingMemoryPerEx = new HashMap<>();
    Map<CacheMode, Long> cachedPerEx = new HashMap<>();

    //During backprop: im2col array for forward pass (possibly cached) + the epsilon6d array required to calculate
    // the 4d epsilons (equal size to input)
    //Note that the eps6d array is same size as im2col
    for (CacheMode cm : CacheMode.values()) {
        long trainWorkingSizePerEx;
        long cacheMemSizePerEx = 0;
        if (cm == CacheMode.NONE) {
            trainWorkingSizePerEx = 2 * im2colSizePerEx;
        } else {
            //im2col is cached, but epsNext2d/eps6d is not
            cacheMemSizePerEx = im2colSizePerEx;
            trainWorkingSizePerEx = im2colSizePerEx;
        }

        if (getIDropout() != null) {
            //Dup on the input before dropout, but only for training
            trainWorkingSizePerEx += inputType.arrayElementsPerExample();
        }

        trainWorkingMemoryPerEx.put(cm, trainWorkingSizePerEx);
        cachedPerEx.put(cm, cacheMemSizePerEx);
    }

    return new LayerMemoryReport.Builder(layerName, ConvolutionLayer.class, inputType, outputType)
                    .standardMemory(paramSize, updaterStateSize)
                    //im2col caching -> only variable size caching
                    .workingMemory(0, im2colSizePerEx, MemoryReport.CACHE_MODE_ALL_ZEROS, trainWorkingMemoryPerEx)
                    .cacheMemory(MemoryReport.CACHE_MODE_ALL_ZEROS, cachedPerEx).build();

}

Java Code Examples for org.deeplearning4j.nn.conf.inputs.InputType#arrayElementsPerExample()