org.deeplearning4j.nn.conf.NeuralNetConfiguration#addVariable

Source File: PReLUParamInitializer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof BaseLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, length));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);

    return params;
}

Source File: DepthwiseConvolutionParamInitializer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    DepthwiseConvolution2D layer = (DepthwiseConvolution2D) conf.getLayer();
    if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf.getLayer();

    val depthWiseParams = numDepthWiseParams(layerConf);
    val biasParams = numBiasParams(layerConf);

    INDArray depthWiseWeightView = paramsView.get(
            NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams));

    params.put(WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);

    if(layer.hasBias()){
        INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, biasParams));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
    }

    return params;
}

Source File: PretrainParamInitializer.java From deeplearning4j with Apache License 2.0

6 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);

    org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf =
                    (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer();
    val nIn = layerConf.getNIn();
    val nOut = layerConf.getNOut();
    val nWeightParams = nIn * nOut;

    INDArray visibleBiasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn));
    params.put(VISIBLE_BIAS_KEY, createVisibleBias(conf, visibleBiasView, initializeParams));
    conf.addVariable(VISIBLE_BIAS_KEY);

    return params;
}

Source File: DeepFMParameter.java From jstarcraft-rns with Apache License 2.0

5 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration configuration, INDArray view, boolean initialize) {
    Map<String, INDArray> parameters = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    FeedForwardLayer layerConfiguration = (FeedForwardLayer) configuration.getLayer();
    long numberOfOut = layerConfiguration.getNOut();
    long numberOfWeights = numberOfFeatures * numberOfOut;
    INDArray weight = view.get(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(0, numberOfWeights) });
    INDArray bias = view.get(NDArrayIndex.point(0), NDArrayIndex.interval(numberOfWeights, numberOfWeights + numberOfOut));

    parameters.put(WEIGHT_KEY, this.createWeightMatrix(configuration, weight, initialize));
    parameters.put(BIAS_KEY, createBias(configuration, bias, initialize));
    configuration.addVariable(WEIGHT_KEY);
    configuration.addVariable(BIAS_KEY);
    return parameters;
}

Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0

5 votes

private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false)
                        .updater(new AdaGrad(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RastriginFunctionModel(10, conf);
        int nParams = (int)m.numParams();
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true);
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] <= scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}

Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0

5 votes

private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        Random rng = new DefaultRandom(12345L);
        org.nd4j.linalg.api.rng.distribution.Distribution dist =
                        new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new SphereFunctionModel(100, dist, conf);
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            for( int j=0; j<100; j++ ) {
                opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            }
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }

    for (int i = 1; i < scores.length; i++) {
        assertTrue(scores[i] <= scores[i - 1]);
    }
    assertTrue(scores[scores.length - 1] < 1.0); //Very easy function, expect score ~= 0 with any reasonable number of steps/numLineSearchIter
}

Source File: OCNNParamInitializer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf.getLayer();
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    val nIn = ocnnOutputLayer.getNIn();
    int hiddenLayer = ocnnOutputLayer.getHiddenSize();
    Preconditions.checkState(hiddenLayer > 0, "OCNNOutputLayer hidden layer state: must be non-zero.");

    val firstLayerWeightLength =  hiddenLayer;
    val secondLayerLength = nIn * hiddenLayer;
    int rLength = 1;
    INDArray weightView = paramsView.get(point(0),interval(0, firstLayerWeightLength))
            .reshape(1,hiddenLayer);
    INDArray weightsTwoView = paramsView.get(point(0),
            NDArrayIndex.interval(firstLayerWeightLength,
                    firstLayerWeightLength + secondLayerLength))
            .reshape('f',nIn,hiddenLayer);
    INDArray rView = paramsView.get(point(0),point(paramsView.length() - rLength));


    INDArray paramViewPut = createWeightMatrix(conf, weightView, initializeParams);
    params.put(W_KEY, paramViewPut);
    conf.addVariable(W_KEY);
    INDArray paramIvewPutTwo = createWeightMatrix(conf,weightsTwoView,initializeParams);
    params.put(V_KEY,paramIvewPutTwo);
    conf.addVariable(V_KEY);
    INDArray rViewPut = createWeightMatrix(conf,rView,initializeParams);
    params.put(R_KEY,rViewPut);
    conf.addVariable(R_KEY);

    return params;
}

Source File: ElementWiseParamInitializer.java From deeplearning4j with Apache License 2.0

5 votes

/**
 * Initialize the parameters
 *
 * @param conf             the configuration
 * @param paramsView       a view of the full network (backprop) parameters
 * @param initializeParams if true: initialize the parameters according to the configuration. If false: don't modify the
 *                         values in the paramsView array (but do select out the appropriate subset, reshape etc as required)
 * @return Map of parameters keyed by type (view of the 'paramsView' array)
 */
@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
            (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    val nIn = layerConf.getNIn();

    val nWeightParams = nIn ;
    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
            NDArrayIndex.interval(nWeightParams, nWeightParams + nIn));


    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    return params;
}

Source File: CenterLossParamInitializer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf =
                    (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer();

    val nIn = layerConf.getNIn();
    val nOut = layerConf.getNOut(); // also equal to numClasses

    val wEndOffset = nIn * nOut;
    val bEndOffset = wEndOffset + nOut;
    val cEndOffset = bEndOffset + nIn * nOut;

    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, wEndOffset));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(wEndOffset, bEndOffset));
    INDArray centerLossView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bEndOffset, cEndOffset))
                    .reshape('c', nOut, nIn);

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
    params.put(CENTER_KEY, createCenterLossMatrix(conf, centerLossView, initializeParams));
    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
    conf.addVariable(CENTER_KEY);

    return params;
}

Source File: SeparableConvolutionParamInitializer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    SeparableConvolution2D layer = (SeparableConvolution2D) conf.getLayer();
    if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2");

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    SeparableConvolution2D layerConf = (SeparableConvolution2D) conf.getLayer();

    val depthWiseParams = numDepthWiseParams(layerConf);
    val biasParams = numBiasParams(layerConf);

    INDArray depthWiseWeightView = paramsView.get(
            NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams));
    INDArray pointWiseWeightView = paramsView.get(
            NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams + depthWiseParams, numParams(conf)));

    params.put(DEPTH_WISE_WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams));
    conf.addVariable(DEPTH_WISE_WEIGHT_KEY);
    params.put(POINT_WISE_WEIGHT_KEY, createPointWiseWeightMatrix(conf, pointWiseWeightView, initializeParams));
    conf.addVariable(POINT_WISE_WEIGHT_KEY);

    if(layer.hasBias()){
        INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, biasParams));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
    }

    return params;
}

Source File: SameDiffParamInitializer.java From deeplearning4j with Apache License 2.0

5 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer();
    Map<String,INDArray> out = subsetAndReshape(sd.getLayerParams().getParameterKeys(),
            sd.getLayerParams().getParamShapes(), paramsView, sd);
    if(initializeParams){
        sd.initializeParameters(out);
    }

    for(String s : sd.getLayerParams().getParameterKeys()){
        conf.addVariable(s);
    }

    return out;
}

Source File: CDAEParameter.java From jstarcraft-rns with Apache License 2.0

5 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = super.init(conf, paramsView, initializeParams);
    FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer();
    long nIn = layerConf.getNIn();
    long nOut = layerConf.getNOut();
    long nWeightParams = nIn * nOut;
    long nUserWeightParams = numberOfUsers * nOut;
    INDArray userWeightView = paramsView.get(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams) });
    params.put(USER_KEY, this.createUserWeightMatrix(conf, userWeightView, initializeParams));
    conf.addVariable(USER_KEY);
    return params;
}

Source File: SimpleRnnParamInitializer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    SimpleRnn c = (SimpleRnn)conf.getLayer();
    val nIn = c.getNIn();
    val nOut = c.getNOut();

    Map<String,INDArray> m;

    if (initializeParams) {
        m = getSubsets(paramsView, nIn, nOut, false, hasLayerNorm(c));
        INDArray w = c.getWeightInitFn().init(nIn, nOut, new long[]{nIn, nOut}, 'f', m.get(WEIGHT_KEY));
        m.put(WEIGHT_KEY, w);

        IWeightInit rwInit;
        if (c.getWeightInitFnRecurrent() != null) {
            rwInit = c.getWeightInitFnRecurrent();
        } else {
            rwInit = c.getWeightInitFn();
        }

        INDArray rw = rwInit.init(nOut, nOut, new long[]{nOut, nOut}, 'f', m.get(RECURRENT_WEIGHT_KEY));
        m.put(RECURRENT_WEIGHT_KEY, rw);

        m.get(BIAS_KEY).assign(c.getBiasInit());

        if(hasLayerNorm(c)){
            m.get(GAIN_KEY).assign(c.getGainInit());
        }
    } else {
        m = getSubsets(paramsView, nIn, nOut, true, hasLayerNorm(c));
    }

    conf.addVariable(WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);
    if(hasLayerNorm(c)){
        conf.addVariable(GAIN_KEY);
    }

    return m;
}

Source File: LSTMParamInitializer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    val nL = layerConf.getNOut(); //i.e., n neurons in this layer
    val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    val nParamsIn = nLast * (4 * nL);
    val nParamsRecurrent = nL * (4 * nL);
    val nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));

    if (initializeParams) {
        val fanIn = nL;
        val fanOut = nLast + nL;
        val inputWShape = new long[] {nLast, 4 * nL};
        val recurrentWShape = new long[] {nL, 4 * nL};

        IWeightInit rwInit;
        if(layerConf.getWeightInitFnRecurrent() != null){
            rwInit = layerConf.getWeightInitFnRecurrent();
        } else {
            rwInit = layerConf.getWeightInitFn();
        }

        params.put(INPUT_WEIGHT_KEY, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView));
        biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.valueArrayOf(new long[]{1, nL}, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
        /*The above line initializes the forget gate biases to specified value.
         * See Sutskever PhD thesis, pg19:
         * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
         *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
         *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
         *  gates will create a vanishing gradients problem."
         *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
         */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nLast, 4 * nL}, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY,
                        WeightInitUtil.reshapeWeights(new long[] {nL, 4 * nL}, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }

    return params;
}

Source File: GravesLSTMParamInitializer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf =
                    (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    val nL = layerConf.getNOut(); //i.e., n neurons in this layer
    val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY);
    conf.addVariable(RECURRENT_WEIGHT_KEY);
    conf.addVariable(BIAS_KEY);

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    val nParamsIn = nLast * (4 * nL);
    val nParamsRecurrent = nL * (4 * nL + 3);
    val nBias = 4 * nL;
    INDArray inputWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsIn));
    INDArray recurrentWeightView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent));
    INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias));

    if (initializeParams) {
        val fanIn = nL;
        val fanOut = nLast + nL;
        val inputWShape = new long[] {nLast, 4 * nL};
        val recurrentWShape = new long[] {nL, 4 * nL + 3};

        IWeightInit rwInit;
        if(layerConf.getWeightInitFnRecurrent() != null){
            rwInit = layerConf.getWeightInitFnRecurrent();
        } else {
            rwInit = layerConf.getWeightInitFn();
        }

        params.put(INPUT_WEIGHT_KEY,layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                        IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape,
                        IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView));
        biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                        Nd4j.valueArrayOf(new long[]{1, nL}, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG}
        /*The above line initializes the forget gate biases to specified value.
         * See Sutskever PhD thesis, pg19:
         * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
         *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
         *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
         *  gates will create a vanishing gradients problem."
         *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
         */
        params.put(BIAS_KEY, biasView);
    } else {
        params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nLast, 4 * nL}, inputWeightView));
        params.put(RECURRENT_WEIGHT_KEY,
                        WeightInitUtil.reshapeWeights(new long[] {nL, 4 * nL + 3}, recurrentWeightView));
        params.put(BIAS_KEY, biasView);
    }

    return params;
}

Source File: DefaultParamInitializer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer))
        throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName());

    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    val length = numParams(conf);
    if (paramsView.length() != length)
        throw new IllegalStateException(
                        "Expected params view of length " + length + ", got length " + paramsView.length());

    org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf =
                    (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer();
    val nIn = layerConf.getNIn();
    val nOut = layerConf.getNOut();

    val nWeightParams = nIn * nOut;
    INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams));

    params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams));
    conf.addVariable(WEIGHT_KEY);

    long offset = nWeightParams;
    if(hasBias(layerConf)){
        INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true),
                NDArrayIndex.interval(offset, offset + nOut));
        params.put(BIAS_KEY, createBias(conf, biasView, initializeParams));
        conf.addVariable(BIAS_KEY);
        offset += nOut;
    }

    if(hasLayerNorm(layerConf)){
        INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true),
                NDArrayIndex.interval(offset, offset + nOut));
        params.put(GAIN_KEY, createGain(conf, gainView, initializeParams));
        conf.addVariable(GAIN_KEY);
    }

    return params;
}

Source File: BatchNormalizationParamInitializer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());
    // TODO setup for RNN
    BatchNormalization layer = (BatchNormalization) conf.getLayer();
    val nOut = layer.getNOut();

    long meanOffset = 0;
    if (!layer.isLockGammaBeta()) { //No gamma/beta parameters when gamma/beta are locked
        INDArray gammaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut));
        INDArray betaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, 2 * nOut));

        params.put(GAMMA, createGamma(conf, gammaView, initializeParams));
        conf.addVariable(GAMMA);
        params.put(BETA, createBeta(conf, betaView, initializeParams));
        conf.addVariable(BETA);

        meanOffset = 2 * nOut;
    }

    INDArray globalMeanView =
                    paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(meanOffset, meanOffset + nOut));
    INDArray globalVarView = paramView.get(NDArrayIndex.interval(0,0,true),
                    NDArrayIndex.interval(meanOffset + nOut, meanOffset + 2 * nOut));

    if (initializeParams) {
        globalMeanView.assign(0);
        if(layer.isUseLogStd()){
            //Global log stdev: assign 0.0 as initial value (s=sqrt(v), and log10(s) = log10(sqrt(v)) -> log10(1) = 0
            globalVarView.assign(0);
        } else {
            //Global variance view: assign 1.0 as initial value
            globalVarView.assign(1);
        }
    }

    params.put(GLOBAL_MEAN, globalMeanView);
    conf.addVariable(GLOBAL_MEAN);
    if(layer.isUseLogStd()){
        params.put(GLOBAL_LOG_STD, globalVarView);
        conf.addVariable(GLOBAL_LOG_STD);
    } else {
        params.put(GLOBAL_VAR, globalVarView);
        conf.addVariable(GLOBAL_VAR);
    }

    return params;
}

Source File: GravesBidirectionalLSTMParamInitializer.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) {
    Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>());

    org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf =
            (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer();
    double forgetGateInit = layerConf.getForgetGateBiasInit();

    val nL = layerConf.getNOut(); //i.e., n neurons in this layer
    val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer

    conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS);
    conf.addVariable(BIAS_KEY_FORWARDS);
    conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS);
    conf.addVariable(BIAS_KEY_BACKWARDS);

    val nParamsInput = nLast * (4 * nL);
    val nParamsRecurrent = nL * (4 * nL + 3);
    val nBias = 4 * nL;

    val rwFOffset = nParamsInput;
    val bFOffset = rwFOffset + nParamsRecurrent;
    val iwROffset = bFOffset + nBias;
    val rwROffset = iwROffset + nParamsInput;
    val bROffset = rwROffset + nParamsRecurrent;

    INDArray iwF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, rwFOffset));
    INDArray rwF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(rwFOffset, bFOffset));
    INDArray bF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bFOffset, iwROffset));
    INDArray iwR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(iwROffset, rwROffset));
    INDArray rwR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(rwROffset, bROffset));
    INDArray bR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bROffset, bROffset + nBias));

    if (initializeParams) {
        bF.put(new INDArrayIndex[]{NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                Nd4j.ones(1, nL).muli(forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG
        bR.put(new INDArrayIndex[]{NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)},
                Nd4j.ones(1, nL).muli(forgetGateInit));
    }
    /*The above line initializes the forget gate biases to specified value.
     * See Sutskever PhD thesis, pg19:
     * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning,
     *  which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is
     *  not done, it will be harder to learn long range dependencies because the smaller values of the forget
     *  gates will create a vanishing gradients problem."
     *  http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf
     */

    if (initializeParams) {
        //As per standard LSTM
        val fanIn = nL;
        val fanOut = nLast + nL;
        val inputWShape = new long[]{nLast, 4 * nL};
        val recurrentWShape = new long[]{nL, 4 * nL + 3};

        params.put(INPUT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape,
                IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    } else {
        params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new long[]{nLast, 4 * nL}, iwF));
        params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new long[]{nL, 4 * nL + 3}, rwF));
        params.put(BIAS_KEY_FORWARDS, bF);
        params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new long[]{nLast, 4 * nL}, iwR));
        params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new long[]{nL, 4 * nL + 3}, rwR));
        params.put(BIAS_KEY_BACKWARDS, bR);
    }

    return params;
}

Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0

4 votes

public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIter, int nDimensions) {

        if (PRINT_OPT_RESULTS)
            System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= "
                            + nDimensions);

        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter)
                        .updater(new Sgd(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Random rng = new DefaultRandom(12345L);
        org.nd4j.linalg.api.rng.distribution.Distribution dist =
                        new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10);
        Model m = new SphereFunctionModel(nDimensions, dist, conf);
        m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
        double scoreBefore = m.score();
        assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore));
        if (PRINT_OPT_RESULTS) {
            System.out.println("Before:");
            System.out.println(scoreBefore);
            System.out.println(m.params());
        }

        ConvexOptimizer opt = getOptimizer(oa, conf, m);

        opt.setupSearchState(m.gradientAndScore());
        for( int i=0; i<100; i++ ) {
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
        }
        m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
        double scoreAfter = m.score();

        assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter));
        if (PRINT_OPT_RESULTS) {
            System.out.println("After:");
            System.out.println(scoreAfter);
            System.out.println(m.params());
        }

        //Expected behaviour after optimization:
        //(a) score is better (lower) after optimization.
        //(b) Parameters are closer to minimum after optimization (TODO)
        assertTrue("Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")",
                        scoreAfter < scoreBefore);
    }

Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0

4 votes

private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter)
                        .updater(new Sgd(1e-1))
                        .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction())
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build())
                        .build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RosenbrockFunctionModel(100, conf);
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue("NaN or infinite score: " + scores[i],
                            !Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rosenbrock: Multiple optimization iterations ( " + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter= " + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] < scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}

Java Code Examples for org.deeplearning4j.nn.conf.NeuralNetConfiguration#addVariable()