Java Code Examples for org.deeplearning4j.nn.conf.NeuralNetConfiguration#addVariable()
The following examples show how to use
org.deeplearning4j.nn.conf.NeuralNetConfiguration#addVariable() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PReLUParamInitializer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { if (!(conf.getLayer() instanceof BaseLayer)) throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName()); Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); val length = numParams(conf); if (paramsView.length() != length) throw new IllegalStateException( "Expected params view of length " + length + ", got length " + paramsView.length()); INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, length)); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); conf.addVariable(WEIGHT_KEY); return params; }
Example 2
Source File: DepthwiseConvolutionParamInitializer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { DepthwiseConvolution2D layer = (DepthwiseConvolution2D) conf.getLayer(); if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); DepthwiseConvolution2D layerConf = (DepthwiseConvolution2D) conf.getLayer(); val depthWiseParams = numDepthWiseParams(layerConf); val biasParams = numBiasParams(layerConf); INDArray depthWiseWeightView = paramsView.get( NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams)); params.put(WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams)); conf.addVariable(WEIGHT_KEY); if(layer.hasBias()){ INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, biasParams)); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); conf.addVariable(BIAS_KEY); } return params; }
Example 3
Source File: PretrainParamInitializer.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = super.init(conf, paramsView, initializeParams); org.deeplearning4j.nn.conf.layers.BasePretrainNetwork layerConf = (org.deeplearning4j.nn.conf.layers.BasePretrainNetwork) conf.getLayer(); val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); val nWeightParams = nIn * nOut; INDArray visibleBiasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nIn)); params.put(VISIBLE_BIAS_KEY, createVisibleBias(conf, visibleBiasView, initializeParams)); conf.addVariable(VISIBLE_BIAS_KEY); return params; }
Example 4
Source File: DeepFMParameter.java From jstarcraft-rns with Apache License 2.0 | 5 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration configuration, INDArray view, boolean initialize) { Map<String, INDArray> parameters = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); FeedForwardLayer layerConfiguration = (FeedForwardLayer) configuration.getLayer(); long numberOfOut = layerConfiguration.getNOut(); long numberOfWeights = numberOfFeatures * numberOfOut; INDArray weight = view.get(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(0, numberOfWeights) }); INDArray bias = view.get(NDArrayIndex.point(0), NDArrayIndex.interval(numberOfWeights, numberOfWeights + numberOfOut)); parameters.put(WEIGHT_KEY, this.createWeightMatrix(configuration, weight, initialize)); parameters.put(BIAS_KEY, createBias(configuration, bias, initialize)); configuration.addVariable(WEIGHT_KEY); configuration.addVariable(BIAS_KEY); return parameters; }
Example 5
Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0 | 5 votes |
private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter, int maxNumLineSearchIter) { double[] scores = new double[nOptIter + 1]; for (int i = 0; i <= nOptIter; i++) { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false) .updater(new AdaGrad(1e-2)) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Model m = new RastriginFunctionModel(10, conf); int nParams = (int)m.numParams(); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[0] = m.score(); //Before optimization } else { ConvexOptimizer opt = getOptimizer(oa, conf, m); opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true); opt.optimize(LayerWorkspaceMgr.noWorkspaces()); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[i] = m.score(); assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i])); } } if (PRINT_OPT_RESULTS) { System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": " + oa); System.out.println(Arrays.toString(scores)); } for (int i = 1; i < scores.length; i++) { if (i == 1) { assertTrue(scores[i] <= scores[i - 1]); //Require at least one step of improvement } else { assertTrue(scores[i] <= scores[i - 1]); } } }
Example 6
Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0 | 5 votes |
private static void testSphereFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter, int maxNumLineSearchIter) { double[] scores = new double[nOptIter + 1]; for (int i = 0; i <= nOptIter; i++) { Random rng = new DefaultRandom(12345L); org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .maxNumLineSearchIterations(maxNumLineSearchIter).updater(new Sgd(0.1)) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Model m = new SphereFunctionModel(100, dist, conf); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[0] = m.score(); //Before optimization } else { ConvexOptimizer opt = getOptimizer(oa, conf, m); for( int j=0; j<100; j++ ) { opt.optimize(LayerWorkspaceMgr.noWorkspaces()); } m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[i] = m.score(); assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i])); } } if (PRINT_OPT_RESULTS) { System.out.println("Multiple optimization iterations (" + nOptIter + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": " + oa); System.out.println(Arrays.toString(scores)); } for (int i = 1; i < scores.length; i++) { assertTrue(scores[i] <= scores[i - 1]); } assertTrue(scores[scores.length - 1] < 1.0); //Very easy function, expect score ~= 0 with any reasonable number of steps/numLineSearchIter }
Example 7
Source File: OCNNParamInitializer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer ocnnOutputLayer = ( org.deeplearning4j.nn.conf.ocnn.OCNNOutputLayer) conf.getLayer(); Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); val nIn = ocnnOutputLayer.getNIn(); int hiddenLayer = ocnnOutputLayer.getHiddenSize(); Preconditions.checkState(hiddenLayer > 0, "OCNNOutputLayer hidden layer state: must be non-zero."); val firstLayerWeightLength = hiddenLayer; val secondLayerLength = nIn * hiddenLayer; int rLength = 1; INDArray weightView = paramsView.get(point(0),interval(0, firstLayerWeightLength)) .reshape(1,hiddenLayer); INDArray weightsTwoView = paramsView.get(point(0), NDArrayIndex.interval(firstLayerWeightLength, firstLayerWeightLength + secondLayerLength)) .reshape('f',nIn,hiddenLayer); INDArray rView = paramsView.get(point(0),point(paramsView.length() - rLength)); INDArray paramViewPut = createWeightMatrix(conf, weightView, initializeParams); params.put(W_KEY, paramViewPut); conf.addVariable(W_KEY); INDArray paramIvewPutTwo = createWeightMatrix(conf,weightsTwoView,initializeParams); params.put(V_KEY,paramIvewPutTwo); conf.addVariable(V_KEY); INDArray rViewPut = createWeightMatrix(conf,rView,initializeParams); params.put(R_KEY,rViewPut); conf.addVariable(R_KEY); return params; }
Example 8
Source File: ElementWiseParamInitializer.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Initialize the parameters * * @param conf the configuration * @param paramsView a view of the full network (backprop) parameters * @param initializeParams if true: initialize the parameters according to the configuration. If false: don't modify the * values in the paramsView array (but do select out the appropriate subset, reshape etc as required) * @return Map of parameters keyed by type (view of the 'paramsView' array) */ @Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName()); Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); val length = numParams(conf); if (paramsView.length() != length) throw new IllegalStateException( "Expected params view of length " + length + ", got length " + paramsView.length()); org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); val nIn = layerConf.getNIn(); val nWeightParams = nIn ; INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams)); INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nWeightParams, nWeightParams + nIn)); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); conf.addVariable(WEIGHT_KEY); conf.addVariable(BIAS_KEY); return params; }
Example 9
Source File: CenterLossParamInitializer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer layerConf = (org.deeplearning4j.nn.conf.layers.CenterLossOutputLayer) conf.getLayer(); val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); // also equal to numClasses val wEndOffset = nIn * nOut; val bEndOffset = wEndOffset + nOut; val cEndOffset = bEndOffset + nIn * nOut; INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, wEndOffset)); INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(wEndOffset, bEndOffset)); INDArray centerLossView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bEndOffset, cEndOffset)) .reshape('c', nOut, nIn); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); params.put(CENTER_KEY, createCenterLossMatrix(conf, centerLossView, initializeParams)); conf.addVariable(WEIGHT_KEY); conf.addVariable(BIAS_KEY); conf.addVariable(CENTER_KEY); return params; }
Example 10
Source File: SeparableConvolutionParamInitializer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { SeparableConvolution2D layer = (SeparableConvolution2D) conf.getLayer(); if (layer.getKernelSize().length != 2) throw new IllegalArgumentException("Filter size must be == 2"); Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); SeparableConvolution2D layerConf = (SeparableConvolution2D) conf.getLayer(); val depthWiseParams = numDepthWiseParams(layerConf); val biasParams = numBiasParams(layerConf); INDArray depthWiseWeightView = paramsView.get( NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams, biasParams + depthWiseParams)); INDArray pointWiseWeightView = paramsView.get( NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(biasParams + depthWiseParams, numParams(conf))); params.put(DEPTH_WISE_WEIGHT_KEY, createDepthWiseWeightMatrix(conf, depthWiseWeightView, initializeParams)); conf.addVariable(DEPTH_WISE_WEIGHT_KEY); params.put(POINT_WISE_WEIGHT_KEY, createPointWiseWeightMatrix(conf, pointWiseWeightView, initializeParams)); conf.addVariable(POINT_WISE_WEIGHT_KEY); if(layer.hasBias()){ INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, biasParams)); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); conf.addVariable(BIAS_KEY); } return params; }
Example 11
Source File: SameDiffParamInitializer.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { AbstractSameDiffLayer sd = (AbstractSameDiffLayer) conf.getLayer(); Map<String,INDArray> out = subsetAndReshape(sd.getLayerParams().getParameterKeys(), sd.getLayerParams().getParamShapes(), paramsView, sd); if(initializeParams){ sd.initializeParameters(out); } for(String s : sd.getLayerParams().getParameterKeys()){ conf.addVariable(s); } return out; }
Example 12
Source File: CDAEParameter.java From jstarcraft-rns with Apache License 2.0 | 5 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = super.init(conf, paramsView, initializeParams); FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); long nIn = layerConf.getNIn(); long nOut = layerConf.getNOut(); long nWeightParams = nIn * nOut; long nUserWeightParams = numberOfUsers * nOut; INDArray userWeightView = paramsView.get(new INDArrayIndex[] { NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams) }); params.put(USER_KEY, this.createUserWeightMatrix(conf, userWeightView, initializeParams)); conf.addVariable(USER_KEY); return params; }
Example 13
Source File: SimpleRnnParamInitializer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { SimpleRnn c = (SimpleRnn)conf.getLayer(); val nIn = c.getNIn(); val nOut = c.getNOut(); Map<String,INDArray> m; if (initializeParams) { m = getSubsets(paramsView, nIn, nOut, false, hasLayerNorm(c)); INDArray w = c.getWeightInitFn().init(nIn, nOut, new long[]{nIn, nOut}, 'f', m.get(WEIGHT_KEY)); m.put(WEIGHT_KEY, w); IWeightInit rwInit; if (c.getWeightInitFnRecurrent() != null) { rwInit = c.getWeightInitFnRecurrent(); } else { rwInit = c.getWeightInitFn(); } INDArray rw = rwInit.init(nOut, nOut, new long[]{nOut, nOut}, 'f', m.get(RECURRENT_WEIGHT_KEY)); m.put(RECURRENT_WEIGHT_KEY, rw); m.get(BIAS_KEY).assign(c.getBiasInit()); if(hasLayerNorm(c)){ m.get(GAIN_KEY).assign(c.getGainInit()); } } else { m = getSubsets(paramsView, nIn, nOut, true, hasLayerNorm(c)); } conf.addVariable(WEIGHT_KEY); conf.addVariable(RECURRENT_WEIGHT_KEY); conf.addVariable(BIAS_KEY); if(hasLayerNorm(c)){ conf.addVariable(GAIN_KEY); } return m; }
Example 14
Source File: LSTMParamInitializer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); org.deeplearning4j.nn.conf.layers.LSTM layerConf = (org.deeplearning4j.nn.conf.layers.LSTM) conf.getLayer(); double forgetGateInit = layerConf.getForgetGateBiasInit(); val nL = layerConf.getNOut(); //i.e., n neurons in this layer val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer conf.addVariable(INPUT_WEIGHT_KEY); conf.addVariable(RECURRENT_WEIGHT_KEY); conf.addVariable(BIAS_KEY); val length = numParams(conf); if (paramsView.length() != length) throw new IllegalStateException( "Expected params view of length " + length + ", got length " + paramsView.length()); val nParamsIn = nLast * (4 * nL); val nParamsRecurrent = nL * (4 * nL); val nBias = 4 * nL; INDArray inputWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsIn)); INDArray recurrentWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent)); INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias)); if (initializeParams) { val fanIn = nL; val fanOut = nLast + nL; val inputWShape = new long[] {nLast, 4 * nL}; val recurrentWShape = new long[] {nL, 4 * nL}; IWeightInit rwInit; if(layerConf.getWeightInitFnRecurrent() != null){ rwInit = layerConf.getWeightInitFnRecurrent(); } else { rwInit = layerConf.getWeightInitFn(); } params.put(INPUT_WEIGHT_KEY, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView)); params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView)); biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)}, Nd4j.valueArrayOf(new long[]{1, nL}, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG} /*The above line initializes the forget gate biases to specified value. * See Sutskever PhD thesis, pg19: * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning, * which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is * not done, it will be harder to learn long range dependencies because the smaller values of the forget * gates will create a vanishing gradients problem." * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf */ params.put(BIAS_KEY, biasView); } else { params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nLast, 4 * nL}, inputWeightView)); params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nL, 4 * nL}, recurrentWeightView)); params.put(BIAS_KEY, biasView); } return params; }
Example 15
Source File: GravesLSTMParamInitializer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); org.deeplearning4j.nn.conf.layers.GravesLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesLSTM) conf.getLayer(); double forgetGateInit = layerConf.getForgetGateBiasInit(); val nL = layerConf.getNOut(); //i.e., n neurons in this layer val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer conf.addVariable(INPUT_WEIGHT_KEY); conf.addVariable(RECURRENT_WEIGHT_KEY); conf.addVariable(BIAS_KEY); val length = numParams(conf); if (paramsView.length() != length) throw new IllegalStateException( "Expected params view of length " + length + ", got length " + paramsView.length()); val nParamsIn = nLast * (4 * nL); val nParamsRecurrent = nL * (4 * nL + 3); val nBias = 4 * nL; INDArray inputWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nParamsIn)); INDArray recurrentWeightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nParamsIn, nParamsIn + nParamsRecurrent)); INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nParamsIn + nParamsRecurrent, nParamsIn + nParamsRecurrent + nBias)); if (initializeParams) { val fanIn = nL; val fanOut = nLast + nL; val inputWShape = new long[] {nLast, 4 * nL}; val recurrentWShape = new long[] {nL, 4 * nL + 3}; IWeightInit rwInit; if(layerConf.getWeightInitFnRecurrent() != null){ rwInit = layerConf.getWeightInitFnRecurrent(); } else { rwInit = layerConf.getWeightInitFn(); } params.put(INPUT_WEIGHT_KEY,layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, inputWeightView)); params.put(RECURRENT_WEIGHT_KEY, rwInit.init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, recurrentWeightView)); biasView.put(new INDArrayIndex[] {NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)}, Nd4j.valueArrayOf(new long[]{1, nL}, forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG} /*The above line initializes the forget gate biases to specified value. * See Sutskever PhD thesis, pg19: * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning, * which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is * not done, it will be harder to learn long range dependencies because the smaller values of the forget * gates will create a vanishing gradients problem." * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf */ params.put(BIAS_KEY, biasView); } else { params.put(INPUT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nLast, 4 * nL}, inputWeightView)); params.put(RECURRENT_WEIGHT_KEY, WeightInitUtil.reshapeWeights(new long[] {nL, 4 * nL + 3}, recurrentWeightView)); params.put(BIAS_KEY, biasView); } return params; }
Example 16
Source File: DefaultParamInitializer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { if (!(conf.getLayer() instanceof org.deeplearning4j.nn.conf.layers.FeedForwardLayer)) throw new IllegalArgumentException("unsupported layer type: " + conf.getLayer().getClass().getName()); Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); val length = numParams(conf); if (paramsView.length() != length) throw new IllegalStateException( "Expected params view of length " + length + ", got length " + paramsView.length()); org.deeplearning4j.nn.conf.layers.FeedForwardLayer layerConf = (org.deeplearning4j.nn.conf.layers.FeedForwardLayer) conf.getLayer(); val nIn = layerConf.getNIn(); val nOut = layerConf.getNOut(); val nWeightParams = nIn * nOut; INDArray weightView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nWeightParams)); params.put(WEIGHT_KEY, createWeightMatrix(conf, weightView, initializeParams)); conf.addVariable(WEIGHT_KEY); long offset = nWeightParams; if(hasBias(layerConf)){ INDArray biasView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(offset, offset + nOut)); params.put(BIAS_KEY, createBias(conf, biasView, initializeParams)); conf.addVariable(BIAS_KEY); offset += nOut; } if(hasLayerNorm(layerConf)){ INDArray gainView = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(offset, offset + nOut)); params.put(GAIN_KEY, createGain(conf, gainView, initializeParams)); conf.addVariable(GAIN_KEY); } return params; }
Example 17
Source File: BatchNormalizationParamInitializer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramView, boolean initializeParams) { Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); // TODO setup for RNN BatchNormalization layer = (BatchNormalization) conf.getLayer(); val nOut = layer.getNOut(); long meanOffset = 0; if (!layer.isLockGammaBeta()) { //No gamma/beta parameters when gamma/beta are locked INDArray gammaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, nOut)); INDArray betaView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nOut, 2 * nOut)); params.put(GAMMA, createGamma(conf, gammaView, initializeParams)); conf.addVariable(GAMMA); params.put(BETA, createBeta(conf, betaView, initializeParams)); conf.addVariable(BETA); meanOffset = 2 * nOut; } INDArray globalMeanView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(meanOffset, meanOffset + nOut)); INDArray globalVarView = paramView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(meanOffset + nOut, meanOffset + 2 * nOut)); if (initializeParams) { globalMeanView.assign(0); if(layer.isUseLogStd()){ //Global log stdev: assign 0.0 as initial value (s=sqrt(v), and log10(s) = log10(sqrt(v)) -> log10(1) = 0 globalVarView.assign(0); } else { //Global variance view: assign 1.0 as initial value globalVarView.assign(1); } } params.put(GLOBAL_MEAN, globalMeanView); conf.addVariable(GLOBAL_MEAN); if(layer.isUseLogStd()){ params.put(GLOBAL_LOG_STD, globalVarView); conf.addVariable(GLOBAL_LOG_STD); } else { params.put(GLOBAL_VAR, globalVarView); conf.addVariable(GLOBAL_VAR); } return params; }
Example 18
Source File: GravesBidirectionalLSTMParamInitializer.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = Collections.synchronizedMap(new LinkedHashMap<String, INDArray>()); org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM layerConf = (org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM) conf.getLayer(); double forgetGateInit = layerConf.getForgetGateBiasInit(); val nL = layerConf.getNOut(); //i.e., n neurons in this layer val nLast = layerConf.getNIn(); //i.e., n neurons in previous layer conf.addVariable(INPUT_WEIGHT_KEY_FORWARDS); conf.addVariable(RECURRENT_WEIGHT_KEY_FORWARDS); conf.addVariable(BIAS_KEY_FORWARDS); conf.addVariable(INPUT_WEIGHT_KEY_BACKWARDS); conf.addVariable(RECURRENT_WEIGHT_KEY_BACKWARDS); conf.addVariable(BIAS_KEY_BACKWARDS); val nParamsInput = nLast * (4 * nL); val nParamsRecurrent = nL * (4 * nL + 3); val nBias = 4 * nL; val rwFOffset = nParamsInput; val bFOffset = rwFOffset + nParamsRecurrent; val iwROffset = bFOffset + nBias; val rwROffset = iwROffset + nParamsInput; val bROffset = rwROffset + nParamsRecurrent; INDArray iwF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(0, rwFOffset)); INDArray rwF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(rwFOffset, bFOffset)); INDArray bF = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bFOffset, iwROffset)); INDArray iwR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(iwROffset, rwROffset)); INDArray rwR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(rwROffset, bROffset)); INDArray bR = paramsView.get(NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(bROffset, bROffset + nBias)); if (initializeParams) { bF.put(new INDArrayIndex[]{NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)}, Nd4j.ones(1, nL).muli(forgetGateInit)); //Order: input, forget, output, input modulation, i.e., IFOG bR.put(new INDArrayIndex[]{NDArrayIndex.interval(0,0,true), NDArrayIndex.interval(nL, 2 * nL)}, Nd4j.ones(1, nL).muli(forgetGateInit)); } /*The above line initializes the forget gate biases to specified value. * See Sutskever PhD thesis, pg19: * "it is important for [the forget gate activations] to be approximately 1 at the early stages of learning, * which is accomplished by initializing [the forget gate biases] to a large value (such as 5). If it is * not done, it will be harder to learn long range dependencies because the smaller values of the forget * gates will create a vanishing gradients problem." * http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf */ if (initializeParams) { //As per standard LSTM val fanIn = nL; val fanOut = nLast + nL; val inputWShape = new long[]{nLast, 4 * nL}; val recurrentWShape = new long[]{nL, 4 * nL + 3}; params.put(INPUT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwF)); params.put(RECURRENT_WEIGHT_KEY_FORWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwF)); params.put(BIAS_KEY_FORWARDS, bF); params.put(INPUT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, inputWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, iwR)); params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, layerConf.getWeightInitFn().init(fanIn, fanOut, recurrentWShape, IWeightInit.DEFAULT_WEIGHT_INIT_ORDER, rwR)); params.put(BIAS_KEY_BACKWARDS, bR); } else { params.put(INPUT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new long[]{nLast, 4 * nL}, iwF)); params.put(RECURRENT_WEIGHT_KEY_FORWARDS, WeightInitUtil.reshapeWeights(new long[]{nL, 4 * nL + 3}, rwF)); params.put(BIAS_KEY_FORWARDS, bF); params.put(INPUT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new long[]{nLast, 4 * nL}, iwR)); params.put(RECURRENT_WEIGHT_KEY_BACKWARDS, WeightInitUtil.reshapeWeights(new long[]{nL, 4 * nL + 3}, rwR)); params.put(BIAS_KEY_BACKWARDS, bR); } return params; }
Example 19
Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0 | 4 votes |
public void testSphereFnOptHelper(OptimizationAlgorithm oa, int numLineSearchIter, int nDimensions) { if (PRINT_OPT_RESULTS) System.out.println("---------\n Alg= " + oa + ", nIter= " + numLineSearchIter + ", nDimensions= " + nDimensions); NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder().maxNumLineSearchIterations(numLineSearchIter) .updater(new Sgd(1e-2)) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Random rng = new DefaultRandom(12345L); org.nd4j.linalg.api.rng.distribution.Distribution dist = new org.nd4j.linalg.api.rng.distribution.impl.UniformDistribution(rng, -10, 10); Model m = new SphereFunctionModel(nDimensions, dist, conf); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); double scoreBefore = m.score(); assertTrue(!Double.isNaN(scoreBefore) && !Double.isInfinite(scoreBefore)); if (PRINT_OPT_RESULTS) { System.out.println("Before:"); System.out.println(scoreBefore); System.out.println(m.params()); } ConvexOptimizer opt = getOptimizer(oa, conf, m); opt.setupSearchState(m.gradientAndScore()); for( int i=0; i<100; i++ ) { opt.optimize(LayerWorkspaceMgr.noWorkspaces()); } m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); double scoreAfter = m.score(); assertTrue(!Double.isNaN(scoreAfter) && !Double.isInfinite(scoreAfter)); if (PRINT_OPT_RESULTS) { System.out.println("After:"); System.out.println(scoreAfter); System.out.println(m.params()); } //Expected behaviour after optimization: //(a) score is better (lower) after optimization. //(b) Parameters are closer to minimum after optimization (TODO) assertTrue("Score did not improve after optimization (b= " + scoreBefore + " ,a= " + scoreAfter + ")", scoreAfter < scoreBefore); }
Example 20
Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0 | 4 votes |
private static void testRosenbrockFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter, int maxNumLineSearchIter) { double[] scores = new double[nOptIter + 1]; for (int i = 0; i <= nOptIter; i++) { NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder() .maxNumLineSearchIterations(maxNumLineSearchIter) .updater(new Sgd(1e-1)) .stepFunction(new org.deeplearning4j.nn.conf.stepfunctions.NegativeDefaultStepFunction()) .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()) .build(); conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here Model m = new RosenbrockFunctionModel(100, conf); if (i == 0) { m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[0] = m.score(); //Before optimization } else { ConvexOptimizer opt = getOptimizer(oa, conf, m); opt.optimize(LayerWorkspaceMgr.noWorkspaces()); m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces()); scores[i] = m.score(); assertTrue("NaN or infinite score: " + scores[i], !Double.isNaN(scores[i]) && !Double.isInfinite(scores[i])); } } if (PRINT_OPT_RESULTS) { System.out.println("Rosenbrock: Multiple optimization iterations ( " + nOptIter + " opt. iter.) score vs iteration, maxNumLineSearchIter= " + maxNumLineSearchIter + ": " + oa); System.out.println(Arrays.toString(scores)); } for (int i = 1; i < scores.length; i++) { if (i == 1) { assertTrue(scores[i] < scores[i - 1]); //Require at least one step of improvement } else { assertTrue(scores[i] <= scores[i - 1]); } } }