org.nd4j.linalg.learning.config.AdaGrad Java Examples

The following examples show how to use org.nd4j.linalg.learning.config.AdaGrad. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UpdaterTest.java    From nd4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testAdaGrad() {
    int rows = 10;
    int cols = 2;


    AdaGradUpdater grad = new AdaGradUpdater(new AdaGrad(0.1, AdaGrad.DEFAULT_ADAGRAD_EPSILON));
    grad.setStateViewArray(Nd4j.zeros(1, rows * cols), new long[] {rows, cols}, 'c', true);
    INDArray W = Nd4j.zeros(rows, cols);
    Distribution dist = Nd4j.getDistributions().createNormal(1, 1);
    for (int i = 0; i < W.rows(); i++)
        W.putRow(i, Nd4j.create(dist.sample(W.columns())));

    for (int i = 0; i < 5; i++) {
        //            String learningRates = String.valueOf("\nAdagrad\n " + grad.applyUpdater(W, i)).replaceAll(";", "\n");
        //            System.out.println(learningRates);
        W.addi(Nd4j.randn(rows, cols));
    }

}
 
Example #2
Source File: TestTransferLearningJson.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testJsonYaml() {

    FineTuneConfiguration c = new FineTuneConfiguration.Builder().activation(Activation.ELU)
                    .updater(new AdaGrad(1.0)).biasUpdater(new AdaGrad(10.0)).build();

    String asJson = c.toJson();
    String asYaml = c.toYaml();

    FineTuneConfiguration fromJson = FineTuneConfiguration.fromJson(asJson);
    FineTuneConfiguration fromYaml = FineTuneConfiguration.fromYaml(asYaml);

    //        System.out.println(asJson);

    assertEquals(c, fromJson);
    assertEquals(c, fromYaml);
    assertEquals(asJson, fromJson.toJson());
    assertEquals(asYaml, fromYaml.toYaml());
}
 
Example #3
Source File: AdaGradLearnerTestCase.java    From jstarcraft-ai with Apache License 2.0 5 votes vote down vote up
@Override
protected GradientUpdater<?> getOldFunction(long[] shape) {
    AdaGrad configuration = new AdaGrad();
    GradientUpdater<?> oldFunction = new AdaGradUpdater(configuration);
    int length = (int) (shape[0] * configuration.stateSize(shape[1]));
    INDArray view = Nd4j.zeros(length);
    oldFunction.setStateViewArray(view, shape, 'c', true);
    return oldFunction;
}
 
Example #4
Source File: UpdaterTest.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testAdaGradLegacy() {
    int rows = 1;
    int cols = 1;


    org.nd4j.linalg.learning.legacy.AdaGrad grad = new org.nd4j.linalg.learning.legacy.AdaGrad(rows, cols, 1e-3);
    grad.setStateViewArray(Nd4j.zeros(1, rows * cols), new int[] {rows, cols}, 'c', true);
    INDArray w = Nd4j.ones(rows, cols);
    grad.getGradient(w, 0);
    assertEquals(1e-1, w.getDouble(0), 1e-1);
}
 
Example #5
Source File: GravesBidirectionalLSTMTest.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerialization() {

    final MultiLayerConfiguration conf1 = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .updater(new AdaGrad(0.1))
                    .l2(0.001)
                    .seed(12345).list()
                    .layer(0, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
                                    .activation(Activation.TANH).nIn(2).nOut(2)
                                    .dist(new UniformDistribution(-0.05, 0.05)).build())
                    .layer(1, new org.deeplearning4j.nn.conf.layers.GravesBidirectionalLSTM.Builder()
                                    .activation(Activation.TANH).nIn(2).nOut(2)
                                    .dist(new UniformDistribution(-0.05, 0.05)).build())
                    .layer(2, new org.deeplearning4j.nn.conf.layers.RnnOutputLayer.Builder()
                                    .activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT)
                                    .nIn(2).nOut(2).build())
                    .build();


    final String json1 = conf1.toJson();

    final MultiLayerConfiguration conf2 = MultiLayerConfiguration.fromJson(json1);

    final String json2 = conf1.toJson();


    TestCase.assertEquals(json1, json2);
}
 
Example #6
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static MultiLayerConfiguration getMLPConfigIris(OptimizationAlgorithm oa) {
    MultiLayerConfiguration c = new NeuralNetConfiguration.Builder().optimizationAlgo(oa)
                    .updater(new AdaGrad(1e-1)).seed(12345L)
                    .list().layer(0,
                                    new DenseLayer.Builder().nIn(4).nOut(3).weightInit(WeightInit.XAVIER)
                                                    .activation(Activation.RELU)
                                                    .build())
                    .layer(1, new OutputLayer.Builder(LossFunction.MCXENT).nIn(3).nOut(3)
                                    .weightInit(WeightInit.XAVIER).activation(Activation.SOFTMAX).build())
                    .build();

    return c;
}
 
Example #7
Source File: TestOptimizers.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static void testRastriginFnMultipleStepsHelper(OptimizationAlgorithm oa, int nOptIter,
                int maxNumLineSearchIter) {
    double[] scores = new double[nOptIter + 1];

    for (int i = 0; i <= nOptIter; i++) {
        NeuralNetConfiguration conf = new NeuralNetConfiguration.Builder()
                        .maxNumLineSearchIterations(maxNumLineSearchIter).miniBatch(false)
                        .updater(new AdaGrad(1e-2))
                        .layer(new DenseLayer.Builder().nIn(1).nOut(1).build()).build();
        conf.addVariable("W"); //Normally done by ParamInitializers, but obviously that isn't done here

        Model m = new RastriginFunctionModel(10, conf);
        int nParams = (int)m.numParams();
        if (i == 0) {
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[0] = m.score(); //Before optimization
        } else {
            ConvexOptimizer opt = getOptimizer(oa, conf, m);
            opt.getUpdater().setStateViewArray((Layer) m, Nd4j.create(new int[] {1, nParams}, 'c'), true);
            opt.optimize(LayerWorkspaceMgr.noWorkspaces());
            m.computeGradientAndScore(LayerWorkspaceMgr.noWorkspaces());
            scores[i] = m.score();
            assertTrue(!Double.isNaN(scores[i]) && !Double.isInfinite(scores[i]));
        }
    }

    if (PRINT_OPT_RESULTS) {
        System.out.println("Rastrigin: Multiple optimization iterations (" + nOptIter
                        + " opt. iter.) score vs iteration, maxNumLineSearchIter=" + maxNumLineSearchIter + ": "
                        + oa);
        System.out.println(Arrays.toString(scores));
    }
    for (int i = 1; i < scores.length; i++) {
        if (i == 1) {
            assertTrue(scores[i] <= scores[i - 1]); //Require at least one step of improvement
        } else {
            assertTrue(scores[i] <= scores[i - 1]);
        }
    }
}
 
Example #8
Source File: AdaGradSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    if(lrSchedule != null){
        return new AdaGrad(lrSchedule.getValue(parameterValues));
    } else {
        return new AdaGrad(learningRate.getValue(parameterValues));
    }
}
 
Example #9
Source File: AdaGradUpdater.java    From nd4j with Apache License 2.0 4 votes vote down vote up
public AdaGradUpdater(AdaGrad config) {
    this.config = config;
}
 
Example #10
Source File: AdaGradUpdater.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
public AdaGradUpdater(AdaGrad config) {
    this.config = config;
}
 
Example #11
Source File: TestLastTimeStepLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testMaskingAndAllMasked(){
    ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(STOCHASTIC_GRADIENT_DESCENT)
            .weightInit(XAVIER_UNIFORM)
            .activation(TANH)
            .updater(new AdaGrad(0.01))
            .l2(0.0001)
            .seed(1234)
            .graphBuilder()
            .addInputs("in")
            .setInputTypes(InputType.recurrent(1, rnnDataFormat))
            .addLayer("RNN", new LastTimeStep(new LSTM.Builder()
                    .nOut(10).dataFormat(rnnDataFormat)
                    .build()), "in")
            .addLayer("dense", new DenseLayer.Builder()
                    .nOut(10)
                    .build(), "RNN")
            .addLayer("out", new OutputLayer.Builder()
                    .activation(IDENTITY)
                    .lossFunction(MSE)
                    .nOut(10)
                    .build(), "dense")
            .setOutputs("out");

    ComputationGraphConfiguration conf = builder.build();
    ComputationGraph cg = new ComputationGraph(conf);
    cg.init();

    INDArray f = Nd4j.rand(new long[]{1,1,24});
    INDArray fm1 = Nd4j.ones(1,24);
    INDArray fm2 = Nd4j.zeros(1,24);
    INDArray fm3 = Nd4j.zeros(1,24);
    fm3.get(NDArrayIndex.point(0), NDArrayIndex.interval(0,5)).assign(1);
    if (rnnDataFormat == RNNFormat.NWC){
        f = f.permute(0, 2, 1);
    }
    INDArray[] out1 = cg.output(false, new INDArray[]{f}, new INDArray[]{fm1});
    try {
        cg.output(false, new INDArray[]{f}, new INDArray[]{fm2});
        fail("Expected exception");
    } catch (Exception e){
        assertTrue(e.getMessage().contains("mask is all 0s"));
    }

    INDArray[] out3 = cg.output(false, new INDArray[]{f}, new INDArray[]{fm3});

    System.out.println(out1[0]);
    System.out.println(out3[0]);

    assertNotEquals(out1[0], out3[0]);
}
 
Example #12
Source File: TestComputationGraphNetwork.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testSummary() {
        int V_WIDTH = 130;
        int V_HEIGHT = 130;
        int V_NFRAMES = 150;
        ComputationGraphConfiguration confForArchitecture =
                new NeuralNetConfiguration.Builder().seed(12345).l2(0.001) //l2 regularization on all layers
                        .updater(new AdaGrad(0.4)).graphBuilder()
                        .addInputs("in")
                        .addLayer("layer0", new ConvolutionLayer.Builder(10, 10).nIn(3) //3 channels: RGB
                                .nOut(30).stride(4, 4).activation(Activation.RELU).weightInit(
                                        WeightInit.RELU).build(),"in") //Output: (130-10+0)/4+1 = 31 -> 31*31*30
                        .addLayer("layer1", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
                                .kernelSize(3, 3).stride(2, 2).build(),"layer0") //(31-3+0)/2+1 = 15
                        .addLayer("layer2", new ConvolutionLayer.Builder(3, 3).nIn(30).nOut(10).stride(2, 2)
                                .activation(Activation.RELU).weightInit(WeightInit.RELU)
                                .updater(Updater.ADAGRAD).build(), "layer1") //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490
                        .addLayer("layer3", new DenseLayer.Builder().activation(Activation.RELU).nIn(490).nOut(50)
                                .weightInit(WeightInit.RELU).gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
                                .gradientNormalizationThreshold(10).build(), "layer2")
                        .addLayer("layer4", new GravesLSTM.Builder().activation(Activation.SOFTSIGN).nIn(50)
                                .nOut(50).weightInit(WeightInit.XAVIER).updater(Updater.ADAGRAD)
                                .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
                                .gradientNormalizationThreshold(10)
                                .build(), "layer3")
                        .addLayer("layer5", new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                                .activation(Activation.SOFTMAX).nIn(50).nOut(4) //4 possible shapes: circle, square, arc, line
                                .weightInit(WeightInit.XAVIER)
                                .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
                                .gradientNormalizationThreshold(10).build(), "layer4")
                        .setOutputs("layer5")
                        .inputPreProcessor("layer0", new RnnToCnnPreProcessor(V_HEIGHT, V_WIDTH, 3))
                        .inputPreProcessor("layer3", new CnnToFeedForwardPreProcessor(7, 7, 10))
                        .inputPreProcessor("layer4", new FeedForwardToRnnPreProcessor())
                        .backpropType(BackpropType.TruncatedBPTT)
                        .tBPTTForwardLength(V_NFRAMES / 5).tBPTTBackwardLength(V_NFRAMES / 5).build();
        ComputationGraph modelExpectedArch = new ComputationGraph(confForArchitecture);
        modelExpectedArch.init();
        ComputationGraph modelMow = new TransferLearning.GraphBuilder(modelExpectedArch).setFeatureExtractor("layer2").build();
//        System.out.println(modelExpectedArch.summary());
//        System.out.println(modelMow.summary());
//        System.out.println(modelExpectedArch.summary(InputType.recurrent(V_HEIGHT* V_WIDTH* 3)));
        modelExpectedArch.summary();
        modelMow.summary();
        modelExpectedArch.summary(InputType.recurrent(V_HEIGHT* V_WIDTH* 3));
    }