org.deeplearning4j.nn.conf.Updater Java Examples

The following examples show how to use org.deeplearning4j.nn.conf.Updater. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LayerConfigValidationTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testCompGraphNullLayer() {
    ComputationGraphConfiguration.GraphBuilder gb = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new Sgd(0.01))
            .seed(42).miniBatch(false).l1(0.2).l2(0.2)
            /* Graph Builder */
            .updater(Updater.RMSPROP).graphBuilder().addInputs("in")
            .addLayer("L" + 1,
                    new GravesLSTM.Builder().nIn(20).updater(Updater.RMSPROP).nOut(10)
                            .weightInit(WeightInit.XAVIER)
                            .dropOut(0.4).l1(0.3).activation(Activation.SIGMOID).build(),
                    "in")
            .addLayer("output",
                    new RnnOutputLayer.Builder().nIn(20).nOut(10).activation(Activation.SOFTMAX)
                            .weightInit(WeightInit.RELU_UNIFORM).build(),
                    "L" + 1)
            .setOutputs("output");
    ComputationGraphConfiguration conf = gb.build();
    ComputationGraph cg = new ComputationGraph(conf);
    cg.init();
}
 
Example #2
Source File: LinearModel.java    From FederatedAndroidTrainer with MIT License 5 votes vote down vote up
public void buildModel() {
    //Create the network
    int numInput = 2;
    int numOutputs = 1;
    int nHidden = 10;
    mNetwork = new MultiLayerNetwork(new NeuralNetConfiguration.Builder()
            .seed(mSeed)
            .iterations(ITERATIONS)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .learningRate(LEARNING_RATE)
            .weightInit(WeightInit.XAVIER)
            .updater(Updater.NESTEROVS)
            .list()
            .layer(0, new DenseLayer.Builder().nIn(numInput).nOut(nHidden)
                    .activation(Activation.TANH)
                    .name("input")
                    .build())
            .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE)
                    .activation(Activation.IDENTITY)
                    .name("output")
                    .nIn(nHidden).nOut(numOutputs).build())
            .pretrain(false)
            .backprop(true)
            .build()
    );
    mNetwork.init();
    mNetwork.setListeners(mIterationListener);
}
 
Example #3
Source File: LSTMGradientCheckTests.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientGravesBidirectionalLSTMEdgeCases() {
    //Edge cases: T=1, miniBatchSize=1, both
    int[] timeSeriesLength = {1, 5, 1};
    int[] miniBatchSize = {7, 1, 1};

    int nIn = 3;
    int layerSize = 4;
    int nOut = 2;

    for (int i = 0; i < timeSeriesLength.length; i++) {

        Random r = new Random(12345L);
        INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize[i], nIn, timeSeriesLength[i]).subi(0.5);

        INDArray labels = Nd4j.zeros(miniBatchSize[i], nOut, timeSeriesLength[i]);
        for (int m = 0; m < miniBatchSize[i]; m++) {
            for (int j = 0; j < timeSeriesLength[i]; j++) {
                int idx = r.nextInt(nOut);
                labels.putScalar(new int[] {m, idx, j}, 1.0f);
            }
        }

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L)
                        .dataType(DataType.DOUBLE)
                        .list()
                        .layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize)

                                        .dist(new NormalDistribution(0, 1)).updater(
                                                        Updater.NONE)
                                        .build())
                        .layer(1, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX)
                                        .nIn(layerSize).nOut(nOut)
                                        .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build())
                        .build();
        MultiLayerNetwork mln = new MultiLayerNetwork(conf);
        mln.init();

        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input)
                .labels(labels).subset(true).maxPerParam(128));

        String msg = "testGradientGravesLSTMEdgeCases() - timeSeriesLength=" + timeSeriesLength[i]
                        + ", miniBatchSize=" + miniBatchSize[i];
        assertTrue(msg, gradOK);
        TestUtils.testModelSerialization(mln);
    }
}
 
Example #4
Source File: LearnIrisBackprop.java    From aifh with Apache License 2.0 4 votes vote down vote up
/**
 * The main method.
 * @param args Not used.
 */
public static void main(String[] args) {
    try {
        int seed = 43;
        double learningRate = 0.1;
        int splitTrainNum = (int) (150 * .75);

        int numInputs = 4;
        int numOutputs = 3;
        int numHiddenNodes = 50;

        // Setup training data.
        final InputStream istream = LearnIrisBackprop.class.getResourceAsStream("/iris.csv");
        if( istream==null ) {
            System.out.println("Cannot access data set, make sure the resources are available.");
            System.exit(1);
        }
        final NormalizeDataSet ds = NormalizeDataSet.load(istream);
        final CategoryMap species = ds.encodeOneOfN(4); // species is column 4
        istream.close();

        DataSet next = ds.extractSupervised(0, 4, 4, 3);
        next.shuffle();

        // Training and validation data split
        SplitTestAndTrain testAndTrain = next.splitTestAndTrain(splitTrainNum, new Random(seed));
        DataSet trainSet = testAndTrain.getTrain();
        DataSet validationSet = testAndTrain.getTest();

        DataSetIterator trainSetIterator = new ListDataSetIterator(trainSet.asList(), trainSet.numExamples());

        DataSetIterator validationSetIterator = new ListDataSetIterator(validationSet.asList(), validationSet.numExamples());

        // Create neural network.
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .seed(seed)
                .iterations(1)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .learningRate(learningRate)
                .updater(Updater.NESTEROVS).momentum(0.9)
                .list(2)
                .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
                        .weightInit(WeightInit.XAVIER)
                        .activation("relu")
                        .build())
                .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD)
                        .weightInit(WeightInit.XAVIER)
                        .activation("softmax")
                        .nIn(numHiddenNodes).nOut(numOutputs).build())
                .pretrain(false).backprop(true).build();


        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();
        model.setListeners(new ScoreIterationListener(1));

        // Define when we want to stop training.
        EarlyStoppingModelSaver saver = new InMemoryModelSaver();
        EarlyStoppingConfiguration esConf = new EarlyStoppingConfiguration.Builder()
                .epochTerminationConditions(new MaxEpochsTerminationCondition(500)) //Max of 50 epochs
                .epochTerminationConditions(new ScoreImprovementEpochTerminationCondition(25))
                .evaluateEveryNEpochs(1)
                .scoreCalculator(new DataSetLossCalculator(validationSetIterator, true))     //Calculate test set score
                .modelSaver(saver)
                .build();
        EarlyStoppingTrainer trainer = new EarlyStoppingTrainer(esConf, conf, trainSetIterator);

        // Train and display result.
        EarlyStoppingResult result = trainer.fit();
        System.out.println("Termination reason: " + result.getTerminationReason());
        System.out.println("Termination details: " + result.getTerminationDetails());
        System.out.println("Total epochs: " + result.getTotalEpochs());
        System.out.println("Best epoch number: " + result.getBestModelEpoch());
        System.out.println("Score at best epoch: " + result.getBestModelScore());

        model = saver.getBestModel();

        // Evaluate
        Evaluation eval = new Evaluation(numOutputs);
        validationSetIterator.reset();

        for (int i = 0; i < validationSet.numExamples(); i++) {
            DataSet t = validationSet.get(i);
            INDArray features = t.getFeatureMatrix();
            INDArray labels = t.getLabels();
            INDArray predicted = model.output(features, false);
            System.out.println(features + ":Prediction("+findSpecies(labels,species)
                    +"):Actual("+findSpecies(predicted,species)+")" + predicted );
            eval.eval(labels, predicted);
        }

        //Print the evaluation statistics
        System.out.println(eval.stats());
    } catch(Exception ex) {
        ex.printStackTrace();
    }
}
 
Example #5
Source File: LearnXORBackprop.java    From aifh with Apache License 2.0 4 votes vote down vote up
/**
 * The main method.
 * @param args Not used.
 */
public static void main(String[] args) {
    int seed = 43;
    double learningRate = 0.4;
    int nEpochs = 100;

    int numInputs = XOR_INPUT[0].length;
    int numOutputs = XOR_IDEAL[0].length;
    int numHiddenNodes = 4;

    // Setup training data.
    INDArray xorInput = Nd4j.create(XOR_INPUT);
    INDArray xorIdeal = Nd4j.create(XOR_IDEAL);
    DataSet xorDataSet = new DataSet(xorInput,xorIdeal);

    // Create neural network.
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .seed(seed)
            .iterations(1)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .learningRate(learningRate)
            .updater(Updater.NESTEROVS).momentum(0.9)
            .list(2)
            .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
                    .weightInit(WeightInit.XAVIER)
                    .activation("relu")
                    .build())
            .layer(1, new OutputLayer.Builder(LossFunction.MSE)
                    .weightInit(WeightInit.XAVIER)
                    .activation("identity")
                    .nIn(numHiddenNodes).nOut(numOutputs).build())
            .pretrain(false).backprop(true).build();


    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(new ScoreIterationListener(1));


    // Train
    for ( int n = 0; n < nEpochs; n++) {
        model.fit( xorDataSet );
    }


    // Evaluate
    System.out.println("Evaluating neural network.");
    for(int i=0;i<4;i++) {
        INDArray input = xorInput.getRow(i);
        INDArray output = model.output(input);
        System.out.println( input + " : " + output);
    }
}
 
Example #6
Source File: LearnDigitsDropout.java    From aifh with Apache License 2.0 4 votes vote down vote up
/**
 * The main method.
 * @param args Not used.
 */
public static void main(String[] args) {
    try {
        int seed = 43;
        double learningRate = 1e-2;
        int nEpochs = 50;
        int batchSize = 500;

        // Setup training data.
        System.out.println("Please wait, reading MNIST training data.");
        String dir = System.getProperty("user.dir");
        MNISTReader trainingReader = MNIST.loadMNIST(dir, true);
        MNISTReader validationReader = MNIST.loadMNIST(dir, false);

        DataSet trainingSet = trainingReader.getData();
        DataSet validationSet = validationReader.getData();

        DataSetIterator trainSetIterator = new ListDataSetIterator(trainingSet.asList(), batchSize);
        DataSetIterator validationSetIterator = new ListDataSetIterator(validationSet.asList(), validationReader.getNumRows());

        System.out.println("Training set size: " + trainingReader.getNumImages());
        System.out.println("Validation set size: " + validationReader.getNumImages());

        System.out.println(trainingSet.get(0).getFeatures().size(1));
        System.out.println(validationSet.get(0).getFeatures().size(1));

        int numInputs = trainingReader.getNumCols()*trainingReader.getNumRows();
        int numOutputs = 10;
        int numHiddenNodes = 100;

        // Create neural network.
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .seed(seed)
                .iterations(1)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .learningRate(learningRate)
                .updater(Updater.NESTEROVS).momentum(0.9)
                .list(2)
                .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
                        .weightInit(WeightInit.XAVIER)
                        .activation("relu")
                        .build())
                .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD)
                        .weightInit(WeightInit.XAVIER)
                        .activation("softmax")
                        .nIn(numHiddenNodes).nOut(numOutputs).build())
                .pretrain(false).backprop(true).build();


        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();
        model.setListeners(new ScoreIterationListener(1));

        // Define when we want to stop training.
        EarlyStoppingModelSaver saver = new InMemoryModelSaver();
        EarlyStoppingConfiguration esConf = new EarlyStoppingConfiguration.Builder()
                //.epochTerminationConditions(new MaxEpochsTerminationCondition(10))
                .epochTerminationConditions(new ScoreImprovementEpochTerminationCondition(5))
                .evaluateEveryNEpochs(1)
                .scoreCalculator(new DataSetLossCalculator(validationSetIterator, true))     //Calculate test set score
                .modelSaver(saver)
                .build();
        EarlyStoppingTrainer trainer = new EarlyStoppingTrainer(esConf, conf, trainSetIterator);

        // Train and display result.
        EarlyStoppingResult result = trainer.fit();
        System.out.println("Termination reason: " + result.getTerminationReason());
        System.out.println("Termination details: " + result.getTerminationDetails());
        System.out.println("Total epochs: " + result.getTotalEpochs());
        System.out.println("Best epoch number: " + result.getBestModelEpoch());
        System.out.println("Score at best epoch: " + result.getBestModelScore());

        model = saver.getBestModel();

        // Evaluate
        Evaluation eval = new Evaluation(numOutputs);
        validationSetIterator.reset();

        for (int i = 0; i < validationSet.numExamples(); i++) {
            DataSet t = validationSet.get(i);
            INDArray features = t.getFeatureMatrix();
            INDArray labels = t.getLabels();
            INDArray predicted = model.output(features, false);
            eval.eval(labels, predicted);
        }

        //Print the evaluation statistics
        System.out.println(eval.stats());
    } catch(Exception ex) {
        ex.printStackTrace();
    }

}
 
Example #7
Source File: LearnDigitsBackprop.java    From aifh with Apache License 2.0 4 votes vote down vote up
/**
 * The main method.
 * @param args Not used.
 */
public static void main(String[] args) {
    try {
        int seed = 43;
        double learningRate = 1e-2;
        int nEpochs = 50;
        int batchSize = 500;

        // Setup training data.
        System.out.println("Please wait, reading MNIST training data.");
        String dir = System.getProperty("user.dir");
        MNISTReader trainingReader = MNIST.loadMNIST(dir, true);
        MNISTReader validationReader = MNIST.loadMNIST(dir, false);

        DataSet trainingSet = trainingReader.getData();
        DataSet validationSet = validationReader.getData();

        DataSetIterator trainSetIterator = new ListDataSetIterator(trainingSet.asList(), batchSize);
        DataSetIterator validationSetIterator = new ListDataSetIterator(validationSet.asList(), validationReader.getNumRows());

        System.out.println("Training set size: " + trainingReader.getNumImages());
        System.out.println("Validation set size: " + validationReader.getNumImages());

        System.out.println(trainingSet.get(0).getFeatures().size(1));
        System.out.println(validationSet.get(0).getFeatures().size(1));

        int numInputs = trainingReader.getNumCols()*trainingReader.getNumRows();
        int numOutputs = 10;
        int numHiddenNodes = 200;

        // Create neural network.
        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .seed(seed)
                .iterations(1)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .learningRate(learningRate)
                .updater(Updater.NESTEROVS).momentum(0.9)
                .regularization(true).dropOut(0.50)
                .list(2)
                .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
                        .weightInit(WeightInit.XAVIER)
                        .activation("relu")
                        .build())
                .layer(1, new OutputLayer.Builder(LossFunction.NEGATIVELOGLIKELIHOOD)
                        .weightInit(WeightInit.XAVIER)
                        .activation("softmax")
                        .nIn(numHiddenNodes).nOut(numOutputs).build())
                .pretrain(false).backprop(true).build();


        MultiLayerNetwork model = new MultiLayerNetwork(conf);
        model.init();
        model.setListeners(new ScoreIterationListener(1));

        // Define when we want to stop training.
        EarlyStoppingModelSaver saver = new InMemoryModelSaver();
        EarlyStoppingConfiguration esConf = new EarlyStoppingConfiguration.Builder()
                //.epochTerminationConditions(new MaxEpochsTerminationCondition(10))
                .epochTerminationConditions(new ScoreImprovementEpochTerminationCondition(5))
                .evaluateEveryNEpochs(1)
                .scoreCalculator(new DataSetLossCalculator(validationSetIterator, true))     //Calculate test set score
                .modelSaver(saver)
                .build();
        EarlyStoppingTrainer trainer = new EarlyStoppingTrainer(esConf, conf, trainSetIterator);

        // Train and display result.
        EarlyStoppingResult result = trainer.fit();
        System.out.println("Termination reason: " + result.getTerminationReason());
        System.out.println("Termination details: " + result.getTerminationDetails());
        System.out.println("Total epochs: " + result.getTotalEpochs());
        System.out.println("Best epoch number: " + result.getBestModelEpoch());
        System.out.println("Score at best epoch: " + result.getBestModelScore());

        model = saver.getBestModel();

        // Evaluate
        Evaluation eval = new Evaluation(numOutputs);
        validationSetIterator.reset();

        for (int i = 0; i < validationSet.numExamples(); i++) {
            DataSet t = validationSet.get(i);
            INDArray features = t.getFeatureMatrix();
            INDArray labels = t.getLabels();
            INDArray predicted = model.output(features, false);
            eval.eval(labels, predicted);
        }

        //Print the evaluation statistics
        System.out.println(eval.stats());
    } catch(Exception ex) {
        ex.printStackTrace();
    }

}
 
Example #8
Source File: TestSparkComputationGraph.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Ignore("AB 2019/05/23 - Failing on CI only - passing locally. Possible precision or threading issue")
public void testSeedRepeatability() throws Exception {

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.RMSPROP)
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .weightInit(WeightInit.XAVIER).graphBuilder().addInputs("in")
                    .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(4)
                                    .activation(Activation.TANH).build(), "in")
                    .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                    LossFunctions.LossFunction.MCXENT).nIn(4).nOut(3).activation(Activation.SOFTMAX)
                                                    .build(),
                                    "0")
                    .setOutputs("1").build();

    Nd4j.getRandom().setSeed(12345);
    ComputationGraph n1 = new ComputationGraph(conf.clone());
    n1.init();

    Nd4j.getRandom().setSeed(12345);
    ComputationGraph n2 = new ComputationGraph(conf.clone());
    n2.init();

    Nd4j.getRandom().setSeed(12345);
    ComputationGraph n3 = new ComputationGraph(conf.clone());
    n3.init();

    SparkComputationGraph sparkNet1 = new SparkComputationGraph(sc, n1,
                    new ParameterAveragingTrainingMaster.Builder(1).workerPrefetchNumBatches(5)
                                    .batchSizePerWorker(5).averagingFrequency(1).repartionData(Repartition.Always)
                                    .rngSeed(12345).build());

    Thread.sleep(100); //Training master IDs are only unique if they are created at least 1 ms apart...

    SparkComputationGraph sparkNet2 = new SparkComputationGraph(sc, n2,
                    new ParameterAveragingTrainingMaster.Builder(1).workerPrefetchNumBatches(5)
                                    .batchSizePerWorker(5).averagingFrequency(1).repartionData(Repartition.Always)
                                    .rngSeed(12345).build());

    Thread.sleep(100);

    SparkComputationGraph sparkNet3 = new SparkComputationGraph(sc, n3,
                    new ParameterAveragingTrainingMaster.Builder(1).workerPrefetchNumBatches(5)
                                    .batchSizePerWorker(5).averagingFrequency(1).repartionData(Repartition.Always)
                                    .rngSeed(98765).build());

    List<DataSet> data = new ArrayList<>();
    DataSetIterator iter = new IrisDataSetIterator(1, 150);
    while (iter.hasNext())
        data.add(iter.next());

    JavaRDD<DataSet> rdd = sc.parallelize(data);


    sparkNet1.fit(rdd);
    sparkNet2.fit(rdd);
    sparkNet3.fit(rdd);


    INDArray p1 = sparkNet1.getNetwork().params();
    INDArray p2 = sparkNet2.getNetwork().params();
    INDArray p3 = sparkNet3.getNetwork().params();

    sparkNet1.getTrainingMaster().deleteTempFiles(sc);
    sparkNet2.getTrainingMaster().deleteTempFiles(sc);
    sparkNet3.getTrainingMaster().deleteTempFiles(sc);

    boolean eq1 = p1.equalsWithEps(p2, 0.01);
    boolean eq2 = p1.equalsWithEps(p3, 0.01);
    assertTrue("Model 1 and 2 params should be equal", eq1);
    assertFalse("Model 1 and 3 params shoud be different", eq2);
}
 
Example #9
Source File: TestPreProcessedData.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreprocessedDataCompGraphMultiDataSet() throws IOException {
    //Test _loading_ of preprocessed MultiDataSet data
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 10;

    String path = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_testpreprocdata3");
    File f = new File(path);
    if (f.exists())
        f.delete();
    f.mkdir();

    DataSetIterator iter = new IrisDataSetIterator(5, 150);
    int i = 0;
    while (iter.hasNext()) {
        File f2 = new File(FilenameUtils.concat(path, "data" + (i++) + ".bin"));
        DataSet ds = iter.next();
        MultiDataSet mds = new MultiDataSet(ds.getFeatures(), ds.getLabels());
        mds.save(f2);
    }

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .graphBuilder().addInputs("in")
                    .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3)
                                    .activation(Activation.TANH).build(), "in")
                    .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                    LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX)
                                                    .build(),
                                    "0")
                    .setOutputs("1").build();

    SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf,
                    new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize)
                                    .batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1)
                                    .repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);

    sparkNet.fitMultiDataSet("file:///" + path.replaceAll("\\\\", "/"));

    SparkTrainingStats sts = sparkNet.getSparkTrainingStats();
    int expNumFits = 12; //4 'fits' per averaging (4 executors, 1 averaging freq); 10 examples each -> 40 examples per fit. 150/40 = 3 averagings (round down); 3*4 = 12

    //Unfortunately: perfect partitioning isn't guaranteed by SparkUtils.balancedRandomSplit (esp. if original partitions are all size 1
    // which appears to be occurring at least some of the time), but we should get close to what we expect...
    assertTrue(Math.abs(expNumFits - sts.getValue("ParameterAveragingWorkerFitTimesMs").size()) < 3);

    assertEquals(3, sts.getValue("ParameterAveragingMasterMapPartitionsTimesMs").size());
}
 
Example #10
Source File: TestPreProcessedData.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreprocessedDataCompGraphDataSet() {
    //Test _loading_ of preprocessed DataSet data
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 10;

    String path = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_testpreprocdata2");
    File f = new File(path);
    if (f.exists())
        f.delete();
    f.mkdir();

    DataSetIterator iter = new IrisDataSetIterator(5, 150);
    int i = 0;
    while (iter.hasNext()) {
        File f2 = new File(FilenameUtils.concat(path, "data" + (i++) + ".bin"));
        iter.next().save(f2);
    }

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .graphBuilder().addInputs("in")
                    .addLayer("0", new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3)
                                    .activation(Activation.TANH).build(), "in")
                    .addLayer("1", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                    LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX)
                                                    .build(),
                                    "0")
                    .setOutputs("1").build();

    SparkComputationGraph sparkNet = new SparkComputationGraph(sc, conf,
                    new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize)
                                    .batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1)
                                    .repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);

    sparkNet.fit("file:///" + path.replaceAll("\\\\", "/"));

    SparkTrainingStats sts = sparkNet.getSparkTrainingStats();
    int expNumFits = 12; //4 'fits' per averaging (4 executors, 1 averaging freq); 10 examples each -> 40 examples per fit. 150/40 = 3 averagings (round down); 3*4 = 12

    //Unfortunately: perfect partitioning isn't guaranteed by SparkUtils.balancedRandomSplit (esp. if original partitions are all size 1
    // which appears to be occurring at least some of the time), but we should get close to what we expect...
    assertTrue(Math.abs(expNumFits - sts.getValue("ParameterAveragingWorkerFitTimesMs").size()) < 3);

    assertEquals(3, sts.getValue("ParameterAveragingMasterMapPartitionsTimesMs").size());
}
 
Example #11
Source File: TestPreProcessedData.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testPreprocessedData() {
    //Test _loading_ of preprocessed data
    int dataSetObjSize = 5;
    int batchSizePerExecutor = 10;

    String path = FilenameUtils.concat(System.getProperty("java.io.tmpdir"), "dl4j_testpreprocdata");
    File f = new File(path);
    if (f.exists())
        f.delete();
    f.mkdir();

    DataSetIterator iter = new IrisDataSetIterator(5, 150);
    int i = 0;
    while (iter.hasNext()) {
        File f2 = new File(FilenameUtils.concat(path, "data" + (i++) + ".bin"));
        iter.next().save(f2);
    }

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(Updater.RMSPROP)
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).list()
                    .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder().nIn(4).nOut(3)
                                    .activation(Activation.TANH).build())
                    .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(
                                    LossFunctions.LossFunction.MCXENT).nIn(3).nOut(3).activation(Activation.SOFTMAX)
                                                    .build())
                    .build();

    SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, conf,
                    new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize)
                                    .batchSizePerWorker(batchSizePerExecutor).averagingFrequency(1)
                                    .repartionData(Repartition.Always).build());
    sparkNet.setCollectTrainingStats(true);

    sparkNet.fit("file:///" + path.replaceAll("\\\\", "/"));

    SparkTrainingStats sts = sparkNet.getSparkTrainingStats();
    int expNumFits = 12; //4 'fits' per averaging (4 executors, 1 averaging freq); 10 examples each -> 40 examples per fit. 150/40 = 3 averagings (round down); 3*4 = 12

    //Unfortunately: perfect partitioning isn't guaranteed by SparkUtils.balancedRandomSplit (esp. if original partitions are all size 1
    // which appears to be occurring at least some of the time), but we should get close to what we expect...
    assertTrue(Math.abs(expNumFits - sts.getValue("ParameterAveragingWorkerFitTimesMs").size()) < 3);

    assertEquals(3, sts.getValue("ParameterAveragingMasterMapPartitionsTimesMs").size());
}
 
Example #12
Source File: BatchNormalizationTest.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void checkMeanVarianceEstimateCNNCompareModes() throws Exception {

    Nd4j.getRandom().setSeed(12345);
    //Check that the internal global mean/variance estimate is approximately correct

    //First, Mnist data as 2d input (NOT taking into account convolution property)
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .updater(Updater.RMSPROP).seed(12345).list()
            .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(false).build())
            .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER)
                    .activation(Activation.IDENTITY).nOut(10).build())
            .setInputType(InputType.convolutional(5, 5, 3)).build();
    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();

    Nd4j.getRandom().setSeed(12345);
    MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .updater(Updater.RMSPROP).seed(12345).list()
            .layer(0, new BatchNormalization.Builder().nIn(3).nOut(3).eps(1e-5).decay(0.95).useLogStd(true).build())
            .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.MSE).weightInit(WeightInit.XAVIER)
                    .activation(Activation.IDENTITY).nOut(10).build())
            .setInputType(InputType.convolutional(5, 5, 3)).build();
    MultiLayerNetwork net2 = new MultiLayerNetwork(conf2);
    net2.init();

    int minibatch = 32;
    for (int i = 0; i < 10; i++) {
        DataSet ds = new DataSet(Nd4j.rand(new int[]{minibatch, 3, 5, 5}), Nd4j.rand(minibatch, 10));
        net.fit(ds);
        net2.fit(ds);

        INDArray globalVar = net.getParam("0_" + BatchNormalizationParamInitializer.GLOBAL_VAR);

        INDArray log10std = net2.getParam("0_" + BatchNormalizationParamInitializer.GLOBAL_LOG_STD);
        INDArray globalVar2 = Nd4j.valueArrayOf(log10std.shape(), 10.0).castTo(log10std.dataType());
        Transforms.pow(globalVar2, log10std, false);    // stdev = 10^(log10(stdev))
        globalVar2.muli(globalVar2);

        assertEquals(globalVar, globalVar2);
    }
}
 
Example #13
Source File: UtilLayerGradientChecks.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testFrozenWithBackprop(){

    for( int minibatch : new int[]{1,5}) {

        MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder()
                .dataType(DataType.DOUBLE)
                .seed(12345)
                .updater(Updater.NONE)
                .list()
                .layer(new DenseLayer.Builder().nIn(10).nOut(10)
                        .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build())
                .layer(new FrozenLayerWithBackprop(new DenseLayer.Builder().nIn(10).nOut(10)
                        .activation(Activation.TANH).weightInit(WeightInit.XAVIER).build()))
                .layer(new FrozenLayerWithBackprop(
                        new DenseLayer.Builder().nIn(10).nOut(10).activation(Activation.TANH)
                                .weightInit(WeightInit.XAVIER).build()))
                .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                        .activation(Activation.SOFTMAX).nIn(10).nOut(10).build())
                .build();
        MultiLayerNetwork net = new MultiLayerNetwork(conf2);
        net.init();

        INDArray in = Nd4j.rand(minibatch, 10);
        INDArray labels = TestUtils.randomOneHot(minibatch, 10);

        Set<String> excludeParams = new HashSet<>();
        excludeParams.addAll(Arrays.asList("1_W", "1_b", "2_W", "2_b"));

        boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in)
                .labels(labels).excludeParams(excludeParams));
        assertTrue(gradOK);

        TestUtils.testModelSerialization(net);


        //Test ComputationGraph equivalent:
        ComputationGraph g = net.toComputationGraph();

        boolean gradOKCG = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(g)
                .minAbsoluteError(1e-6)
                .inputs(new INDArray[]{in}).labels(new INDArray[]{labels}).excludeParams(excludeParams));
        assertTrue(gradOKCG);

        TestUtils.testModelSerialization(g);
    }

}
 
Example #14
Source File: DL4JSequenceRecommenderTraits.java    From inception with Apache License 2.0 4 votes vote down vote up
public Updater getUpdater()
{
    return updater;
}
 
Example #15
Source File: BaseNetConfigDeserializer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
protected void handleUpdaterBackwardCompatibility(BaseLayer layer, ObjectNode on){
    if(on != null && on.has("updater")){
        String updaterName = on.get("updater").asText();
        if(updaterName != null){
            Updater u = Updater.valueOf(updaterName);
            IUpdater iu = u.getIUpdaterWithDefaultConfig();
            double lr = on.get("learningRate").asDouble();
            double eps;
            if(on.has("epsilon")){
                eps = on.get("epsilon").asDouble();
            } else {
                eps = Double.NaN;
            }
            double rho = on.get("rho").asDouble();
            switch (u){
                case SGD:
                    ((Sgd)iu).setLearningRate(lr);
                    break;
                case ADAM:
                    if(Double.isNaN(eps)){
                        eps = Adam.DEFAULT_ADAM_EPSILON;
                    }
                    ((Adam)iu).setLearningRate(lr);
                    ((Adam)iu).setBeta1(on.get("adamMeanDecay").asDouble());
                    ((Adam)iu).setBeta2(on.get("adamVarDecay").asDouble());
                    ((Adam)iu).setEpsilon(eps);
                    break;
                case ADAMAX:
                    if(Double.isNaN(eps)){
                        eps = AdaMax.DEFAULT_ADAMAX_EPSILON;
                    }
                    ((AdaMax)iu).setLearningRate(lr);
                    ((AdaMax)iu).setBeta1(on.get("adamMeanDecay").asDouble());
                    ((AdaMax)iu).setBeta2(on.get("adamVarDecay").asDouble());
                    ((AdaMax)iu).setEpsilon(eps);
                    break;
                case ADADELTA:
                    if(Double.isNaN(eps)){
                        eps = AdaDelta.DEFAULT_ADADELTA_EPSILON;
                    }
                    ((AdaDelta)iu).setRho(rho);
                    ((AdaDelta)iu).setEpsilon(eps);
                    break;
                case NESTEROVS:
                    ((Nesterovs)iu).setLearningRate(lr);
                    ((Nesterovs)iu).setMomentum(on.get("momentum").asDouble());
                    break;
                case NADAM:
                    if(Double.isNaN(eps)){
                        eps = Nadam.DEFAULT_NADAM_EPSILON;
                    }
                    ((Nadam)iu).setLearningRate(lr);
                    ((Nadam)iu).setBeta1(on.get("adamMeanDecay").asDouble());
                    ((Nadam)iu).setBeta2(on.get("adamVarDecay").asDouble());
                    ((Nadam)iu).setEpsilon(eps);
                    break;
                case ADAGRAD:
                    if(Double.isNaN(eps)){
                        eps = AdaGrad.DEFAULT_ADAGRAD_EPSILON;
                    }
                    ((AdaGrad)iu).setLearningRate(lr);
                    ((AdaGrad)iu).setEpsilon(eps);
                    break;
                case RMSPROP:
                    if(Double.isNaN(eps)){
                        eps = RmsProp.DEFAULT_RMSPROP_EPSILON;
                    }
                    ((RmsProp)iu).setLearningRate(lr);
                    ((RmsProp)iu).setEpsilon(eps);
                    ((RmsProp)iu).setRmsDecay(on.get("rmsDecay").asDouble());
                    break;
                default:
                    //No op
                    break;
            }

            layer.setIUpdater(iu);
        }
    }
}
 
Example #16
Source File: RecurrentNets.java    From StockPrediction with MIT License 4 votes vote down vote up
public static MultiLayerNetwork buildLstmNetworks(int nIn, int nOut) {
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .seed(seed)
            .iterations(iterations)
            .learningRate(learningRate)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .weightInit(WeightInit.XAVIER)
            .updater(Updater.RMSPROP)
            .regularization(true)
            .l2(1e-4)
            .list()
            .layer(0, new GravesLSTM.Builder()
                    .nIn(nIn)
                    .nOut(lstmLayer1Size)
                    .activation(Activation.TANH)
                    .gateActivationFunction(Activation.HARDSIGMOID)
                    .dropOut(dropoutRatio)
                    .build())
            .layer(1, new GravesLSTM.Builder()
                    .nIn(lstmLayer1Size)
                    .nOut(lstmLayer2Size)
                    .activation(Activation.TANH)
                    .gateActivationFunction(Activation.HARDSIGMOID)
                    .dropOut(dropoutRatio)
                    .build())
            .layer(2, new DenseLayer.Builder()
            		.nIn(lstmLayer2Size)
            		.nOut(denseLayerSize)
            		.activation(Activation.RELU)
            		.build())
            .layer(3, new RnnOutputLayer.Builder()
                    .nIn(denseLayerSize)
                    .nOut(nOut)
                    .activation(Activation.IDENTITY)
                    .lossFunction(LossFunctions.LossFunction.MSE)
                    .build())
            .backpropType(BackpropType.TruncatedBPTT)
            .tBPTTForwardLength(truncatedBPTTLength)
            .tBPTTBackwardLength(truncatedBPTTLength)
            .pretrain(false)
            .backprop(true)
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(conf);
    net.init();
    net.setListeners(new ScoreIterationListener(100));
    return net;
}
 
Example #17
Source File: MNISTModel.java    From FederatedAndroidTrainer with MIT License 4 votes vote down vote up
@Override
public void buildModel() {
    //number of rows and columns in the input pictures
    final int numRows = 28;
    final int numColumns = 28;
    int rngSeed = 123; // random number seed for reproducibility
    int numEpochs = 15; // number of epochs to perform
    double rate = 0.0015; // learning rate

    Log.d(TAG, "Build model....");
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .seed(rngSeed) //include a random seed for reproducibility
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) // use stochastic gradient descent as an optimization algorithm
            .iterations(1)
            .activation(Activation.RELU)
            .weightInit(WeightInit.XAVIER)
            .learningRate(rate) //specify the learning rate
            .updater(Updater.NESTEROVS)
            .regularization(true).l2(rate * 0.005) // regularize learning model
            .list()
            .layer(0, new DenseLayer.Builder() //create the first input layer.
                    .nIn(numRows * numColumns)
                    .nOut(500)
                    .build())
            .layer(1, new DenseLayer.Builder() //create the second input layer
                    .nIn(500)
                    .nOut(100)
                    .build())
            .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) //create hidden layer
                    .activation(Activation.SOFTMAX)
                    .nIn(100)
                    .nOut(OUTPUT_NUM)
                    .build())
            .pretrain(false).backprop(true) //use backpropagation to adjust weights
            .build();

    model = new MultiLayerNetwork(conf);
    model.init();
    model.setListeners(mIterationListener);  //print the score with every iteration

    Log.d(TAG, "****************Example finished********************");
}
 
Example #18
Source File: DL4JSentimentAnalysisExample.java    From Java-for-Data-Science with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        getModelData();
        
        System.out.println("Total memory = " + Runtime.getRuntime().totalMemory());

        int batchSize = 50;
        int vectorSize = 300;
        int nEpochs = 5;
        int truncateReviewsToLength = 300;

        MultiLayerConfiguration sentimentNN = new NeuralNetConfiguration.Builder()
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
                .updater(Updater.RMSPROP)
                .regularization(true).l2(1e-5)
                .weightInit(WeightInit.XAVIER)
                .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
                .learningRate(0.0018)
                .list()
                .layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(200)
                        .activation("softsign").build())
                .layer(1, new RnnOutputLayer.Builder().activation("softmax")
                        .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(200).nOut(2).build())
                .pretrain(false).backprop(true).build();

        MultiLayerNetwork net = new MultiLayerNetwork(sentimentNN);
        net.init();
        net.setListeners(new ScoreIterationListener(1));

        WordVectors wordVectors = WordVectorSerializer.loadGoogleModel(new File(GNEWS_VECTORS_PATH), true, false);
        DataSetIterator trainData = new AsyncDataSetIterator(new SentimentExampleIterator(EXTRACT_DATA_PATH, wordVectors, batchSize, truncateReviewsToLength, true), 1);
        DataSetIterator testData = new AsyncDataSetIterator(new SentimentExampleIterator(EXTRACT_DATA_PATH, wordVectors, 100, truncateReviewsToLength, false), 1);

        for (int i = 0; i < nEpochs; i++) {
            net.fit(trainData);
            trainData.reset();

            Evaluation evaluation = new Evaluation();
            while (testData.hasNext()) {
                DataSet t = testData.next();
                INDArray dataFeatures = t.getFeatureMatrix();
                INDArray dataLabels = t.getLabels();
                INDArray inMask = t.getFeaturesMaskArray();
                INDArray outMask = t.getLabelsMaskArray();
                INDArray predicted = net.output(dataFeatures, false, inMask, outMask);

                evaluation.evalTimeSeries(dataLabels, predicted, outMask);
            }
            testData.reset();

            System.out.println(evaluation.stats());
        }
    }
 
Example #19
Source File: NeuralNetAttribute.java    From greycat with Apache License 2.0 4 votes vote down vote up
public void reconf() {

        int seed = 123;
        double learningRate = 0.01;
        int numInputs = 2;
        int numOutputs = 2;
        int numHiddenNodes = 5;

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .seed(seed)
                .iterations(1)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .learningRate(learningRate)
                .updater(Updater.NESTEROVS).momentum(0.9)
                .list()
                .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes)
                        .weightInit(WeightInit.XAVIER)
                        .activation("relu")
                        .build())
                .layer(1, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                        .weightInit(WeightInit.XAVIER)
                        .activation("softmax").weightInit(WeightInit.XAVIER)
                        .nIn(numHiddenNodes).nOut(numOutputs).build())
                .pretrain(false).backprop(true).build();

        model = new MultiLayerNetwork(conf);

        System.out.println("Ready :-)");

        if (dirty != null) {
            dirty.run();
        }

    }
 
Example #20
Source File: DigitRecognizerConvolutionalNeuralNetwork.java    From Java-Machine-Learning-for-Computer-Vision with MIT License 4 votes vote down vote up
public void train() throws IOException {

        MnistDataSetIterator mnistTrain = new MnistDataSetIterator(MINI_BATCH_SIZE, true, 12345);

        MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                .seed(SEED)
                .learningRate(LEARNING_RATE)
                .weightInit(WeightInit.XAVIER)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .updater(Updater.NESTEROVS)
                .list()
                .layer(0, new ConvolutionLayer.Builder(5, 5)
                        .nIn(CHANNELS)
                        .stride(1, 1)
                        .nOut(20)
                        .activation(Activation.IDENTITY)
                        .build())
                .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
                        .kernelSize(2, 2)
                        .stride(2, 2)
                        .build())
                .layer(2, new ConvolutionLayer.Builder(5, 5)
                        .nIn(20)
                        .stride(1, 1)
                        .nOut(50)
                        .activation(Activation.IDENTITY)
                        .build())
                .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX)
                        .kernelSize(2, 2)
                        .stride(2, 2)
                        .build())
                .layer(4, new DenseLayer.Builder().activation(Activation.RELU)
                        .nIn(800)
                        .nOut(128).build())
                .layer(5, new DenseLayer.Builder().activation(Activation.RELU)
                        .nIn(128)
                        .nOut(64).build())
                .layer(6, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                        .nOut(OUTPUT)
                        .activation(Activation.SOFTMAX)
                        .build())
                .setInputType(InputType.convolutionalFlat(28, 28, 1))
                .backprop(true).pretrain(false).build();

        EarlyStoppingConfiguration earlyStoppingConfiguration = new EarlyStoppingConfiguration.Builder()
                .epochTerminationConditions(new MaxEpochsTerminationCondition(MAX_EPOCHS))
                .scoreCalculator(new AccuracyCalculator(new MnistDataSetIterator(MINI_BATCH_SIZE, false, 12345)))
                .evaluateEveryNEpochs(1)
                .modelSaver(new LocalFileModelSaver(OUT_DIR))
                .build();

        EarlyStoppingTrainer trainer = new EarlyStoppingTrainer(earlyStoppingConfiguration, conf, mnistTrain);

        EarlyStoppingResult<MultiLayerNetwork> result = trainer.fit();

        log.info("Termination reason: " + result.getTerminationReason());
        log.info("Termination details: " + result.getTerminationDetails());
        log.info("Total epochs: " + result.getTotalEpochs());
        log.info("Best epoch number: " + result.getBestModelEpoch());
        log.info("Score at best epoch: " + result.getBestModelScore());
    }
 
Example #21
Source File: TransferLearningVGG16.java    From Java-Machine-Learning-for-Computer-Vision with MIT License 4 votes vote down vote up
public void train() throws IOException {
    ComputationGraph preTrainedNet = loadVGG16PreTrainedWeights();
    log.info("VGG 16 Architecture");
    log.info(preTrainedNet.summary());

    log.info("Start Downloading NeuralNetworkTrainingData...");

    downloadAndUnzipDataForTheFirstTime();

    log.info("NeuralNetworkTrainingData Downloaded and unzipped");

    neuralNetworkTrainingData = new DataStorage() {
    }.loadData();

    FineTuneConfiguration fineTuneConf = new FineTuneConfiguration.Builder()
            .learningRate(LEARNING_RATE)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
            .updater(Updater.NESTEROVS)
            .seed(1234)
            .build();

    ComputationGraph vgg16Transfer = new TransferLearning.GraphBuilder(preTrainedNet)
            .fineTuneConfiguration(fineTuneConf)
            .setFeatureExtractor(FREEZE_UNTIL_LAYER)
            .removeVertexKeepConnections("predictions")
            .addLayer("predictions",
                    new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD)
                            .nIn(4096)
                            .nOut(NUM_POSSIBLE_LABELS)
                            .weightInit(WeightInit.XAVIER)
                            .activation(Activation.SOFTMAX)
                            .build(),
                    FREEZE_UNTIL_LAYER)
            .build();
    vgg16Transfer.setListeners(new ScoreIterationListener(5));

    log.info("Modified VGG 16 Architecture for transfer learning");
    log.info(vgg16Transfer.summary());

    int iEpoch = 0;
    int iIteration = 0;
    while (iEpoch < EPOCH) {
        while (neuralNetworkTrainingData.getTrainIterator().hasNext()) {
            DataSet trainMiniBatchData = neuralNetworkTrainingData.getTrainIterator().next();
            vgg16Transfer.fit(trainMiniBatchData);
            saveProgressEveryConfiguredInterval(vgg16Transfer, iEpoch, iIteration);
            iIteration++;
        }
        neuralNetworkTrainingData.getTrainIterator().reset();
        iEpoch++;

        evalOn(vgg16Transfer, neuralNetworkTrainingData.getTestIterator(), iEpoch);
    }
}
 
Example #22
Source File: TrainCifar10Model.java    From Java-Machine-Learning-for-Computer-Vision with MIT License 4 votes vote down vote up
private void train() throws IOException {

        ZooModel zooModel = VGG16.builder().build();
        ComputationGraph vgg16 = (ComputationGraph) zooModel.initPretrained(PretrainedType.CIFAR10);
        log.info(vgg16.summary());

        IUpdater iUpdaterWithDefaultConfig = Updater.ADAM.getIUpdaterWithDefaultConfig();
        iUpdaterWithDefaultConfig.setLrAndSchedule(0.1, null);
        FineTuneConfiguration fineTuneConf = new FineTuneConfiguration.Builder()
                .seed(1234)
//                .weightInit(WeightInit.XAVIER)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .activation(Activation.RELU)
                .updater(iUpdaterWithDefaultConfig)
                .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE)
                .miniBatch(true)
                .inferenceWorkspaceMode(WorkspaceMode.ENABLED)
                .trainingWorkspaceMode(WorkspaceMode.ENABLED)
                .pretrain(true)
                .backprop(true)
                .build();

        ComputationGraph cifar10 = new TransferLearning.GraphBuilder(vgg16)
                .setWorkspaceMode(WorkspaceMode.ENABLED)
                .fineTuneConfiguration(fineTuneConf)
                .setInputTypes(InputType.convolutionalFlat(ImageUtils.HEIGHT,
                        ImageUtils.WIDTH, 3))
                .removeVertexAndConnections("dense_2_loss")
                .removeVertexAndConnections("dense_2")
                .removeVertexAndConnections("dense_1")
                .removeVertexAndConnections("dropout_1")
                .removeVertexAndConnections("embeddings")
                .removeVertexAndConnections("flatten_1")
                .addLayer("dense_1", new DenseLayer.Builder()
                        .nIn(4096)
                        .nOut(EMBEDDINGS)
                        .activation(Activation.RELU).build(), "block3_pool")
                .addVertex("embeddings", new L2NormalizeVertex(new int[]{}, 1e-12), "dense_1")
                .addLayer("lossLayer", new CenterLossOutputLayer.Builder()
                                .lossFunction(LossFunctions.LossFunction.SQUARED_LOSS)
                                .activation(Activation.SOFTMAX).nIn(EMBEDDINGS).nOut(NUM_POSSIBLE_LABELS)
                                .lambda(LAMBDA).alpha(0.9)
                                .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build(),
                        "embeddings")
                .setOutputs("lossLayer")
                .build();

        log.info(cifar10.summary());
        File rootDir = new File("CarTracking/train_from_video_" + NUM_POSSIBLE_LABELS);
        DataSetIterator dataSetIterator = ImageUtils.createDataSetIterator(rootDir, NUM_POSSIBLE_LABELS, BATCH_SIZE);
        DataSetIterator testIterator = ImageUtils.createDataSetIterator(rootDir, NUM_POSSIBLE_LABELS, BATCH_SIZE);
        cifar10.setListeners(new ScoreIterationListener(2));
        int iEpoch = I_EPOCH;
        while (iEpoch < EPOCH_TRAINING) {
            while (dataSetIterator.hasNext()) {
                DataSet trainMiniBatchData = null;
                try {
                    trainMiniBatchData = dataSetIterator.next();
                } catch (Exception e) {
                    e.printStackTrace();
                }
                cifar10.fit(trainMiniBatchData);
            }
            iEpoch++;

            String modelName = PREFIX + NUM_POSSIBLE_LABELS + "_epoch_data_e" + EMBEDDINGS + "_b" + BATCH_SIZE + "_" + iEpoch + ".zip";
            saveProgress(cifar10, iEpoch, modelName);
            testResults(cifar10, testIterator, iEpoch, modelName);
            dataSetIterator.reset();
            log.info("iEpoch = " + iEpoch);
        }
    }
 
Example #23
Source File: DL4JSequenceRecommenderTraits.java    From inception with Apache License 2.0 4 votes vote down vote up
public void setUpdater(Updater updater)
{
    this.updater = updater;
}