Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setUpdater()
The following examples show how to use
org.deeplearning4j.nn.multilayer.MultiLayerNetwork#setUpdater() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUpdaters.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testSetGetUpdater2() { //Same as above test, except that we are doing setUpdater on a new network Nd4j.getRandom().setSeed(12345L); double lr = 0.03; int nIn = 4; int nOut = 8; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(lr,0.6)).list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(5) .updater(org.deeplearning4j.nn.conf.Updater.SGD).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(6) .updater(new NoOp()).build()) .layer(2, new DenseLayer.Builder().nIn(6).nOut(7) .updater(org.deeplearning4j.nn.conf.Updater.ADAGRAD).build()) .layer(3, new OutputLayer.Builder().nIn(7).nOut(nOut).activation(Activation.SOFTMAX) .updater(org.deeplearning4j.nn.conf.Updater.NESTEROVS).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); Updater newUpdater = UpdaterCreator.getUpdater(net); net.setUpdater(newUpdater); assertTrue(newUpdater == net.getUpdater()); //Should be identical object }
Example 2
Source File: NetworkUtils.java From deeplearning4j with Apache License 2.0 | 5 votes |
private static void refreshUpdater(MultiLayerNetwork net) { INDArray origUpdaterState = net.getUpdater().getStateViewArray(); MultiLayerUpdater origUpdater = (MultiLayerUpdater) net.getUpdater(); net.setUpdater(null); MultiLayerUpdater newUpdater = (MultiLayerUpdater) net.getUpdater(); INDArray newUpdaterState = rebuildUpdaterStateArray(origUpdaterState, origUpdater.getUpdaterBlocks(), newUpdater.getUpdaterBlocks()); newUpdater.setStateViewArray(newUpdaterState); }
Example 3
Source File: ParameterAveragingTrainingWorker.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public MultiLayerNetwork getInitialModel() { if (configuration.isCollectTrainingStats()) stats = new ParameterAveragingTrainingWorkerStats.ParameterAveragingTrainingWorkerStatsHelper(); if (configuration.isCollectTrainingStats()) stats.logBroadcastGetValueStart(); NetBroadcastTuple tuple = broadcast.getValue(); if (configuration.isCollectTrainingStats()) stats.logBroadcastGetValueEnd(); //Don't want to have shared configuration object: each may update its iteration count (for LR schedule etc) individually MultiLayerNetwork net = new MultiLayerNetwork(tuple.getConfiguration().clone()); //Can't have shared parameter array across executors for parameter averaging, hence the 'true' for clone parameters array arg net.init(tuple.getParameters().unsafeDuplication(), false); if (tuple.getUpdaterState() != null) { net.setUpdater(new MultiLayerUpdater(net, tuple.getUpdaterState().unsafeDuplication())); //Can't have shared updater state } Nd4j.getExecutioner().commit(); configureListeners(net, tuple.getCounter().getAndIncrement()); if (configuration.isCollectTrainingStats()) stats.logInitEnd(); return net; }
Example 4
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testOneExecutor() { //Idea: single worker/executor on Spark should give identical results to a single machine int miniBatchSize = 10; int nWorkers = 1; for (boolean saveUpdater : new boolean[] {true, false}) { JavaSparkContext sc = getContext(nWorkers); try { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new RmsProp(0.5))); net.init(); INDArray initialParams = net.params().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSet(miniBatchSize, seeds[i]); if (!saveUpdater) net.setUpdater(null); net.fit(ds); } INDArray finalParams = net.params().dup(); //Do training on Spark with one executor, for 3 separate minibatches TrainingMaster tm = getTrainingMaster(1, miniBatchSize, saveUpdater); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new RmsProp(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); for (int i = 0; i < seeds.length; i++) { List<DataSet> list = getOneDataSetAsIndividalExamples(miniBatchSize, seeds[i]); JavaRDD<DataSet> rdd = sc.parallelize(list); sparkNet.fit(rdd); } INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); assertEquals(initialParams, initialSparkParams); assertNotEquals(initialParams, finalParams); assertEquals(finalParams, finalSparkParams); } finally { sc.stop(); } } }
Example 5
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAverageEveryStep() { //Idea: averaging every step with SGD (SGD updater + optimizer) is mathematically identical to doing the learning // on a single machine for synchronous distributed training //BUT: This is *ONLY* the case if all workers get an identical number of examples. This won't be the case if // we use RDD.randomSplit (which is what occurs if we use .fit(JavaRDD<DataSet> on a data set that needs splitting), // which might give a number of examples that isn't divisible by number of workers (like 39 examples on 4 executors) //This is also ONLY the case using SGD updater int miniBatchSizePerWorker = 10; int nWorkers = 4; for (boolean saveUpdater : new boolean[] {true, false}) { JavaSparkContext sc = getContext(nWorkers); try { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; // CudaGridExecutioner executioner = (CudaGridExecutioner) Nd4j.getExecutioner(); MultiLayerNetwork net = new MultiLayerNetwork(getConf(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); // executioner.addToWatchdog(initialParams, "initialParams"); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSet(miniBatchSizePerWorker * nWorkers, seeds[i]); if (!saveUpdater) net.setUpdater(null); net.fit(ds); } INDArray finalParams = net.params().dup(); //Do training on Spark with one executor, for 3 separate minibatches // TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater); ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1) .averagingFrequency(1).batchSizePerWorker(miniBatchSizePerWorker) .saveUpdater(saveUpdater).workerPrefetchNumBatches(0) // .rddTrainingApproach(RDDTrainingApproach.Direct) .rddTrainingApproach(RDDTrainingApproach.Export).build(); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConf(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); // executioner.addToWatchdog(initialSparkParams, "initialSparkParams"); for (int i = 0; i < seeds.length; i++) { List<DataSet> list = getOneDataSetAsIndividalExamples(miniBatchSizePerWorker * nWorkers, seeds[i]); JavaRDD<DataSet> rdd = sc.parallelize(list); sparkNet.fit(rdd); } // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); // System.out.println("Initial (Local) params: " + Arrays.toString(initialParams.data().asFloat())); // System.out.println("Initial (Spark) params: " // + Arrays.toString(initialSparkParams.data().asFloat())); // System.out.println("Final (Local) params: " + Arrays.toString(finalParams.data().asFloat())); // System.out.println("Final (Spark) params: " + Arrays.toString(finalSparkParams.data().asFloat())); assertEquals(initialParams, initialSparkParams); assertNotEquals(initialParams, finalParams); assertEquals(finalParams, finalSparkParams); double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); assertEquals(net.score(), sparkScore, 1e-3); } finally { sc.stop(); } } }
Example 6
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAverageEveryStepCNN() { //Idea: averaging every step with SGD (SGD updater + optimizer) is mathematically identical to doing the learning // on a single machine for synchronous distributed training //BUT: This is *ONLY* the case if all workers get an identical number of examples. This won't be the case if // we use RDD.randomSplit (which is what occurs if we use .fit(JavaRDD<DataSet> on a data set that needs splitting), // which might give a number of examples that isn't divisible by number of workers (like 39 examples on 4 executors) //This is also ONLY the case using SGD updater int miniBatchSizePerWorker = 10; int nWorkers = 4; for (boolean saveUpdater : new boolean[] {true, false}) { JavaSparkContext sc = getContext(nWorkers); try { //Do training locally, for 3 minibatches int[] seeds = {1, 2, 3}; MultiLayerNetwork net = new MultiLayerNetwork(getConfCNN(12345, new Sgd(0.5))); net.init(); INDArray initialParams = net.params().dup(); for (int i = 0; i < seeds.length; i++) { DataSet ds = getOneDataSetCNN(miniBatchSizePerWorker * nWorkers, seeds[i]); if (!saveUpdater) net.setUpdater(null); net.fit(ds); } INDArray finalParams = net.params().dup(); //Do training on Spark with one executor, for 3 separate minibatches ParameterAveragingTrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1) .averagingFrequency(1).batchSizePerWorker(miniBatchSizePerWorker) .saveUpdater(saveUpdater).workerPrefetchNumBatches(0) .rddTrainingApproach(RDDTrainingApproach.Export).build(); SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc, getConfCNN(12345, new Sgd(0.5)), tm); sparkNet.setCollectTrainingStats(true); INDArray initialSparkParams = sparkNet.getNetwork().params().dup(); for (int i = 0; i < seeds.length; i++) { List<DataSet> list = getOneDataSetAsIndividalExamplesCNN(miniBatchSizePerWorker * nWorkers, seeds[i]); JavaRDD<DataSet> rdd = sc.parallelize(list); sparkNet.fit(rdd); } // System.out.println(sparkNet.getSparkTrainingStats().statsAsString()); sparkNet.getSparkTrainingStats().statsAsString(); INDArray finalSparkParams = sparkNet.getNetwork().params().dup(); // System.out.println("Initial (Local) params: " + Arrays.toString(initialParams.data().asFloat())); // System.out.println("Initial (Spark) params: " // + Arrays.toString(initialSparkParams.data().asFloat())); // System.out.println("Final (Local) params: " + Arrays.toString(finalParams.data().asFloat())); // System.out.println("Final (Spark) params: " + Arrays.toString(finalSparkParams.data().asFloat())); assertArrayEquals(initialParams.data().asFloat(), initialSparkParams.data().asFloat(), 1e-8f); assertArrayEquals(finalParams.data().asFloat(), finalSparkParams.data().asFloat(), 1e-6f); double sparkScore = sparkNet.getScore(); assertTrue(sparkScore > 0.0); assertEquals(net.score(), sparkScore, 1e-3); } finally { sc.stop(); } } }