org.nd4j.linalg.schedule.ScheduleType Java Examples
The following examples show how to use
org.nd4j.linalg.schedule.ScheduleType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SigmoidScheduleSpace.java From deeplearning4j with Apache License 2.0 | 5 votes |
public SigmoidScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType, @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue, @NonNull @JsonProperty("gamma") ParameterSpace<Double> gamma, @NonNull @JsonProperty("stepSize") ParameterSpace<Integer> stepSize){ this.scheduleType = scheduleType; this.initialValue = initialValue; this.gamma = gamma; this.stepSize = stepSize; }
Example #2
Source File: StepScheduleSpace.java From deeplearning4j with Apache License 2.0 | 5 votes |
public StepScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType, @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue, @NonNull @JsonProperty("decayRate") ParameterSpace<Double> decayRate, @NonNull @JsonProperty("step") ParameterSpace<Double> step){ this.scheduleType = scheduleType; this.initialValue = initialValue; this.decayRate = decayRate; this.step = step; }
Example #3
Source File: InverseScheduleSpace.java From deeplearning4j with Apache License 2.0 | 5 votes |
public InverseScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType, @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue, @NonNull @JsonProperty("gamma") ParameterSpace<Double> gamma, @NonNull @JsonProperty("power") ParameterSpace<Double> power){ this.scheduleType = scheduleType; this.initialValue = initialValue; this.gamma = gamma; this.power = power; }
Example #4
Source File: LayerConfigValidationTest.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Test public void testNesterovsNotSetGlobal() { // Warnings only thrown Map<Integer, Double> testMomentumAfter = new HashMap<>(); testMomentumAfter.put(0, 0.1); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new Nesterovs(1.0, new MapSchedule(ScheduleType.ITERATION, testMomentumAfter))).list() .layer(0, new DenseLayer.Builder().nIn(2).nOut(2).build()) .layer(1, new DenseLayer.Builder().nIn(2).nOut(2).build()).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); }
Example #5
Source File: ExponentialScheduleSpace.java From deeplearning4j with Apache License 2.0 | 5 votes |
public ExponentialScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType, @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue, @NonNull @JsonProperty("gamma") ParameterSpace<Double> gamma){ this.scheduleType = scheduleType; this.initialValue = initialValue; this.gamma = gamma; }
Example #6
Source File: PolyScheduleSpace.java From deeplearning4j with Apache License 2.0 | 5 votes |
public PolyScheduleSpace(@NonNull @JsonProperty("scheduleType") ScheduleType scheduleType, @NonNull @JsonProperty("initialValue") ParameterSpace<Double> initialValue, @NonNull @JsonProperty("power") ParameterSpace<Double> power, @NonNull @JsonProperty("maxIter") ParameterSpace<Integer> maxIter){ this.scheduleType = scheduleType; this.initialValue = initialValue; this.power = power; this.maxIter = maxIter; }
Example #7
Source File: PolyScheduleSpace.java From deeplearning4j with Apache License 2.0 | 4 votes |
public PolyScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue, double power, int maxIter){ this(scheduleType, initialValue, new FixedValue<>(power), new FixedValue<>(maxIter)); }
Example #8
Source File: SigmoidScheduleSpace.java From deeplearning4j with Apache License 2.0 | 4 votes |
public SigmoidScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue, double gamma, int stepSize){ this(scheduleType, initialValue, new FixedValue<>(gamma), new FixedValue<>(stepSize)); }
Example #9
Source File: StepScheduleSpace.java From deeplearning4j with Apache License 2.0 | 4 votes |
public StepScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue, double decayRate, double step){ this(scheduleType, initialValue, new FixedValue<>(decayRate), new FixedValue<>(step)); }
Example #10
Source File: InverseScheduleSpace.java From deeplearning4j with Apache License 2.0 | 4 votes |
public InverseScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue, double gamma, double power){ this(scheduleType, initialValue, new FixedValue<>(gamma), new FixedValue<>(power)); }
Example #11
Source File: ExponentialScheduleSpace.java From deeplearning4j with Apache License 2.0 | 4 votes |
public ExponentialScheduleSpace(@NonNull ScheduleType scheduleType, @NonNull ParameterSpace<Double> initialValue, double gamma){ this(scheduleType, initialValue, new FixedValue<>(gamma)); }
Example #12
Source File: DL4JSequenceRecommender.java From inception with Apache License 2.0 | 4 votes |
private MultiLayerNetwork createConfiguredNetwork(DL4JSequenceRecommenderTraits aTraits, int aEmbeddingsDim) { long start = System.currentTimeMillis(); // Set up network configuration MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(aTraits.getOptimizationAlgorithm()) .updater(new Nesterovs( new StepSchedule(ScheduleType.ITERATION, 1e-2, 0.1, 100000), 0.9)) .biasUpdater(new Nesterovs( new StepSchedule(ScheduleType.ITERATION, 2e-2, 0.1, 100000), 0.9)) .l2(aTraits.getL2()) .weightInit(aTraits.getWeightInit()) .gradientNormalization(aTraits.getGradientNormalization()) .gradientNormalizationThreshold(aTraits.getGradientNormalizationThreshold()) .list() .layer(0, new Bidirectional(Bidirectional.Mode.ADD, new LSTM.Builder() .nIn(aEmbeddingsDim) .nOut(200) .activation(aTraits.getActivationL0()) .build())) .layer(1, new RnnOutputLayer.Builder() .nIn(200) .nOut(aTraits.getMaxTagsetSize()) .activation(aTraits.getActivationL1()) .lossFunction(aTraits.getLossFunction()) .build()) .build(); // log.info("Network configuration: {}", conf.toYaml()); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); // net.setListeners(new ScoreIterationListener(1)); log.trace("Setting up the model took {}ms", System.currentTimeMillis() - start); return net; }
Example #13
Source File: TestKryo.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testSerializationConfigurations() { SerializerInstance si = sc.env().serializer().newInstance(); //Check network configurations: Map<Integer, Double> m = new HashMap<>(); m.put(0, 0.5); m.put(10, 0.1); MultiLayerConfiguration mlc = new NeuralNetConfiguration.Builder() .updater(new Nadam(new MapSchedule(ScheduleType.ITERATION,m))).list().layer(0, new OutputLayer.Builder().nIn(10).nOut(10).build()) .build(); testSerialization(mlc, si); ComputationGraphConfiguration cgc = new NeuralNetConfiguration.Builder() .dist(new UniformDistribution(-1, 1)) .updater(new Adam(new MapSchedule(ScheduleType.ITERATION,m))) .graphBuilder() .addInputs("in").addLayer("out", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in") .setOutputs("out").build(); testSerialization(cgc, si); //Check main layers: Layer[] layers = new Layer[] {new OutputLayer.Builder().nIn(10).nOut(10).build(), new RnnOutputLayer.Builder().nIn(10).nOut(10).build(), new LossLayer.Builder().build(), new CenterLossOutputLayer.Builder().nIn(10).nOut(10).build(), new DenseLayer.Builder().nIn(10).nOut(10).build(), new ConvolutionLayer.Builder().nIn(10).nOut(10).build(), new SubsamplingLayer.Builder().build(), new Convolution1DLayer.Builder(2, 2).nIn(10).nOut(10).build(), new ActivationLayer.Builder().activation(Activation.TANH).build(), new GlobalPoolingLayer.Builder().build(), new GravesLSTM.Builder().nIn(10).nOut(10).build(), new LSTM.Builder().nIn(10).nOut(10).build(), new DropoutLayer.Builder(0.5).build(), new BatchNormalization.Builder().build(), new LocalResponseNormalization.Builder().build()}; for (Layer l : layers) { testSerialization(l, si); } //Check graph vertices GraphVertex[] vertices = new GraphVertex[] {new ElementWiseVertex(ElementWiseVertex.Op.Add), new L2NormalizeVertex(), new LayerVertex(null, null), new MergeVertex(), new PoolHelperVertex(), new PreprocessorVertex(new CnnToFeedForwardPreProcessor(28, 28, 1)), new ReshapeVertex(new int[] {1, 1}), new ScaleVertex(1.0), new ShiftVertex(1.0), new SubsetVertex(1, 1), new UnstackVertex(0, 2), new DuplicateToTimeSeriesVertex("in1"), new LastTimeStepVertex("in1")}; for (GraphVertex gv : vertices) { testSerialization(gv, si); } }
Example #14
Source File: ValidateCuDNN.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test @Ignore //AB 2019/05/20 - https://github.com/deeplearning4j/deeplearning4j/issues/5088 - ignored to get to "all passing" state for CI, and revisit later public void validateConvLayersLRN() { //Test ONLY LRN - no other CuDNN functionality (i.e., DL4J impls for everything else) Nd4j.getRandom().setSeed(12345); int minibatch = 8; int numClasses = 10; //imageHeight,imageWidth,channels int imageHeight = 48; int imageWidth = 48; int channels = 3; IActivation activation = new ActivationIdentity(); MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER).seed(42) .activation(new ActivationELU()) .updater(Nesterovs.builder() .momentum(0.9) .learningRateSchedule(new StepSchedule( ScheduleType.EPOCH, 1e-2, 0.1, 20)).build()).list( new Convolution2D.Builder().nOut(96) .kernelSize(11, 11).biasInit(0.0) .stride(4, 4).build(), new ActivationLayer.Builder().activation(activation).build(), new LocalResponseNormalization.Builder() .alpha(1e-3).beta(0.75).k(2) .n(5).build(), new Pooling2D.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2) .build(), new Convolution2D.Builder().nOut(256) .kernelSize(5, 5).padding(2, 2) .biasInit(0.0) .stride(1, 1).build(), new ActivationLayer.Builder().activation(activation).build(), new OutputLayer.Builder().activation(new ActivationSoftmax()) .lossFunction(new LossNegativeLogLikelihood()) .nOut(numClasses) .biasInit(0.0) .build()) .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration); net.init(); int[] fShape = new int[]{minibatch, channels, imageHeight, imageWidth}; int[] lShape = new int[]{minibatch, numClasses}; List<Class<?>> classesToTest = new ArrayList<>(); classesToTest.add(org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization.class); validateLayers(net, classesToTest, false, fShape, lShape, 1e-2, 1e-2); }
Example #15
Source File: ValidateCuDNN.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void validateConvLayersSimpleBN() { //Test ONLY BN - no other CuDNN functionality (i.e., DL4J impls for everything else) Nd4j.getRandom().setSeed(12345); int minibatch = 8; int numClasses = 10; //imageHeight,imageWidth,channels int imageHeight = 48; int imageWidth = 48; int channels = 3; IActivation activation = new ActivationIdentity(); MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .weightInit(WeightInit.XAVIER).seed(42) .activation(new ActivationELU()) .updater(Nesterovs.builder() .momentum(0.9) .learningRateSchedule(new StepSchedule( ScheduleType.EPOCH, 1e-2, 0.1, 20)).build()).list( new Convolution2D.Builder().nOut(96) .kernelSize(11, 11).biasInit(0.0) .stride(4, 4).build(), new ActivationLayer.Builder().activation(activation).build(), new BatchNormalization.Builder().build(), new Pooling2D.Builder() .poolingType(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3).stride(2, 2) .build(), new DenseLayer.Builder() .nOut(128) .biasInit(0.0) .build(), new ActivationLayer.Builder().activation(activation).build(), new OutputLayer.Builder().activation(new ActivationSoftmax()) .lossFunction(new LossNegativeLogLikelihood()) .nOut(numClasses) .biasInit(0.0) .build()) .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration); net.init(); int[] fShape = new int[]{minibatch, channels, imageHeight, imageWidth}; int[] lShape = new int[]{minibatch, numClasses}; List<Class<?>> classesToTest = new ArrayList<>(); classesToTest.add(org.deeplearning4j.nn.layers.normalization.BatchNormalization.class); validateLayers(net, classesToTest, false, fShape, lShape, CuDNNValidationUtil.MAX_REL_ERROR, CuDNNValidationUtil.MIN_ABS_ERROR); }
Example #16
Source File: TestWeightNoise.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testWeightNoiseConfigJson() { IWeightNoise[] weightNoises = new IWeightNoise[]{ new DropConnect(0.5), new DropConnect(new SigmoidSchedule(ScheduleType.ITERATION, 0.5, 0.5, 100)), new WeightNoise(new NormalDistribution(0, 0.1)) }; for (IWeightNoise wn : weightNoises) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .weightNoise(wn) .list() .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); assertEquals(wn, ((BaseLayer) net.getLayer(0).conf().getLayer()).getWeightNoise()); assertEquals(new DropConnect(0.25), ((BaseLayer) net.getLayer(1).conf().getLayer()).getWeightNoise()); assertEquals(wn, ((BaseLayer) net.getLayer(2).conf().getLayer()).getWeightNoise()); TestUtils.testModelSerialization(net); ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() .weightNoise(wn) .graphBuilder() .addInputs("in") .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .layer("1", new DenseLayer.Builder().nIn(10).nOut(10).weightNoise(new DropConnect(0.25)).build(), "0") .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .build(); ComputationGraph graph = new ComputationGraph(conf2); graph.init(); assertEquals(wn, ((BaseLayer) graph.getLayer(0).conf().getLayer()).getWeightNoise()); assertEquals(new DropConnect(0.25), ((BaseLayer) graph.getLayer(1).conf().getLayer()).getWeightNoise()); assertEquals(wn, ((BaseLayer) graph.getLayer(2).conf().getLayer()).getWeightNoise()); TestUtils.testModelSerialization(graph); graph.fit(new DataSet(Nd4j.create(1,10), Nd4j.create(1,10))); } }
Example #17
Source File: TestDropout.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDropoutValues(){ Nd4j.getRandom().setSeed(12345); Dropout d = new Dropout(0.5); INDArray in = Nd4j.ones(10, 10); INDArray out = d.applyDropout(in, Nd4j.create(10,10), 0, 0, LayerWorkspaceMgr.noWorkspacesImmutable()); assertEquals(in, Nd4j.ones(10, 10)); int countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).getInt(0); int countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).getInt(0); assertEquals(100, countZeros + countTwos); //Should only be 0 or 2 //Stochastic, but this should hold for most cases assertTrue(countZeros >= 25 && countZeros <= 75); assertTrue(countTwos >= 25 && countTwos <= 75); //Test schedule: d = new Dropout(new MapSchedule.Builder(ScheduleType.ITERATION).add(0, 0.5).add(5, 0.1).build()); for( int i=0; i<10; i++ ) { out = d.applyDropout(in, Nd4j.create(in.shape()), i, 0, LayerWorkspaceMgr.noWorkspacesImmutable()); assertEquals(in, Nd4j.ones(10, 10)); countZeros = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(0))).getInt(0); if(i < 5){ countTwos = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(2))).getInt(0); assertEquals(String.valueOf(i), 100, countZeros + countTwos); //Should only be 0 or 2 //Stochastic, but this should hold for most cases assertTrue(countZeros >= 25 && countZeros <= 75); assertTrue(countTwos >= 25 && countTwos <= 75); } else { int countInverse = Nd4j.getExecutioner().exec(new MatchCondition(out, Conditions.equals(1.0/0.1))).getInt(0); assertEquals(100, countZeros + countInverse); //Should only be 0 or 10 //Stochastic, but this should hold for most cases assertTrue(countZeros >= 80); assertTrue(countInverse <= 20); } } }
Example #18
Source File: TestLrChanges.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testChangeLrCompGraphSchedule(){ //First: Set LR for a *single* layer and compare vs. equivalent net config ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(0.1)) .graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0") .addLayer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .build(); ComputationGraph net = new ComputationGraph(conf); net.init(); for( int i=0; i<10; i++ ){ net.fit(new DataSet(Nd4j.rand(10,10), Nd4j.rand(10,10))); } ComputationGraphConfiguration conf2 = new NeuralNetConfiguration.Builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ))) .graphBuilder() .addInputs("in") .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0") .layer("2", new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build(), "1") .setOutputs("2") .build(); ComputationGraph net2 = new ComputationGraph(conf2); net2.init(); net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf2.setIterationCount(conf.getIterationCount()); net2.setParams(net.params().dup()); net.setLearningRate(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 )); //Set LR for layer 0 to 0.5 assertEquals(conf, conf2); assertEquals(conf.toJson(), conf2.toJson()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); //Perform some parameter updates - check things are actually in sync... for( int i=0; i<3; i++ ){ INDArray in = Nd4j.rand(10, 10); INDArray l = Nd4j.rand(10, 10); net.fit(new DataSet(in, l)); net2.fit(new DataSet(in, l)); } assertEquals(net.params(), net2.params()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); }
Example #19
Source File: TestLrChanges.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testChangeLrMLNSchedule(){ //First: Set LR for a *single* layer and compare vs. equivalent net config MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(0.1)) .list() .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); for( int i=0; i<10; i++ ){ net.fit(Nd4j.rand(10,10), Nd4j.rand(10,10)); } MultiLayerConfiguration conf2 = new NeuralNetConfiguration.Builder() .activation(Activation.TANH) .seed(12345) .updater(new Adam(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 ))) .list() .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).build()) .build(); MultiLayerNetwork net2 = new MultiLayerNetwork(conf2); net2.init(); net2.getUpdater().getStateViewArray().assign(net.getUpdater().getStateViewArray()); conf2.setIterationCount(conf.getIterationCount()); net2.setParams(net.params().dup()); net.setLearningRate(new ExponentialSchedule(ScheduleType.ITERATION, 0.5, 0.8 )); //Set LR for layer 0 to 0.5 assertEquals(conf, conf2); assertEquals(conf.toJson(), conf2.toJson()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); //Perform some parameter updates - check things are actually in sync... for( int i=0; i<3; i++ ){ INDArray in = Nd4j.rand(10, 10); INDArray l = Nd4j.rand(10, 10); net.fit(in, l); net2.fit(in, l); } assertEquals(net.params(), net2.params()); assertEquals(net.getUpdater().getStateViewArray(), net2.getUpdater().getStateViewArray()); }
Example #20
Source File: AlexNetTrain.java From dl4j-tutorials with MIT License | 4 votes |
public static MultiLayerNetwork alexnetModel() { /** * AlexNet model interpretation based on the original paper ImageNet Classification with Deep Convolutional Neural Networks * and the imagenetExample code referenced. * http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf **/ double nonZeroBias = 1; double dropOut = 0.8; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0.0, 0.01)) .activation(Activation.RELU) .updater(new Nesterovs(new StepSchedule(ScheduleType.ITERATION, 0.1, 0.1, 100000), 0.9)) .biasUpdater(new Nesterovs(new StepSchedule(ScheduleType.ITERATION, 0.2, 0.1, 100000), 0.9)) .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer) // normalize to prevent vanishing or exploding gradients //.l2(5 * 1e-4) .list() .layer(0, convInit("cnn1", channels, 96, new int[]{11, 11}, new int[]{4, 4}, new int[]{3, 3}, 0)) .layer(1, new LocalResponseNormalization.Builder().name("lrn1").build()) .layer(2, maxPool("maxpool1", new int[]{3,3})) .layer(3, conv5x5("cnn2", 256, new int[] {1,1}, new int[] {2,2}, nonZeroBias)) .layer(4, new LocalResponseNormalization.Builder().name("lrn2").build()) .layer(5, maxPool("maxpool2", new int[]{3,3})) .layer(6,conv3x3("cnn3", 384, 0)) .layer(7,conv3x3("cnn4", 384, nonZeroBias)) .layer(8,conv3x3("cnn5", 256, nonZeroBias)) .layer(9, maxPool("maxpool3", new int[]{3,3})) .layer(10, fullyConnected("ffn1", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005))) .layer(11, fullyConnected("ffn2", 4096, nonZeroBias, dropOut, new GaussianDistribution(0, 0.005))) .layer(12, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .name("output") .nOut(numLabels) .activation(Activation.SOFTMAX) .build()) .backprop(true) .pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return new MultiLayerNetwork(conf); }