Java Code Examples for org.deeplearning4j.nn.multilayer.MultiLayerNetwork#score()
The following examples show how to use
org.deeplearning4j.nn.multilayer.MultiLayerNetwork#score() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ScoreUtil.java From deeplearning4j with Apache License 2.0 | 6 votes |
/** * Score the given test data * with the given multi layer network * @param model model to use * @param testData the test data to test with * @param average whether to average the score or not * @return the score for the given test data given the model */ public static double score(MultiLayerNetwork model, DataSetIterator testData, boolean average) { //TODO: do this properly taking into account division by N, L1/L2 etc double sumScore = 0.0; int totalExamples = 0; while (testData.hasNext()) { DataSet ds = testData.next(); int numExamples = ds.numExamples(); sumScore += numExamples * model.score(ds); totalExamples += numExamples; } if (!average) return sumScore; return sumScore / totalExamples; }
Example 2
Source File: ParameterAveragingTrainingWorker.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Override public ParameterAveragingTrainingResult getFinalResult(MultiLayerNetwork network) { INDArray updaterState = null; if (saveUpdater) { Updater u = network.getUpdater(); if (u != null) updaterState = u.getStateViewArray(); } Nd4j.getExecutioner().commit(); Collection<StorageMetaData> storageMetaData = null; Collection<Persistable> listenerStaticInfo = null; Collection<Persistable> listenerUpdates = null; if (listenerRouterProvider != null) { StatsStorageRouter r = listenerRouterProvider.getRouter(); if (r instanceof VanillaStatsStorageRouter) { //TODO this is ugly... need to find a better solution VanillaStatsStorageRouter ssr = (VanillaStatsStorageRouter) r; storageMetaData = ssr.getStorageMetaData(); listenerStaticInfo = ssr.getStaticInfo(); listenerUpdates = ssr.getUpdates(); } } return new ParameterAveragingTrainingResult(network.params(), updaterState, network.score(), storageMetaData, listenerStaticInfo, listenerUpdates); }
Example 3
Source File: TestComputationGraphNetwork.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Test public void testScoringDataSet() { ComputationGraphConfiguration configuration = getIrisGraphConfiguration(); ComputationGraph graph = new ComputationGraph(configuration); graph.init(); MultiLayerConfiguration mlc = getIrisMLNConfiguration(); MultiLayerNetwork net = new MultiLayerNetwork(mlc); net.init(); DataSetIterator iris = new IrisDataSetIterator(150, 150); DataSet ds = iris.next(); //Now: set parameters of both networks to be identical. Then feedforward, and check we get the same score Nd4j.getRandom().setSeed(12345); int nParams = getNumParams(); INDArray params = Nd4j.rand(1, nParams); graph.setParams(params.dup()); net.setParams(params.dup()); double scoreMLN = net.score(ds, false); double scoreCG = graph.score(ds, false); assertEquals(scoreMLN, scoreCG, 1e-4); }
Example 4
Source File: NeuralNetworkModel.java From jstarcraft-rns with Apache License 2.0 | 6 votes |
@Override protected void doPractice() { MultiLayerConfiguration configuration = getNetworkConfiguration(); network = new MultiLayerNetwork(configuration); network.init(); for (int epocheIndex = 0; epocheIndex < epocheSize; epocheIndex++) { totalError = 0F; network.fit(inputData, inputData); totalError = (float) network.score(); if (isConverged(epocheIndex) && isConverged) { break; } currentError = totalError; } outputData = network.output(inputData); }
Example 5
Source File: ScoreFlatMapFunction.java From deeplearning4j with Apache License 2.0 | 5 votes |
@Override public Iterator<Tuple2<Integer, Double>> call(Iterator<DataSet> dataSetIterator) throws Exception { if (!dataSetIterator.hasNext()) { return Collections.singletonList(new Tuple2<>(0, 0.0)).iterator(); } DataSetIterator iter = new IteratorDataSetIterator(dataSetIterator, minibatchSize); //Does batching where appropriate MultiLayerNetwork network = new MultiLayerNetwork(MultiLayerConfiguration.fromJson(json)); network.init(); INDArray val = params.value().unsafeDuplication(); //.value() object will be shared by all executors on each machine -> OK, as params are not modified by score function if (val.length() != network.numParams(false)) throw new IllegalStateException( "Network did not have same number of parameters as the broadcast set parameters"); network.setParameters(val); List<Tuple2<Integer, Double>> out = new ArrayList<>(); while (iter.hasNext()) { DataSet ds = iter.next(); double score = network.score(ds, false); val numExamples = (int) ds.getFeatures().size(0); out.add(new Tuple2<>(numExamples, score * numExamples)); } Nd4j.getExecutioner().commit(); return out.iterator(); }
Example 6
Source File: TestSameDiffOutput.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testMSEOutputLayer(){ //Faliing 2019/04/17 - https://github.com/deeplearning4j/deeplearning4j/issues/7560 Nd4j.getRandom().setSeed(12345); for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) { log.info("Starting test: " + a); MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER)) .build(); MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); netSD.init(); MultiLayerNetwork netStd = new MultiLayerNetwork(confStd); netStd.init(); netSD.params().assign(netStd.params()); assertEquals(netStd.paramTable(), netSD.paramTable()); int minibatch = 2; INDArray in = Nd4j.rand(minibatch, 5); INDArray label = Nd4j.rand(minibatch, 5); INDArray outSD = netSD.output(in); INDArray outStd = netStd.output(in); assertEquals(outStd, outSD); DataSet ds = new DataSet(in, label); double scoreSD = netSD.score(ds); double scoreStd = netStd.score(ds); assertEquals(scoreStd, scoreSD, 1e-6); netSD.setInput(in); netSD.setLabels(label); netStd.setInput(in); netStd.setLabels(label); //System.out.println(((SameDiffOutputLayer) netSD.getLayer(1)).sameDiff.summary()); netSD.computeGradientAndScore(); netStd.computeGradientAndScore(); assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); for (int i = 0; i < 3; i++) { netSD.fit(ds); netStd.fit(ds); String s = String.valueOf(i); assertEquals(s, netStd.params(), netSD.params()); assertEquals(s, netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); } //Test fit before output: MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone()); net.init(); net.fit(ds); //Sanity check on different minibatch sizes: INDArray newIn = Nd4j.vstack(in, in); INDArray outMbsd = netSD.output(newIn); INDArray outMb = netStd.output(newIn); assertEquals(outMb, outMbsd); } }
Example 7
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNL1L2MLN() { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS}; boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here for( int i=0; i<l2vals.length; i++ ){ Activation afn = activFns[i]; boolean doLearningFirst = characteristic[i]; LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]) .optimizationAlgo( OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) .weightInit(WeightInit.XAVIER).activation(afn) .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String testName = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } }
Example 8
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNMLN() { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn) .cudnnAllowFallback(false) .build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } } } }
Example 9
Source File: CuDNNGradientChecks.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testConvolutional() throws Exception { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first int[] minibatchSizes = {1, 4}; int width = 6; int height = 6; int inputDepth = 2; int nOut = 3; Field f = org.deeplearning4j.nn.layers.convolution.ConvolutionLayer.class.getDeclaredField("helper"); f.setAccessible(true); Random r = new Random(12345); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int minibatchSize : minibatchSizes) { INDArray input = Nd4j.rand(new int[] {minibatchSize, inputDepth, height, width}); INDArray labels = Nd4j.zeros(minibatchSize, nOut); for (int i = 0; i < minibatchSize; i++) { labels.putScalar(i, r.nextInt(nOut), 1.0); } MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .dist(new UniformDistribution(-1, 1)) .updater(new NoOp()).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(1, 1).nOut(3) .activation(afn).build()) .layer(1, new ConvolutionLayer.Builder(2, 2).stride(2, 2).padding(0, 0).nOut(3) .activation(afn).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(nOut).build()) .setInputType(InputType.convolutional(height, width, inputDepth)) ; MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c0 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(0); ConvolutionHelper ch0 = (ConvolutionHelper) f.get(c0); assertTrue(ch0 instanceof CudnnConvolutionHelper); org.deeplearning4j.nn.layers.convolution.ConvolutionLayer c1 = (org.deeplearning4j.nn.layers.convolution.ConvolutionLayer) mln.getLayer(1); ConvolutionHelper ch1 = (ConvolutionHelper) f.get(c1); assertTrue(ch1 instanceof CudnnConvolutionHelper); //------------------------------- //For debugging/comparison to no-cudnn case: set helper field to null // f.set(c0, null); // f.set(c1, null); // assertNull(f.get(c0)); // assertNull(f.get(c1)); //------------------------------- String name = new Object() {}.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(input); mln.setLabels(labels); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(input, labels); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", doLearningFirst=" + doLearningFirst); for (int j = 0; j < mln.getnLayers(); j++) System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); } } } }
Example 10
Source File: TestOptimizers.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testOptimizersMLP() { //Check that the score actually decreases over time DataSetIterator iter = new IrisDataSetIterator(150, 150); OptimizationAlgorithm[] toTest = {OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, OptimizationAlgorithm.LINE_GRADIENT_DESCENT, OptimizationAlgorithm.CONJUGATE_GRADIENT, OptimizationAlgorithm.LBFGS}; DataSet ds = iter.next(); ds.normalizeZeroMeanZeroUnitVariance(); for (OptimizationAlgorithm oa : toTest) { int nIter = 5; MultiLayerNetwork network = new MultiLayerNetwork(getMLPConfigIris(oa)); network.init(); double score = network.score(ds); assertTrue(score != 0.0 && !Double.isNaN(score)); if (PRINT_OPT_RESULTS) System.out.println("testOptimizersMLP() - " + oa); int nCallsToOptimizer = 10; double[] scores = new double[nCallsToOptimizer + 1]; scores[0] = score; for (int i = 0; i < nCallsToOptimizer; i++) { for( int j=0; j<nIter; j++ ) { network.fit(ds); } double scoreAfter = network.score(ds); scores[i + 1] = scoreAfter; assertTrue("Score is NaN after optimization", !Double.isNaN(scoreAfter)); assertTrue("OA= " + oa + ", before= " + score + ", after= " + scoreAfter, scoreAfter <= score); score = scoreAfter; } if (PRINT_OPT_RESULTS) System.out.println(oa + " - " + Arrays.toString(scores)); } }
Example 11
Source File: DTypeTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testMultiLayerNetworkTypeConversion() { for (DataType dt : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) { Nd4j.setDefaultDataTypes(dt, dt); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345) .weightInit(WeightInit.XAVIER) .updater(new Adam(0.01)) .dataType(DataType.DOUBLE) .list() .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build()) .layer(new DenseLayer.Builder().activation(Activation.TANH).nIn(10).nOut(10).build()) .layer(new OutputLayer.Builder().nIn(10).nOut(10).activation(Activation.SOFTMAX).lossFunction(LossFunctions.LossFunction.MCXENT).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray inD = Nd4j.rand(DataType.DOUBLE, 1, 10); INDArray lD = Nd4j.create(DataType.DOUBLE, 1, 10); net.fit(inD, lD); INDArray outDouble = net.output(inD); net.setInput(inD); net.setLabels(lD); net.computeGradientAndScore(); double scoreDouble = net.score(); INDArray grads = net.getFlattenedGradients(); INDArray u = net.getUpdater().getStateViewArray(); assertEquals(DataType.DOUBLE, net.params().dataType()); assertEquals(DataType.DOUBLE, grads.dataType()); assertEquals(DataType.DOUBLE, u.dataType()); MultiLayerNetwork netFloat = net.convertDataType(DataType.FLOAT); netFloat.initGradientsView(); assertEquals(DataType.FLOAT, netFloat.params().dataType()); assertEquals(DataType.FLOAT, netFloat.getFlattenedGradients().dataType()); assertEquals(DataType.FLOAT, netFloat.getUpdater(true).getStateViewArray().dataType()); INDArray inF = inD.castTo(DataType.FLOAT); INDArray lF = lD.castTo(DataType.FLOAT); INDArray outFloat = netFloat.output(inF); netFloat.setInput(inF); netFloat.setLabels(lF); netFloat.computeGradientAndScore(); double scoreFloat = netFloat.score(); INDArray gradsFloat = netFloat.getFlattenedGradients(); INDArray uFloat = netFloat.getUpdater().getStateViewArray(); assertEquals(scoreDouble, scoreFloat, 1e-6); assertEquals(outDouble.castTo(DataType.FLOAT), outFloat); assertEquals(grads.castTo(DataType.FLOAT), gradsFloat); INDArray uCast = u.castTo(DataType.FLOAT); assertTrue(uCast.equalsWithEps(uFloat, 1e-4)); MultiLayerNetwork netFP16 = net.convertDataType(DataType.HALF); netFP16.initGradientsView(); assertEquals(DataType.HALF, netFP16.params().dataType()); assertEquals(DataType.HALF, netFP16.getFlattenedGradients().dataType()); assertEquals(DataType.HALF, netFP16.getUpdater(true).getStateViewArray().dataType()); INDArray inH = inD.castTo(DataType.HALF); INDArray lH = lD.castTo(DataType.HALF); INDArray outHalf = netFP16.output(inH); netFP16.setInput(inH); netFP16.setLabels(lH); netFP16.computeGradientAndScore(); double scoreHalf = netFP16.score(); INDArray gradsHalf = netFP16.getFlattenedGradients(); INDArray uHalf = netFP16.getUpdater().getStateViewArray(); assertEquals(scoreDouble, scoreHalf, 1e-4); boolean outHalfEq = outDouble.castTo(DataType.HALF).equalsWithEps(outHalf, 1e-3); assertTrue(outHalfEq); boolean gradsHalfEq = grads.castTo(DataType.HALF).equalsWithEps(gradsHalf, 1e-3); assertTrue(gradsHalfEq); INDArray uHalfCast = u.castTo(DataType.HALF); assertTrue(uHalfCast.equalsWithEps(uHalf, 1e-4)); } }
Example 12
Source File: TestSameDiffOutput.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testOutputMSELossLayer(){ Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffMSELossLayer()) .build(); MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new LossLayer.Builder().activation(Activation.IDENTITY).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); netSD.init(); MultiLayerNetwork netStd = new MultiLayerNetwork(confStd); netStd.init(); INDArray in = Nd4j.rand(3, 5); INDArray label = Nd4j.rand(3,5); INDArray outSD = netSD.output(in); INDArray outStd = netStd.output(in); assertEquals(outStd, outSD); DataSet ds = new DataSet(in, label); double scoreSD = netSD.score(ds); double scoreStd = netStd.score(ds); assertEquals(scoreStd, scoreSD, 1e-6); for( int i=0; i<3; i++ ){ netSD.fit(ds); netStd.fit(ds); assertEquals(netStd.params(), netSD.params()); assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); } //Test fit before output: MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone()); net.init(); net.fit(ds); //Sanity check on different minibatch sizes: INDArray newIn = Nd4j.vstack(in, in); INDArray outMbsd = netSD.output(newIn); INDArray outMb = netStd.output(newIn); assertEquals(outMb, outMbsd); }
Example 13
Source File: TestDropout.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testDropoutSimple() throws Exception { //Testing dropout with a single layer //Layer input: values should be set to either 0.0 or 2.0x original value int nIn = 8; int nOut = 8; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Sgd()) .dropOut(0.5).list() .layer(0, new OutputLayer.Builder().activation(Activation.IDENTITY) .lossFunction(LossFunctions.LossFunction.MSE).nIn(nIn).nOut(nOut) .weightInit(WeightInit.XAVIER).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); net.getLayer(0).getParam("W").assign(Nd4j.eye(nIn)); int nTests = 15; Nd4j.getRandom().setSeed(12345); int noDropoutCount = 0; for (int i = 0; i < nTests; i++) { INDArray in = Nd4j.rand(1, nIn); INDArray out = Nd4j.rand(1, nOut); INDArray inCopy = in.dup(); List<INDArray> l = net.feedForward(in, true); INDArray postDropout = l.get(l.size() - 1); //Dropout occurred. Expect inputs to be either scaled 2x original, or set to 0.0 (with dropout = 0.5) for (int j = 0; j < inCopy.length(); j++) { double origValue = inCopy.getDouble(j); double doValue = postDropout.getDouble(j); if (doValue > 0.0) { //Input was kept -> should be scaled by factor of (1.0/0.5 = 2) assertEquals(origValue * 2.0, doValue, 0.0001); } } //Do forward pass //(1) ensure dropout ISN'T being applied for forward pass at test time //(2) ensure dropout ISN'T being applied for test time scoring //If dropout is applied at test time: outputs + score will differ between passes INDArray in2 = Nd4j.rand(1, nIn); INDArray out2 = Nd4j.rand(1, nOut); INDArray outTest1 = net.output(in2, false); INDArray outTest2 = net.output(in2, false); INDArray outTest3 = net.output(in2, false); assertEquals(outTest1, outTest2); assertEquals(outTest1, outTest3); double score1 = net.score(new DataSet(in2, out2), false); double score2 = net.score(new DataSet(in2, out2), false); double score3 = net.score(new DataSet(in2, out2), false); assertEquals(score1, score2, 0.0); assertEquals(score1, score3, 0.0); } if (noDropoutCount >= nTests / 3) { //at 0.5 dropout ratio and more than a few inputs, expect only a very small number of instances where //no dropout occurs, just due to random chance fail("Too many instances of dropout not being applied"); } }
Example 14
Source File: TransferLearningMLNTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testRemoveAndAdd() { Nd4j.getRandom().setSeed(12345); DataSet randomData = new DataSet(Nd4j.rand(DataType.FLOAT,10, 4), TestUtils.randomOneHot(DataType.FLOAT, 10, 3)); NeuralNetConfiguration.Builder equivalentConf = new NeuralNetConfiguration.Builder().updater(new Sgd(0.1)); FineTuneConfiguration overallConf = new FineTuneConfiguration.Builder().updater(new Sgd(0.1)).build(); MultiLayerNetwork modelToFineTune = new MultiLayerNetwork(//overallConf.list() equivalentConf.list().layer(0, new DenseLayer.Builder().nIn(4).nOut(5).build()) .layer(1, new DenseLayer.Builder().nIn(5).nOut(2).build()) .layer(2, new DenseLayer.Builder().nIn(2).nOut(3).build()) .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nIn(3).nOut(3) .build()) .build()); modelToFineTune.init(); MultiLayerNetwork modelNow = new TransferLearning.Builder(modelToFineTune).fineTuneConfiguration(overallConf) .nOutReplace(0, 7, WeightInit.XAVIER, WeightInit.XAVIER) .nOutReplace(2, 5, WeightInit.XAVIER).removeOutputLayer() .addLayer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).nIn(5) .nOut(3).updater(new Sgd(0.5)).activation(Activation.SOFTMAX) .build()) .build(); MultiLayerNetwork modelExpectedArch = new MultiLayerNetwork(equivalentConf.list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(7).build()) .layer(1, new DenseLayer.Builder().nIn(7).nOut(2).build()) .layer(2, new DenseLayer.Builder().nIn(2).nOut(5).build()) .layer(3, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder( LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) .updater(new Sgd(0.5)).nIn(5).nOut(3).build()) .build()); modelExpectedArch.init(); //modelNow should have the same architecture as modelExpectedArch assertArrayEquals(modelExpectedArch.params().shape(), modelNow.params().shape()); assertArrayEquals(modelExpectedArch.getLayer(0).params().shape(), modelNow.getLayer(0).params().shape()); assertArrayEquals(modelExpectedArch.getLayer(1).params().shape(), modelNow.getLayer(1).params().shape()); assertArrayEquals(modelExpectedArch.getLayer(2).params().shape(), modelNow.getLayer(2).params().shape()); assertArrayEquals(modelExpectedArch.getLayer(3).params().shape(), modelNow.getLayer(3).params().shape()); modelNow.setParams(modelExpectedArch.params()); //fit should give the same results modelExpectedArch.fit(randomData); modelNow.fit(randomData); double scoreExpected = modelExpectedArch.score(); double scoreActual = modelNow.score(); assertEquals(scoreExpected, scoreActual, 1e-4); assertEquals(modelExpectedArch.params(), modelNow.params()); }
Example 15
Source File: CNN1DGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testCnn1dWithMasking(){ int length = 12; int convNIn = 2; int convNOut1 = 3; int convNOut2 = 4; int finalNOut = 3; int pnorm = 2; SubsamplingLayer.PoolingType[] poolingTypes = new SubsamplingLayer.PoolingType[] {SubsamplingLayer.PoolingType.MAX, SubsamplingLayer.PoolingType.AVG}; for (SubsamplingLayer.PoolingType poolingType : poolingTypes) { for(ConvolutionMode cm : new ConvolutionMode[]{ConvolutionMode.Same, ConvolutionMode.Truncate}){ for( int stride : new int[]{1, 2}){ String s = cm + ", stride=" + stride + ", pooling=" + poolingType; log.info("Starting test: " + s); Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .activation(Activation.TANH) .dist(new NormalDistribution(0, 1)).convolutionMode(cm) .seed(12345) .list() .layer(new Convolution1DLayer.Builder().kernelSize(2) .stride(stride).nIn(convNIn).nOut(convNOut1) .build()) .layer(new Subsampling1DLayer.Builder(poolingType).kernelSize(2) .stride(stride).pnorm(pnorm).build()) .layer(new Convolution1DLayer.Builder().kernelSize(2) .stride(stride).nIn(convNOut1).nOut(convNOut2) .build()) .layer(new GlobalPoolingLayer(PoolingType.AVG)) .layer(new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX).nOut(finalNOut).build()) .setInputType(InputType.recurrent(convNIn, length)).build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray f = Nd4j.rand(new int[]{2, convNIn, length}); INDArray fm = Nd4j.create(2, length); fm.get(NDArrayIndex.point(0), NDArrayIndex.all()).assign(1); fm.get(NDArrayIndex.point(1), NDArrayIndex.interval(0,6)).assign(1); INDArray label = TestUtils.randomOneHot(2, finalNOut); boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(f) .labels(label).inputMask(fm)); assertTrue(s, gradOK); TestUtils.testModelSerialization(net); //TODO also check that masked step values don't impact forward pass, score or gradients DataSet ds = new DataSet(f,label,fm,null); double scoreBefore = net.score(ds); net.setInput(f); net.setLabels(label); net.setLayerMaskArrays(fm, null); net.computeGradientAndScore(); INDArray gradBefore = net.getFlattenedGradients().dup(); f.putScalar(1, 0, 10, 10.0); f.putScalar(1, 1, 11, 20.0); double scoreAfter = net.score(ds); net.setInput(f); net.setLabels(label); net.setLayerMaskArrays(fm, null); net.computeGradientAndScore(); INDArray gradAfter = net.getFlattenedGradients().dup(); assertEquals(scoreBefore, scoreAfter, 1e-6); assertEquals(gradBefore, gradAfter); } } } }
Example 16
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNL1L2MLN() { if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... return; //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS}; boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here for( int i=0; i<l2vals.length; i++ ){ Activation afn = activFns[i]; boolean doLearningFirst = characteristic[i]; LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]) .optimizationAlgo( OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) .weightInit(WeightInit.XAVIER).activation(afn) .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String testName = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } }
Example 17
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNMLN() { if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... return; //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.9 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } } } }
Example 18
Source File: GradientCheckTestsMasking.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testOutputLayerMasking(){ Nd4j.getRandom().setSeed(12345); //Idea: RNN input, global pooling, OutputLayer - with "per example" mask arrays int mb = 4; int tsLength = 5; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .weightInit(new NormalDistribution(0,2)) .updater(new NoOp()) .list() .layer(new LSTM.Builder().nIn(3).nOut(3).build()) .layer(new GlobalPoolingLayer.Builder().poolingType(PoolingType.AVG).build()) .layer(new OutputLayer.Builder().nIn(3).nOut(3).activation(Activation.SOFTMAX).build()) .setInputType(InputType.recurrent(3)) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray f = Nd4j.rand(new int[]{mb, 3, tsLength}); INDArray l = TestUtils.randomOneHot(mb, 3); INDArray lm = TestUtils.randomBernoulli(mb, 1); int attempts = 0; while(attempts++ < 1000 && lm.sumNumber().intValue() == 0){ lm = TestUtils.randomBernoulli(mb, 1); } assertTrue("Could not generate non-zero mask after " + attempts + " attempts", lm.sumNumber().intValue() > 0); boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(f) .labels(l).labelMask(lm)); assertTrue(gradOK); //Also ensure score doesn't depend on masked feature or label values double score = net.score(new DataSet(f,l,null,lm)); for( int i=0; i<mb; i++ ){ if(lm.getDouble(i) != 0.0){ continue; } INDArray fView = f.get(interval(i,i,true), all(),all()); fView.assign(Nd4j.rand(fView.shape())); INDArray lView = l.get(interval(i,i,true), all()); lView.assign(TestUtils.randomOneHot(1, lView.size(1))); double score2 = net.score(new DataSet(f,l,null,lm)); assertEquals(String.valueOf(i), score, score2, 1e-8); } }
Example 19
Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testEmbeddingSequenceLayer(){ Nd4j.getRandom().setSeed(12345); for(RNNFormat seqOutputFormat : RNNFormat.values()) { for (boolean maskArray : new boolean[]{false, true}) { for (int inputRank : new int[]{2, 3}) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .seed(12345) .updater(new NoOp()) .weightInit(new NormalDistribution(0, 1)) .list() .layer(new EmbeddingSequenceLayer.Builder() .nIn(8) .nOut(4) .outputDataFormat(seqOutputFormat) .build()) .layer(new RnnOutputLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH) .dataFormat(seqOutputFormat) .lossFunction(LossFunction.MSE).build()) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); boolean ncw = seqOutputFormat == RNNFormat.NCW; INDArray in = Transforms.floor(Nd4j.rand(3, 6).muli(8)); //Integers 0 to 7 inclusive INDArray label = Nd4j.rand(DataType.FLOAT, ncw ? new int[]{3, 3, 6} : new int[]{3,6,3}); if (inputRank == 3) { //Reshape from [3,6] to [3,1,6] in = in.reshape('c', 3, 1, 6); } INDArray fMask = null; if (maskArray) { fMask = Nd4j.create(new double[][]{{1, 1, 1, 1, 1, 1}, {1, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0}}); } String msg = "mask=" + maskArray + ", inputRank=" + inputRank; boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(in) .labels(label).inputMask(fMask)); assertTrue(msg, gradOK); TestUtils.testModelSerialization(net); //Also: if mask is present, double check that the masked steps don't impact score if (maskArray) { DataSet ds = new DataSet(in, label, fMask, null); double score = net.score(ds); if (inputRank == 2) { in.putScalar(1, 2, 0); in.putScalar(2, 1, 0); in.putScalar(2, 2, 0); } else { in.putScalar(1, 0, 2, 0); in.putScalar(2, 0, 1, 0); in.putScalar(2, 0, 2, 0); } double score2 = net.score(ds); assertEquals(score, score2, 1e-6); if (inputRank == 2) { in.putScalar(1, 2, 1); in.putScalar(2, 1, 1); in.putScalar(2, 2, 1); } else { in.putScalar(1, 0, 2, 1); in.putScalar(2, 0, 1, 1); in.putScalar(2, 0, 2, 1); } double score3 = net.score(ds); assertEquals(score, score3, 1e-6); } } } } }
Example 20
Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAutoEncoder() { //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied //Need to run gradient through updater, so that L2 can be applied Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); scaler.fit(iter); iter.setPreProcessor(scaler); DataSet ds = iter.next(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(ds); norm.transform(ds); double[] l2vals = {0.2, 0.0, 0.2}; double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i] for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2(l2).l1(l1) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L) .dist(new NormalDistribution(0, 1)) .list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3) .activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) .activation(outputActivation).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String msg; if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < scoreBefore); } msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1; if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } } } } }