Java Code Examples for org.nd4j.linalg.activations.Activation#SOFTMAX
The following examples show how to use
org.nd4j.linalg.activations.Activation#SOFTMAX .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KerasActivationUtils.java From deeplearning4j with Apache License 2.0 | 5 votes |
/** * Map Keras to DL4J activation functions. * * @param conf Keras layer configuration * @param kerasActivation String containing Keras activation function name * @return Activation enum value containing DL4J activation function name */ public static Activation mapToActivation(String kerasActivation, KerasLayerConfiguration conf) throws UnsupportedKerasConfigurationException { Activation dl4jActivation; if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTMAX())) { dl4jActivation = Activation.SOFTMAX; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTPLUS())) { dl4jActivation = Activation.SOFTPLUS; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SOFTSIGN())) { dl4jActivation = Activation.SOFTSIGN; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU())) { dl4jActivation = Activation.RELU; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_RELU6())) { dl4jActivation = Activation.RELU6; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_ELU())) { dl4jActivation = Activation.ELU; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SELU())) { dl4jActivation = Activation.SELU; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_TANH())) { dl4jActivation = Activation.TANH; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SIGMOID())) { dl4jActivation = Activation.SIGMOID; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_HARD_SIGMOID())) { dl4jActivation = Activation.HARDSIGMOID; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_LINEAR())) { dl4jActivation = Activation.IDENTITY; } else if (kerasActivation.equals(conf.getKERAS_ACTIVATION_SWISH())) { dl4jActivation = Activation.SWISH; } else { throw new UnsupportedKerasConfigurationException( "Unknown Keras activation function " + kerasActivation); } return dl4jActivation; }
Example 2
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNMLN() { if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... return; //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.9 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } } } }
Example 3
Source File: TestScoreFunctions.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testROCScoreFunctions() throws Exception { for (boolean auc : new boolean[]{true, false}) { for (ROCScoreFunction.ROCType rocType : ROCScoreFunction.ROCType.values()) { String msg = (auc ? "AUC" : "AUPRC") + " - " + rocType; log.info("Starting: " + msg); ParameterSpace<Double> lr = new ContinuousParameterSpace(1e-5, 1e-3); int nOut = (rocType == ROCScoreFunction.ROCType.ROC ? 2 : 10); LossFunctions.LossFunction lf = (rocType == ROCScoreFunction.ROCType.BINARY ? LossFunctions.LossFunction.XENT : LossFunctions.LossFunction.MCXENT); Activation a = (rocType == ROCScoreFunction.ROCType.BINARY ? Activation.SIGMOID : Activation.SOFTMAX); MultiLayerSpace mls = new MultiLayerSpace.Builder() .trainingWorkspaceMode(WorkspaceMode.NONE) .inferenceWorkspaceMode(WorkspaceMode.NONE) .updater(new AdamSpace(lr)) .weightInit(WeightInit.XAVIER) .layer(new OutputLayerSpace.Builder().nIn(784).nOut(nOut) .activation(a) .lossFunction(lf).build()) .build(); CandidateGenerator cg = new RandomSearchGenerator(mls); ResultSaver rs = new InMemoryResultSaver(); ScoreFunction sf = new ROCScoreFunction(rocType, (auc ? ROCScoreFunction.Metric.AUC : ROCScoreFunction.Metric.AUPRC)); OptimizationConfiguration oc = new OptimizationConfiguration.Builder() .candidateGenerator(cg) .dataProvider(new DP(rocType)) .modelSaver(rs) .scoreFunction(sf) .terminationConditions(new MaxCandidatesCondition(3)) .rngSeed(12345) .build(); IOptimizationRunner runner = new LocalOptimizationRunner(oc, new MultiLayerNetworkTaskCreator()); runner.execute(); List<ResultReference> list = runner.getResults(); for (ResultReference rr : list) { DataSetIterator testIter = new MnistDataSetIterator(4, 16, false, false, false, 12345); testIter.setPreProcessor(new PreProc(rocType)); OptimizationResult or = rr.getResult(); MultiLayerNetwork net = (MultiLayerNetwork) or.getResultReference().getResultModel(); double expScore; switch (rocType){ case ROC: if(auc){ expScore = net.doEvaluation(testIter, new ROC())[0].calculateAUC(); } else { expScore = net.doEvaluation(testIter, new ROC())[0].calculateAUCPR(); } break; case BINARY: if(auc){ expScore = net.doEvaluation(testIter, new ROCBinary())[0].calculateAverageAuc(); } else { expScore = net.doEvaluation(testIter, new ROCBinary())[0].calculateAverageAUCPR(); } break; case MULTICLASS: if(auc){ expScore = net.doEvaluation(testIter, new ROCMultiClass())[0].calculateAverageAUC(); } else { expScore = net.doEvaluation(testIter, new ROCMultiClass())[0].calculateAverageAUCPR(); } break; default: throw new RuntimeException(); } DataSetIterator iter = new MnistDataSetIterator(4, 16, false, false, false, 12345); iter.setPreProcessor(new PreProc(rocType)); assertEquals(msg, expScore, or.getScore(), 1e-4); } } } }
Example 4
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNL1L2MLN() { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS}; boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here for( int i=0; i<l2vals.length; i++ ){ Activation afn = activFns[i]; boolean doLearningFirst = characteristic[i]; LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]) .optimizationAlgo( OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) .weightInit(WeightInit.XAVIER).activation(afn) .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String testName = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } }
Example 5
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNMLN() { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT).updater(new NoOp()) .weightInit(WeightInit.XAVIER).seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(1, 1).nOut(6).activation(afn) .cudnnAllowFallback(false) .build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } } } }
Example 6
Source File: ComputationGraphConfigurationTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testInvalidOutputLayer(){ /* Test case (invalid configs) 1. nOut=1 + softmax 2. mcxent + tanh 3. xent + softmax 4. xent + relu 5. mcxent + sigmoid */ LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{ LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT}; int[] nOut = new int[]{1, 3, 3, 3, 3}; Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID}; for( int i=0; i<lf.length; i++ ){ for(boolean lossLayer : new boolean[]{false, true}) { for (boolean validate : new boolean[]{true, false}) { String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate; if(nOut[i] == 1 && lossLayer) continue; //nOuts are not availabel in loss layer, can't expect it to detect this case try { new NeuralNetConfiguration.Builder() .graphBuilder() .addInputs("in") .layer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in") .layer("1", !lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build() : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build(), "0") .setOutputs("1") .validateOutputLayerConfig(validate) .build(); if (validate) { fail("Expected exception: " + s); } } catch (DL4JInvalidConfigException e) { if (validate) { assertTrue(s, e.getMessage().toLowerCase().contains("invalid output")); } else { fail("Validation should not be enabled"); } } } } } }
Example 7
Source File: MultiLayerNeuralNetConfigurationTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testInvalidOutputLayer(){ /* Test case (invalid configs) 1. nOut=1 + softmax 2. mcxent + tanh 3. xent + softmax 4. xent + relu 5. mcxent + sigmoid */ LossFunctions.LossFunction[] lf = new LossFunctions.LossFunction[]{ LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.MCXENT, LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.XENT, LossFunctions.LossFunction.MCXENT}; int[] nOut = new int[]{1, 3, 3, 3, 3}; Activation[] activations = new Activation[]{Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.RELU, Activation.SIGMOID}; for( int i=0; i<lf.length; i++ ){ for(boolean lossLayer : new boolean[]{false, true}) { for (boolean validate : new boolean[]{true, false}) { String s = "nOut=" + nOut[i] + ",lossFn=" + lf[i] + ",lossLayer=" + lossLayer + ",validate=" + validate; if(nOut[i] == 1 && lossLayer) continue; //nOuts are not availabel in loss layer, can't expect it to detect this case try { new NeuralNetConfiguration.Builder() .list() .layer(new DenseLayer.Builder().nIn(10).nOut(10).build()) .layer(!lossLayer ? new OutputLayer.Builder().nIn(10).nOut(nOut[i]).activation(activations[i]).lossFunction(lf[i]).build() : new LossLayer.Builder().activation(activations[i]).lossFunction(lf[i]).build()) .validateOutputLayerConfig(validate) .build(); if (validate) { fail("Expected exception: " + s); } } catch (DL4JInvalidConfigException e) { if (validate) { assertTrue(s, e.getMessage().toLowerCase().contains("invalid output")); } else { fail("Validation should not be enabled"); } } } } } }
Example 8
Source File: JsonTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testJsonLossFunctions() { ILossFunction[] lossFunctions = new ILossFunction[] {new LossBinaryXENT(), new LossBinaryXENT(), new LossCosineProximity(), new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL1(), new LossL2(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge(), new LossFMeasure(), new LossFMeasure(2.0)}; Activation[] outputActivationFn = new Activation[] {Activation.SIGMOID, //xent Activation.SIGMOID, //xent Activation.TANH, //cosine Activation.TANH, //hinge -> trying to predict 1 or -1 Activation.SIGMOID, //kld -> probab so should be between 0 and 1 Activation.SOFTMAX, //kld + softmax Activation.TANH, //l1 Activation.SOFTMAX, //l1 + softmax Activation.TANH, //l2 Activation.SOFTMAX, //l2 + softmax Activation.IDENTITY, //mae Activation.SOFTMAX, //mae + softmax Activation.IDENTITY, //mape Activation.SOFTMAX, //mape + softmax Activation.SOFTMAX, //mcxent Activation.IDENTITY, //mse Activation.SOFTMAX, //mse + softmax Activation.SIGMOID, //msle - requires positive labels/activations due to log Activation.SOFTMAX, //msle + softmax Activation.SIGMOID, //nll Activation.SOFTMAX, //nll + softmax Activation.SIGMOID, //poisson - requires positive predictions due to log... not sure if this is the best option Activation.TANH, //squared hinge Activation.SIGMOID, //f-measure (binary, single sigmoid output) Activation.SOFTMAX //f-measure (binary, 2-label softmax output) }; int[] nOut = new int[] {1, //xent 3, //xent 5, //cosine 3, //hinge 3, //kld 3, //kld + softmax 3, //l1 3, //l1 + softmax 3, //l2 3, //l2 + softmax 3, //mae 3, //mae + softmax 3, //mape 3, //mape + softmax 3, //mcxent 3, //mse 3, //mse + softmax 3, //msle 3, //msle + softmax 3, //nll 3, //nll + softmax 3, //poisson 3, //squared hinge 1, //f-measure (binary, single sigmoid output) 2, //f-measure (binary, 2-label softmax output) }; for (int i = 0; i < lossFunctions.length; i++) { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345).updater(Updater.ADAM).list() .layer(0, new DenseLayer.Builder().nIn(4).nOut(nOut[i]).activation(Activation.TANH).build()) .layer(1, new LossLayer.Builder().lossFunction(lossFunctions[i]) .activation(outputActivationFn[i]).build()) .validateOutputLayerConfig(false).build(); String json = conf.toJson(); String yaml = conf.toYaml(); MultiLayerConfiguration fromJson = MultiLayerConfiguration.fromJson(json); MultiLayerConfiguration fromYaml = MultiLayerConfiguration.fromYaml(yaml); assertEquals(conf, fromJson); assertEquals(conf, fromYaml); } }
Example 9
Source File: TestSameDiffOutput.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testMSEOutputLayer(){ //Faliing 2019/04/17 - https://github.com/deeplearning4j/deeplearning4j/issues/7560 Nd4j.getRandom().setSeed(12345); for(Activation a : new Activation[]{Activation.IDENTITY, Activation.TANH, Activation.SOFTMAX}) { log.info("Starting test: " + a); MultiLayerConfiguration confSD = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new SameDiffMSEOutputLayer(5, 5, a, WeightInit.XAVIER)) .build(); MultiLayerConfiguration confStd = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .list() .layer(new DenseLayer.Builder().nIn(5).nOut(5).activation(Activation.TANH).build()) .layer(new OutputLayer.Builder().nIn(5).nOut(5).activation(a).lossFunction(LossFunctions.LossFunction.MSE).build()) .build(); MultiLayerNetwork netSD = new MultiLayerNetwork(confSD); netSD.init(); MultiLayerNetwork netStd = new MultiLayerNetwork(confStd); netStd.init(); netSD.params().assign(netStd.params()); assertEquals(netStd.paramTable(), netSD.paramTable()); int minibatch = 2; INDArray in = Nd4j.rand(minibatch, 5); INDArray label = Nd4j.rand(minibatch, 5); INDArray outSD = netSD.output(in); INDArray outStd = netStd.output(in); assertEquals(outStd, outSD); DataSet ds = new DataSet(in, label); double scoreSD = netSD.score(ds); double scoreStd = netStd.score(ds); assertEquals(scoreStd, scoreSD, 1e-6); netSD.setInput(in); netSD.setLabels(label); netStd.setInput(in); netStd.setLabels(label); //System.out.println(((SameDiffOutputLayer) netSD.getLayer(1)).sameDiff.summary()); netSD.computeGradientAndScore(); netStd.computeGradientAndScore(); assertEquals(netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); for (int i = 0; i < 3; i++) { netSD.fit(ds); netStd.fit(ds); String s = String.valueOf(i); assertEquals(s, netStd.params(), netSD.params()); assertEquals(s, netStd.getFlattenedGradients(), netSD.getFlattenedGradients()); } //Test fit before output: MultiLayerNetwork net = new MultiLayerNetwork(confSD.clone()); net.init(); net.fit(ds); //Sanity check on different minibatch sizes: INDArray newIn = Nd4j.vstack(in, in); INDArray outMbsd = netSD.output(newIn); INDArray outMb = netStd.output(newIn); assertEquals(outMb, outMbsd); } }
Example 10
Source File: VaeGradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testVaeAsMLP() { //Post pre-training: a VAE can be used as a MLP, by taking the mean value from p(z|x) as the output //This gradient check tests this part Activation[] activFns = {Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH, Activation.IDENTITY, Activation.TANH}; LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MCXENT, LossFunction.MSE, LossFunction.MSE, LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.SOFTMAX, Activation.TANH, Activation.TANH, Activation.SOFTMAX, Activation.TANH}; //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0}; double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.0, 0.5}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.4}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.0}; int[][] encoderLayerSizes = new int[][] {{5}, {5}, {5, 6}, {5, 6}, {5}, {5, 6}}; int[][] decoderLayerSizes = new int[][] {{6}, {7, 8}, {6}, {7, 8}, {6}, {7, 8}}; int[] minibatches = new int[]{1,5,4,3,1,4}; Nd4j.getRandom().setSeed(12345); for( int i=0; i<activFns.length; i++ ){ LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; int[] encoderSizes = encoderLayerSizes[i]; int[] decoderSizes = decoderLayerSizes[i]; int minibatch = minibatches[i]; INDArray input = Nd4j.rand(minibatch, 4); INDArray labels = Nd4j.create(minibatch, 3); for (int j = 0; j < minibatch; j++) { labels.putScalar(j, j % 3, 1.0); } Activation afn = activFns[i]; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2).l1(l1) .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2Bias(biasL2[i]).l1Bias(biasL1[i]) .updater(new NoOp()).seed(12345L).list() .layer(0, new VariationalAutoencoder.Builder().nIn(4) .nOut(3).encoderLayerSizes(encoderSizes) .decoderLayerSizes(decoderSizes) .dist(new NormalDistribution(0, 1)) .activation(afn) .build()) .layer(1, new OutputLayer.Builder(lf) .activation(outputActivation).nIn(3).nOut(3) .dist(new NormalDistribution(0, 1)) .build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String msg = "testVaeAsMLP() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", encLayerSizes = " + Arrays.toString(encoderSizes) + ", decLayerSizes = " + Arrays.toString(decoderSizes) + ", l2=" + l2 + ", l1=" + l1; if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } }
Example 11
Source File: CNNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientCNNL1L2MLN() { if(this.format != CNN2DFormat.NCHW) //Only test NCHW due to flat input format... return; //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) DataSet ds = new IrisDataSetIterator(150, 150).next(); ds.normalizeZeroMeanZeroUnitVariance(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4}; double[] l1vals = {0.0, 0.0, 0.5, 0.0}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0}; Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.ELU, Activation.SOFTPLUS}; boolean[] characteristic = {false, true, false, true}; //If true: run some backprop steps first LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE, LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD, LossFunctions.LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH, Activation.SOFTMAX, Activation.IDENTITY}; //i.e., lossFunctions[i] used with outputActivations[i] here for( int i=0; i<l2vals.length; i++ ){ Activation afn = activFns[i]; boolean doLearningFirst = characteristic[i]; LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; MultiLayerConfiguration.Builder builder = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .l2(l2).l1(l1).l2Bias(biasL2[i]).l1Bias(biasL1[i]) .optimizationAlgo( OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L).list() .layer(0, new ConvolutionLayer.Builder(new int[]{1, 1}).nIn(1).nOut(6) .weightInit(WeightInit.XAVIER).activation(afn) .updater(new NoOp()).build()) .layer(1, new OutputLayer.Builder(lf).activation(outputActivation).nOut(3) .weightInit(WeightInit.XAVIER).updater(new NoOp()).build()) .setInputType(InputType.convolutionalFlat(1, 4, 1)); MultiLayerConfiguration conf = builder.build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String testName = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = testName + "- score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.8 * scoreBefore); } if (PRINT_RESULTS) { System.out.println(testName + "- activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(gradOK); TestUtils.testModelSerialization(mln); } }
Example 12
Source File: Vasttext.java From scava with Eclipse Public License 2.0 | 4 votes |
private ComputationGraph VasttextTextualAndNumeric() { Activation activation = null; LossFunction loss = null; //If multilabel, it is considered according to the book "Deep Learning with Python" to use the following parameters if(multiLabel) { activation = Activation.SIGMOID; loss = LossFunction.XENT; //Binary Crossentropy } else { //We're using a softmax/cross entropy for the binary classification, as the number of neurons is two. If the number of neurons would be one, then //the activation would be sigmoid and the loss binary crossentropy activation = Activation.SOFTMAX; loss = LossFunction.MCXENT; //CATEGORICAL_CROSSENTROPY } System.err.println("LR:"+lr); System.err.println("Dense:"+denseDimension); ComputationGraphConfiguration nnConf = new NeuralNetConfiguration.Builder() .updater(new Adam(lr)) .weightInit(WeightInit.XAVIER) .trainingWorkspaceMode(WorkspaceMode.ENABLED) .inferenceWorkspaceMode(WorkspaceMode.ENABLED) .graphBuilder() .addInputs("Text", "Extra") //Embeddings Parts .addLayer("Embeddings", new EmbeddingSequenceLayer.Builder() .nIn(textFeaturesSize) .nOut(denseDimension) .activation(Activation.IDENTITY) //.activation(Activation.TANH) //.dropOut(0.0) .build(), "Text") .addLayer("GlobalPooling", new GlobalPoolingLayer.Builder() .poolingType(PoolingType.AVG) .poolingDimensions(2) .collapseDimensions(true) //.dropOut(0.0) .build(), "Embeddings") //We're merging directly the values from the extra .addVertex("Merge", new MergeVertex(), "GlobalPooling","Extra") .addLayer("DenseAll", new DenseLayer.Builder() .nIn(denseDimension+numericFeaturesSize) .nOut(denseDimension/2) //.dropOut(0.5) //.l2(0.001) .build(), "Merge") .addLayer("Output", new OutputLayer.Builder() //.dropOut(0.5) .nIn(denseDimension/2) .nOut(labelsSize) .activation(activation) .lossFunction(loss) .build(), "DenseAll") .setOutputs("Output") .pretrain(false) .backprop(true) .build(); return new ComputationGraph(nnConf); }
Example 13
Source File: BNGradientCheckTest.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientBNWithCNNandSubsamplingCompGraph() { //Parameterized test, testing combinations of: // (a) activation function // (b) Whether to test at random initialization, or after some learning (i.e., 'characteristic mode of operation') // (c) Loss function (with specified output activations) // (d) l1 and l2 values Activation[] activFns = {Activation.TANH, Activation.IDENTITY}; boolean doLearningFirst = true; LossFunctions.LossFunction[] lossFunctions = {LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD}; Activation[] outputActivations = {Activation.SOFTMAX}; //i.e., lossFunctions[i] used with outputActivations[i] here double[] l2vals = {0.0, 0.1}; double[] l1vals = {0.0, 0.2}; //i.e., use l2vals[j] with l1vals[j] Nd4j.getRandom().setSeed(12345); int minibatch = 10; int depth = 2; int hw = 5; int nOut = 3; INDArray input = Nd4j.rand(new int[]{minibatch, depth, hw, hw}); INDArray labels = Nd4j.zeros(minibatch, nOut); Random r = new Random(12345); for (int i = 0; i < minibatch; i++) { labels.putScalar(i, r.nextInt(nOut), 1.0); } DataSet ds = new DataSet(input, labels); for (boolean useLogStd : new boolean[]{true, false}) { for (Activation afn : activFns) { for (int i = 0; i < lossFunctions.length; i++) { for (int j = 0; j < l2vals.length; j++) { LossFunctions.LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345) .dataType(DataType.DOUBLE) .optimizationAlgo(OptimizationAlgorithm.LINE_GRADIENT_DESCENT) .updater(new NoOp()) .dist(new UniformDistribution(-2, 2)).seed(12345L).graphBuilder() .addInputs("in") .addLayer("0", new ConvolutionLayer.Builder(2, 2).stride(1, 1).nOut(3) .activation(afn).build(), "in") .addLayer("1", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "0") .addLayer("2", new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(2, 2).stride(1, 1).build(), "1") .addLayer("3", new BatchNormalization.Builder().useLogStd(useLogStd).build(), "2") .addLayer("4", new ActivationLayer.Builder().activation(afn).build(), "3") .addLayer("5", new OutputLayer.Builder(lf).activation(outputActivation) .nOut(nOut).build(), "4") .setOutputs("5").setInputTypes(InputType.convolutional(hw, hw, depth)) .build(); ComputationGraph net = new ComputationGraph(conf); net.init(); String name = new Object() { }.getClass().getEnclosingMethod().getName(); if (doLearningFirst) { //Run a number of iterations of learning net.setInput(0, ds.getFeatures()); net.setLabels(ds.getLabels()); net.computeGradientAndScore(); double scoreBefore = net.score(); for (int k = 0; k < 20; k++) net.fit(ds); net.computeGradientAndScore(); double scoreAfter = net.score(); //Can't test in 'characteristic mode of operation' if not learning String msg = name + " - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst= " + doLearningFirst + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < 0.9 * scoreBefore); } System.out.println(name + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l1=" + l1vals[j] + ", l2=" + l2vals[j]); // for (int k = 0; k < net.getNumLayers(); k++) // System.out.println("Layer " + k + " # params: " + net.getLayer(k).numParams()); //Mean and variance vars are not gradient checkable; mean/variance "gradient" is used to implement running mean/variance calc //i.e., runningMean = decay * runningMean + (1-decay) * batchMean //However, numerical gradient will be 0 as forward pass doesn't depend on this "parameter" Set<String> excludeParams = new HashSet<>(Arrays.asList("1_mean", "1_var", "3_mean", "3_var", "1_log10stdev", "3_log10stdev")); boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.GraphConfig().net(net).inputs(new INDArray[]{input}) .labels(new INDArray[]{labels}).excludeParams(excludeParams)); assertTrue(gradOK); TestUtils.testModelSerialization(net); } } } } }
Example 14
Source File: LSTMGradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientGravesBidirectionalLSTMFull() { Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN}; LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here int timeSeriesLength = 3; int nIn = 2; int layerSize = 2; int nOut = 2; int miniBatchSize = 3; Random r = new Random(12345L); INDArray input = Nd4j.rand(DataType.DOUBLE, miniBatchSize, nIn, timeSeriesLength).subi(0.5); INDArray labels = TestUtils.randomOneHotTimeSeries(miniBatchSize, nOut, timeSeriesLength); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0}; double[] l1vals = {0.5, 0.0}; double[] biasL2 = {0.0, 0.2}; double[] biasL1 = {0.0, 0.6}; for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { Activation afn = activFns[i]; LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; NeuralNetConfiguration.Builder conf = new NeuralNetConfiguration.Builder(); if (l1 > 0.0) conf.l1(l1); if (l2 > 0.0) conf.l2(l2); if (biasL2[k] > 0) conf.l2Bias(biasL2[k]); if (biasL1[k] > 0) conf.l1Bias(biasL1[k]); MultiLayerConfiguration mlc = conf.seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) .list().layer(0, new GravesBidirectionalLSTM.Builder().nIn(nIn).nOut(layerSize) .weightInit(new NormalDistribution(0, 1)) .activation(afn) .build()) .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation).nIn(layerSize) .nOut(nOut) .dist(new NormalDistribution(0, 1)).updater(new NoOp()).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(mlc); mln.init(); if (PRINT_RESULTS) { System.out.println("testGradientGravesBidirectionalLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); String msg = "testGradientGravesLSTMFull() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1; assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } } }
Example 15
Source File: LSTMGradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientLSTMFull() { int timeSeriesLength = 4; int nIn = 3; int layerSize = 4; int nOut = 2; int miniBatchSize = 2; boolean[] gravesLSTM = new boolean[] {true, false}; for (boolean graves : gravesLSTM) { Random r = new Random(12345L); INDArray input = Nd4j.rand(new int[]{miniBatchSize, nIn, timeSeriesLength}, 'f').subi(0.5); INDArray labels = Nd4j.zeros(miniBatchSize, nOut, timeSeriesLength); for (int i = 0; i < miniBatchSize; i++) { for (int j = 0; j < timeSeriesLength; j++) { int idx = r.nextInt(nOut); labels.putScalar(new int[] {i, idx, j}, 1.0f); } } //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0}; double[] l1vals = {0.0, 0.5}; double[] biasL2 = {0.3, 0.0}; double[] biasL1 = {0.0, 0.6}; Activation[] activFns = {Activation.TANH, Activation.SOFTSIGN}; LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; for (int i = 0; i < l2vals.length; i++) { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[i]; double l1 = l1vals[i]; Activation afn = activFns[i]; NeuralNetConfiguration.Builder conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .seed(12345L) .dist(new NormalDistribution(0, 1)).updater(new NoOp()); if (l1 > 0.0) conf.l1(l1); if (l2 > 0.0) conf.l2(l2); if (biasL2[i] > 0) conf.l2Bias(biasL2[i]); if (biasL1[i] > 0) conf.l1Bias(biasL1[i]); Layer layer; if (graves) { layer = new GravesLSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build(); } else { layer = new LSTM.Builder().nIn(nIn).nOut(layerSize).activation(afn).build(); } NeuralNetConfiguration.ListBuilder conf2 = conf.list().layer(0, layer) .layer(1, new RnnOutputLayer.Builder(lf).activation(outputActivation) .nIn(layerSize).nOut(nOut).build()) ; MultiLayerNetwork mln = new MultiLayerNetwork(conf2.build()); mln.init(); String testName = "testGradientLSTMFull(" + (graves ? "GravesLSTM" : "LSTM") + " - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1; if (PRINT_RESULTS) { System.out.println(testName); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input) .labels(labels).subset(true).maxPerParam(128)); assertTrue(testName, gradOK); TestUtils.testModelSerialization(mln); } } }
Example 16
Source File: GradientCheckTestsMasking.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testPerOutputMaskingMLP() { int nIn = 6; int layerSize = 4; INDArray mask1 = Nd4j.create(new double[] {1, 0, 0, 1, 0}).reshape(1, -1); INDArray mask3 = Nd4j.create(new double[][] {{1, 1, 1, 1, 1}, {0, 1, 0, 1, 0}, {1, 0, 0, 1, 1}}); INDArray[] labelMasks = new INDArray[] {mask1, mask3}; ILossFunction[] lossFunctions = new ILossFunction[] {new LossBinaryXENT(), // new LossCosineProximity(), //Doesn't support per-output masking, as it doesn't make sense for cosine proximity new LossHinge(), new LossKLD(), new LossKLD(), new LossL1(), new LossL2(), new LossMAE(), new LossMAE(), new LossMAPE(), new LossMAPE(), // new LossMCXENT(), //Per output masking on MCXENT+Softmax: not yet supported new LossMCXENT(), new LossMSE(), new LossMSE(), new LossMSLE(), new LossMSLE(), new LossNegativeLogLikelihood(), new LossPoisson(), new LossSquaredHinge()}; Activation[] act = new Activation[] {Activation.SIGMOID, //XENT // Activation.TANH, Activation.TANH, //Hinge Activation.SIGMOID, //KLD Activation.SOFTMAX, //KLD + softmax Activation.TANH, //L1 Activation.TANH, //L2 Activation.TANH, //MAE Activation.SOFTMAX, //MAE + softmax Activation.TANH, //MAPE Activation.SOFTMAX, //MAPE + softmax // Activation.SOFTMAX, //MCXENT + softmax: see comment above Activation.SIGMOID, //MCXENT + sigmoid Activation.TANH, //MSE Activation.SOFTMAX, //MSE + softmax Activation.SIGMOID, //MSLE - needs positive labels/activations (due to log) Activation.SOFTMAX, //MSLE + softmax Activation.SIGMOID, //NLL Activation.SIGMOID, //Poisson Activation.TANH //Squared hinge }; for (INDArray labelMask : labelMasks) { val minibatch = labelMask.size(0); val nOut = labelMask.size(1); for (int i = 0; i < lossFunctions.length; i++) { ILossFunction lf = lossFunctions[i]; Activation a = act[i]; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().updater(new NoOp()) .dataType(DataType.DOUBLE) .dist(new NormalDistribution(0, 1)).seed(12345) .list() .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(layerSize).activation(Activation.TANH) .build()) .layer(1, new OutputLayer.Builder().nIn(layerSize).nOut(nOut).lossFunction(lf) .activation(a).build()) .validateOutputLayerConfig(false) .build(); MultiLayerNetwork net = new MultiLayerNetwork(conf); net.init(); INDArray[] fl = LossFunctionGradientCheck.getFeaturesAndLabels(lf, minibatch, nIn, nOut, 12345); INDArray features = fl[0]; INDArray labels = fl[1]; String msg = "testPerOutputMaskingMLP(): maskShape = " + Arrays.toString(labelMask.shape()) + ", loss function = " + lf + ", activation = " + a; System.out.println(msg); boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(features) .labels(labels).labelMask(labelMask)); assertTrue(msg, gradOK); TestUtils.testModelSerialization(net); } } }
Example 17
Source File: GradientCheckTestsMasking.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void gradientCheckMaskingOutputSimple() { int timeSeriesLength = 5; boolean[][] mask = new boolean[5][0]; mask[0] = new boolean[] {true, true, true, true, true}; //No masking mask[1] = new boolean[] {false, true, true, true, true}; //mask first output time step mask[2] = new boolean[] {false, false, false, false, true}; //time series classification: mask all but last mask[3] = new boolean[] {false, false, true, false, true}; //time series classification w/ variable length TS mask[4] = new boolean[] {true, true, true, false, true}; //variable length TS int nIn = 3; int layerSize = 3; GradientCheckSimpleScenario[] scenarios = new GradientCheckSimpleScenario[] { new GradientCheckSimpleScenario(LossFunctions.LossFunction.MCXENT.getILossFunction(), Activation.SOFTMAX, 2, 2), new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(3).build(), Activation.TANH, 10, 3), new GradientCheckSimpleScenario(LossMixtureDensity.builder().gaussians(2).labelWidth(4).build(), Activation.IDENTITY, 12, 4)}; for (GradientCheckSimpleScenario s : scenarios) { Random r = new Random(12345L); INDArray input = Nd4j.rand(DataType.DOUBLE, 1, nIn, timeSeriesLength).subi(0.5); INDArray labels = Nd4j.zeros(DataType.DOUBLE, 1, s.labelWidth, timeSeriesLength); for (int m = 0; m < 1; m++) { for (int j = 0; j < timeSeriesLength; j++) { int idx = r.nextInt(s.labelWidth); labels.putScalar(new int[] {m, idx, j}, 1.0f); } } for (int i = 0; i < mask.length; i++) { //Create mask array: INDArray maskArr = Nd4j.create(1, timeSeriesLength); for (int j = 0; j < mask[i].length; j++) { maskArr.putScalar(new int[] {0, j}, mask[i][j] ? 1.0 : 0.0); } MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().seed(12345L) .dataType(DataType.DOUBLE) .updater(new NoOp()) .list() .layer(0, new SimpleRnn.Builder().nIn(nIn).nOut(layerSize) .weightInit(new NormalDistribution(0, 1)).build()) .layer(1, new RnnOutputLayer.Builder(s.lf).activation(s.act).nIn(layerSize).nOut(s.nOut) .weightInit(new NormalDistribution(0, 1)).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(mln).input(input) .labels(labels).labelMask(maskArr)); String msg = "gradientCheckMaskingOutputSimple() - timeSeriesLength=" + timeSeriesLength + ", miniBatchSize=" + 1; assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } } }
Example 18
Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testGradientWeightDecay() { Activation[] activFns = {Activation.SIGMOID, Activation.TANH, Activation.THRESHOLDEDRELU}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; //i.e., lossFunctions[i] used with outputActivations[i] here DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); scaler.fit(iter); iter.setPreProcessor(scaler); DataSet ds = iter.next(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); //use l2vals[i] with l1vals[i] double[] l2vals = {0.4, 0.0, 0.4, 0.4, 0.0, 0.0}; double[] l1vals = {0.0, 0.0, 0.5, 0.0, 0.5, 0.0}; double[] biasL2 = {0.0, 0.0, 0.0, 0.2, 0.0, 0.0}; double[] biasL1 = {0.0, 0.0, 0.6, 0.0, 0.0, 0.5}; double[] wdVals = {0.0, 0.0, 0.0, 0.0, 0.4, 0.0}; double[] wdBias = {0.0, 0.0, 0.0, 0.0, 0.0, 0.4}; for (Activation afn : activFns) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().l2(l2).l1(l1) .dataType(DataType.DOUBLE) .l2Bias(biasL2[k]).l1Bias(biasL1[k]) .weightDecay(wdVals[k]).weightDecayBias(wdBias[k]) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L) .list().layer(0, new DenseLayer.Builder().nIn(4).nOut(3) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()) .activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) .dist(new NormalDistribution(0, 1)) .updater(new NoOp()) .activation(outputActivation).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); boolean gradOK1 = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); String msg = "testGradientWeightDecay() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", l2=" + l2 + ", l1=" + l1; assertTrue(msg, gradOK1); TestUtils.testModelSerialization(mln); } } } }
Example 19
Source File: GradientCheckTests.java From deeplearning4j with Apache License 2.0 | 4 votes |
@Test public void testAutoEncoder() { //As above (testGradientMLP2LayerIrisSimple()) but with L2, L1, and both L2/L1 applied //Need to run gradient through updater, so that L2 can be applied Activation[] activFns = {Activation.SIGMOID, Activation.TANH}; boolean[] characteristic = {false, true}; //If true: run some backprop steps first LossFunction[] lossFunctions = {LossFunction.MCXENT, LossFunction.MSE}; Activation[] outputActivations = {Activation.SOFTMAX, Activation.TANH}; DataNormalization scaler = new NormalizerMinMaxScaler(); DataSetIterator iter = new IrisDataSetIterator(150, 150); scaler.fit(iter); iter.setPreProcessor(scaler); DataSet ds = iter.next(); INDArray input = ds.getFeatures(); INDArray labels = ds.getLabels(); NormalizerStandardize norm = new NormalizerStandardize(); norm.fit(ds); norm.transform(ds); double[] l2vals = {0.2, 0.0, 0.2}; double[] l1vals = {0.0, 0.3, 0.3}; //i.e., use l2vals[i] with l1vals[i] for (Activation afn : activFns) { for (boolean doLearningFirst : characteristic) { for (int i = 0; i < lossFunctions.length; i++) { for (int k = 0; k < l2vals.length; k++) { LossFunction lf = lossFunctions[i]; Activation outputActivation = outputActivations[i]; double l2 = l2vals[k]; double l1 = l1vals[k]; Nd4j.getRandom().setSeed(12345); MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .dataType(DataType.DOUBLE) .updater(new NoOp()) .l2(l2).l1(l1) .optimizationAlgo(OptimizationAlgorithm.CONJUGATE_GRADIENT) .seed(12345L) .dist(new NormalDistribution(0, 1)) .list().layer(0, new AutoEncoder.Builder().nIn(4).nOut(3) .activation(afn).build()) .layer(1, new OutputLayer.Builder(lf).nIn(3).nOut(3) .activation(outputActivation).build()) .build(); MultiLayerNetwork mln = new MultiLayerNetwork(conf); mln.init(); String msg; if (doLearningFirst) { //Run a number of iterations of learning mln.setInput(ds.getFeatures()); mln.setLabels(ds.getLabels()); mln.computeGradientAndScore(); double scoreBefore = mln.score(); for (int j = 0; j < 10; j++) mln.fit(ds); mln.computeGradientAndScore(); double scoreAfter = mln.score(); //Can't test in 'characteristic mode of operation' if not learning msg = "testGradMLP2LayerIrisSimple() - score did not (sufficiently) decrease during learning - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1 + " (before=" + scoreBefore + ", scoreAfter=" + scoreAfter + ")"; assertTrue(msg, scoreAfter < scoreBefore); } msg = "testGradMLP2LayerIrisSimple() - activationFn=" + afn + ", lossFn=" + lf + ", outputActivation=" + outputActivation + ", doLearningFirst=" + doLearningFirst + ", l2=" + l2 + ", l1=" + l1; if (PRINT_RESULTS) { System.out.println(msg); // for (int j = 0; j < mln.getnLayers(); j++) // System.out.println("Layer " + j + " # params: " + mln.getLayer(j).numParams()); } boolean gradOK = GradientCheckUtil.checkGradients(mln, DEFAULT_EPS, DEFAULT_MAX_REL_ERROR, DEFAULT_MIN_ABS_ERROR, PRINT_RESULTS, RETURN_ON_FIRST_FAILURE, input, labels); assertTrue(msg, gradOK); TestUtils.testModelSerialization(mln); } } } } }
Example 20
Source File: Vasttext.java From scava with Eclipse Public License 2.0 | 4 votes |
private MultiLayerNetwork VasttextTextual() { Activation activation = null; LossFunction loss = null; //If multilabel, it is considered according to the book "Deep Learning with Python" to use the following parameters if(multiLabel) { activation = Activation.SIGMOID; loss = LossFunction.XENT; //Binary Crossentropy } else { //We're using a softmax/cross entropy for the binary classification, as the number of neurons is two. If the number of neurons would be one, then //the activation would be sigmoid and the loss binary crossentropy activation = Activation.SOFTMAX; loss = LossFunction.MCXENT; //CATEGORICAL_CROSSENTROPY } MultiLayerConfiguration nnConf = new NeuralNetConfiguration.Builder() .updater(new Adam(lr)) .weightInit(WeightInit.XAVIER) .trainingWorkspaceMode(WorkspaceMode.ENABLED) .inferenceWorkspaceMode(WorkspaceMode.ENABLED) .list() .layer(0, new EmbeddingSequenceLayer.Builder() .nIn(textFeaturesSize) .nOut(denseDimension) .activation(Activation.IDENTITY) .build()) .layer(1, new GlobalPoolingLayer.Builder() .poolingType(PoolingType.AVG) .poolingDimensions(2) .collapseDimensions(true) .build()) .layer(2, new OutputLayer.Builder() .nIn(denseDimension) .nOut(labelsSize) .activation(activation) .lossFunction(loss) .build()) .pretrain(false).backprop(true).build(); return new MultiLayerNetwork(nnConf); }