org.nd4j.linalg.activations.impl.ActivationSoftmax Java Exaples

Source File: MixtureDensityLossFunctionTestCase.java From jstarcraft-ai with Apache License 2.0

5 votes

@Test
@Override
public void testGradient() throws Exception {
    EnvironmentContext context = EnvironmentFactory.getContext();
    Future<?> task = context.doTask(() -> {
        LinkedList<KeyValue<IActivation, ActivationFunction>> activetionList = new LinkedList<>();
        activetionList.add(new KeyValue<>(new ActivationSigmoid(), new SigmoidActivationFunction()));
        activetionList.add(new KeyValue<>(new ActivationSoftmax(), new SoftMaxActivationFunction()));
        for (KeyValue<IActivation, ActivationFunction> keyValue : activetionList) {
            INDArray array = Nd4j.linspace(-2.5D, 2.0D, 20).reshape(5, 4);
            INDArray marks = Nd4j.create(new double[] { 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D }).reshape(5, 2);
            ILossFunction oldFunction = getOldFunction();
            INDArray value = oldFunction.computeGradient(marks, array.dup(), keyValue.getKey(), null);

            MathMatrix input = getMatrix(array.rows(), array.columns()).copyMatrix(getMatrix(array), false);
            MathMatrix output = getMatrix(input.getRowSize(), input.getColumnSize());
            ActivationFunction function = keyValue.getValue();
            function.forward(input, output);
            MathMatrix gradient = getMatrix(input.getRowSize(), input.getColumnSize());
            LossFunction newFunction = getNewFunction(function);
            newFunction.doCache(getMatrix(marks.rows(), marks.columns()).copyMatrix(getMatrix(marks), false), output);
            newFunction.computeGradient(getMatrix(marks.rows(), marks.columns()).copyMatrix(getMatrix(marks), false), output, null, gradient);
            function.backward(input, gradient, output);
            System.out.println(value);
            System.out.println(output);
            Assert.assertTrue(equalMatrix(output, value));
        }
    });
    task.get();
}

Source File: LossFunctionTestCase.java From jstarcraft-ai with Apache License 2.0

5 votes

@Test
public void testScore() throws Exception {
    EnvironmentContext context = EnvironmentFactory.getContext();
    Future<?> task = context.doTask(() -> {
        LinkedList<KeyValue<IActivation, ActivationFunction>> activetionList = new LinkedList<>();
        activetionList.add(new KeyValue<>(new ActivationSigmoid(), new SigmoidActivationFunction()));
        activetionList.add(new KeyValue<>(new ActivationSoftmax(), new SoftMaxActivationFunction()));
        for (KeyValue<IActivation, ActivationFunction> keyValue : activetionList) {
            INDArray array = Nd4j.linspace(-2.5D, 2.0D, 10).reshape(5, 2);
            INDArray marks = Nd4j.create(new double[] { 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D }).reshape(5, 2);
            ILossFunction oldFunction = getOldFunction();
            double value = oldFunction.computeScore(marks, array.dup(), keyValue.getKey(), null, false);

            DenseMatrix input = getMatrix(array);
            DenseMatrix output = DenseMatrix.valueOf(input.getRowSize(), input.getColumnSize());
            ActivationFunction function = keyValue.getValue();
            function.forward(input, output);
            LossFunction newFunction = getNewFunction(function);
            newFunction.doCache(getMatrix(marks), output);
            double score = newFunction.computeScore(getMatrix(marks), output, null);

            System.out.println(value);
            System.out.println(score);

            if (Math.abs(value - score) > MathUtility.EPSILON) {
                Assert.fail();
            }
        }
    });
    task.get();
}

Source File: LossFunctionTestCase.java From jstarcraft-ai with Apache License 2.0

5 votes

@Test
public void testGradient() throws Exception {
    EnvironmentContext context = EnvironmentFactory.getContext();
    Future<?> task = context.doTask(() -> {
        LinkedList<KeyValue<IActivation, ActivationFunction>> activetionList = new LinkedList<>();
        activetionList.add(new KeyValue<>(new ActivationSigmoid(), new SigmoidActivationFunction()));
        activetionList.add(new KeyValue<>(new ActivationSoftmax(), new SoftMaxActivationFunction()));
        for (KeyValue<IActivation, ActivationFunction> keyValue : activetionList) {
            INDArray array = Nd4j.linspace(-2.5D, 2.0D, 10).reshape(5, 2);
            INDArray marks = Nd4j.create(new double[] { 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D }).reshape(5, 2);
            ILossFunction oldFunction = getOldFunction();
            INDArray value = oldFunction.computeGradient(marks, array.dup(), keyValue.getKey(), null);

            DenseMatrix input = getMatrix(array);
            DenseMatrix output = DenseMatrix.valueOf(input.getRowSize(), input.getColumnSize());
            ActivationFunction function = keyValue.getValue();
            function.forward(input, output);
            DenseMatrix gradient = DenseMatrix.valueOf(input.getRowSize(), input.getColumnSize());
            LossFunction newFunction = getNewFunction(function);
            newFunction.doCache(getMatrix(marks), output);
            newFunction.computeGradient(getMatrix(marks), output, null, gradient);
            function.backward(input, gradient, output);
            System.out.println(value);
            System.out.println(output);
            Assert.assertTrue(equalMatrix(output, value));
        }
    });
    task.get();
}

Source File: MixtureDensityLossFunctionTestCase.java From jstarcraft-ai with Apache License 2.0

5 votes

@Test
@Override
public void testScore() throws Exception {
    EnvironmentContext context = EnvironmentFactory.getContext();
    Future<?> task = context.doTask(() -> {
        LinkedList<KeyValue<IActivation, ActivationFunction>> activetionList = new LinkedList<>();
        activetionList.add(new KeyValue<>(new ActivationSigmoid(), new SigmoidActivationFunction()));
        activetionList.add(new KeyValue<>(new ActivationSoftmax(), new SoftMaxActivationFunction()));
        for (KeyValue<IActivation, ActivationFunction> keyValue : activetionList) {
            INDArray array = Nd4j.linspace(-2.5D, 2.0D, 20).reshape(5, 4);
            INDArray marks = Nd4j.create(new double[] { 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D, 0D, 1D }).reshape(5, 2);
            ILossFunction oldFunction = getOldFunction();
            float value = (float) oldFunction.computeScore(marks, array.dup(), keyValue.getKey(), null, false);

            MathMatrix input = getMatrix(array.rows(), array.columns()).copyMatrix(getMatrix(array), false);
            MathMatrix output = getMatrix(input.getRowSize(), input.getColumnSize());
            ActivationFunction function = keyValue.getValue();
            function.forward(input, output);
            LossFunction newFunction = getNewFunction(function);
            newFunction.doCache(getMatrix(marks.rows(), marks.columns()).copyMatrix(getMatrix(marks), false), output);
            float score = newFunction.computeScore(getMatrix(marks.rows(), marks.columns()).copyMatrix(getMatrix(marks), false), output, null);

            System.out.println(value);
            System.out.println(score);
            if (!MathUtility.equal(value, score)) {
                Assert.fail();
            }
        }
    });
    task.get();
}

Source File: LossMCXENT.java From nd4j with Apache License 2.0

5 votes

private INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if (labels.size(1) != preOutput.size(1)) {
        throw new IllegalArgumentException(
                        "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer"
                                        + " number of outputs (nOut = " + preOutput.size(1) + ") ");

    }

    INDArray output = activationFn.getActivation(preOutput.dup(), true);
    if(activationFn instanceof ActivationSoftmax && softmaxClipEps > 0.0){
        BooleanIndexing.replaceWhere(output, softmaxClipEps, Conditions.lessThan(softmaxClipEps));
        BooleanIndexing.replaceWhere(output, 1.0-softmaxClipEps, Conditions.greaterThan(1.0-softmaxClipEps));
    }
    INDArray scoreArr = Transforms.log(output, false).muli(labels);

    //Weighted loss function
    if (weights != null) {
        if (weights.length() != scoreArr.size(1)) {
            throw new IllegalStateException("Weights vector (length " + weights.length()
                            + ") does not match output.size(1)=" + preOutput.size(1));
        }
        scoreArr.muliRowVector(weights);
    }

    if (mask != null) {
        LossUtil.applyMask(scoreArr, mask);
    }
    return scoreArr;
}

Source File: ActorCriticTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
    public void testLoss() {
        ActivationSoftmax activation = new ActivationSoftmax();
        ActorCriticLoss loss = new ActorCriticLoss();
        double n = 10;
        double eps = 1e-5;
        double maxRelError = 1e-3;

        for (double i = eps; i < n; i++) {
            for (double j = eps; j < n; j++) {
                INDArray labels = Nd4j.create(new double[]{i / n, 1 - i / n}, new long[]{1, 2});
                INDArray output = Nd4j.create(new double[]{j / n, 1 - j / n}, new long[]{1, 2});
                INDArray gradient = loss.computeGradient(labels, output, activation, null);

                output = Nd4j.create(new double[]{j / n, 1 - j / n}, new long[]{1, 2});
                double score = loss.computeScore(labels, output, activation, null, false);
                INDArray output1 = Nd4j.create(new double[]{j / n + eps, 1 - j / n}, new long[]{1, 2});
                double score1 = loss.computeScore(labels, output1, activation, null, false);
                INDArray output2 = Nd4j.create(new double[]{j / n, 1 - j / n + eps}, new long[]{1, 2});
                double score2 = loss.computeScore(labels, output2, activation, null, false);

                double gradient1 = (score1 - score) / eps;
                double gradient2 = (score2 - score) / eps;
                double error1 = gradient1 - gradient.getDouble(0);
                double error2 = gradient2 - gradient.getDouble(1);
                double relError1 = error1 / gradient.getDouble(0);
                double relError2 = error2 / gradient.getDouble(1);
//                System.out.println(gradient.getDouble(0) + "  " + gradient1 + " " + relError1);
//                System.out.println(gradient.getDouble(1) + "  " + gradient2 + " " + relError2);
                assertTrue(gradient.getDouble(0) < maxRelError || Math.abs(relError1) < maxRelError);
                assertTrue(gradient.getDouble(1) < maxRelError || Math.abs(relError2) < maxRelError);
            }
        }
    }

Source File: LossMCXENT.java From deeplearning4j with Apache License 2.0

5 votes

protected INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if(!labels.equalShapes(preOutput)){
        Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape());
    }
    labels = labels.castTo(preOutput.dataType());   //No-op if already correct dtype

    INDArray output = activationFn.getActivation(preOutput.dup(), true);
    if(activationFn instanceof ActivationSoftmax && softmaxClipEps > 0.0){
        BooleanIndexing.replaceWhere(output, softmaxClipEps, Conditions.lessThan(softmaxClipEps));
        BooleanIndexing.replaceWhere(output, 1.0-softmaxClipEps, Conditions.greaterThan(1.0-softmaxClipEps));
    }
    INDArray scoreArr = Transforms.log(output, false).muli(labels);

    //Weighted loss function
    if (weights != null) {
        if (weights.length() != scoreArr.size(1)) {
            throw new IllegalStateException("Weights vector (length " + weights.length()
                            + ") does not match output.size(1)=" + preOutput.size(1));
        }
        scoreArr.muliRowVector(weights.castTo(scoreArr.dataType()));
    }

    if (mask != null) {
        LossUtil.applyMask(scoreArr, mask);
    }
    return scoreArr;
}

Source File: ActivationLayerTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testActivationInheritanceCG() {

    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123)
            .weightInit(WeightInit.XAVIER)
            .activation(Activation.RATIONALTANH)
            .graphBuilder()
            .addInputs("in")
            .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in")
            .addLayer("1", new ActivationLayer(), "0")
            .addLayer("2", new ActivationLayer.Builder().build(), "1")
            .addLayer("3", new ActivationLayer.Builder().activation(Activation.ELU).build(), "2")
            .addLayer("4", new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nIn(10).nOut(10).build(), "3")
            .setOutputs("4")
            .build();

    ComputationGraph network = new ComputationGraph(conf);
    network.init();

    assertNotNull(((ActivationLayer)network.getLayer("1").conf().getLayer()).getActivationFn());

    assertTrue(((DenseLayer)network.getLayer("0").conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh);
    assertTrue(((ActivationLayer)network.getLayer("1").conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh);
    assertTrue(((ActivationLayer)network.getLayer("2").conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh);
    assertTrue(((ActivationLayer)network.getLayer("3").conf().getLayer()).getActivationFn() instanceof ActivationELU);
    assertTrue(((OutputLayer)network.getLayer("4").conf().getLayer()).getActivationFn() instanceof ActivationSoftmax);
}

Source File: ActivationLayerTest.java From deeplearning4j with Apache License 2.0

5 votes

@Test
public void testActivationInheritance() {

    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).seed(123)
            .weightInit(WeightInit.XAVIER)
            .activation(Activation.RATIONALTANH)
            .list()
            .layer(new DenseLayer.Builder().nIn(10).nOut(10).build())
            .layer(new ActivationLayer())
            .layer(new ActivationLayer.Builder().build())
            .layer(new ActivationLayer.Builder().activation(Activation.ELU).build())
            .layer(new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .activation(Activation.SOFTMAX).nIn(10).nOut(10).build())
            .build();

    MultiLayerNetwork network = new MultiLayerNetwork(conf);
    network.init();

    assertNotNull(((ActivationLayer)network.getLayer(1).conf().getLayer()).getActivationFn());

    assertTrue(((DenseLayer)network.getLayer(0).conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh);
    assertTrue(((ActivationLayer)network.getLayer(1).conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh);
    assertTrue(((ActivationLayer)network.getLayer(2).conf().getLayer()).getActivationFn() instanceof ActivationRationalTanh);
    assertTrue(((ActivationLayer)network.getLayer(3).conf().getLayer()).getActivationFn() instanceof ActivationELU);
    assertTrue(((OutputLayer)network.getLayer(4).conf().getLayer()).getActivationFn() instanceof ActivationSoftmax);
}

Source File: RnnOutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * @param lossFunction Loss function for the output layer
 */
public Builder(ILossFunction lossFunction) {
    this.setLossFn(lossFunction);
    //Set default activation function to softmax (for consistent behaviour with no-arg constructor)
    this.setActivationFn(new ActivationSoftmax());
}

Source File: ValidateCuDNN.java From deeplearning4j with Apache License 2.0

4 votes

@Test @Ignore //AB 2019/05/20 - https://github.com/deeplearning4j/deeplearning4j/issues/5088 - ignored to get to "all passing" state for CI, and revisit later
public void validateConvLayersLRN() {
    //Test ONLY LRN - no other CuDNN functionality (i.e., DL4J impls for everything else)
    Nd4j.getRandom().setSeed(12345);

    int minibatch = 8;
    int numClasses = 10;
    //imageHeight,imageWidth,channels
    int imageHeight = 48;
    int imageWidth = 48;
    int channels = 3;
    IActivation activation = new ActivationIdentity();
    MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
            .dataType(DataType.DOUBLE)
            .weightInit(WeightInit.XAVIER).seed(42)
            .activation(new ActivationELU())
            .updater(Nesterovs.builder()
                    .momentum(0.9)
                    .learningRateSchedule(new StepSchedule(
                            ScheduleType.EPOCH,
                            1e-2,
                            0.1,
                            20)).build()).list(
                    new Convolution2D.Builder().nOut(96)
                            .kernelSize(11, 11).biasInit(0.0)
                            .stride(4, 4).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new LocalResponseNormalization.Builder()
                            .alpha(1e-3).beta(0.75).k(2)
                            .n(5).build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new Convolution2D.Builder().nOut(256)
                            .kernelSize(5, 5).padding(2, 2)
                            .biasInit(0.0)
                            .stride(1, 1).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new OutputLayer.Builder().activation(new ActivationSoftmax())
                            .lossFunction(new LossNegativeLogLikelihood())
                            .nOut(numClasses)
                            .biasInit(0.0)
                            .build())
            .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels))
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
    net.init();

    int[] fShape = new int[]{minibatch, channels, imageHeight, imageWidth};
    int[] lShape = new int[]{minibatch, numClasses};

    List<Class<?>> classesToTest = new ArrayList<>();
    classesToTest.add(org.deeplearning4j.nn.layers.normalization.LocalResponseNormalization.class);

    validateLayers(net, classesToTest, false, fShape, lShape, 1e-2, 1e-2);
}

Source File: CenterLossOutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

public Builder(){
    this.setActivationFn(new ActivationSoftmax());
}

Source File: RegressionTest100b4.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testLSTM() throws Exception {

    File f = Resources.asFile("regression_testing/100b4/GravesLSTMCharModelingExample_100b4.bin");
    MultiLayerNetwork net = MultiLayerNetwork.load(f, true);

    LSTM l0 = (LSTM) net.getLayer(0).conf().getLayer();
    assertEquals(new ActivationTanH(), l0.getActivationFn());
    assertEquals(200, l0.getNOut());
    assertEquals(new WeightInitXavier(), l0.getWeightInitFn());
    assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l0));
    assertEquals(new Adam(0.005), l0.getIUpdater());

    LSTM l1 = (LSTM) net.getLayer(1).conf().getLayer();
    assertEquals(new ActivationTanH(), l1.getActivationFn());
    assertEquals(200, l1.getNOut());
    assertEquals(new WeightInitXavier(), l1.getWeightInitFn());
    assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l1));
    assertEquals(new Adam(0.005), l1.getIUpdater());

    RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).conf().getLayer();
    assertEquals(new ActivationSoftmax(), l2.getActivationFn());
    assertEquals(77, l2.getNOut());
    assertEquals(new WeightInitXavier(), l2.getWeightInitFn());
    assertEquals(new L2Regularization(0.0001), TestUtils.getL2Reg(l2));
    assertEquals(new Adam(0.005), l2.getIUpdater());

    assertEquals(BackpropType.TruncatedBPTT, net.getLayerWiseConfigurations().getBackpropType());
    assertEquals(50, net.getLayerWiseConfigurations().getTbpttBackLength());
    assertEquals(50, net.getLayerWiseConfigurations().getTbpttFwdLength());

    INDArray outExp;
    File f2 = Resources.asFile("regression_testing/100b4/GravesLSTMCharModelingExample_Output_100b4.bin");
    try (DataInputStream dis = new DataInputStream(new FileInputStream(f2))) {
        outExp = Nd4j.read(dis);
    }

    INDArray in;
    File f3 = Resources.asFile("regression_testing/100b4/GravesLSTMCharModelingExample_Input_100b4.bin");
    try (DataInputStream dis = new DataInputStream(new FileInputStream(f3))) {
        in = Nd4j.read(dis);
    }

    INDArray outAct = net.output(in);

    assertEquals(outExp, outAct);
}

Source File: RegressionTest100a.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testGravesLSTM() throws Exception {

    File f = Resources.asFile("regression_testing/100a/GravesLSTMCharModelingExample_100a.bin");
    MultiLayerNetwork net = MultiLayerNetwork.load(f, true);

    GravesLSTM l0 = (GravesLSTM) net.getLayer(0).conf().getLayer();
    assertEquals(new ActivationTanH(), l0.getActivationFn());
    assertEquals(200, l0.getNOut());
    assertEquals(new WeightInitXavier(), l0.getWeightInitFn());
    assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l0));
    assertEquals(new RmsProp(0.1), l0.getIUpdater());

    GravesLSTM l1 = (GravesLSTM) net.getLayer(1).conf().getLayer();
    assertEquals(new ActivationTanH(), l1.getActivationFn());
    assertEquals(200, l1.getNOut());
    assertEquals(new WeightInitXavier(), l1.getWeightInitFn());
    assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l1));
    assertEquals(new RmsProp(0.1), l1.getIUpdater());

    RnnOutputLayer l2 = (RnnOutputLayer) net.getLayer(2).conf().getLayer();
    assertEquals(new ActivationSoftmax(), l2.getActivationFn());
    assertEquals(77, l2.getNOut());
    assertEquals(new WeightInitXavier(), l2.getWeightInitFn());
    assertEquals(new WeightDecay(0.001, false), TestUtils.getWeightDecayReg(l0));
    assertEquals(new RmsProp(0.1), l0.getIUpdater());

    assertEquals(BackpropType.TruncatedBPTT, net.getLayerWiseConfigurations().getBackpropType());
    assertEquals(50, net.getLayerWiseConfigurations().getTbpttBackLength());
    assertEquals(50, net.getLayerWiseConfigurations().getTbpttFwdLength());

    INDArray outExp;
    File f2 = Resources.asFile("regression_testing/100a/GravesLSTMCharModelingExample_Output_100a.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(f2))){
        outExp = Nd4j.read(dis);
    }

    INDArray in;
    File f3 = Resources.asFile("regression_testing/100a/GravesLSTMCharModelingExample_Input_100a.bin");
    try(DataInputStream dis = new DataInputStream(new FileInputStream(f3))){
        in = Nd4j.read(dis);
    }

    INDArray outAct = net.output(in);

    assertEquals(outExp, outAct);
}

Source File: CapsnetGradientCheckTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testCapsNet() {

    int[] minibatchSizes = {8, 16};

    int width = 6;
    int height = 6;
    int inputDepth = 4;

    int[] primaryCapsDims = {2, 4};
    int[] primaryCapsChannels = {8};
    int[] capsules = {5};
    int[] capsuleDims = {4, 8};
    int[] routings = {1};

    Nd4j.getRandom().setSeed(12345);

    for (int routing : routings) {
        for (int primaryCapsDim : primaryCapsDims) {
            for (int primarpCapsChannel : primaryCapsChannels) {
                for (int capsule : capsules) {
                    for (int capsuleDim : capsuleDims) {
                        for (int minibatchSize : minibatchSizes) {

                            INDArray input = Nd4j.rand(minibatchSize, inputDepth * height * width).mul(10)
                                    .reshape(-1, inputDepth, height, width);
                            INDArray labels = Nd4j.zeros(minibatchSize, capsule);
                            for (int i = 0; i < minibatchSize; i++) {
                                labels.putScalar(new int[]{i, i % capsule}, 1.0);
                            }

                            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                                    .dataType(DataType.DOUBLE)
                                    .seed(123)
                                    .updater(new NoOp())
                                    .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6)))
                                    .list()
                                    .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel)
                                            .kernelSize(3, 3)
                                            .stride(2, 2)
                                            .build())
                                    .layer(new CapsuleLayer.Builder(capsule, capsuleDim, routing).build())
                                    .layer(new CapsuleStrengthLayer.Builder().build())
                                    .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build())
                                    .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build())
                                    .setInputType(InputType.convolutional(height, width, inputDepth))
                                    .build();

                            MultiLayerNetwork net = new MultiLayerNetwork(conf);
                            net.init();

                            for (int i = 0; i < 4; i++) {
                                System.out.println("nParams, layer " + i + ": " + net.getLayer(i).numParams());
                            }

                            String msg = "minibatch=" + minibatchSize +
                                    ", PrimaryCaps: " + primarpCapsChannel +
                                    " channels, " + primaryCapsDim + " dimensions, Capsules: " + capsule +
                                    " capsules with " + capsuleDim + " dimensions and " + routing + " routings";
                            System.out.println(msg);

                            boolean gradOK = GradientCheckUtil.checkGradients(new GradientCheckUtil.MLNConfig().net(net).input(input)
                                    .labels(labels).subset(true).maxPerParam(100));

                            assertTrue(msg, gradOK);

                            TestUtils.testModelSerialization(net);
                        }
                    }
                }
            }
        }
    }
}

Source File: CapsNetMNISTTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testCapsNetOnMNIST(){
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .seed(123)
            .updater(new Adam())
            .list()
            .layer(new ConvolutionLayer.Builder()
                    .nOut(16)
                    .kernelSize(9, 9)
                    .stride(3, 3)
                    .build())
            .layer(new PrimaryCapsules.Builder(8, 8)
                    .kernelSize(7, 7)
                    .stride(2, 2)
                    .build())
            .layer(new CapsuleLayer.Builder(10, 16, 3).build())
            .layer(new CapsuleStrengthLayer.Builder().build())
            .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build())
            .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build())
            .setInputType(InputType.convolutionalFlat(28, 28, 1))
            .build();

    MultiLayerNetwork model = new MultiLayerNetwork(conf);
    model.init();

    int rngSeed = 12345;
    try {
        MnistDataSetIterator mnistTrain = new MnistDataSetIterator(64, true, rngSeed);
        MnistDataSetIterator mnistTest = new MnistDataSetIterator(64, false, rngSeed);

        for (int i = 0; i < 2; i++) {
            model.fit(mnistTrain);
        }

        Evaluation eval = model.evaluate(mnistTest);

        assertTrue("Accuracy not over 95%", eval.accuracy() > 0.95);
        assertTrue("Precision not over 95%", eval.precision() > 0.95);
        assertTrue("Recall not over 95%", eval.recall() > 0.95);
        assertTrue("F1-score not over 95%", eval.f1() > 0.95);

    } catch (IOException e){
        System.out.println("Could not load MNIST.");
    }
}

Source File: DTypeTests.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testCapsNetDtypes() {
    for (DataType globalDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
        Nd4j.setDefaultDataTypes(globalDtype, globalDtype);
        for (DataType networkDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
            assertEquals(globalDtype, Nd4j.dataType());
            assertEquals(globalDtype, Nd4j.defaultFloatingPointType());

            String msg = "Global dtype: " + globalDtype + ", network dtype: " + networkDtype;

            int primaryCapsDim = 2;
            int primarpCapsChannel = 8;
            int capsule = 5;
            int minibatchSize = 8;
            int routing = 1;
            int capsuleDim = 4;
            int height = 6;
            int width = 6;
            int inputDepth = 4;

            MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .dataType(networkDtype)
                    .seed(123)
                    .updater(new NoOp())
                    .weightInit(new WeightInitDistribution(new UniformDistribution(-6, 6)))
                    .list()
                    .layer(new PrimaryCapsules.Builder(primaryCapsDim, primarpCapsChannel)
                            .kernelSize(3, 3)
                            .stride(2, 2)
                            .build())
                    .layer(new CapsuleLayer.Builder(capsule, capsuleDim, routing).build())
                    .layer(new CapsuleStrengthLayer.Builder().build())
                    .layer(new ActivationLayer.Builder(new ActivationSoftmax()).build())
                    .layer(new LossLayer.Builder(new LossNegativeLogLikelihood()).build())
                    .setInputType(InputType.convolutional(height, width, inputDepth))
                    .build();

            MultiLayerNetwork net = new MultiLayerNetwork(conf);
            net.init();

            INDArray in = Nd4j.rand(networkDtype, minibatchSize, inputDepth * height * width).mul(10)
                    .reshape(-1, inputDepth, height, width);
            INDArray label = Nd4j.zeros(networkDtype, minibatchSize, capsule);
            for (int i = 0; i < minibatchSize; i++) {
                label.putScalar(new int[]{i, i % capsule}, 1.0);
            }

            INDArray out = net.output(in);
            assertEquals(msg, networkDtype, out.dataType());
            List<INDArray> ff = net.feedForward(in);
            for (int i = 0; i < ff.size(); i++) {
                String s = msg + " - layer " + (i - 1) + " - " + (i == 0 ? "input" : net.getLayer(i - 1).conf().getLayer().getClass().getSimpleName());
                assertEquals(s, networkDtype, ff.get(i).dataType());
            }

            net.setInput(in);
            net.setLabels(label);
            net.computeGradientAndScore();

            net.fit(new DataSet(in, label));

            logUsedClasses(net);

            //Now, test mismatched dtypes for input/labels:
            for (DataType inputLabelDtype : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}) {
                INDArray in2 = in.castTo(inputLabelDtype);
                INDArray label2 = label.castTo(inputLabelDtype);
                net.output(in2);
                net.setInput(in2);
                net.setLabels(label2);
                net.computeGradientAndScore();

                net.fit(new DataSet(in2, label2));
            }
        }
    }
}

Source File: ValidateCuDNN.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void validateConvLayers() {
    Nd4j.getRandom().setSeed(12345);

    int numClasses = 10;
    //imageHeight,imageWidth,channels
    int imageHeight = 64;
    int imageWidth = 64;
    int channels = 3;
    IActivation activation = new ActivationIdentity();
    MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
            .dataType(DataType.DOUBLE)
            .weightInit(WeightInit.XAVIER).seed(42)
            .activation(new ActivationELU())
            .updater(new Nesterovs(1e-3, 0.9))
            .list(
                    new Convolution2D.Builder().nOut(16)
                            .kernelSize(4, 4).biasInit(0.0)
                            .stride(2, 2).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new Convolution2D.Builder().nOut(256)
                            .kernelSize(5, 5).padding(2, 2)
                            .biasInit(0.0)
                            .stride(1, 1).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new Convolution2D.Builder().nOut(16)
                            .kernelSize(3, 3).padding(1, 1)
                            .biasInit(0.0)
                            .stride(1, 1).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new Convolution2D.Builder().nOut(16)
                            .kernelSize(3, 3).padding(1, 1)
                            .stride(1, 1).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new DenseLayer.Builder()
                            .nOut(64)
                            .biasInit(0.0)
                            .build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new OutputLayer.Builder().activation(new ActivationSoftmax())
                            .lossFunction(new LossNegativeLogLikelihood())
                            .nOut(numClasses)
                            .biasInit(0.0)
                            .build())
            .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels))
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
    net.init();

    int[] fShape = new int[]{8, channels, imageHeight, imageWidth};
    int[] lShape = new int[]{8, numClasses};

    List<Class<?>> classesToTest = new ArrayList<>();
    classesToTest.add(ConvolutionLayer.class);
    classesToTest.add(org.deeplearning4j.nn.layers.convolution.subsampling.SubsamplingLayer.class);

    validateLayers(net, classesToTest, true, fShape, lShape, CuDNNValidationUtil.MAX_REL_ERROR, CuDNNValidationUtil.MIN_ABS_ERROR);
}

Source File: ValidateCuDNN.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void validateConvLayersSimpleBN() {
    //Test ONLY BN - no other CuDNN functionality (i.e., DL4J impls for everything else)
    Nd4j.getRandom().setSeed(12345);

    int minibatch = 8;
    int numClasses = 10;
    //imageHeight,imageWidth,channels
    int imageHeight = 48;
    int imageWidth = 48;
    int channels = 3;
    IActivation activation = new ActivationIdentity();
    MultiLayerConfiguration multiLayerConfiguration = new NeuralNetConfiguration.Builder()
            .dataType(DataType.DOUBLE)
            .weightInit(WeightInit.XAVIER).seed(42)
            .activation(new ActivationELU())
            .updater(Nesterovs.builder()
                    .momentum(0.9)
                    .learningRateSchedule(new StepSchedule(
                            ScheduleType.EPOCH,
                            1e-2,
                            0.1,
                            20)).build()).list(
                    new Convolution2D.Builder().nOut(96)
                            .kernelSize(11, 11).biasInit(0.0)
                            .stride(4, 4).build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new BatchNormalization.Builder().build(),
                    new Pooling2D.Builder()
                            .poolingType(SubsamplingLayer.PoolingType.MAX)
                            .kernelSize(3, 3).stride(2, 2)
                            .build(),
                    new DenseLayer.Builder()
                            .nOut(128)
                            .biasInit(0.0)
                            .build(),
                    new ActivationLayer.Builder().activation(activation).build(),
                    new OutputLayer.Builder().activation(new ActivationSoftmax())
                            .lossFunction(new LossNegativeLogLikelihood())
                            .nOut(numClasses)
                            .biasInit(0.0)
                            .build())
            .setInputType(InputType.convolutionalFlat(imageHeight, imageWidth, channels))
            .build();

    MultiLayerNetwork net = new MultiLayerNetwork(multiLayerConfiguration);
    net.init();

    int[] fShape = new int[]{minibatch, channels, imageHeight, imageWidth};
    int[] lShape = new int[]{minibatch, numClasses};

    List<Class<?>> classesToTest = new ArrayList<>();
    classesToTest.add(org.deeplearning4j.nn.layers.normalization.BatchNormalization.class);

    validateLayers(net, classesToTest, false, fShape, lShape, CuDNNValidationUtil.MAX_REL_ERROR, CuDNNValidationUtil.MIN_ABS_ERROR);
}

Source File: RnnOutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * @param lossFunction Loss function for the output layer
 */
public Builder(LossFunction lossFunction) {
    lossFunction(lossFunction);
    //Set default activation function to softmax (for consistent behaviour with no-arg constructor)
    this.setActivationFn(new ActivationSoftmax());
}

Source File: RnnOutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

public Builder() {
    //Set default activation function to softmax (to match default loss function MCXENT)
    this.setActivationFn(new ActivationSoftmax());
}

Source File: OutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * @param lossFunction Loss function for the output layer
 */
public Builder(ILossFunction lossFunction) {
    this.setLossFn(lossFunction);
    //Set default activation function to softmax (for consistent behaviour with no-arg constructor)
    this.setActivationFn(new ActivationSoftmax());
}

Source File: OutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

/**
 * @param lossFunction Loss function for the output layer
 */
public Builder(LossFunction lossFunction) {
    super.lossFunction(lossFunction);
    //Set default activation function to softmax (for consistent behaviour with no-arg constructor)
    this.setActivationFn(new ActivationSoftmax());
}

Source File: OutputLayer.java From deeplearning4j with Apache License 2.0

4 votes

public Builder() {
    //Set default activation function to softmax (to match default loss function MCXENT)
    this.setActivationFn(new ActivationSoftmax());
}

Source File: LossBinaryXENT.java From deeplearning4j with Apache License 2.0

4 votes

private INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if(!labels.equalShapes(preOutput)){
        Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape());
    }
    labels = labels.castTo(preOutput.dataType());   //No-op if already correct dtype

    INDArray scoreArr;
    if (activationFn instanceof ActivationSoftmax) {
        //TODO Post GPU support for custom ops: Use LogSoftMax op to avoid numerical issues when calculating score
        INDArray logsoftmax = Nd4j.exec((CustomOp) new SoftMax(preOutput, preOutput.ulike(), -1))[0];
        Transforms.log(logsoftmax, false);
        scoreArr = logsoftmax.muli(labels);

    } else {
        INDArray output = activationFn.getActivation(preOutput.dup(), true);
        if (clipEps > 0.0) {
            CustomOp op = DynamicCustomOp.builder("clipbyvalue")
                    .addInputs(output)
                    .callInplace(true)
                    .addFloatingPointArguments(clipEps, 1.0-clipEps)
                    .build();
            Nd4j.getExecutioner().execAndReturn(op);
        }
        scoreArr = Transforms.log(output, true).muli(labels);
        INDArray secondTerm = output.rsubi(1);
        Transforms.log(secondTerm, false);
        secondTerm.muli(labels.rsub(1));
        scoreArr.addi(secondTerm);
    }

    //Weighted loss function
    if (weights != null) {
        if (weights.length() != preOutput.size(1)) {
            throw new IllegalStateException("Weights vector (length " + weights.length()
                            + ") does not match output.size(1)=" + preOutput.size(1));
        }

        scoreArr.muliRowVector(weights.castTo(scoreArr.dataType()));
    }

    if (mask != null) {
        LossUtil.applyMask(scoreArr, mask);
    }
    return scoreArr;
}

Source File: LossMCXENT.java From deeplearning4j with Apache License 2.0

4 votes

@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if(!labels.equalShapes(preOutput)){
        Preconditions.throwEx("Labels and preOutput must have equal shapes: got shapes %s vs %s", labels.shape(), preOutput.shape());
    }
    INDArray grad;
    INDArray output = activationFn.getActivation(preOutput.dup(), true);
    labels = labels.castTo(preOutput.dataType());   //No-op if already correct dtype

    if (activationFn instanceof ActivationSoftmax) {

        if (mask != null && LossUtil.isPerOutputMasking(output, mask)) {
            throw new UnsupportedOperationException("Per output masking for MCXENT + softmax: not supported");
        }

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            INDArray temp = labels.mulRowVector(weights.castTo(labels.dataType()));
            INDArray col = temp.sum(true,1);
            grad = output.mulColumnVector(col).sub(temp);
        } else {
            grad = output.subi(labels);
        }
    } else {
        INDArray dLda = output.rdivi(labels).negi();

        grad = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation function with weights

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            grad.muliRowVector(weights.castTo(grad.dataType()));
        }
    }

    //Loss function with masking
    if (mask != null) {
        LossUtil.applyMask(grad, mask);
    }

    return grad;
}

Source File: LossFunctionTest.java From deeplearning4j with Apache License 2.0

4 votes

@Test
public void testWeightedLossFunctionDTypes(){

    for(DataType activationsDt : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}){
        for(DataType weightsDt : new DataType[]{DataType.DOUBLE, DataType.FLOAT, DataType.HALF}){
            for( boolean rank1W : new boolean[]{false, true}) {

                INDArray preOut = Nd4j.rand(activationsDt, 2, 3);
                INDArray l = Nd4j.rand(activationsDt, 2, 3);

                INDArray w = Nd4j.createFromArray(1.0f, 2.0f, 3.0f).castTo(weightsDt);
                if(!rank1W){
                    w = w.reshape(1, 3);
                }

                ILossFunction lf = null;
                for (int i = 0; i < 10; i++) {
                    switch (i) {
                        case 0:
                            lf = new LossBinaryXENT(w);
                            break;
                        case 1:
                            lf = new LossL1(w);
                            break;
                        case 2:
                            lf = new LossL2(w);
                            break;
                        case 3:
                            lf = new LossMAE(w);
                            break;
                        case 4:
                            lf = new LossMAPE(w);
                            break;
                        case 5:
                            lf = new LossMCXENT(w);
                            break;
                        case 6:
                            lf = new LossMSE(w);
                            break;
                        case 7:
                            lf = new LossMSLE(w);
                            break;
                        case 8:
                            lf = new LossNegativeLogLikelihood(w);
                            break;
                        case 9:
                            lf = new LossSparseMCXENT(w);
                            l = Nd4j.createFromArray(1,2).reshape(2, 1).castTo(activationsDt);
                            break;
                        default:
                            throw new RuntimeException();
                    }
                }

                //Check score
                lf.computeScore(l, preOut, new ActivationSoftmax(), null, true);

                //Check backward
                lf.computeGradient(l, preOut, new ActivationSoftmax(), null);
            }
        }
    }

}

Source File: LossBinaryXENT.java From nd4j with Apache License 2.0

4 votes

private INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {

        if (labels.size(1) != preOutput.size(1)) {
            throw new IllegalArgumentException(
                            "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer"
                                            + " number of outputs (nOut = " + preOutput.size(1) + ") ");
        }

        INDArray scoreArr;
        if (activationFn instanceof ActivationSoftmax) {
            //Use LogSoftMax op to avoid numerical issues when calculating score
            INDArray logsoftmax = Nd4j.getExecutioner().execAndReturn(new LogSoftMax(preOutput.dup()));
            scoreArr = logsoftmax.muli(labels);

        } else {
            //INDArray output = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(activationFn, preOutput.dup()));
            INDArray output = activationFn.getActivation(preOutput.dup(), true);
            if (clipEps > 0.0) {
                CustomOp op = DynamicCustomOp.builder("clipbyvalue")
                        .addInputs(output)
                        .callInplace(true)
                        .addFloatingPointArguments(clipEps, 1.0-clipEps)
                        .build();
                Nd4j.getExecutioner().exec(op);
            }
            scoreArr = Transforms.log(output, true).muli(labels);
            INDArray secondTerm = output.rsubi(1);
            Transforms.log(secondTerm, false);
            secondTerm.muli(labels.rsub(1));
            scoreArr.addi(secondTerm);
        }

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != preOutput.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + preOutput.size(1));
            }

            scoreArr.muliRowVector(weights);
        }

        if (mask != null) {
            LossUtil.applyMask(scoreArr, mask);
        }
        return scoreArr;
    }

Source File: LossMCXENT.java From nd4j with Apache License 2.0

4 votes

@Override
public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) {
    if (labels.size(1) != preOutput.size(1)) {
        throw new IllegalArgumentException(
                        "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer"
                                        + " number of outputs (nOut = " + preOutput.size(1) + ") ");

    }
    INDArray grad;
    //INDArray output = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(activationFn, preOutput.dup()));
    INDArray output = activationFn.getActivation(preOutput.dup(), true);

    if (activationFn instanceof ActivationSoftmax) {

        if (mask != null && LossUtil.isPerOutputMasking(output, mask)) {
            throw new UnsupportedOperationException("Per output masking for MCXENT + softmax: not supported");
        }

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            INDArray temp = labels.mulRowVector(weights);
            INDArray col = temp.sum(1);
            grad = output.mulColumnVector(col).sub(temp);
        } else {
            grad = output.subi(labels);
        }
    } else {
        INDArray dLda = output.rdivi(labels).negi();

        grad = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation function with weights

        //Weighted loss function
        if (weights != null) {
            if (weights.length() != output.size(1)) {
                throw new IllegalStateException("Weights vector (length " + weights.length()
                                + ") does not match output.size(1)=" + output.size(1));
            }
            grad.muliRowVector(weights);
        }
    }

    //Loss function with masking
    if (mask != null) {
        LossUtil.applyMask(grad, mask);
    }

    return grad;
}

Source File: SoftMaxActivationFunctionTestCase.java From jstarcraft-ai with Apache License 2.0

4 votes

@Override
protected IActivation getOldFunction() {
    return new ActivationSoftmax();
}

org.nd4j.linalg.activations.impl.ActivationSoftmax Java Examples