org.nd4j.linalg.learning.config.IUpdater Java Examples

The following examples show how to use org.nd4j.linalg.learning.config.IUpdater. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AdaMaxSpace.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    double lr = learningRate == null ? AdaMax.DEFAULT_ADAMAX_LEARNING_RATE : learningRate.getValue(parameterValues);
    ISchedule lrS = learningRateSchedule == null ? null : learningRateSchedule.getValue(parameterValues);
    double b1 = beta1 == null ? AdaMax.DEFAULT_ADAMAX_LEARNING_RATE : beta1.getValue(parameterValues);
    double b2 = beta2 == null ? AdaMax.DEFAULT_ADAMAX_LEARNING_RATE : beta2.getValue(parameterValues);
    double eps = epsilon == null ? AdaMax.DEFAULT_ADAMAX_LEARNING_RATE : epsilon.getValue(parameterValues);
    if(lrS == null){
        return new AdaMax(lr, b1, b2, eps);
    } else {
        AdaMax a = new AdaMax(lrS);
        a.setBeta1(b1);
        a.setBeta2(b2);
        a.setEpsilon(eps);
        return a;
    }
}
 
Example #2
Source File: NetworkUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static void setLearningRate(MultiLayerNetwork net, int layerNumber, double newLr, ISchedule newLrSchedule, boolean refreshUpdater) {

        Layer l = net.getLayer(layerNumber).conf().getLayer();
        if (l instanceof BaseLayer) {
            BaseLayer bl = (BaseLayer) l;
            IUpdater u = bl.getIUpdater();
            if (u != null && u.hasLearningRate()) {
                if (newLrSchedule != null) {
                    u.setLrAndSchedule(Double.NaN, newLrSchedule);
                } else {
                    u.setLrAndSchedule(newLr, null);
                }
            }

            //Need to refresh the updater - if we change the LR (or schedule) we may rebuild the updater blocks, which are
            // built by creating blocks of params with the same configuration
            if (refreshUpdater) {
                refreshUpdater(net);
            }
        }
    }
 
Example #3
Source File: NetworkUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Get the current learning rate, for the specified layer, fromthe network.
 * Note: If the layer has no learning rate (no parameters, or an updater without a learning rate) then null is returned
 *
 * @param net         Network
 * @param layerNumber Layer number to get the learning rate for
 * @return Learning rate for the specified layer, or null
 */
public static Double getLearningRate(MultiLayerNetwork net, int layerNumber) {
    Layer l = net.getLayer(layerNumber).conf().getLayer();
    int iter = net.getIterationCount();
    int epoch = net.getEpochCount();
    if (l instanceof BaseLayer) {
        BaseLayer bl = (BaseLayer) l;
        IUpdater u = bl.getIUpdater();
        if (u != null && u.hasLearningRate()) {
            double d = u.getLearningRate(iter, epoch);
            if (Double.isNaN(d)) {
                return null;
            }
            return d;
        }
        return null;
    }
    return null;
}
 
Example #4
Source File: NetworkUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static void setLearningRate(ComputationGraph net, String layerName, double newLr, ISchedule newLrSchedule, boolean refreshUpdater) {

        Layer l = net.getLayer(layerName).conf().getLayer();
        if (l instanceof BaseLayer) {
            BaseLayer bl = (BaseLayer) l;
            IUpdater u = bl.getIUpdater();
            if (u != null && u.hasLearningRate()) {
                if (newLrSchedule != null) {
                    u.setLrAndSchedule(Double.NaN, newLrSchedule);
                } else {
                    u.setLrAndSchedule(newLr, null);
                }
            }

            //Need to refresh the updater - if we change the LR (or schedule) we may rebuild the updater blocks, which are
            // built by creating blocks of params with the same configuration
            if (refreshUpdater) {
                refreshUpdater(net);
            }
        }
    }
 
Example #5
Source File: NetworkUtils.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
/**
 * Get the current learning rate, for the specified layer, from the network.
 * Note: If the layer has no learning rate (no parameters, or an updater without a learning rate) then null is returned
 *
 * @param net       Network
 * @param layerName Layer name to get the learning rate for
 * @return Learning rate for the specified layer, or null
 */
public static Double getLearningRate(ComputationGraph net, String layerName) {
    Layer l = net.getLayer(layerName).conf().getLayer();
    int iter = net.getConfiguration().getIterationCount();
    int epoch = net.getConfiguration().getEpochCount();
    if (l instanceof BaseLayer) {
        BaseLayer bl = (BaseLayer) l;
        IUpdater u = bl.getIUpdater();
        if (u != null && u.hasLearningRate()) {
            double d = u.getLearningRate(iter, epoch);
            if (Double.isNaN(d)) {
                return null;
            }
            return d;
        }
        return null;
    }
    return null;
}
 
Example #6
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static ComputationGraphConfiguration getGraphConfCNN(int seed, IUpdater updater) {
    Nd4j.getRandom().setSeed(seed);
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder()
                    .addInputs("in")
                    .addLayer("0", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1)
                                    .padding(0, 0).activation(Activation.TANH).build(), "in")
                    .addLayer("1", new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1)
                                    .padding(0, 0).activation(Activation.TANH).build(), "0")
                    .addLayer("2", new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10)
                                    .build(), "1")
                    .setOutputs("2").setInputTypes(InputType.convolutional(10, 10, 3))
                    .build();
    return conf;
}
 
Example #7
Source File: TestSparkMultiLayerParameterAveraging.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Test
public void testUpdaters() {
    SparkDl4jMultiLayer sparkNet = getBasicNetwork();
    MultiLayerNetwork netCopy = sparkNet.getNetwork().clone();

    netCopy.fit(data);
    IUpdater expectedUpdater = ((BaseLayer) netCopy.conf().getLayer()).getIUpdater();
    double expectedLR = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getLearningRate();
    double expectedMomentum = ((Nesterovs)((BaseLayer) netCopy.conf().getLayer()).getIUpdater()).getMomentum();

    IUpdater actualUpdater = ((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater();
    sparkNet.fit(sparkData);
    double actualLR = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getLearningRate();
    double actualMomentum = ((Nesterovs)((BaseLayer) sparkNet.getNetwork().conf().getLayer()).getIUpdater()).getMomentum();

    assertEquals(expectedUpdater, actualUpdater);
    assertEquals(expectedLR, actualLR, 0.01);
    assertEquals(expectedMomentum, actualMomentum, 0.01);

}
 
Example #8
Source File: NesterovsSpace.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    double lr = learningRate == null ? Nesterovs.DEFAULT_NESTEROV_LEARNING_RATE : learningRate.getValue(parameterValues);
    ISchedule lrS = learningRateSchedule == null ? null : learningRateSchedule.getValue(parameterValues);
    double m = momentum == null ? Nesterovs.DEFAULT_NESTEROV_MOMENTUM : momentum.getValue(parameterValues);
    ISchedule mS = momentumSchedule == null ? null : momentumSchedule.getValue(parameterValues);
    if(lrS == null){
        if(momentumSchedule == null){
            return new Nesterovs(lr, m);
        } else {
            return new Nesterovs(lr, mS);
        }
    } else {
        if(momentumSchedule == null){
            return new Nesterovs(lrS, m);
        } else {
            return new Nesterovs(lrS, mS);
        }
    }
}
 
Example #9
Source File: AdamSpace.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    double lr = learningRate == null ? Adam.DEFAULT_ADAM_LEARNING_RATE : learningRate.getValue(parameterValues);
    ISchedule lrS = learningRateSchedule == null ? null : learningRateSchedule.getValue(parameterValues);
    double b1 = beta1 == null ? Adam.DEFAULT_ADAM_LEARNING_RATE : beta1.getValue(parameterValues);
    double b2 = beta2 == null ? Adam.DEFAULT_ADAM_LEARNING_RATE : beta2.getValue(parameterValues);
    double eps = epsilon == null ? Adam.DEFAULT_ADAM_LEARNING_RATE : epsilon.getValue(parameterValues);
    if(lrS == null){
        return new Adam(lr, b1, b2, eps);
    } else {
        Adam a = new Adam(lrS);
        a.setBeta1(b1);
        a.setBeta2(b2);
        a.setEpsilon(eps);
        return a;
    }
}
 
Example #10
Source File: NadamSpace.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    double lr = learningRate == null ? Nadam.DEFAULT_NADAM_LEARNING_RATE : learningRate.getValue(parameterValues);
    ISchedule lrS = learningRateSchedule == null ? null : learningRateSchedule.getValue(parameterValues);
    double b1 = beta1 == null ? Nadam.DEFAULT_NADAM_LEARNING_RATE : beta1.getValue(parameterValues);
    double b2 = beta2 == null ? Nadam.DEFAULT_NADAM_LEARNING_RATE : beta2.getValue(parameterValues);
    double eps = epsilon == null ? Nadam.DEFAULT_NADAM_LEARNING_RATE : epsilon.getValue(parameterValues);
    if(lrS == null){
        return new Nadam(lr, b1, b2, eps);
    } else {
        Nadam a = new Nadam(lrS);
        a.setBeta1(b1);
        a.setBeta2(b2);
        a.setEpsilon(eps);
        return a;
    }
}
 
Example #11
Source File: RmsPropSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    double lr = learningRate == null ? RmsProp.DEFAULT_RMSPROP_LEARNING_RATE : learningRate.getValue(parameterValues);
    ISchedule lrS = learningRateSchedule == null ? null : learningRateSchedule.getValue(parameterValues);
    if(lrS == null){
        return new RmsProp(lr);
    } else {
        return new RmsProp(lrS);
    }
}
 
Example #12
Source File: SgdSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    double lr = learningRate == null ? Sgd.DEFAULT_SGD_LR : learningRate.getValue(parameterValues);
    ISchedule lrS = learningRateSchedule == null ? null : learningRateSchedule.getValue(parameterValues);
    if(lrS == null){
        return new Sgd(lr);
    } else {
        return new Sgd(lrS);
    }
}
 
Example #13
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static ComputationGraphConfiguration getGraphConf(int seed, IUpdater updater) {
    Nd4j.getRandom().setSeed(seed);
    ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).graphBuilder()
                    .addInputs("in")
                    .addLayer("0", new DenseLayer.Builder().nIn(10).nOut(10).build(), "in").addLayer("1",
                                    new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nIn(10)
                                                    .nOut(10).build(),
                                    "0")
                    .setOutputs("1").build();
    return conf;
}
 
Example #14
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static MultiLayerConfiguration getConfCNN(int seed, IUpdater updater) {
    Nd4j.getRandom().setSeed(seed);
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list()
                    .layer(0, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0)
                                    .activation(Activation.TANH).build())
                    .layer(1, new ConvolutionLayer.Builder().nOut(3).kernelSize(2, 2).stride(1, 1).padding(0, 0)
                                    .activation(Activation.TANH).build())
                    .layer(1, new OutputLayer.Builder().lossFunction(LossFunctions.LossFunction.MSE).nOut(10)
                                    .build())
                    .setInputType(InputType.convolutional(10, 10, 3)).build();
    return conf;
}
 
Example #15
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
private static MultiLayerConfiguration getConf(int seed, IUpdater updater) {
    Nd4j.getRandom().setSeed(seed);
    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
                    .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                    .weightInit(WeightInit.XAVIER).updater(updater).seed(seed).list()
                    .layer(0, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(1, new OutputLayer.Builder()
                                    .lossFunction(LossFunctions.LossFunction.MSE).nIn(10).nOut(10).build())
                    .build();
    return conf;
}
 
Example #16
Source File: AdaGradSpace.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getValue(double[] parameterValues) {
    if(lrSchedule != null){
        return new AdaGrad(lrSchedule.getValue(parameterValues));
    } else {
        return new AdaGrad(learningRate.getValue(parameterValues));
    }
}
 
Example #17
Source File: CenterLossOutputLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    // center loss utilizes alpha directly for this so any updater can be used for other layers
    switch (paramName) {
        case CenterLossParamInitializer.CENTER_KEY:
            return new NoOp();
        default:
            return iUpdater;
    }
}
 
Example #18
Source File: BaseLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
/**
 * Get the updater for the given parameter. Typically the same updater will be used for all updaters, but this is
 * not necessarily the case
 *
 * @param paramName Parameter name
 * @return IUpdater for the parameter
 */
@Override
public IUpdater getUpdaterByParam(String paramName) {
    if (biasUpdater != null && initializer().isBiasParam(this, paramName)) {
        return biasUpdater;
    }
    return iUpdater;
}
 
Example #19
Source File: AbstractSameDiffLayer.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    if (biasUpdater != null && initializer().isBiasParam(this, paramName)) {
        return biasUpdater;
    } else if (initializer().isBiasParam(this, paramName) || initializer().isWeightParam(this, paramName)) {
        return updater;
    }
    throw new IllegalStateException("Unknown parameter key: " + paramName);
}
 
Example #20
Source File: SameDiffVertex.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    if (getVertexParams().isWeightParam(paramName)) {
        return updater;
    }
    if (getVertexParams().isBiasParam(paramName)) {
        if (biasUpdater == null) {
            return updater;
        }
        return biasUpdater;
    }
    throw new IllegalStateException("Unknown parameter name: " + paramName + " - not in weights ("
                    + getVertexParams().getWeightParameterKeys() + ") or biases ("
                    + getVertexParams().getBiasParameterKeys() + ")");
}
 
Example #21
Source File: TrainingConfig.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
protected TrainingConfig(IUpdater updater, List<Regularization> regularization, boolean minimize, List<String> dataSetFeatureMapping, List<String> dataSetLabelMapping,
        List<String> dataSetFeatureMaskMapping, List<String> dataSetLabelMaskMapping, List<String> lossVariables,
        Map<String, List<IEvaluation>> trainEvaluations, Map<String, Integer> trainEvaluationLabels,
        Map<String, List<IEvaluation>> validationEvaluations, Map<String, Integer> validationEvaluationLabels){
    this(updater, regularization, minimize, dataSetFeatureMapping, dataSetLabelMapping, dataSetFeatureMaskMapping, dataSetLabelMaskMapping, lossVariables);
    this.trainEvaluations = trainEvaluations;
    this.trainEvaluationLabels = trainEvaluationLabels;
    this.validationEvaluations = validationEvaluations;
    this.validationEvaluationLabels = validationEvaluationLabels;
}
 
Example #22
Source File: UpdaterUtils.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
public static boolean updaterConfigurationsEquals(Trainable layer1, String param1, Trainable layer2, String param2) {
    TrainingConfig l1 = layer1.getConfig();
    TrainingConfig l2 = layer2.getConfig();
    IUpdater u1 = l1.getUpdaterByParam(param1);
    IUpdater u2 = l2.getUpdaterByParam(param2);

    //For updaters to be equal (and hence combinable), we require that:
    //(a) The updater-specific configurations are equal (inc. LR, LR/momentum schedules etc)
    //(b) If one or more of the params are pretrainable params, they are in the same layer
    //    This last point is necessary as we don't want to modify the pretrain gradient/updater state during
    //    backprop, or modify the pretrain gradient/updater state of one layer while training another
    if (!u1.equals(u2)) {
        //Different updaters or different config
        return false;
    }

    boolean isPretrainParam1 = l1.isPretrainParam(param1);
    boolean isPretrainParam2 = l2.isPretrainParam(param2);
    if (isPretrainParam1 || isPretrainParam2) {
        //One or both of params are pretrainable.
        //Either layers differ -> don't want to combine a pretrain updaters across layers
        //Or one is pretrain and the other isn't -> don't want to combine pretrain updaters within a layer
        return layer1 == layer2 && isPretrainParam1 && isPretrainParam2;
    }

    return true;
}
 
Example #23
Source File: BatchNormalization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    switch (paramName) {
        case BatchNormalizationParamInitializer.BETA:
        case BatchNormalizationParamInitializer.GAMMA:
            return iUpdater;
        case BatchNormalizationParamInitializer.GLOBAL_MEAN:
        case BatchNormalizationParamInitializer.GLOBAL_VAR:
        case BatchNormalizationParamInitializer.GLOBAL_LOG_STD:
            return new NoOp();
        default:
            throw new IllegalArgumentException("Unknown parameter: \"" + paramName + "\"");
    }
}
 
Example #24
Source File: TrainCifar10Model.java    From Java-Machine-Learning-for-Computer-Vision with MIT License 4 votes vote down vote up
private void train() throws IOException {

        ZooModel zooModel = VGG16.builder().build();
        ComputationGraph vgg16 = (ComputationGraph) zooModel.initPretrained(PretrainedType.CIFAR10);
        log.info(vgg16.summary());

        IUpdater iUpdaterWithDefaultConfig = Updater.ADAM.getIUpdaterWithDefaultConfig();
        iUpdaterWithDefaultConfig.setLrAndSchedule(0.1, null);
        FineTuneConfiguration fineTuneConf = new FineTuneConfiguration.Builder()
                .seed(1234)
//                .weightInit(WeightInit.XAVIER)
                .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
                .activation(Activation.RELU)
                .updater(iUpdaterWithDefaultConfig)
                .cudnnAlgoMode(ConvolutionLayer.AlgoMode.NO_WORKSPACE)
                .miniBatch(true)
                .inferenceWorkspaceMode(WorkspaceMode.ENABLED)
                .trainingWorkspaceMode(WorkspaceMode.ENABLED)
                .pretrain(true)
                .backprop(true)
                .build();

        ComputationGraph cifar10 = new TransferLearning.GraphBuilder(vgg16)
                .setWorkspaceMode(WorkspaceMode.ENABLED)
                .fineTuneConfiguration(fineTuneConf)
                .setInputTypes(InputType.convolutionalFlat(ImageUtils.HEIGHT,
                        ImageUtils.WIDTH, 3))
                .removeVertexAndConnections("dense_2_loss")
                .removeVertexAndConnections("dense_2")
                .removeVertexAndConnections("dense_1")
                .removeVertexAndConnections("dropout_1")
                .removeVertexAndConnections("embeddings")
                .removeVertexAndConnections("flatten_1")
                .addLayer("dense_1", new DenseLayer.Builder()
                        .nIn(4096)
                        .nOut(EMBEDDINGS)
                        .activation(Activation.RELU).build(), "block3_pool")
                .addVertex("embeddings", new L2NormalizeVertex(new int[]{}, 1e-12), "dense_1")
                .addLayer("lossLayer", new CenterLossOutputLayer.Builder()
                                .lossFunction(LossFunctions.LossFunction.SQUARED_LOSS)
                                .activation(Activation.SOFTMAX).nIn(EMBEDDINGS).nOut(NUM_POSSIBLE_LABELS)
                                .lambda(LAMBDA).alpha(0.9)
                                .gradientNormalization(GradientNormalization.RenormalizeL2PerLayer).build(),
                        "embeddings")
                .setOutputs("lossLayer")
                .build();

        log.info(cifar10.summary());
        File rootDir = new File("CarTracking/train_from_video_" + NUM_POSSIBLE_LABELS);
        DataSetIterator dataSetIterator = ImageUtils.createDataSetIterator(rootDir, NUM_POSSIBLE_LABELS, BATCH_SIZE);
        DataSetIterator testIterator = ImageUtils.createDataSetIterator(rootDir, NUM_POSSIBLE_LABELS, BATCH_SIZE);
        cifar10.setListeners(new ScoreIterationListener(2));
        int iEpoch = I_EPOCH;
        while (iEpoch < EPOCH_TRAINING) {
            while (dataSetIterator.hasNext()) {
                DataSet trainMiniBatchData = null;
                try {
                    trainMiniBatchData = dataSetIterator.next();
                } catch (Exception e) {
                    e.printStackTrace();
                }
                cifar10.fit(trainMiniBatchData);
            }
            iEpoch++;

            String modelName = PREFIX + NUM_POSSIBLE_LABELS + "_epoch_data_e" + EMBEDDINGS + "_b" + BATCH_SIZE + "_" + iEpoch + ".zip";
            saveProgress(cifar10, iEpoch, modelName);
            testResults(cifar10, testIterator, iEpoch, modelName);
            dataSetIterator.reset();
            log.info("iEpoch = " + iEpoch);
        }
    }
 
Example #25
Source File: DummyConfig.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    return new NoOp();
}
 
Example #26
Source File: NeuralNetConfiguration.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
private void copyConfigToLayer(String layerName, Layer layer) {

            if (layer.getIDropout() == null) {
                //Dropout is stateful usually - don't want to have the same instance shared by multiple layers
                layer.setIDropout(idropOut == null ? null : idropOut.clone());
            }

            if (layer instanceof BaseLayer) {
                BaseLayer bLayer = (BaseLayer) layer;
                if (bLayer.getRegularization() == null || bLayer.getRegularization().isEmpty())
                    bLayer.setRegularization(regularization);
                if (bLayer.getRegularizationBias() == null || bLayer.getRegularizationBias().isEmpty())
                    bLayer.setRegularizationBias(regularizationBias);
                if (bLayer.getActivationFn() == null)
                    bLayer.setActivationFn(activationFn);
                if (bLayer.getWeightInitFn() == null)
                    bLayer.setWeightInitFn(weightInitFn);
                if (Double.isNaN(bLayer.getBiasInit()))
                    bLayer.setBiasInit(biasInit);
                if (Double.isNaN(bLayer.getGainInit()))
                    bLayer.setGainInit(gainInit);

                //Configure weight noise:
                if(weightNoise != null && ((BaseLayer) layer).getWeightNoise() == null){
                    ((BaseLayer) layer).setWeightNoise(weightNoise.clone());
                }

                //Configure updaters:
                if(iUpdater != null && bLayer.getIUpdater() == null){
                    bLayer.setIUpdater(iUpdater.clone());   //Clone the updater to avoid shared instances - in case of setLearningRate calls later
                }
                if(biasUpdater != null && bLayer.getBiasUpdater() == null){
                    bLayer.setBiasUpdater(biasUpdater.clone());     //Clone the updater to avoid shared instances - in case of setLearningRate calls later
                }

                if(bLayer.getIUpdater() == null && iUpdater == null && bLayer.initializer().numParams(bLayer) > 0){
                    //No updater set anywhere
                    IUpdater u = new Sgd();
                    bLayer.setIUpdater(u);
                    log.warn("*** No updater configuration is set for layer {} - defaulting to {} ***", layerName, u);
                }

                if (bLayer.getGradientNormalization() == null)
                    bLayer.setGradientNormalization(gradientNormalization);
                if (Double.isNaN(bLayer.getGradientNormalizationThreshold()))
                    bLayer.setGradientNormalizationThreshold(gradientNormalizationThreshold);
            }

            if (layer instanceof ActivationLayer){
                ActivationLayer al = (ActivationLayer)layer;
                if(al.getActivationFn() == null)
                    al.setActivationFn(activationFn);
            }
        }
 
Example #27
Source File: TestMultiModelGradientApplication.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientApplyComputationGraph() {
    int minibatch = 7;
    int nIn = 10;
    int nOut = 10;

    for (boolean regularization : new boolean[] {false, true}) {
        for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Adam(0.1)}) {

            ComputationGraphConfiguration conf =
                            new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH)
                                            .weightInit(WeightInit.XAVIER).updater(u)
                                            .l1(regularization ? 0.2 : 0.0)
                                            .l2(regularization ? 0.3 : 0.0).graphBuilder().addInputs("in")
                                            .addLayer("0", new DenseLayer.Builder().nIn(nIn).nOut(10).build(), "in")
                                            .addLayer("1", new DenseLayer.Builder().nIn(10).nOut(10).build(), "0")
                                            .addLayer("2", new OutputLayer.Builder(
                                                            LossFunctions.LossFunction.MCXENT)
                                                                            .activation(Activation.SOFTMAX).nIn(10)
                                                                            .nOut(nOut).build(),
                                                            "1")
                                            .setOutputs("2").build();


            Nd4j.getRandom().setSeed(12345);
            ComputationGraph net1GradCalc = new ComputationGraph(conf);
            net1GradCalc.init();

            Nd4j.getRandom().setSeed(12345);
            ComputationGraph net2GradUpd = new ComputationGraph(conf.clone());
            net2GradUpd.init();

            assertEquals(net1GradCalc.params(), net2GradUpd.params());

            INDArray f = Nd4j.rand(minibatch, nIn);
            INDArray l = Nd4j.create(minibatch, nOut);
            for (int i = 0; i < minibatch; i++) {
                l.putScalar(i, i % nOut, 1.0);
            }
            net1GradCalc.setInputs(f);
            net1GradCalc.setLabels(l);

            net2GradUpd.setInputs(f);
            net2GradUpd.setLabels(l);

            //Calculate gradient in first net, update and apply it in the second
            //Also: calculate gradient in the second net, just to be sure it isn't modified while doing updating on
            // the other net's gradient
            net1GradCalc.computeGradientAndScore();
            net2GradUpd.computeGradientAndScore();

            Gradient g = net1GradCalc.gradient();
            INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified
            INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be
            net2GradUpd.getUpdater().update(g, 0, 0, minibatch, LayerWorkspaceMgr.noWorkspaces());
            INDArray gAfter = g.gradient().dup();
            INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup();

            assertNotEquals(gBefore, gAfter); //Net 1 gradient should be modified
            assertEquals(net2GradBefore, net2GradAfter); //But net 2 gradient should not be


            //Also: if we apply the gradient using a subi op, we should get the same final params as if we did a fit op
            // on the original network
            net2GradUpd.params().subi(g.gradient());

            net1GradCalc.fit(new INDArray[] {f}, new INDArray[] {l});
            assertEquals(net1GradCalc.params(), net2GradUpd.params());

            //=============================
            if (!(u instanceof Sgd)) {
                net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray());
            }
            assertEquals(net1GradCalc.params(), net2GradUpd.params());
            assertEquals(net1GradCalc.getUpdater().getStateViewArray(),
                            net2GradUpd.getUpdater().getStateViewArray());

            //Remove the next 2 lines: fails - as net 1 is 1 iteration ahead
            net1GradCalc.getConfiguration().setIterationCount(0);
            net2GradUpd.getConfiguration().setIterationCount(0);


            for (int i = 0; i < 100; i++) {
                net1GradCalc.fit(new INDArray[] {f}, new INDArray[] {l});
                net2GradUpd.fit(new INDArray[] {f}, new INDArray[] {l});
                assertEquals(net1GradCalc.params(), net2GradUpd.params());
            }
        }
    }
}
 
Example #28
Source File: TestMultiModelGradientApplication.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
public void testGradientApplyMultiLayerNetwork() {
    int minibatch = 7;
    int nIn = 10;
    int nOut = 10;

    for (boolean regularization : new boolean[] {false, true}) {
        for (IUpdater u : new IUpdater[] {new Sgd(0.1), new Nesterovs(0.1), new Adam(0.1)}) {

            MultiLayerConfiguration conf =
                            new NeuralNetConfiguration.Builder().seed(12345).activation(Activation.TANH)
                                            .weightInit(WeightInit.XAVIER).updater(u)
                                            .l1(regularization ? 0.2 : 0.0)
                                            .l2(regularization ? 0.3 : 0.0).list()
                                            .layer(0, new DenseLayer.Builder().nIn(nIn).nOut(10).build())
                                            .layer(1, new DenseLayer.Builder().nIn(10).nOut(10).build()).layer(2,
                                                            new OutputLayer.Builder(
                                                                            LossFunctions.LossFunction.MCXENT)
                                                                                            .activation(Activation.SOFTMAX)
                                                                                            .nIn(10).nOut(nOut)
                                                                                            .build())
                                            .build();


            Nd4j.getRandom().setSeed(12345);
            MultiLayerNetwork net1GradCalc = new MultiLayerNetwork(conf);
            net1GradCalc.init();

            Nd4j.getRandom().setSeed(12345);
            MultiLayerNetwork net2GradUpd = new MultiLayerNetwork(conf.clone());
            net2GradUpd.init();

            assertEquals(net1GradCalc.params(), net2GradUpd.params());

            INDArray f = Nd4j.rand(minibatch, nIn);
            INDArray l = Nd4j.create(minibatch, nOut);
            for (int i = 0; i < minibatch; i++) {
                l.putScalar(i, i % nOut, 1.0);
            }
            net1GradCalc.setInput(f);
            net1GradCalc.setLabels(l);

            net2GradUpd.setInput(f);
            net2GradUpd.setLabels(l);

            //Calculate gradient in first net, update and apply it in the second
            //Also: calculate gradient in the second net, just to be sure it isn't modified while doing updating on
            // the other net's gradient
            net1GradCalc.computeGradientAndScore();
            net2GradUpd.computeGradientAndScore();

            Gradient g = net1GradCalc.gradient();
            INDArray gBefore = g.gradient().dup(); //Net 1 gradient should be modified
            INDArray net2GradBefore = net2GradUpd.gradient().gradient().dup(); //But net 2 gradient should not be
            net2GradUpd.getUpdater().update(net2GradUpd, g, 0, 0, minibatch, LayerWorkspaceMgr.noWorkspaces());
            INDArray gAfter = g.gradient().dup();
            INDArray net2GradAfter = net2GradUpd.gradient().gradient().dup();

            assertNotEquals(gBefore, gAfter); //Net 1 gradient should be modified
            assertEquals(net2GradBefore, net2GradAfter); //But net 2 gradient should not be


            //Also: if we apply the gradient using a subi op, we should get the same final params as if we did a fit op
            // on the original network
            net2GradUpd.params().subi(g.gradient());

            net1GradCalc.fit(f, l);
            assertEquals(net1GradCalc.params(), net2GradUpd.params());


            //=============================
            if (!(u instanceof Sgd)) {
                net2GradUpd.getUpdater().getStateViewArray().assign(net1GradCalc.getUpdater().getStateViewArray());
            }
            assertEquals(net1GradCalc.params(), net2GradUpd.params());
            assertEquals(net1GradCalc.getUpdater().getStateViewArray(),
                            net2GradUpd.getUpdater().getStateViewArray());

            //Remove the next 2 lines: fails - as net 1 is 1 iteration ahead
            net1GradCalc.getLayerWiseConfigurations().setIterationCount(0);
            net2GradUpd.getLayerWiseConfigurations().setIterationCount(0);

            for (int i = 0; i < 100; i++) {
                net1GradCalc.fit(f, l);
                net2GradUpd.fit(f, l);
                assertEquals(net1GradCalc.params(), net2GradUpd.params());
            }
        }
    }
}
 
Example #29
Source File: FrozenLayerWithBackprop.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    return null;
}
 
Example #30
Source File: FrozenLayer.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Override
public IUpdater getUpdaterByParam(String paramName) {
    return null;
}