Python lasagne.updates.nesterov_momentum() Examples

The following are 12 code examples of lasagne.updates.nesterov_momentum(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module lasagne.updates , or try the search function .
Example #1
Source File: lasagne_net.py    From BirdCLEF-Baseline with MIT License 6 votes vote down vote up
def net_updates(net, loss, lr):    
                        
    # Get all trainable parameters (weights) of our net
    params = l.get_all_params(net, trainable=True)

    # We use the adam update, other options are available
    if cfg.OPTIMIZER == 'adam':
        param_updates = updates.adam(loss, params, learning_rate=lr, beta1=0.9)
    elif cfg.OPTIMIZER == 'nesterov':
        param_updates = updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9)
    elif cfg.OPTIMIZER == 'sgd':
        param_updates = updates.sgd(loss, params, learning_rate=lr)    

    return param_updates

#################### TRAIN FUNCTION ##################### 
Example #2
Source File: theano_funcs.py    From adversarial-autoencoder with MIT License 5 votes vote down vote up
def create_encoder_decoder_func(layers, apply_updates=False):
    X = T.fmatrix('X')
    X_batch = T.fmatrix('X_batch')

    X_hat = get_output(layers['l_decoder_out'], X, deterministic=False)

    # reconstruction loss
    encoder_decoder_loss = T.mean(
        T.mean(T.sqr(X - X_hat), axis=1)
    )

    if apply_updates:
        # all layers that participate in the forward pass should be updated
        encoder_decoder_params = get_all_params(
            layers['l_decoder_out'], trainable=True)

        encoder_decoder_updates = nesterov_momentum(
            encoder_decoder_loss, encoder_decoder_params, 0.01, 0.9)
    else:
        encoder_decoder_updates = None

    encoder_decoder_func = theano.function(
        inputs=[theano.In(X_batch)],
        outputs=encoder_decoder_loss,
        updates=encoder_decoder_updates,
        givens={
            X: X_batch,
        },
    )

    return encoder_decoder_func


# forward/backward (optional) pass for discriminator 
Example #3
Source File: resnet.py    From luna16 with BSD 2-Clause "Simplified" License 5 votes vote down vote up
def define_updates(output_layer, X, Y):
    output_train = lasagne.layers.get_output(output_layer)
    output_test = lasagne.layers.get_output(output_layer, deterministic=True)

    # set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot
    loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y)
    loss = loss.mean()

    acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX)

    # if using ResNet use L2 regularization
    all_layers = lasagne.layers.get_all_layers(output_layer)
    l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA
    loss = loss + l2_penalty

    # set up loss functions for validation dataset
    test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y)
    test_loss = test_loss.mean()
    test_loss = test_loss + l2_penalty

    test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX)

    # get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed
    l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX))
    params = lasagne.layers.get_all_params(output_layer, trainable=True)
    updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM)
    #updates = adam(loss, params, learning_rate=l_r)

    prediction_binary = T.argmax(output_train, axis=1)
    test_prediction_binary = T.argmax(output_test, axis=1)

    # set up training and prediction functions
    train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates)
    valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]])

    return train_fn, valid_fn, l_r 
Example #4
Source File: parameter_updates.py    From dcase_task2 with MIT License 5 votes vote down vote up
def get_update_nesterov_momentum(m=0.9):
    """
    Compute update with nesterov momentum
    """

    def update(all_grads, all_params, learning_rate):
        return nesterov_momentum(all_grads, all_params, learning_rate,
                                 momentum=m)

    return update 
Example #5
Source File: nn.py    From kaggle_diabetic with MIT License 5 votes vote down vote up
def create_net(config, **kwargs):
    args = {
        'layers': config.layers,
        'batch_iterator_train': iterator.ResampleIterator(
            config, batch_size=config.get('batch_size_train')),
        'batch_iterator_test': iterator.SharedIterator(
            config, deterministic=True, 
            batch_size=config.get('batch_size_test')),
        'on_epoch_finished': [
            Schedule('update_learning_rate', config.get('schedule'),
                     weights_file=config.final_weights_file),
            SaveBestWeights(weights_file=config.weights_file, 
                            loss='kappa', greater_is_better=True,),
            SaveWeights(config.weights_epoch, every_n_epochs=5),
            SaveWeights(config.weights_best, every_n_epochs=1, only_best=True),
        ],
        'objective': get_objective(),
        'use_label_encoder': False,
        'eval_size': 0.1,
        'regression': True,
        'max_epochs': 1000,
        'verbose': 2,
        'update_learning_rate': theano.shared(
            util.float32(config.get('schedule')[0])),
        'update': nesterov_momentum,
        'update_momentum': 0.9,
        'custom_score': ('kappa', util.kappa),

    }
    args.update(kwargs)
    net = Net(**args)
    return net 
Example #6
Source File: ClassificationNN.py    From AirTicketPredicting with MIT License 5 votes vote down vote up
def __init__(self, isTrain, isOutlierRemoval, isNN=1):
        super(ClassificationNN, self).__init__(isTrain, isOutlierRemoval, isNN=1)
        # data preprocessing
        self.dataPreprocessing()

        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 12),  # inut dimension is 12
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=3,  # number of units in hidden layer
                        output_nonlinearity=lasagne.nonlinearities.sigmoid,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # optimization method:
                        update=nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.9,

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=25,  # we want to train this many epochs
                        verbose=0,
                        ) 
Example #7
Source File: theano_funcs.py    From adversarial-autoencoder with MIT License 4 votes vote down vote up
def create_discriminator_func(layers, apply_updates=False):
    X = T.fmatrix('X')
    pz = T.fmatrix('pz')

    X_batch = T.fmatrix('X_batch')
    pz_batch = T.fmatrix('pz_batch')

    # the discriminator receives samples from q(z|x) and p(z)
    # and should predict to which distribution each sample belongs
    discriminator_outputs = get_output(
        layers['l_discriminator_out'],
        inputs={
            layers['l_prior_in']: pz,
            layers['l_encoder_in']: X,
        },
        deterministic=False,
    )

    # label samples from q(z|x) as 1 and samples from p(z) as 0
    discriminator_targets = T.vertical_stack(
        T.ones((X_batch.shape[0], 1)),
        T.zeros((pz_batch.shape[0], 1))
    )

    discriminator_loss = T.mean(
        T.nnet.binary_crossentropy(
            discriminator_outputs,
            discriminator_targets,
        )
    )

    if apply_updates:
        # only layers that are part of the discriminator should be updated
        discriminator_params = get_all_params(
            layers['l_discriminator_out'], trainable=True, discriminator=True)

        discriminator_updates = nesterov_momentum(
            discriminator_loss, discriminator_params, 0.1, 0.0)
    else:
        discriminator_updates = None

    discriminator_func = theano.function(
        inputs=[
            theano.In(X_batch),
            theano.In(pz_batch),
        ],
        outputs=discriminator_loss,
        updates=discriminator_updates,
        givens={
            X: X_batch,
            pz: pz_batch,
        },
    )

    return discriminator_func


# forward/backward (optional) pass for the generator
# note that the generator is the same network as the encoder,
# but updated separately 
Example #8
Source File: theano_funcs.py    From adversarial-autoencoder with MIT License 4 votes vote down vote up
def create_generator_func(layers, apply_updates=False):
    X = T.fmatrix('X')
    X_batch = T.fmatrix('X_batch')

    # no need to pass an input to l_prior_in here
    generator_outputs = get_output(
        layers['l_encoder_out'], X, deterministic=False)

    # so pass the output of the generator as the output of the concat layer
    discriminator_outputs = get_output(
        layers['l_discriminator_out'],
        inputs={
            layers['l_prior_encoder_concat']: generator_outputs,
        },
        deterministic=False
    )

    # the discriminator learns to predict 1 for q(z|x),
    # so the generator should fool it into predicting 0
    generator_targets = T.zeros_like(X_batch.shape[0])

    # so the generator needs to push the discriminator's output to 0
    generator_loss = T.mean(
        T.nnet.binary_crossentropy(
            discriminator_outputs,
            generator_targets,
        )
    )

    if apply_updates:
        # only layers that are part of the generator (i.e., encoder)
        # should be updated
        generator_params = get_all_params(
            layers['l_discriminator_out'], trainable=True, generator=True)

        generator_updates = nesterov_momentum(
            generator_loss, generator_params, 0.1, 0.0)
    else:
        generator_updates = None

    generator_func = theano.function(
        inputs=[
            theano.In(X_batch),
        ],
        outputs=generator_loss,
        updates=generator_updates,
        givens={
            X: X_batch,
        },
    )

    return generator_func 
Example #9
Source File: updates.py    From Deep-SVDD with MIT License 4 votes vote down vote up
def get_updates(nnet,
                train_obj,
                trainable_params,
                solver=None):

    implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop", "adadelta", "adam", "adamax")

    if solver not in implemented_solvers:
        nnet.sgd_solver = "adam"
    else:
        nnet.sgd_solver = solver

    if nnet.sgd_solver == "sgd":
        updates = l_updates.sgd(train_obj,
                                trainable_params,
                                learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "momentum":
        updates = l_updates.momentum(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     momentum=Cfg.momentum)
    elif nnet.sgd_solver == "nesterov":
        updates = l_updates.nesterov_momentum(train_obj,
                                              trainable_params,
                                              learning_rate=Cfg.learning_rate,
                                              momentum=Cfg.momentum)
    elif nnet.sgd_solver == "adagrad":
        updates = l_updates.adagrad(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "rmsprop":
        updates = l_updates.rmsprop(train_obj,
                                    trainable_params,
                                    learning_rate=Cfg.learning_rate,
                                    rho=Cfg.rho)
    elif nnet.sgd_solver == "adadelta":
        updates = l_updates.adadelta(train_obj,
                                     trainable_params,
                                     learning_rate=Cfg.learning_rate,
                                     rho=Cfg.rho)
    elif nnet.sgd_solver == "adam":
        updates = l_updates.adam(train_obj,
                                 trainable_params,
                                 learning_rate=Cfg.learning_rate)
    elif nnet.sgd_solver == "adamax":
        updates = l_updates.adamax(train_obj,
                                   trainable_params,
                                   learning_rate=Cfg.learning_rate)

    return updates 
Example #10
Source File: ClassificationUniformBlending.py    From AirTicketPredicting with MIT License 4 votes vote down vote up
def __init__(self, isTrain, isOutlierRemoval=0):
        super(ClassificationUniformBlending, self).__init__(isTrain, isOutlierRemoval)
        # data preprocessing
        self.dataPreprocessing()

        # create logistic regression object
        self.logreg = linear_model.LogisticRegression(tol=1e-6, penalty='l1', C=0.0010985411419875584)

        # create adaboost object
        self.dt_stump = DecisionTreeClassifier(max_depth=10)
        self.ada = AdaBoostClassifier(
            base_estimator=self.dt_stump,
            learning_rate=1,
            n_estimators=5,
            algorithm="SAMME.R")

        # create knn object
        self.knn = neighbors.KNeighborsClassifier(2, weights='uniform')

        # create decision tree object
        self.decisiontree = DecisionTreeClassifier(max_depth=45, max_features='log2')

        # create neural network object
        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 12),  # inut dimension is 12
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=3,  # number of units in hidden layer
                        output_nonlinearity=lasagne.nonlinearities.sigmoid,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # optimization method:
                        update=nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.9,

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=25,  # we want to train this many epochs
                        verbose=0,
                        )

        # create PLA object
        self.pla = Perceptron()

        # create random forest object
        self.rf = RandomForestClassifier(max_features='log2', n_estimators=20, max_depth=30) 
Example #11
Source File: ClassificationLinearBlending.py    From AirTicketPredicting with MIT License 4 votes vote down vote up
def __init__(self, isTrain, isOutlierRemoval=0):
        super(ClassificationLinearBlending, self).__init__(isTrain, isOutlierRemoval)
        # data preprocessing
        self.dataPreprocessing()

        # create logistic regression object
        self.logreg = linear_model.LogisticRegression(tol=1e-6, penalty='l1', C=0.0010985411419875584)

        # create adaboost object
        self.dt_stump = DecisionTreeClassifier(max_depth=10)
        self.ada = AdaBoostClassifier(
            base_estimator=self.dt_stump,
            learning_rate=1,
            n_estimators=5,
            algorithm="SAMME.R")

        # create knn object
        self.knn = neighbors.KNeighborsClassifier(6, weights='uniform')

        # create decision tree object
        self.decisiontree = DecisionTreeClassifier(max_depth=50)

        # create neural network object
        self.net1 = NeuralNet(
                        layers=[  # three layers: one hidden layer
                            ('input', layers.InputLayer),
                            ('hidden', layers.DenseLayer),
                            #('hidden2', layers.DenseLayer),
                            ('output', layers.DenseLayer),
                            ],
                        # layer parameters:
                        input_shape=(None, 12),  # inut dimension is 12
                        hidden_num_units=6,  # number of units in hidden layer
                        #hidden2_num_units=3,  # number of units in hidden layer
                        output_nonlinearity=lasagne.nonlinearities.sigmoid,  # output layer uses sigmoid function
                        output_num_units=1,  # output dimension is 1

                        # optimization method:
                        update=nesterov_momentum,
                        update_learning_rate=0.002,
                        update_momentum=0.9,

                        regression=True,  # flag to indicate we're dealing with regression problem
                        max_epochs=25,  # we want to train this many epochs
                        verbose=0,
                        ) 
Example #12
Source File: bidnn.py    From BiDNN with GNU Affero General Public License v3.0 4 votes vote down vote up
def __init__(self, conf):
        self.conf = conf

        if self.conf.act == "linear":
            self.conf.act = linear
        elif self.conf.act == "sigmoid":
            self.conf.act = sigmoid
        elif self.conf.act == "relu":
            self.conf.act = rectify
        elif self.conf.act == "tanh":
            self.conf.act = tanh
        else:
            raise ValueError("Unknown activation function", self.conf.act)

        input_var_first   = T.matrix('inputs1')
        input_var_second  = T.matrix('inputs2')
        target_var        = T.matrix('targets')

        # create network        
        self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second)
        
        self.out = get_output(self.autoencoder)
        
        loss = squared_error(self.out, target_var)
        loss = loss.mean()
        
        params = get_all_params(self.autoencoder, trainable=True)
        updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum)
        
        # training function
        self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates)
        
        # fuction to reconstruct
        test_reconstruction = get_output(self.autoencoder, deterministic=True)
        self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction)
        
        # encoding function
        test_encode = get_output([encoder_first, encoder_second], deterministic=True)
        self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode)

        # utils
        blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0]
        self.blas_nrm2 = blas('nrm2', np.array([], dtype=float))
        self.blas_scal = blas('scal', np.array([], dtype=float))

        # load weights if necessary
        if self.conf.load_model is not None:
            self.load_model()