Python lasagne.updates.nesterov_momentum() Examples
The following are 12
code examples of lasagne.updates.nesterov_momentum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
lasagne.updates
, or try the search function
.
Example #1
Source File: lasagne_net.py From BirdCLEF-Baseline with MIT License | 6 votes |
def net_updates(net, loss, lr): # Get all trainable parameters (weights) of our net params = l.get_all_params(net, trainable=True) # We use the adam update, other options are available if cfg.OPTIMIZER == 'adam': param_updates = updates.adam(loss, params, learning_rate=lr, beta1=0.9) elif cfg.OPTIMIZER == 'nesterov': param_updates = updates.nesterov_momentum(loss, params, learning_rate=lr, momentum=0.9) elif cfg.OPTIMIZER == 'sgd': param_updates = updates.sgd(loss, params, learning_rate=lr) return param_updates #################### TRAIN FUNCTION #####################
Example #2
Source File: theano_funcs.py From adversarial-autoencoder with MIT License | 5 votes |
def create_encoder_decoder_func(layers, apply_updates=False): X = T.fmatrix('X') X_batch = T.fmatrix('X_batch') X_hat = get_output(layers['l_decoder_out'], X, deterministic=False) # reconstruction loss encoder_decoder_loss = T.mean( T.mean(T.sqr(X - X_hat), axis=1) ) if apply_updates: # all layers that participate in the forward pass should be updated encoder_decoder_params = get_all_params( layers['l_decoder_out'], trainable=True) encoder_decoder_updates = nesterov_momentum( encoder_decoder_loss, encoder_decoder_params, 0.01, 0.9) else: encoder_decoder_updates = None encoder_decoder_func = theano.function( inputs=[theano.In(X_batch)], outputs=encoder_decoder_loss, updates=encoder_decoder_updates, givens={ X: X_batch, }, ) return encoder_decoder_func # forward/backward (optional) pass for discriminator
Example #3
Source File: resnet.py From luna16 with BSD 2-Clause "Simplified" License | 5 votes |
def define_updates(output_layer, X, Y): output_train = lasagne.layers.get_output(output_layer) output_test = lasagne.layers.get_output(output_layer, deterministic=True) # set up the loss that we aim to minimize when using cat cross entropy our Y should be ints not one-hot loss = lasagne.objectives.categorical_crossentropy(T.clip(output_train,0.000001,0.999999), Y) loss = loss.mean() acc = T.mean(T.eq(T.argmax(output_train, axis=1), Y), dtype=theano.config.floatX) # if using ResNet use L2 regularization all_layers = lasagne.layers.get_all_layers(output_layer) l2_penalty = lasagne.regularization.regularize_layer_params(all_layers, lasagne.regularization.l2) * P.L2_LAMBDA loss = loss + l2_penalty # set up loss functions for validation dataset test_loss = lasagne.objectives.categorical_crossentropy(T.clip(output_test,0.000001,0.999999), Y) test_loss = test_loss.mean() test_loss = test_loss + l2_penalty test_acc = T.mean(T.eq(T.argmax(output_test, axis=1), Y), dtype=theano.config.floatX) # get parameters from network and set up sgd with nesterov momentum to update parameters, l_r is shared var so it can be changed l_r = theano.shared(np.array(LR_SCHEDULE[0], dtype=theano.config.floatX)) params = lasagne.layers.get_all_params(output_layer, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=l_r, momentum=P.MOMENTUM) #updates = adam(loss, params, learning_rate=l_r) prediction_binary = T.argmax(output_train, axis=1) test_prediction_binary = T.argmax(output_test, axis=1) # set up training and prediction functions train_fn = theano.function(inputs=[X,Y], outputs=[loss, l2_penalty, acc, prediction_binary, output_train[:,1]], updates=updates) valid_fn = theano.function(inputs=[X,Y], outputs=[test_loss, l2_penalty, test_acc, test_prediction_binary, output_test[:,1]]) return train_fn, valid_fn, l_r
Example #4
Source File: parameter_updates.py From dcase_task2 with MIT License | 5 votes |
def get_update_nesterov_momentum(m=0.9): """ Compute update with nesterov momentum """ def update(all_grads, all_params, learning_rate): return nesterov_momentum(all_grads, all_params, learning_rate, momentum=m) return update
Example #5
Source File: nn.py From kaggle_diabetic with MIT License | 5 votes |
def create_net(config, **kwargs): args = { 'layers': config.layers, 'batch_iterator_train': iterator.ResampleIterator( config, batch_size=config.get('batch_size_train')), 'batch_iterator_test': iterator.SharedIterator( config, deterministic=True, batch_size=config.get('batch_size_test')), 'on_epoch_finished': [ Schedule('update_learning_rate', config.get('schedule'), weights_file=config.final_weights_file), SaveBestWeights(weights_file=config.weights_file, loss='kappa', greater_is_better=True,), SaveWeights(config.weights_epoch, every_n_epochs=5), SaveWeights(config.weights_best, every_n_epochs=1, only_best=True), ], 'objective': get_objective(), 'use_label_encoder': False, 'eval_size': 0.1, 'regression': True, 'max_epochs': 1000, 'verbose': 2, 'update_learning_rate': theano.shared( util.float32(config.get('schedule')[0])), 'update': nesterov_momentum, 'update_momentum': 0.9, 'custom_score': ('kappa', util.kappa), } args.update(kwargs) net = Net(**args) return net
Example #6
Source File: ClassificationNN.py From AirTicketPredicting with MIT License | 5 votes |
def __init__(self, isTrain, isOutlierRemoval, isNN=1): super(ClassificationNN, self).__init__(isTrain, isOutlierRemoval, isNN=1) # data preprocessing self.dataPreprocessing() self.net1 = NeuralNet( layers=[ # three layers: one hidden layer ('input', layers.InputLayer), ('hidden', layers.DenseLayer), #('hidden2', layers.DenseLayer), ('output', layers.DenseLayer), ], # layer parameters: input_shape=(None, 12), # inut dimension is 12 hidden_num_units=6, # number of units in hidden layer #hidden2_num_units=3, # number of units in hidden layer output_nonlinearity=lasagne.nonlinearities.sigmoid, # output layer uses sigmoid function output_num_units=1, # output dimension is 1 # optimization method: update=nesterov_momentum, update_learning_rate=0.002, update_momentum=0.9, regression=True, # flag to indicate we're dealing with regression problem max_epochs=25, # we want to train this many epochs verbose=0, )
Example #7
Source File: theano_funcs.py From adversarial-autoencoder with MIT License | 4 votes |
def create_discriminator_func(layers, apply_updates=False): X = T.fmatrix('X') pz = T.fmatrix('pz') X_batch = T.fmatrix('X_batch') pz_batch = T.fmatrix('pz_batch') # the discriminator receives samples from q(z|x) and p(z) # and should predict to which distribution each sample belongs discriminator_outputs = get_output( layers['l_discriminator_out'], inputs={ layers['l_prior_in']: pz, layers['l_encoder_in']: X, }, deterministic=False, ) # label samples from q(z|x) as 1 and samples from p(z) as 0 discriminator_targets = T.vertical_stack( T.ones((X_batch.shape[0], 1)), T.zeros((pz_batch.shape[0], 1)) ) discriminator_loss = T.mean( T.nnet.binary_crossentropy( discriminator_outputs, discriminator_targets, ) ) if apply_updates: # only layers that are part of the discriminator should be updated discriminator_params = get_all_params( layers['l_discriminator_out'], trainable=True, discriminator=True) discriminator_updates = nesterov_momentum( discriminator_loss, discriminator_params, 0.1, 0.0) else: discriminator_updates = None discriminator_func = theano.function( inputs=[ theano.In(X_batch), theano.In(pz_batch), ], outputs=discriminator_loss, updates=discriminator_updates, givens={ X: X_batch, pz: pz_batch, }, ) return discriminator_func # forward/backward (optional) pass for the generator # note that the generator is the same network as the encoder, # but updated separately
Example #8
Source File: theano_funcs.py From adversarial-autoencoder with MIT License | 4 votes |
def create_generator_func(layers, apply_updates=False): X = T.fmatrix('X') X_batch = T.fmatrix('X_batch') # no need to pass an input to l_prior_in here generator_outputs = get_output( layers['l_encoder_out'], X, deterministic=False) # so pass the output of the generator as the output of the concat layer discriminator_outputs = get_output( layers['l_discriminator_out'], inputs={ layers['l_prior_encoder_concat']: generator_outputs, }, deterministic=False ) # the discriminator learns to predict 1 for q(z|x), # so the generator should fool it into predicting 0 generator_targets = T.zeros_like(X_batch.shape[0]) # so the generator needs to push the discriminator's output to 0 generator_loss = T.mean( T.nnet.binary_crossentropy( discriminator_outputs, generator_targets, ) ) if apply_updates: # only layers that are part of the generator (i.e., encoder) # should be updated generator_params = get_all_params( layers['l_discriminator_out'], trainable=True, generator=True) generator_updates = nesterov_momentum( generator_loss, generator_params, 0.1, 0.0) else: generator_updates = None generator_func = theano.function( inputs=[ theano.In(X_batch), ], outputs=generator_loss, updates=generator_updates, givens={ X: X_batch, }, ) return generator_func
Example #9
Source File: updates.py From Deep-SVDD with MIT License | 4 votes |
def get_updates(nnet, train_obj, trainable_params, solver=None): implemented_solvers = ("sgd", "momentum", "nesterov", "adagrad", "rmsprop", "adadelta", "adam", "adamax") if solver not in implemented_solvers: nnet.sgd_solver = "adam" else: nnet.sgd_solver = solver if nnet.sgd_solver == "sgd": updates = l_updates.sgd(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "momentum": updates = l_updates.momentum(train_obj, trainable_params, learning_rate=Cfg.learning_rate, momentum=Cfg.momentum) elif nnet.sgd_solver == "nesterov": updates = l_updates.nesterov_momentum(train_obj, trainable_params, learning_rate=Cfg.learning_rate, momentum=Cfg.momentum) elif nnet.sgd_solver == "adagrad": updates = l_updates.adagrad(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "rmsprop": updates = l_updates.rmsprop(train_obj, trainable_params, learning_rate=Cfg.learning_rate, rho=Cfg.rho) elif nnet.sgd_solver == "adadelta": updates = l_updates.adadelta(train_obj, trainable_params, learning_rate=Cfg.learning_rate, rho=Cfg.rho) elif nnet.sgd_solver == "adam": updates = l_updates.adam(train_obj, trainable_params, learning_rate=Cfg.learning_rate) elif nnet.sgd_solver == "adamax": updates = l_updates.adamax(train_obj, trainable_params, learning_rate=Cfg.learning_rate) return updates
Example #10
Source File: ClassificationUniformBlending.py From AirTicketPredicting with MIT License | 4 votes |
def __init__(self, isTrain, isOutlierRemoval=0): super(ClassificationUniformBlending, self).__init__(isTrain, isOutlierRemoval) # data preprocessing self.dataPreprocessing() # create logistic regression object self.logreg = linear_model.LogisticRegression(tol=1e-6, penalty='l1', C=0.0010985411419875584) # create adaboost object self.dt_stump = DecisionTreeClassifier(max_depth=10) self.ada = AdaBoostClassifier( base_estimator=self.dt_stump, learning_rate=1, n_estimators=5, algorithm="SAMME.R") # create knn object self.knn = neighbors.KNeighborsClassifier(2, weights='uniform') # create decision tree object self.decisiontree = DecisionTreeClassifier(max_depth=45, max_features='log2') # create neural network object self.net1 = NeuralNet( layers=[ # three layers: one hidden layer ('input', layers.InputLayer), ('hidden', layers.DenseLayer), #('hidden2', layers.DenseLayer), ('output', layers.DenseLayer), ], # layer parameters: input_shape=(None, 12), # inut dimension is 12 hidden_num_units=6, # number of units in hidden layer #hidden2_num_units=3, # number of units in hidden layer output_nonlinearity=lasagne.nonlinearities.sigmoid, # output layer uses sigmoid function output_num_units=1, # output dimension is 1 # optimization method: update=nesterov_momentum, update_learning_rate=0.002, update_momentum=0.9, regression=True, # flag to indicate we're dealing with regression problem max_epochs=25, # we want to train this many epochs verbose=0, ) # create PLA object self.pla = Perceptron() # create random forest object self.rf = RandomForestClassifier(max_features='log2', n_estimators=20, max_depth=30)
Example #11
Source File: ClassificationLinearBlending.py From AirTicketPredicting with MIT License | 4 votes |
def __init__(self, isTrain, isOutlierRemoval=0): super(ClassificationLinearBlending, self).__init__(isTrain, isOutlierRemoval) # data preprocessing self.dataPreprocessing() # create logistic regression object self.logreg = linear_model.LogisticRegression(tol=1e-6, penalty='l1', C=0.0010985411419875584) # create adaboost object self.dt_stump = DecisionTreeClassifier(max_depth=10) self.ada = AdaBoostClassifier( base_estimator=self.dt_stump, learning_rate=1, n_estimators=5, algorithm="SAMME.R") # create knn object self.knn = neighbors.KNeighborsClassifier(6, weights='uniform') # create decision tree object self.decisiontree = DecisionTreeClassifier(max_depth=50) # create neural network object self.net1 = NeuralNet( layers=[ # three layers: one hidden layer ('input', layers.InputLayer), ('hidden', layers.DenseLayer), #('hidden2', layers.DenseLayer), ('output', layers.DenseLayer), ], # layer parameters: input_shape=(None, 12), # inut dimension is 12 hidden_num_units=6, # number of units in hidden layer #hidden2_num_units=3, # number of units in hidden layer output_nonlinearity=lasagne.nonlinearities.sigmoid, # output layer uses sigmoid function output_num_units=1, # output dimension is 1 # optimization method: update=nesterov_momentum, update_learning_rate=0.002, update_momentum=0.9, regression=True, # flag to indicate we're dealing with regression problem max_epochs=25, # we want to train this many epochs verbose=0, )
Example #12
Source File: bidnn.py From BiDNN with GNU Affero General Public License v3.0 | 4 votes |
def __init__(self, conf): self.conf = conf if self.conf.act == "linear": self.conf.act = linear elif self.conf.act == "sigmoid": self.conf.act = sigmoid elif self.conf.act == "relu": self.conf.act = rectify elif self.conf.act == "tanh": self.conf.act = tanh else: raise ValueError("Unknown activation function", self.conf.act) input_var_first = T.matrix('inputs1') input_var_second = T.matrix('inputs2') target_var = T.matrix('targets') # create network self.autoencoder, encoder_first, encoder_second = self.__create_toplogy__(input_var_first, input_var_second) self.out = get_output(self.autoencoder) loss = squared_error(self.out, target_var) loss = loss.mean() params = get_all_params(self.autoencoder, trainable=True) updates = nesterov_momentum(loss, params, learning_rate=self.conf.lr, momentum=self.conf.momentum) # training function self.train_fn = theano.function([input_var_first, input_var_second, target_var], loss, updates=updates) # fuction to reconstruct test_reconstruction = get_output(self.autoencoder, deterministic=True) self.reconstruction_fn = theano.function([input_var_first, input_var_second], test_reconstruction) # encoding function test_encode = get_output([encoder_first, encoder_second], deterministic=True) self.encoding_fn = theano.function([input_var_first, input_var_second], test_encode) # utils blas = lambda name, ndarray: scipy.linalg.get_blas_funcs((name,), (ndarray,))[0] self.blas_nrm2 = blas('nrm2', np.array([], dtype=float)) self.blas_scal = blas('scal', np.array([], dtype=float)) # load weights if necessary if self.conf.load_model is not None: self.load_model()