Python theano.tensor.sqr() Examples
The following are 30
code examples of theano.tensor.sqr().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: toolbox.py From Theano-Lights with MIT License | 6 votes |
def adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): updates = [] grads = T.grad(cost, params) i = shared(floatX(0.)) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = shared(p.get_value() * 0.) v = shared(p.get_value() * 0.) #g = g + srnd.normal(g.shape, avg = 0.0, std = 0.01, dtype=theano.config.floatX) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) #m_t += srnd.normal(m_t.shape, std = 0.01, dtype=theano.config.floatX) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates, norm_gs(params, grads)
Example #2
Source File: conv_net.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #3
Source File: model1.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adam gradient descent updates ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #open previous lowest training cost if it exists
Example #4
Source File: convlstm_within_subject.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adaptive moment estimation gradient descent ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load data
Example #5
Source File: deconv.py From adversarial with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _modify_updates(self, updates): """ Replaces the values in `updates` if needed to enforce the options set in the __init__ method, including `max_kernel_norm`. Parameters ---------- updates : OrderedDict A dictionary mapping parameters (including parameters not belonging to this model) to updated values of those parameters. The dictionary passed in contains the updates proposed by the learning algorithm. This function modifies the dictionary directly. The modified version will be compiled and executed by the learning algorithm. """ if self.max_kernel_norm is not None: W, = self.transformer.get_params() if W in updates: updated_W = updates[W] row_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=(0, 1, 2))) desired_norms = T.clip(row_norms, 0, self.max_kernel_norm) scales = desired_norms / (1e-7 + row_norms) updates[W] = (updated_W * scales.dimshuffle('x', 'x', 'x', 0))
Example #6
Source File: model2.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adam gradient descent updates ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load saved lstm if it exists, else initialize new lstm
Example #7
Source File: lstm_between_subject.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adaptive moment estimation gradient descent ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load data
Example #8
Source File: convlstm_between_subject.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adaptive moment estimation gradient descent ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load data
Example #9
Source File: convnet.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #10
Source File: conv2d_crossvalidation.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #11
Source File: conv2d_predict.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #12
Source File: batch_normalization.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #13
Source File: residual_gradient_descent.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #14
Source File: gradient_descent.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #15
Source File: residual_batch_normalization.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #16
Source File: aa.py From D-VAE with MIT License | 6 votes |
def __call__(self, env): self.merge(env) #eliminate identities if 0: print('SKIPPING optimizations') else: for opt in self.ident_opt_list: opt(env) for opt in self.sqr: opt(env) self.gemm_opt_1(env) self.gemm_opt_2(env) self.merge(env)
Example #17
Source File: train_face_128.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): noise = T.matrix('noise') generator = models_uncond.build_generator_128(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #18
Source File: train_bedroom_64.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): noise = T.matrix('noise') generator = models_uncond.build_generator_64(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #19
Source File: train_toy_8G.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #20
Source File: train_toy_25G.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, DIM=64): noise = T.matrix('noise') generator = models_uncond.build_generator_toy(noise,nd=DIM) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #21
Source File: train_cifar10.py From EvolutionaryGAN with MIT License | 6 votes |
def create_G(loss_type=None, discriminator=None, lr=0.0002, b1=0.5, ngf=64): noise = T.matrix('noise') generator = models_uncond.build_generator_32(noise,ngf=ngf) Tgimgs = lasagne.layers.get_output(generator) Tfake_out = lasagne.layers.get_output(discriminator, Tgimgs) if loss_type == 'trickLogD': generator_loss = lasagne.objectives.binary_crossentropy(Tfake_out, 1).mean() elif loss_type == 'minimax': generator_loss = -lasagne.objectives.binary_crossentropy(Tfake_out, 0).mean() elif loss_type == 'ls': generator_loss = T.mean(T.sqr((Tfake_out - 1))) generator_params = lasagne.layers.get_all_params(generator, trainable=True) updates_g = lasagne.updates.adam(generator_loss, generator_params, learning_rate=lr, beta1=b1) train_g = theano.function([noise], generator_loss, updates=updates_g) gen_fn = theano.function([noise], lasagne.layers.get_output(generator, deterministic=True)) return train_g, gen_fn, generator
Example #22
Source File: durmodel_elements.py From kaldi-nnet-dur-model with BSD 3-Clause "New" or "Revised" License | 6 votes |
def cost(self, Y, Y_hat): mean = Y_hat[:, 0] #+ 1.6091597151048114 sigma = T.exp(Y_hat[:, 1]) #+ 0.26165911509618789 y_target = Y[:, 0] cost_multiplier = Y[:, 1] return (self.logprob(y_target, mean, sigma) * cost_multiplier).sum() / (1.0 * cost_multiplier.sum()) #@wraps(Layer.cost) #def cost(self, Y, Y_hat): # # return self.cost_from_cost_matrix(self.cost_matrix(Y, Y_hat)) # #@wraps(Layer.cost_from_cost_matrix) #def cost_from_cost_matrix(self, cost_matrix): # # return cost_matrix.sum(axis=1).mean() # #@wraps(Layer.cost_matrix) #def cost_matrix(self, Y, Y_hat): # # return T.sqr(Y - Y_hat)
Example #23
Source File: theano_utils.py From seq2seq-keyphrase with MIT License | 6 votes |
def dot_2d(k, M, b=None, g=None): # k: (nb_samples, memory_width) # M: (nb_samples, memory_dim, memory_width) # norms of keys and memories # k_norm = T.sqrt(T.sum(T.sqr(k), 1)) + 1e-5 # (nb_samples,) # M_norm = T.sqrt(T.sum(T.sqr(M), 2)) + 1e-5 # (nb_samples, memory_dim,) k = k[:, None, :] # (nb_samples, 1, memory_width) value = k * M if b is not None: b = b[:, None, :] value *= b # (nb_samples, memory_dim,) if g is not None: g = g[None, None, :] value *= g sim = T.sum(value, axis=2) return sim
Example #24
Source File: optimizers.py From theano-recurrence with MIT License | 6 votes |
def adam(cost, params, lr=0.001, b1=0.9, b2=0.999, e=1e-8): updates = [] grads = T.grad(cost, params) i = theano.shared(np.dtype(theano.config.floatX).type(1)) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): g = T.clip(g, -grad_clip, grad_clip) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates
Example #25
Source File: utils.py From hred-latent-piecewise with GNU General Public License v3.0 | 6 votes |
def Adagrad(grads, lr): updates = OrderedDict() for param in grads.keys(): # sum_square_grad := \sum g^2 sum_square_grad = sharedX(param.get_value() * 0.) if param.name is not None: sum_square_grad.name = 'sum_square_grad_' + param.name # Accumulate gradient new_sum_squared_grad = sum_square_grad + T.sqr(grads[param]) # Compute update delta_x_t = (- lr / T.sqrt(numpy.float32(1e-5) + new_sum_squared_grad)) * grads[param] # Apply update updates[sum_square_grad] = new_sum_squared_grad updates[param] = param + delta_x_t return updates
Example #26
Source File: adam.py From hred-latent-piecewise with GNU General Public License v3.0 | 6 votes |
def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): updates = [] varlist = [] i = sharedX(0.) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in grads.items(): m = sharedX(p.get_value() * 0., name=p.name + '_adam_optimizer_m') v = sharedX(p.get_value() * 0., name=p.name + '_adam_optimizer_v') m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) varlist.append(m) varlist.append(v) updates.append((i, i_t)) return updates, varlist
Example #27
Source File: utils.py From hred-qs with BSD 3-Clause "New" or "Revised" License | 6 votes |
def Adagrad(grads, lr): """ Taken from pylearn2, https://github.com/lisa-lab/pylearn2/blob/master/pylearn2/training_algorithms/learning_rule.py """ updates = OrderedDict() for param in grads.keys(): # sum_square_grad := \sum g^2 sum_square_grad = sharedX(param.get_value() * 0.) if param.name is not None: sum_square_grad.name = 'sum_square_grad_' + param.name # Accumulate gradient new_sum_squared_grad = sum_square_grad + T.sqr(grads[param]) # Compute update delta_x_t = (- lr / T.sqrt(numpy.float32(1e-5) + new_sum_squared_grad)) * grads[param] # Apply update updates[sum_square_grad] = new_sum_squared_grad updates[param] = param + delta_x_t return updates
Example #28
Source File: adam.py From hred-qs with BSD 3-Clause "New" or "Revised" License | 6 votes |
def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): updates = [] i = sharedX(0.) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in grads.items(): m = sharedX(p.get_value() * 0.) v = sharedX(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates
Example #29
Source File: adam.py From gated-graph-transformer-network with MIT License | 6 votes |
def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): updates = [] grads = T.grad(cost, params) i = theano.shared(np.array(0., theano.config.floatX)) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates
Example #30
Source File: objectives.py From seq2seq-keyphrase with MIT License | 5 votes |
def squared_hinge(y_true, y_pred): return T.sqr(T.maximum(1. - y_true * y_pred, 0.)).mean(axis=-1)