Python theano.tensor.grad() Examples
The following are 30
code examples of theano.tensor.grad().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: updates.py From iGAN with MIT License | 6 votes |
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) acc = theano.shared(p.get_value() * 0.) acc_delta = theano.shared(p.get_value() * 0.) acc_new = self.rho * acc + (1 - self.rho) * g ** 2 updates.append((acc, acc_new)) update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon) updated_p = p - self.lr * update updated_p = self.regularizer.weight_regularize(updated_p) updates.append((p, updated_p)) acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2 updates.append((acc_delta, acc_delta_new)) return updates
Example #2
Source File: test_opt.py From D-VAE with MIT License | 6 votes |
def test_local_csm_grad_c(): raise SkipTest("Opt disabled as it don't support unsorted indices") if not theano.config.cxx: raise SkipTest("G++ not available, so we need to skip this test.") data = tensor.vector() indices, indptr, shape = (tensor.ivector(), tensor.ivector(), tensor.ivector()) mode = theano.compile.mode.get_default_mode() if theano.config.mode == 'FAST_COMPILE': mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile') mode = mode.including("specialize", "local_csm_grad_c") for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]: cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape))) f = theano.function( [data, indices, indptr, shape], tensor.grad(cost, data), mode=mode) assert not any(isinstance(node.op, sparse.CSMGrad) for node in f.maker.fgraph.toposort()) v = cast(random_lil((10, 40), config.floatX, 3)) f(v.data, v.indices, v.indptr, v.shape)
Example #3
Source File: hgru4rec.py From hgru4rec with MIT License | 6 votes |
def adam(self, param, grad, updates, sample_idx=None, epsilon=1e-6): v1 = np.float32(self.decay) v2 = np.float32(1.0 - self.decay) acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True) meang = theano.shared(param.get_value(borrow=False) * 0., borrow=True) countt = theano.shared(param.get_value(borrow=False) * 0., borrow=True) if sample_idx is None: acc_new = v1 * acc + v2 * grad ** 2 meang_new = v1 * meang + v2 * grad countt_new = countt + 1 updates[acc] = acc_new updates[meang] = meang_new updates[countt] = countt_new else: acc_s = acc[sample_idx] meang_s = meang[sample_idx] countt_s = countt[sample_idx] acc_new = v1 * acc_s + v2 * grad ** 2 meang_new = v1 * meang_s + v2 * grad countt_new = countt_s + 1.0 updates[acc] = T.set_subtensor(acc_s, acc_new) updates[meang] = T.set_subtensor(meang_s, meang_new) updates[countt] = T.set_subtensor(countt_s, countt_new) return (meang_new / (1 - v1 ** countt_new)) / (T.sqrt(acc_new / (1 - v1 ** countt_new)) + epsilon)
Example #4
Source File: blocks.py From spinn with MIT License | 6 votes |
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6, grads=None): # From: # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py if grads is None: grads = T.grad(cost=cost, wrt=params) assert len(grads) == len(params) updates = [] for p, g in zip(params, grads): acc = theano.shared(np.zeros_like(p.get_value(), dtype=np.float32), name="%s/rms/acc" % p.name) acc_new = rho * acc + (1 - rho) * g ** 2 gradient_scaling = T.sqrt(acc_new + epsilon) g = g / gradient_scaling updates.append((acc, acc_new)) updates.append((p, p - lr * g)) return updates
Example #5
Source File: adam.py From gated-graph-transformer-network with MIT License | 6 votes |
def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8): updates = [] grads = T.grad(cost, params) i = theano.shared(np.array(0., theano.config.floatX)) i_t = i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates
Example #6
Source File: test_basic.py From D-VAE with MIT License | 6 votes |
def test_csm_grad(self): for sparsetype in ('csr', 'csc'): x = tensor.vector() y = tensor.ivector() z = tensor.ivector() s = tensor.ivector() call = getattr(sp, sparsetype + '_matrix') spm = call(random_lil((300, 400), config.floatX, 5)) out = tensor.grad(dense_from_sparse( CSM(sparsetype)(x, y, z, s) ).sum(), x) self._compile_and_check([x, y, z, s], [out], [spm.data, spm.indices, spm.indptr, spm.shape], (CSMGrad, CSMGradC) )
Example #7
Source File: pooling.py From Depth-Map-Prediction with GNU General Public License v3.0 | 6 votes |
def test_cmrnorm(): from theano.tests.unittest_tools import verify_grad xtest = np.random.rand(2,8,3,4) xtest = xtest.astype(theano.config.floatX) x = T.tensor4('x', dtype=theano.config.floatX) x.tag.test_value = xtest y = cmrnorm(x, input_shape=xtest.shape[1:]) f = theano.function([x], y, mode='DEBUG_MODE') f(xtest) f = theano.function([x], gpu_from_host(T.grad(T.sum(y), wrt=x)), mode='DEBUG_MODE') f(xtest) theano.printing.debugprint(f) T.verify_grad(lambda x: cmrnorm(x, input_shape=xtest.shape[1:]), (xtest,), rng=np.random.RandomState(0)) print 'cmrnorm passed'
Example #8
Source File: hgru4rec.py From hgru4rec with MIT License | 6 votes |
def adadelta(self, param, grad, updates, sample_idx=None, epsilon=1e-6): v1 = np.float32(self.decay) v2 = np.float32(1.0 - self.decay) acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True) upd = theano.shared(param.get_value(borrow=False) * 0., borrow=True) if sample_idx is None: acc_new = acc + grad ** 2 updates[acc] = acc_new grad = T.sqrt(upd + epsilon) * grad upd_new = v1 * upd + v2 * grad ** 2 updates[upd] = upd_new else: acc_s = acc[sample_idx] acc_new = acc_s + grad ** 2 updates[acc] = T.set_subtensor(acc_s, acc_new) upd_s = upd[sample_idx] upd_new = v1 * upd_s + v2 * grad ** 2 updates[upd] = T.set_subtensor(upd_s, upd_new) grad = T.sqrt(upd_s + epsilon) * grad gradient_scaling = T.cast(T.sqrt(acc_new + epsilon), theano.config.floatX) return grad / gradient_scaling
Example #9
Source File: test_nnet.py From D-VAE with MIT License | 6 votes |
def test_local_softmax_grad_optimization_and_big_input(self): """Test the Logsoftmax's grad substitution. Check that Log(Softmax(x))'s grad is substituted with Logsoftmax(x)'s grad and that the new operation does not explode for big inputs. Note that only the grad is checked. """ m = theano.config.mode m = theano.compile.get_mode(m) m.check_isfinite = False # some inputs that are large to make the gradient explode in the non # optimized case a = numpy.exp(10 * numpy.random.rand(5, 10).astype(theano.config.floatX)) def myfunc(x): sm = tensor.nnet.softmax(x) logsm = tensor.log(sm) return logsm # We set step to 0.1 because for big values we need a big epsilon utt.verify_grad(myfunc, [a], eps=0.1, mode=m) f = theano.function([], myfunc(a)) self.assertTrue(hasattr(f.maker.fgraph.outputs[0].tag, 'trace'))
Example #10
Source File: test_stack.py From spinn with MIT License | 6 votes |
def test_speed(self): top = self.stack.final_stack[-self.batch_size:] cost = self._make_cost(top) error_signal = T.grad(cost, top) # Build automatic backprop function. self.stack.make_backprop_scan(error_signal, [self.y], compute_embedding_gradients=False) f = theano.function( [self.X, self.transitions, self.y], [cost] + self.stack.gradients.values(), updates=self.stack.scan_updates + self.stack.bscan_updates) theano.printing.debugprint(f.maker.fgraph.outputs[1]) for t in range(10): self._run_batch(f)
Example #11
Source File: aa.py From D-VAE with MIT License | 6 votes |
def __init__(self): super(M, self).__init__() x = T.matrix('x') # input, target self.w = module.Member(T.matrix('w')) # weights self.a = module.Member(T.vector('a')) # hid bias self.b = module.Member(T.vector('b')) # output bias self.hid = T.tanh(T.dot(x, self.w) + self.a) hid = self.hid self.out = T.tanh(T.dot(hid, self.w.T) + self.b) out = self.out self.err = 0.5 * T.sum((out - x)**2) err = self.err params = [self.w, self.a, self.b] gparams = T.grad(err, params) updates = [(p, p - 0.01 * gp) for p, gp in zip(params, gparams)] self.step = module.Method([x], err, updates=dict(updates))
Example #12
Source File: residual_gradient_descent.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #13
Source File: rbm_pretraining.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #14
Source File: rbm_pretraining.py From Projects with MIT License | 6 votes |
def get_cost_updates(self, lr=0.1, persistent=None, k=1): pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input) if persistent is None: chain_start = ph_sample else: chain_start = persistent ([pre_sigmoid_nvs,nv_means,nv_samples,pre_sigmoid_nhs,nh_means,nh_samples],updates) = \ theano.scan(self.gibbs_step, outputs_info=[None, None, None, None, None, chain_start],n_steps=k,name="gibbs_step") chain_end = nv_samples[-1] cost = T.mean(self.free_energy(self.input)) - T.mean(self.free_energy(chain_end)) gparams = T.grad(cost, self.params, consider_constant=[chain_end]) for gparam, param in zip(gparams, self.params): updates[param] = param - gparam * T.cast(lr,dtype=theano.config.floatX) if persistent: updates[persistent] = nh_samples[-1] monitoring_cost = self.get_pseudo_likelihood_cost(updates) else: monitoring_cost = self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1]) return monitoring_cost, updates
Example #15
Source File: batch_normalization.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #16
Source File: conv2d_crossvalidation.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #17
Source File: convnet.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #18
Source File: convlstm_between_subject.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adaptive moment estimation gradient descent ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load data
Example #19
Source File: lstm_within_subject.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adaptive moment estimation gradient descent ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load data
Example #20
Source File: lstm_between_subject.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adaptive moment estimation gradient descent ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load data
Example #21
Source File: model2.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adam gradient descent updates ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #load saved lstm if it exists, else initialize new lstm
Example #22
Source File: model1.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): ''' adam gradient descent updates ''' updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates #open previous lowest training cost if it exists
Example #23
Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 6 votes |
def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) t = th.shared(np.cast[th.config.floatX](1.)) for p, g in zip(params, grads): v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v_t = mom1*v + (1. - mom1)*g mg_t = mom2*mg + (1. - mom2)*T.square(g) v_hat = v_t / (1. - mom1 ** t) mg_hat = mg_t / (1. - mom2 ** t) g_t = v_hat / T.sqrt(mg_hat + 1e-8) p_t = p - lr * g_t updates.append((v, v_t)) updates.append((mg, mg_t)) updates.append((p, p_t)) updates.append((t, t+1)) return updates
Example #24
Source File: conv_net.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #25
Source File: theano_nn.py From Projects with MIT License | 6 votes |
def __init__(self,classes,hidden_layers,features,nodes_per_hidden_layer,learning_rate,regularization): self.hidden_layers = [] self.hidden_layers.append(layer(features,nodes_per_hidden_layer)) for i in range(hidden_layers-1): self.hidden_layers.append(layer(nodes_per_hidden_layer,nodes_per_hidden_layer)) self.output_layer = layer(nodes_per_hidden_layer,classes) self.params = [] for l in self.hidden_layers: self.params.extend(l.get_params()) self.params.extend(self.output_layer.get_params()) self.A = T.matrix() self.t = T.matrix() self.s = 1/(1+T.exp(-T.dot(self.A,self.params[0])-self.params[1])) for i in range(hidden_layers): self.s = 1/(1+T.exp(-T.dot(self.s,self.params[2*(i+1)])-self.params[2*(i+1)+1])) self.cost = -self.t*T.log(self.s)-(1-self.t)*T.log(1-self.s) self.cost = self.cost.mean() for i in range(hidden_layers+1): self.cost += regularization*(self.params[2*i]**2).mean() self.gparams = [T.grad(self.cost, param) for param in self.params] self.propogate = theano.function([self.A,self.t],self.cost,updates=[(param,param-learning_rate*gparam) for param,gparam in zip(self.params,self.gparams)],allow_input_downcast=True) self.classify = theano.function([self.A],self.s,allow_input_downcast=True)
Example #26
Source File: convolutional_nn.py From Projects with MIT License | 6 votes |
def __init__(self,convolutional_layers,feature_maps,filter_shapes,poolsize,feedforward_layers,feedforward_nodes,classes,learning_rate,regularization): self.input = T.tensor4() self.convolutional_layers = [] self.convolutional_layers.append(convolutional_layer(self.input,feature_maps[1],feature_maps[0],filter_shapes[0][0],filter_shapes[0][1],poolsize[0])) for i in range(1,convolutional_layers): self.convolutional_layers.append(convolutional_layer(self.convolutional_layers[i-1].output,feature_maps[i+1],feature_maps[i],filter_shapes[i][0],filter_shapes[i][1],poolsize[i])) self.feedforward_layers = [] self.feedforward_layers.append(feedforward_layer(self.convolutional_layers[-1].output.flatten(2),flattened,feedforward_nodes[0])) for i in range(1,feedforward_layers): self.feedforward_layers.append(feedforward_layer(self.feedforward_layers[i-1].output,feedforward_nodes[i-1],feedforward_nodes[i])) self.output_layer = feedforward_layer(self.feedforward_layers[-1].output,feedforward_nodes[-1],classes) self.params = [] for l in self.convolutional_layers + self.feedforward_layers: self.params.extend(l.get_params()) self.params.extend(self.output_layer.get_params()) self.target = T.matrix() self.output = self.output_layer.output self.cost = -self.target*T.log(self.output)-(1-self.target)*T.log(1-self.output) self.cost = self.cost.mean() for i in range(convolutional_layers+feedforward_layers+1): self.cost += regularization*(self.params[2*i]**2).mean() self.gparams = [T.grad(self.cost, param) for param in self.params] self.propogate = theano.function([self.input,self.target],self.cost,updates=[(param,param-learning_rate*gparam) for param,gparam in zip(self.params,self.gparams)],allow_input_downcast=True) self.classify = theano.function([self.input],self.output,allow_input_downcast=True)
Example #27
Source File: updates.py From iGAN with MIT License | 6 votes |
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1 * self.l**(t - 1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t * m + (1 - b1_t) * g v_t = self.b2 * v + (1 - self.b2) * g**2 m_c = m_t / (1 - self.b1**t) v_c = v_t / (1 - self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) return updates
Example #28
Source File: gradient_descent.py From Projects with MIT License | 6 votes |
def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8): updates = [] grads = T.grad(cost, params) self.i = theano.shared(np.float32(0.)) i_t = self.i + 1. fix1 = 1. - (1. - b1)**i_t fix2 = 1. - (1. - b2)**i_t lr_t = lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): self.m = theano.shared(p.get_value() * 0.) self.v = theano.shared(p.get_value() * 0.) m_t = (b1 * g) + ((1. - b1) * self.m) v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v) g_t = m_t / (T.sqrt(v_t) + e) p_t = p - (lr_t * g_t) updates.append((self.m, m_t)) updates.append((self.v, v_t)) updates.append((p, p_t)) updates.append((self.i, i_t)) return updates
Example #29
Source File: optimizers.py From CAPTCHA-breaking with MIT License | 5 votes |
def get_gradients(self, loss, params): grads = T.grad(loss, params) if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = T.sqrt(sum([T.sum(g ** 2) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] return grads
Example #30
Source File: test_nnet.py From D-VAE with MIT License | 5 votes |
def test_softmax_grad_optimizations_vector(self): x = tensor.vector('x') one_of_n = tensor.lvector('one_of_n') op = crossentropy_categorical_1hot xe = op(softmax_op(x), one_of_n) sum_xe = tensor.sum(xe) g_x = tensor.grad(sum_xe, x) fgraph = gof.FunctionGraph( [x, one_of_n], [g_x]) # print 'BEFORE' # for node in fgraph.toposort(): # print node.op, node.inputs # print '----' theano.compile.mode.optdb.query( theano.compile.mode.OPT_FAST_RUN).optimize(fgraph) # print 'AFTER' # for node in fgraph.toposort(): # print node.op, node.inputs has_cx1hot = False has_cx1hotdx = False has_softmax = False has_softmaxdx = False for node in fgraph.toposort(): if node.op == crossentropy_softmax_argmax_1hot_with_bias: has_cx1hot = True if node.op == crossentropy_softmax_1hot_with_bias_dx: has_cx1hotdx = True if node.op == softmax_op: has_softmax = True if node.op == softmax_grad: has_softmaxdx = True assert not has_cx1hot assert has_cx1hotdx assert has_softmax assert not has_softmaxdx