Python theano.function() Examples
The following are 30
code examples of theano.function().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano
, or try the search function
.
Example #1
Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0 | 6 votes |
def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3): '''Apply batch normalization on x given mean, var, beta and gamma. ''' # TODO remove this if statement when Theano without # T.nnet.bn.batch_normalization_test is deprecated if not hasattr(T.nnet.bn, 'batch_normalization_test'): return _old_batch_normalization(x, mean, var, beta, gamma, epsilon) if mean.ndim == 1: # based on TensorFlow's default: normalize along rightmost dimension reduction_axes = range(x.ndim - 1) else: reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]] return T.nnet.bn.batch_normalization_test( x, gamma, beta, mean, var, reduction_axes, epsilon) # TODO remove this function when Theano without # T.nnet.bn.batch_normalization_train is deprecated
Example #2
Source File: nmt.py From nmt with BSD 3-Clause "New" or "Revised" License | 6 votes |
def rmsprop(lr, tparams, grads, inp, cost): zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()] running_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad'%k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function(inp, cost, updates=zgup+rgup+rg2up, profile=profile) updir = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_updir'%k) for k, p in tparams.iteritems()] updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)] param_up = [(p, p + udn[1]) for p, udn in zip(itemlist(tparams), updir_new)] f_update = theano.function([lr], [], updates=updir_new+param_up, on_unused_input='ignore', profile=profile) return f_grad_shared, f_update
Example #3
Source File: nmt.py From nmt with BSD 3-Clause "New" or "Revised" License | 6 votes |
def adadelta(lr, tparams, grads, inp, cost): running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()] rg2_new = [0.95 * rg2 + 0.05 * (g ** 2) for rg2, g in zip(running_grads2, grads)] rg2up = [(rg2, r_n) for rg2, r_n in zip(running_grads2, rg2_new)] updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(grads, running_up2, rg2_new)] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)] inp += [lr] f_update = theano.function(inp, cost, updates=rg2up+ru2up+param_up, on_unused_input='ignore', profile=profile) return f_update
Example #4
Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0 | 6 votes |
def __call__(self, inputs): assert isinstance(inputs, (list, tuple)) return self.function(*inputs)
Example #5
Source File: nmt.py From nmt with BSD 3-Clause "New" or "Revised" License | 6 votes |
def debugging_adadelta(lr, tparams, grads, inp, cost): zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()] running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()] running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()] zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)] rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)] f_grad_shared = theano.function(inp, cost, updates=zgup+rg2up, profile=profile) updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)] ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)] param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)] f_update = theano.function([lr], [], updates=ru2up+param_up, on_unused_input='ignore', profile=profile) return f_grad_shared, f_update
Example #6
Source File: pooling.py From Depth-Map-Prediction with GNU General Public License v3.0 | 6 votes |
def test_cmrnorm(): from theano.tests.unittest_tools import verify_grad xtest = np.random.rand(2,8,3,4) xtest = xtest.astype(theano.config.floatX) x = T.tensor4('x', dtype=theano.config.floatX) x.tag.test_value = xtest y = cmrnorm(x, input_shape=xtest.shape[1:]) f = theano.function([x], y, mode='DEBUG_MODE') f(xtest) f = theano.function([x], gpu_from_host(T.grad(T.sum(y), wrt=x)), mode='DEBUG_MODE') f(xtest) theano.printing.debugprint(f) T.verify_grad(lambda x: cmrnorm(x, input_shape=xtest.shape[1:]), (xtest,), rng=np.random.RandomState(0)) print 'cmrnorm passed'
Example #7
Source File: test_activations.py From CAPTCHA-breaking with MIT License | 6 votes |
def test_tanh(): from keras.activations import tanh as t test_values = get_standard_values() x = T.vector() exp = t(x) f = theano.function([x], exp) result = f(test_values) expected = [math.tanh(v) for v in test_values] print(result) print(expected) list_assert_equal(result, expected)
Example #8
Source File: test_activations.py From CAPTCHA-breaking with MIT License | 6 votes |
def test_softmax(): from keras.activations import softmax as s # Test using a reference implementation of softmax def softmax(values): m = max(values) values = numpy.array(values) e = numpy.exp(values - m) dist = list(e / numpy.sum(e)) return dist x = T.vector() exp = s(x) f = theano.function([x], exp) test_values=get_standard_values() result = f(test_values) expected = softmax(test_values) print(str(result)) print(str(expected)) list_assert_equal(result, expected)
Example #9
Source File: classifier.py From spinn with MIT License | 6 votes |
def build_cost(logits, targets): """ Build a classification cost function. """ # Clip gradients coming from the cost function. logits = theano.gradient.grad_clip( logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value) predicted_dist = T.nnet.softmax(logits) costs = T.nnet.categorical_crossentropy(predicted_dist, targets) cost = costs.mean() pred = T.argmax(logits, axis=1) acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX)) return cost, acc
Example #10
Source File: fat_classifier.py From spinn with MIT License | 6 votes |
def build_cost(logits, targets): """ Build a classification cost function. """ # Clip gradients coming from the cost function. logits = theano.gradient.grad_clip( logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value) predicted_dist = T.nnet.softmax(logits) costs = T.nnet.categorical_crossentropy(predicted_dist, targets) cost = costs.mean() pred = T.argmax(logits, axis=1) acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX)) return cost, acc
Example #11
Source File: test_stack.py From spinn with MIT License | 6 votes |
def test_speed(self): top = self.stack.final_stack[-self.batch_size:] cost = self._make_cost(top) error_signal = T.grad(cost, top) # Build automatic backprop function. self.stack.make_backprop_scan(error_signal, [self.y], compute_embedding_gradients=False) f = theano.function( [self.X, self.transitions, self.y], [cost] + self.stack.gradients.values(), updates=self.stack.scan_updates + self.stack.bscan_updates) theano.printing.debugprint(f.maker.fgraph.outputs[1]) for t in range(10): self._run_batch(f)
Example #12
Source File: dA.py From deeplearn_hsi with BSD 2-Clause "Simplified" License | 6 votes |
def get_corrupted_input(self, input, corruption_level): """This function keeps ``1-corruption_level`` entries of the inputs the same and zero-out randomly selected subset of size ``coruption_level`` Note : first argument of theano.rng.binomial is the shape(size) of random numbers that it should produce second argument is the number of trials third argument is the probability of success of any trial this will produce an array of 0s and 1s where 1 has a probability of 1 - ``corruption_level`` and 0 with ``corruption_level`` The binomial function return int64 data type by default. int64 multiplicated by the input type(floatX) always return float64. To keep all data in floatX when floatX is float32, we set the dtype of the binomial to floatX. As in our case the value of the binomial is always 0 or 1, this don't change the result. This is needed to allow the gpu to work correctly as it only support float32 for now. """ return self.theano_rng.binomial(size=input.shape, n=1, p=1 - corruption_level, dtype=theano.config.floatX) * input
Example #13
Source File: solver.py From 3D-R2N2 with MIT License | 6 votes |
def save(self, training_losses, save_dir, step): ''' Save the current network parameters to the save_dir and make a symlink to the latest param so that the training function can easily load the latest model''' save_path = os.path.join(save_dir, 'weights.%d' % (step)) self.net.save(save_path) # Make a symlink for weights.npy symlink_path = os.path.join(save_dir, 'weights.npy') if os.path.lexists(symlink_path): os.remove(symlink_path) # Make a symlink to the latest network params os.symlink("%s.npy" % os.path.abspath(save_path), symlink_path) # Write the losses with open(os.path.join(save_dir, 'loss.%d.txt' % step), 'w') as f: f.write('\n'.join([str(l) for l in training_losses]))
Example #14
Source File: SdA_train.py From adage with BSD 3-Clause "New" or "Revised" License | 6 votes |
def return_network(self): '''This function returns weight matrix and bias vectors of each hidden layer in the final network after training.''' weights_all_layer = [] bias_all_layer = [] bias_prime_all_layer = [] for dA_layer in self.dA_layers: weight = dA_layer.W.get_value(borrow = True) bias = dA_layer.b.get_value(borrow = True) bias_prime = dA_layer.b_prime.get_value(borrow = True) weights_all_layer.append(weight) bias_all_layer.append(bias) bias_prime_all_layer.append(bias_prime) return weights_all_layer, bias_all_layer, bias_prime_all_layer
Example #15
Source File: layer_utils.py From CAPTCHA-breaking with MIT License | 5 votes |
def print_layer_shapes(model, input_shapes): """ Utility function to print the shape of the output at each layer of a Model Arguments: model: instance of Model / Merge input_shapes: dict (Graph), list of tuples (Merge) or tuple (Sequential) """ if model.__class__.__name__ in ['Sequential', 'Merge']: # in this case input_shapes is a tuple, or a list [shape1, shape2] if not isinstance(input_shapes[0], tuple): input_shapes = [input_shapes] inputs = model.get_input(train=False) if not isinstance(inputs, list): inputs = [inputs] input_dummy = [np.zeros(shape, dtype=np.float32) for shape in input_shapes] layers = model.layers elif model.__class__.__name__ == 'Graph': # in this case input_shapes is a dictionary inputs = [model.inputs[name].input for name in model.input_order] input_dummy = [np.zeros(input_shapes[name], dtype=np.float32) for name in model.input_order] layers = [model.nodes[c['name']] for c in model.node_config] print("input shapes : ", input_shapes) for l in layers: shape_f = theano.function(inputs, l.get_output(train=False).shape, on_unused_input='ignore') out_shape = tuple(shape_f(*input_dummy)) config = l.get_config() print('shape after %s: %s' % (config['name'], out_shape))
Example #16
Source File: skipthoughts.py From text-to-image with MIT License | 5 votes |
def load_model(): """ Load the model with saved tables """ # Load model options print 'Loading model parameters...' with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: boptions = pkl.load(f) # Load parameters uparams = init_params(uoptions) uparams = load_params(path_to_umodel, uparams) utparams = init_tparams(uparams) bparams = init_params_bi(boptions) bparams = load_params(path_to_bmodel, bparams) btparams = init_tparams(bparams) # Extractor functions print 'Compiling encoders...' embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables print 'Loading tables...' utable, btable = load_tables() # Store everything we need in a dictionary print 'Packing up...' model = {} model['uoptions'] = uoptions model['boptions'] = boptions model['utable'] = utable model['btable'] = btable model['f_w2v'] = f_w2v model['f_w2v2'] = f_w2v2 return model
Example #17
Source File: skipthoughts.py From text-to-image with MIT License | 5 votes |
def load_model(): """ Load the model with saved tables """ # Load model options print('Loading model parameters...') with open('%s.pkl'%path_to_umodel, 'rb') as f: uoptions = pkl.load(f) with open('%s.pkl'%path_to_bmodel, 'rb') as f: boptions = pkl.load(f) # Load parameters uparams = init_params(uoptions) uparams = load_params(path_to_umodel, uparams) utparams = init_tparams(uparams) bparams = init_params_bi(boptions) bparams = load_params(path_to_bmodel, bparams) btparams = init_tparams(bparams) # Extractor functions print('Compiling encoders...') embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions) f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v') embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions) f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2') # Tables print('Loading tables...') utable, btable = load_tables() # Store everything we need in a dictionary print('Packing up...') model = {} model['uoptions'] = uoptions model['boptions'] = boptions model['utable'] = utable model['btable'] = btable model['f_w2v'] = f_w2v model['f_w2v2'] = f_w2v2 return model
Example #18
Source File: check_callbacks.py From CAPTCHA-breaking with MIT License | 5 votes |
def on_train_begin(self, logs={}): self.imgs = Frames(n_plots=5) layers_0_ids = np.random.choice(32, 16, replace=False) self.test_layer0 = theano.function([self.model.get_input()], self.model.layers[1].get_output(train=False)[0, layers_0_ids]) layers_1_ids = np.random.choice(64, 36, replace=False) self.test_layer1 = theano.function([self.model.get_input()], self.model.layers[5].get_output(train=False)[0, layers_1_ids]) self.test_layer2 = theano.function([self.model.get_input()], self.model.layers[10].get_output(train=False)[0])
Example #19
Source File: dcgan_theano.py From iGAN with MIT License | 5 votes |
def def_gen(self, gen_params, gen_pl, n_layers, n_f, nc): z = T.matrix() gx = gen_test(z, gen_params, gen_pl, n_layers=n_layers, n_f=n_f, nc=nc, use_tanh=False) print('COMPILING...') t = time() _gen = theano.function([z], gx) print('%.2f seconds to compile _gen function' % (time() - t)) return _gen
Example #20
Source File: iGAN_predict.py From iGAN with MIT License | 5 votes |
def def_predict(model_P): print('COMPILING...') t = time() x = T.tensor4() z = model_P(x) _predict = theano.function([x], [z]) print('%.2f seconds to compile _predict function' % (time() - t)) return _predict
Example #21
Source File: iGAN_predict.py From iGAN with MIT License | 5 votes |
def def_bfgs(model_G, layer='conv4', npx=64, alpha=0.002): print('COMPILING...') t = time() x_f = T.tensor4() x = T.tensor4() z = T.matrix() tanh = activations.Tanh() gx = model_G(tanh(z)) if layer is 'hog': gx_f = HOGNet.get_hog(gx, use_bin=True, BS=4) else: gx_t = AlexNet.transform_im(gx) gx_net = AlexNet.build_model(gx_t, layer=layer, shape=(None, 3, npx, npx)) AlexNet.load_model(gx_net, layer=layer) gx_f = lasagne.layers.get_output(gx_net[layer], deterministic=True) f_rec = T.mean(T.sqr(x_f - gx_f), axis=(1, 2, 3)) * sharedX(alpha) x_rec = T.mean(T.sqr(x - gx), axis=(1, 2, 3)) cost = T.sum(f_rec) + T.sum(x_rec) grad = T.grad(cost, z) output = [cost, grad, gx] _invert = theano.function(inputs=[z, x, x_f], outputs=output) print('%.2f seconds to compile _bfgs function' % (time() - t)) return _invert, z
Example #22
Source File: iGAN_predict.py From iGAN with MIT License | 5 votes |
def def_feature(layer='conv4', up_scale=4): print('COMPILING...') t = time() x = T.tensor4() x_t = AlexNet.transform_im(x) x_net = AlexNet.build_model(x_t, layer=layer, shape=(None, 3, 64, 64), up_scale=up_scale) AlexNet.load_model(x_net, layer=layer) x_f = lasagne.layers.get_output(x_net[layer], deterministic=True) _ftr = theano.function(inputs=[x], outputs=x_f) print('%.2f seconds to compile _feature function' % (time() - t)) return _ftr
Example #23
Source File: eval_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_network(results_file, n_test=None): with np.load(results_file) as d: args = d['args'][()] params = d['params'] if 'params' in d else d['net_params'] sigma_val = d['sigma'][()] def gen_data(n=None, dtype=learn_kernel.floatX): return generate.generate_data(args, n or args.n_test, dtype=dtype) if getattr(args, 'net_code', False): learn_kernel.register_custom_net(args.net_code) # make the representation network; don't bother calling make_network since # it does a bunch of other things dim = gen_data()[0].shape[1] # could be smarter about this input_p = T.matrix('input_p') input_q = T.matrix('input_q') in_p = lasagne.layers.InputLayer(shape=(None, dim), input_var=input_p) in_q = lasagne.layers.InputLayer(shape=(None, dim), input_var=input_q) net_p, net_q, reg = learn_kernel.net_versions[args.net_version](in_p, in_q) rep_p = lasagne.layers.get_output(net_p) print("Compiling...", file=sys.stderr, end='') get_rep = theano.function([input_p], rep_p) print("done.", file=sys.stderr) if getattr(args, 'opt_sigma', False): params = params[:-1] lasagne.layers.set_all_param_values(net_p, params) return get_rep, gen_data, sigma_val, args.linear_kernel
Example #24
Source File: learn_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setup(dim, criterion='mmd', biased=True, streaming_est=False, opt_log=True, linear_kernel=False, opt_sigma=False, init_log_sigma=0, net_version='basic', hotelling_reg=0, strat='nesterov_momentum', learning_rate=0.01, **opt_args): input_p = T.matrix('input_p') input_q = T.matrix('input_q') mmd2_pq, obj, rep_p, net_p, net_q, log_sigma = make_network( input_p, input_q, dim, criterion=criterion, biased=biased, streaming_est=streaming_est, opt_log=opt_log, linear_kernel=linear_kernel, log_sigma=init_log_sigma, hotelling_reg=hotelling_reg, net_version=net_version) params = lasagne.layers.get_all_params([net_p, net_q], trainable=True) if opt_sigma: params.append(log_sigma) fn = getattr(lasagne.updates, strat) updates = fn(obj, params, learning_rate=learning_rate, **opt_args) print("Compiling...", file=sys.stderr, end='') train_fn = theano.function( [input_p, input_q], [mmd2_pq, obj], updates=updates) val_fn = theano.function([input_p, input_q], [mmd2_pq, obj]) get_rep = theano.function([input_p], rep_p) print("done", file=sys.stderr) return params, train_fn, val_fn, get_rep, log_sigma
Example #25
Source File: learn_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def register_custom_net(code): module = types.ModuleType('net_custom', 'Custom network function') exec_(code, module.__dict__) sys.modules['net_custom']= module net_versions['custom'] = module.net_custom ################################################################################ ### Adding loss and so on to the network
Example #26
Source File: learn_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License | 5 votes |
def net_scf(net_p, net_q, n_freqs=5): ''' Network equivalent to Wittawat's smoothed characteristic function test. ''' from layers import SmoothedCFLayer net_p = SmoothedCFLayer(net_p, n_freqs) net_q = SmoothedCFLayer(net_q, n_freqs, freqs=net_p.freqs, log_sigma=net_p.log_sigma) return net_p, net_q, 0
Example #27
Source File: SdA_train.py From adage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def return_activity(self, train_set_x): '''Given an input, this function returns the activity value of all the nodes in each hidden layer.''' activity_each_layer = [] index = T.lscalar('index') # index to a sample for dA in self.dA_layers: activity_fn = theano.function(inputs=[index],outputs = dA.output, givens={self.x: train_set_x[index:(index+1)]}) activity_each_layer.append(activity_fn) return activity_each_layer
Example #28
Source File: SdA_train.py From adage with BSD 3-Clause "New" or "Revised" License | 5 votes |
def return_raw_activity(self, train_set_x): '''Given an input, this function returns the raw activity value of all the nodes in each layer.''' raw_activity_each_layer = [] index = T.lscalar('index') # index to a sample for dA in self.dA_layers: raw_activity_fn = theano.function(inputs=[index],outputs = dA.raw_output, givens={self.x: train_set_x[index:(index+1)]}) raw_activity_each_layer.append(raw_activity_fn) return raw_activity_each_layer
Example #29
Source File: model.py From Att-ChemdNER with Apache License 2.0 | 5 votes |
def modelScore(self,tag_ids,scores,s_len): #{{{ """ ATTENTATION THIS FUNCTION IS SYMBOL PROGRAMMING this function is to return the score of our model at a fixed sentence label @param: scores: the scores matrix ,the output of our model tag: a numpy array, which represent one sentence label sent_lens: a scalar number, the length of sentence. because our sentence label will be expand to max sentence length, so we will use this to get the original sentence label. @return: a scalar number ,the score; """ #{{{ n_tags=self.output_dim; transitions=self.transitions; #score from tags_scores real_path_score = scores[T.arange(s_len), tag_ids].sum() # Score from transitions b_id = theano.shared(value=np.array([n_tags], dtype=np.int32)) e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32)) padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0) real_path_score += transitions[ padded_tags_ids[T.arange(s_len + 1)], padded_tags_ids[T.arange(s_len + 1) + 1] ].sum() #to prevent T.exp(real_path_score) to be inf #return real_path_score; return real_path_score/s_len; #}}} #}}}
Example #30
Source File: optimizers.py From DL4MT with BSD 3-Clause "New" or "Revised" License | 5 votes |
def adam(lr, tparams, grads, inp, cost, beta1=0.9, beta2=0.999, e=1e-8, profile=False): gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k) for k, p in tparams.iteritems()] gsup = [(gs, g) for gs, g in zip(gshared, grads)] f_grad_shared = theano.function(inp, cost, updates=gsup, profile=profile) updates = [] t_prev = theano.shared(numpy.float32(0.)) t = t_prev + 1. lr_t = lr * tensor.sqrt(1. - beta2**t) / (1. - beta1**t) for p, g in zip(tparams.values(), gshared): m = theano.shared(p.get_value() * 0., p.name + '_mean') v = theano.shared(p.get_value() * 0., p.name + '_variance') m_t = beta1 * m + (1. - beta1) * g v_t = beta2 * v + (1. - beta2) * g**2 step = lr_t * m_t / (tensor.sqrt(v_t) + e) p_t = p - step updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t_prev, t)) f_update = theano.function([lr], [], updates=updates, on_unused_input='ignore', profile=profile) return f_grad_shared, f_update