Python Examples of theano.function

Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0

6 votes

def batch_normalization(x, mean, var, beta, gamma, epsilon=1e-3):
    '''Apply batch normalization on x given mean, var, beta and gamma.
    '''
    # TODO remove this if statement when Theano without
    # T.nnet.bn.batch_normalization_test is deprecated
    if not hasattr(T.nnet.bn, 'batch_normalization_test'):
        return _old_batch_normalization(x, mean, var, beta, gamma, epsilon)

    if mean.ndim == 1:
        # based on TensorFlow's default: normalize along rightmost dimension
        reduction_axes = range(x.ndim - 1)
    else:
        reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]]

    return T.nnet.bn.batch_normalization_test(
        x, gamma, beta, mean, var, reduction_axes, epsilon)


# TODO remove this function when Theano without
# T.nnet.bn.batch_normalization_train is deprecated

Source File: nmt.py From nmt with BSD 3-Clause "New" or "Revised" License

6 votes

def rmsprop(lr, tparams, grads, inp, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
    running_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad'%k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, cost, updates=zgup+rgup+rg2up, profile=profile)

    updir = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_updir'%k) for k, p in tparams.iteritems()]
    updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)]
    param_up = [(p, p + udn[1]) for p, udn in zip(itemlist(tparams), updir_new)]
    f_update = theano.function([lr], [], updates=updir_new+param_up, on_unused_input='ignore', profile=profile)

    return f_grad_shared, f_update

Source File: nmt.py From nmt with BSD 3-Clause "New" or "Revised" License

6 votes

def adadelta(lr, tparams, grads, inp, cost):
    running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]

    rg2_new = [0.95 * rg2 + 0.05 * (g ** 2) for rg2, g in zip(running_grads2, grads)]
    rg2up = [(rg2, r_n) for rg2, r_n in zip(running_grads2, rg2_new)]
    
    
    updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(grads, running_up2, rg2_new)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]

    inp += [lr]
    f_update = theano.function(inp, cost, updates=rg2up+ru2up+param_up, on_unused_input='ignore', profile=profile)

    return f_update

Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0

6 votes

def __call__(self, inputs):
        assert isinstance(inputs, (list, tuple))
        return self.function(*inputs)

Source File: nmt.py From nmt with BSD 3-Clause "New" or "Revised" License

6 votes

def debugging_adadelta(lr, tparams, grads, inp, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy.float32(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inp, cost, updates=zgup+rg2up, profile=profile)
    
    
    updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]

    f_update = theano.function([lr], [], updates=ru2up+param_up, on_unused_input='ignore', profile=profile)

    return f_grad_shared, f_update

Source File: pooling.py From Depth-Map-Prediction with GNU General Public License v3.0

6 votes

def test_cmrnorm():
    from theano.tests.unittest_tools import verify_grad

    xtest = np.random.rand(2,8,3,4)
    xtest = xtest.astype(theano.config.floatX)

    x = T.tensor4('x', dtype=theano.config.floatX)
    x.tag.test_value = xtest

    y = cmrnorm(x, input_shape=xtest.shape[1:])
    f = theano.function([x], y, mode='DEBUG_MODE')
    f(xtest)

    f = theano.function([x], gpu_from_host(T.grad(T.sum(y), wrt=x)),
                        mode='DEBUG_MODE')
    f(xtest)
    theano.printing.debugprint(f)

    T.verify_grad(lambda x: cmrnorm(x, input_shape=xtest.shape[1:]),
                  (xtest,),
                  rng=np.random.RandomState(0))

    print 'cmrnorm passed'

Source File: test_activations.py From CAPTCHA-breaking with MIT License

6 votes

def test_tanh():

    from keras.activations import tanh as t
    test_values = get_standard_values()

    x = T.vector()
    exp = t(x)
    f = theano.function([x], exp)

    result = f(test_values)
    expected = [math.tanh(v) for v in test_values]

    print(result)
    print(expected)

    list_assert_equal(result, expected)

Source File: test_activations.py From CAPTCHA-breaking with MIT License

6 votes

def test_softmax():

    from keras.activations import softmax as s

    # Test using a reference implementation of softmax
    def softmax(values):
        m = max(values)
        values = numpy.array(values)
        e = numpy.exp(values - m)
        dist = list(e / numpy.sum(e))

        return dist

    x = T.vector()
    exp = s(x)
    f = theano.function([x], exp)
    test_values=get_standard_values()

    result = f(test_values)
    expected = softmax(test_values)

    print(str(result))
    print(str(expected))

    list_assert_equal(result, expected)

Source File: classifier.py From spinn with MIT License

6 votes

def build_cost(logits, targets):
    """
    Build a classification cost function.
    """
    # Clip gradients coming from the cost function.
    logits = theano.gradient.grad_clip(
        logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value)

    predicted_dist = T.nnet.softmax(logits)

    costs = T.nnet.categorical_crossentropy(predicted_dist, targets)
    cost = costs.mean()

    pred = T.argmax(logits, axis=1)
    acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX))

    return cost, acc

Source File: fat_classifier.py From spinn with MIT License

6 votes

def build_cost(logits, targets):
    """
    Build a classification cost function.
    """
    # Clip gradients coming from the cost function.
    logits = theano.gradient.grad_clip(
        logits, -1. * FLAGS.clipping_max_value, FLAGS.clipping_max_value)

    predicted_dist = T.nnet.softmax(logits)

    costs = T.nnet.categorical_crossentropy(predicted_dist, targets)
    cost = costs.mean()

    pred = T.argmax(logits, axis=1)
    acc = 1. - T.mean(T.cast(T.neq(pred, targets), theano.config.floatX))

    return cost, acc

Source File: test_stack.py From spinn with MIT License

6 votes

def test_speed(self):
        top = self.stack.final_stack[-self.batch_size:]
        cost = self._make_cost(top)
        error_signal = T.grad(cost, top)

        # Build automatic backprop function.
        self.stack.make_backprop_scan(error_signal, [self.y],
                                      compute_embedding_gradients=False)
        f = theano.function(
            [self.X, self.transitions, self.y],
            [cost] + self.stack.gradients.values(),
            updates=self.stack.scan_updates + self.stack.bscan_updates)
        theano.printing.debugprint(f.maker.fgraph.outputs[1])

        for t in range(10):
            self._run_batch(f)

Source File: dA.py From deeplearn_hsi with BSD 2-Clause "Simplified" License

6 votes

def get_corrupted_input(self, input, corruption_level):
        """This function keeps ``1-corruption_level`` entries of the inputs the
        same and zero-out randomly selected subset of size ``coruption_level``
        Note : first argument of theano.rng.binomial is the shape(size) of
               random numbers that it should produce
               second argument is the number of trials
               third argument is the probability of success of any trial

                this will produce an array of 0s and 1s where 1 has a
                probability of 1 - ``corruption_level`` and 0 with
                ``corruption_level``

                The binomial function return int64 data type by
                default.  int64 multiplicated by the input
                type(floatX) always return float64.  To keep all data
                in floatX when floatX is float32, we set the dtype of
                the binomial to floatX. As in our case the value of
                the binomial is always 0 or 1, this don't change the
                result. This is needed to allow the gpu to work
                correctly as it only support float32 for now.

        """
        return self.theano_rng.binomial(size=input.shape, n=1,
                                        p=1 - corruption_level,
                                        dtype=theano.config.floatX) * input

Source File: solver.py From 3D-R2N2 with MIT License

6 votes

def save(self, training_losses, save_dir, step):
        ''' Save the current network parameters to the save_dir and make a
        symlink to the latest param so that the training function can easily
        load the latest model'''
        save_path = os.path.join(save_dir, 'weights.%d' % (step))
        self.net.save(save_path)

        # Make a symlink for weights.npy
        symlink_path = os.path.join(save_dir, 'weights.npy')
        if os.path.lexists(symlink_path):
            os.remove(symlink_path)

        # Make a symlink to the latest network params
        os.symlink("%s.npy" % os.path.abspath(save_path), symlink_path)

        # Write the losses
        with open(os.path.join(save_dir, 'loss.%d.txt' % step), 'w') as f:
            f.write('\n'.join([str(l) for l in training_losses]))

Source File: SdA_train.py From adage with BSD 3-Clause "New" or "Revised" License

6 votes

def return_network(self):
        '''This function returns weight matrix and bias vectors of each hidden layer in the 
        final network after training.'''

        weights_all_layer = []
        bias_all_layer = []
        bias_prime_all_layer = []

        for dA_layer in self.dA_layers:
            weight = dA_layer.W.get_value(borrow = True)
            bias = dA_layer.b.get_value(borrow = True)
            bias_prime = dA_layer.b_prime.get_value(borrow = True)
            weights_all_layer.append(weight)
            bias_all_layer.append(bias)
            bias_prime_all_layer.append(bias_prime)

        return weights_all_layer, bias_all_layer, bias_prime_all_layer

Source File: layer_utils.py From CAPTCHA-breaking with MIT License

5 votes

def print_layer_shapes(model, input_shapes):
    """
    Utility function to print the shape of the output at each layer of a Model

    Arguments:
        model: instance of Model / Merge
        input_shapes: dict (Graph), list of tuples (Merge) or tuple (Sequential)
    """
    if model.__class__.__name__ in ['Sequential', 'Merge']:
        # in this case input_shapes is a tuple, or a list [shape1, shape2]
        if not isinstance(input_shapes[0], tuple):
            input_shapes = [input_shapes]

        inputs = model.get_input(train=False)
        if not isinstance(inputs, list):
            inputs = [inputs]
        input_dummy = [np.zeros(shape, dtype=np.float32)
                       for shape in input_shapes]
        layers = model.layers

    elif model.__class__.__name__ == 'Graph':
        # in this case input_shapes is a dictionary
        inputs = [model.inputs[name].input
                  for name in model.input_order]
        input_dummy = [np.zeros(input_shapes[name], dtype=np.float32)
                       for name in model.input_order]
        layers = [model.nodes[c['name']] for c in model.node_config]

    print("input shapes : ", input_shapes)
    for l in layers:
        shape_f = theano.function(inputs, l.get_output(train=False).shape,
                                  on_unused_input='ignore')
        out_shape = tuple(shape_f(*input_dummy))
        config = l.get_config()
        print('shape after %s: %s' % (config['name'], out_shape))

Source File: skipthoughts.py From text-to-image with MIT License

5 votes

def load_model():
	"""
	Load the model with saved tables
	"""
	# Load model options
	print 'Loading model parameters...'
	with open('%s.pkl'%path_to_umodel, 'rb') as f:
		uoptions = pkl.load(f)
	with open('%s.pkl'%path_to_bmodel, 'rb') as f:
		boptions = pkl.load(f)

	# Load parameters
	uparams = init_params(uoptions)
	uparams = load_params(path_to_umodel, uparams)
	utparams = init_tparams(uparams)
	bparams = init_params_bi(boptions)
	bparams = load_params(path_to_bmodel, bparams)
	btparams = init_tparams(bparams)

	# Extractor functions
	print 'Compiling encoders...'
	embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
	f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
	embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
	f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')

	# Tables
	print 'Loading tables...'
	utable, btable = load_tables()

	# Store everything we need in a dictionary
	print 'Packing up...'
	model = {}
	model['uoptions'] = uoptions
	model['boptions'] = boptions
	model['utable'] = utable
	model['btable'] = btable
	model['f_w2v'] = f_w2v
	model['f_w2v2'] = f_w2v2

	return model

Source File: skipthoughts.py From text-to-image with MIT License

5 votes

def load_model():
	"""
	Load the model with saved tables
	"""
	# Load model options
	print('Loading model parameters...')
	with open('%s.pkl'%path_to_umodel, 'rb') as f:
		uoptions = pkl.load(f)
	with open('%s.pkl'%path_to_bmodel, 'rb') as f:
		boptions = pkl.load(f)

	# Load parameters
	uparams = init_params(uoptions)
	uparams = load_params(path_to_umodel, uparams)
	utparams = init_tparams(uparams)
	bparams = init_params_bi(boptions)
	bparams = load_params(path_to_bmodel, bparams)
	btparams = init_tparams(bparams)

	# Extractor functions
	print('Compiling encoders...')
	embedding, x_mask, ctxw2v = build_encoder(utparams, uoptions)
	f_w2v = theano.function([embedding, x_mask], ctxw2v, name='f_w2v')
	embedding, x_mask, ctxw2v = build_encoder_bi(btparams, boptions)
	f_w2v2 = theano.function([embedding, x_mask], ctxw2v, name='f_w2v2')

	# Tables
	print('Loading tables...')
	utable, btable = load_tables()

	# Store everything we need in a dictionary
	print('Packing up...')
	model = {}
	model['uoptions'] = uoptions
	model['boptions'] = boptions
	model['utable'] = utable
	model['btable'] = btable
	model['f_w2v'] = f_w2v
	model['f_w2v2'] = f_w2v2

	return model

Source File: check_callbacks.py From CAPTCHA-breaking with MIT License

5 votes

def on_train_begin(self, logs={}):
        self.imgs = Frames(n_plots=5)

        layers_0_ids = np.random.choice(32, 16, replace=False)
        self.test_layer0 = theano.function([self.model.get_input()], self.model.layers[1].get_output(train=False)[0, layers_0_ids])

        layers_1_ids = np.random.choice(64, 36, replace=False)
        self.test_layer1 = theano.function([self.model.get_input()], self.model.layers[5].get_output(train=False)[0, layers_1_ids])

        self.test_layer2 = theano.function([self.model.get_input()], self.model.layers[10].get_output(train=False)[0])

Source File: dcgan_theano.py From iGAN with MIT License

5 votes

def def_gen(self, gen_params, gen_pl, n_layers, n_f, nc):
        z = T.matrix()
        gx = gen_test(z, gen_params, gen_pl, n_layers=n_layers, n_f=n_f, nc=nc, use_tanh=False)
        print('COMPILING...')
        t = time()
        _gen = theano.function([z], gx)
        print('%.2f seconds to compile _gen function' % (time() - t))
        return _gen

Source File: iGAN_predict.py From iGAN with MIT License

5 votes

def def_predict(model_P):
    print('COMPILING...')
    t = time()
    x = T.tensor4()
    z = model_P(x)
    _predict = theano.function([x], [z])
    print('%.2f seconds to compile _predict function' % (time() - t))
    return _predict

Source File: iGAN_predict.py From iGAN with MIT License

5 votes

def def_bfgs(model_G, layer='conv4', npx=64, alpha=0.002):
    print('COMPILING...')
    t = time()

    x_f = T.tensor4()
    x = T.tensor4()
    z = T.matrix()
    tanh = activations.Tanh()
    gx = model_G(tanh(z))

    if layer is 'hog':
        gx_f = HOGNet.get_hog(gx, use_bin=True, BS=4)
    else:
        gx_t = AlexNet.transform_im(gx)
        gx_net = AlexNet.build_model(gx_t, layer=layer, shape=(None, 3, npx, npx))
        AlexNet.load_model(gx_net, layer=layer)
        gx_f = lasagne.layers.get_output(gx_net[layer], deterministic=True)

    f_rec = T.mean(T.sqr(x_f - gx_f), axis=(1, 2, 3)) * sharedX(alpha)
    x_rec = T.mean(T.sqr(x - gx), axis=(1, 2, 3))
    cost = T.sum(f_rec) + T.sum(x_rec)
    grad = T.grad(cost, z)
    output = [cost, grad, gx]
    _invert = theano.function(inputs=[z, x, x_f], outputs=output)

    print('%.2f seconds to compile _bfgs function' % (time() - t))
    return _invert, z

Source File: iGAN_predict.py From iGAN with MIT License

5 votes

def def_feature(layer='conv4', up_scale=4):
    print('COMPILING...')
    t = time()
    x = T.tensor4()
    x_t = AlexNet.transform_im(x)
    x_net = AlexNet.build_model(x_t, layer=layer, shape=(None, 3, 64, 64), up_scale=up_scale)
    AlexNet.load_model(x_net, layer=layer)
    x_f = lasagne.layers.get_output(x_net[layer], deterministic=True)
    _ftr = theano.function(inputs=[x], outputs=x_f)
    print('%.2f seconds to compile _feature function' % (time() - t))
    return _ftr

Source File: eval_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

5 votes

def load_network(results_file, n_test=None):
    with np.load(results_file) as d:
        args = d['args'][()]
        params = d['params'] if 'params' in d else d['net_params']
        sigma_val = d['sigma'][()]

    def gen_data(n=None, dtype=learn_kernel.floatX):
        return generate.generate_data(args, n or args.n_test, dtype=dtype)

    if getattr(args, 'net_code', False):
        learn_kernel.register_custom_net(args.net_code)

    # make the representation network; don't bother calling make_network since
    # it does a bunch of other things
    dim = gen_data()[0].shape[1]  # could be smarter about this
    input_p = T.matrix('input_p')
    input_q = T.matrix('input_q')
    in_p = lasagne.layers.InputLayer(shape=(None, dim), input_var=input_p)
    in_q = lasagne.layers.InputLayer(shape=(None, dim), input_var=input_q)
    net_p, net_q, reg = learn_kernel.net_versions[args.net_version](in_p, in_q)
    rep_p = lasagne.layers.get_output(net_p)

    print("Compiling...", file=sys.stderr, end='')
    get_rep = theano.function([input_p], rep_p)
    print("done.", file=sys.stderr)

    if getattr(args, 'opt_sigma', False):
        params = params[:-1]
    lasagne.layers.set_all_param_values(net_p, params)

    return get_rep, gen_data, sigma_val, args.linear_kernel

Source File: learn_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

5 votes

def setup(dim, criterion='mmd', biased=True, streaming_est=False, opt_log=True,
          linear_kernel=False, opt_sigma=False, init_log_sigma=0,
          net_version='basic', hotelling_reg=0,
          strat='nesterov_momentum', learning_rate=0.01, **opt_args):
    input_p = T.matrix('input_p')
    input_q = T.matrix('input_q')

    mmd2_pq, obj, rep_p, net_p, net_q, log_sigma = make_network(
        input_p, input_q, dim,
        criterion=criterion, biased=biased, streaming_est=streaming_est,
        opt_log=opt_log, linear_kernel=linear_kernel, log_sigma=init_log_sigma,
        hotelling_reg=hotelling_reg, net_version=net_version)

    params = lasagne.layers.get_all_params([net_p, net_q], trainable=True)
    if opt_sigma:
        params.append(log_sigma)
    fn = getattr(lasagne.updates, strat)
    updates = fn(obj, params, learning_rate=learning_rate, **opt_args)

    print("Compiling...", file=sys.stderr, end='')
    train_fn = theano.function(
        [input_p, input_q], [mmd2_pq, obj], updates=updates)
    val_fn = theano.function([input_p, input_q], [mmd2_pq, obj])
    get_rep = theano.function([input_p], rep_p)
    print("done", file=sys.stderr)

    return params, train_fn, val_fn, get_rep, log_sigma

Source File: learn_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

5 votes

def register_custom_net(code):
    module = types.ModuleType('net_custom', 'Custom network function')
    exec_(code, module.__dict__)
    sys.modules['net_custom']= module
    net_versions['custom'] = module.net_custom


################################################################################
### Adding loss and so on to the network

Source File: learn_kernel.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

5 votes

def net_scf(net_p, net_q, n_freqs=5):
    '''
    Network equivalent to Wittawat's smoothed characteristic function test.
    '''
    from layers import SmoothedCFLayer
    net_p = SmoothedCFLayer(net_p, n_freqs)
    net_q = SmoothedCFLayer(net_q, n_freqs,
                            freqs=net_p.freqs, log_sigma=net_p.log_sigma)
    return net_p, net_q, 0

Source File: SdA_train.py From adage with BSD 3-Clause "New" or "Revised" License

5 votes

def return_activity(self, train_set_x):
        '''Given an input, this function returns the activity
        value of all the nodes in each hidden layer.'''
        
        activity_each_layer = []
        index = T.lscalar('index')  # index to a sample
        
        for dA in self.dA_layers:
            activity_fn = theano.function(inputs=[index],outputs = dA.output,
                                          givens={self.x: train_set_x[index:(index+1)]})
            activity_each_layer.append(activity_fn)
        return activity_each_layer

Source File: SdA_train.py From adage with BSD 3-Clause "New" or "Revised" License

5 votes

def return_raw_activity(self, train_set_x):
        '''Given an input, this function returns the raw activity
        value of all the nodes in each layer.'''
        
        raw_activity_each_layer = []
        index = T.lscalar('index')  # index to a sample
        
        for dA in self.dA_layers:
            raw_activity_fn = theano.function(inputs=[index],outputs = dA.raw_output,
                                              givens={self.x: train_set_x[index:(index+1)]})
            raw_activity_each_layer.append(raw_activity_fn)
        return raw_activity_each_layer

Source File: model.py From Att-ChemdNER with Apache License 2.0

5 votes

def modelScore(self,tag_ids,scores,s_len):
    #{{{
        """
            ATTENTATION THIS FUNCTION IS SYMBOL PROGRAMMING
            this function is to return the score of our model at a fixed sentence label 
        @param:
            scores:        the scores matrix ,the output of our model
            tag:           a numpy array, which represent one sentence label 
            sent_lens:     a scalar number, the length of sentence.
                because our sentence label will be expand to max sentence length,
                so we will use this to get the original sentence label. 
        @return: 
            a scalar number ,the score;
        """
    #{{{
        n_tags=self.output_dim;
        transitions=self.transitions;
        #score from tags_scores
        real_path_score = scores[T.arange(s_len), tag_ids].sum()

        # Score from transitions
        b_id = theano.shared(value=np.array([n_tags], dtype=np.int32))
        e_id = theano.shared(value=np.array([n_tags + 1], dtype=np.int32))
        padded_tags_ids = T.concatenate([b_id, tag_ids, e_id], axis=0)
        real_path_score += transitions[
                padded_tags_ids[T.arange(s_len + 1)],
                padded_tags_ids[T.arange(s_len + 1) + 1]
            ].sum()
        #to prevent T.exp(real_path_score) to be inf 
        #return real_path_score;
        return real_path_score/s_len;
    #}}}
    #}}}

Source File: optimizers.py From DL4MT with BSD 3-Clause "New" or "Revised" License

5 votes

def adam(lr, tparams, grads, inp, cost, beta1=0.9, beta2=0.999, e=1e-8, profile=False):

    gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
               for k, p in tparams.iteritems()]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]

    f_grad_shared = theano.function(inp, cost, updates=gsup, profile=profile)

    updates = []

    t_prev = theano.shared(numpy.float32(0.))
    t = t_prev + 1.
    lr_t = lr * tensor.sqrt(1. - beta2**t) / (1. - beta1**t)

    for p, g in zip(tparams.values(), gshared):
        m = theano.shared(p.get_value() * 0., p.name + '_mean')
        v = theano.shared(p.get_value() * 0., p.name + '_variance')
        m_t = beta1 * m + (1. - beta1) * g
        v_t = beta2 * v + (1. - beta2) * g**2
        step = lr_t * m_t / (tensor.sqrt(v_t) + e)
        p_t = p - step
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((t_prev, t))

    f_update = theano.function([lr], [], updates=updates,
                               on_unused_input='ignore', profile=profile)

    return f_grad_shared, f_update

Python theano.function() Examples