Python Examples of theano.tensor.grad

Source File: updates.py From iGAN with MIT License

6 votes

def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)

            acc = theano.shared(p.get_value() * 0.)
            acc_delta = theano.shared(p.get_value() * 0.)
            acc_new = self.rho * acc + (1 - self.rho) * g ** 2
            updates.append((acc, acc_new))

            update = g * T.sqrt(acc_delta + self.epsilon) / T.sqrt(acc_new + self.epsilon)
            updated_p = p - self.lr * update
            updated_p = self.regularizer.weight_regularize(updated_p)
            updates.append((p, updated_p))

            acc_delta_new = self.rho * acc_delta + (1 - self.rho) * update ** 2
            updates.append((acc_delta, acc_delta_new))
        return updates

Source File: test_opt.py From D-VAE with MIT License

6 votes

def test_local_csm_grad_c():
    raise SkipTest("Opt disabled as it don't support unsorted indices")
    if not theano.config.cxx:
        raise SkipTest("G++ not available, so we need to skip this test.")
    data = tensor.vector()
    indices, indptr, shape = (tensor.ivector(), tensor.ivector(),
                              tensor.ivector())
    mode = theano.compile.mode.get_default_mode()

    if theano.config.mode == 'FAST_COMPILE':
        mode = theano.compile.Mode(linker='c|py', optimizer='fast_compile')

    mode = mode.including("specialize", "local_csm_grad_c")
    for CS, cast in [(sparse.CSC, sp.csc_matrix), (sparse.CSR, sp.csr_matrix)]:
        cost = tensor.sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
        f = theano.function(
            [data, indices, indptr, shape],
            tensor.grad(cost, data),
            mode=mode)
        assert not any(isinstance(node.op, sparse.CSMGrad) for node
                       in f.maker.fgraph.toposort())
        v = cast(random_lil((10, 40),
                            config.floatX, 3))
        f(v.data, v.indices, v.indptr, v.shape)

Source File: hgru4rec.py From hgru4rec with MIT License

6 votes

def adam(self, param, grad, updates, sample_idx=None, epsilon=1e-6):
        v1 = np.float32(self.decay)
        v2 = np.float32(1.0 - self.decay)
        acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        meang = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        countt = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        if sample_idx is None:
            acc_new = v1 * acc + v2 * grad ** 2
            meang_new = v1 * meang + v2 * grad
            countt_new = countt + 1
            updates[acc] = acc_new
            updates[meang] = meang_new
            updates[countt] = countt_new
        else:
            acc_s = acc[sample_idx]
            meang_s = meang[sample_idx]
            countt_s = countt[sample_idx]
            acc_new = v1 * acc_s + v2 * grad ** 2
            meang_new = v1 * meang_s + v2 * grad
            countt_new = countt_s + 1.0
            updates[acc] = T.set_subtensor(acc_s, acc_new)
            updates[meang] = T.set_subtensor(meang_s, meang_new)
            updates[countt] = T.set_subtensor(countt_s, countt_new)
        return (meang_new / (1 - v1 ** countt_new)) / (T.sqrt(acc_new / (1 - v1 ** countt_new)) + epsilon)

Source File: blocks.py From spinn with MIT License

6 votes

def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6, grads=None):
    # From:
    # https://github.com/Newmu/Theano-Tutorials/blob/master/4_modern_net.py
    if grads is None:
        grads = T.grad(cost=cost, wrt=params)
    assert len(grads) == len(params)

    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(np.zeros_like(p.get_value(), dtype=np.float32),
                            name="%s/rms/acc" % p.name)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

Source File: adam.py From gated-graph-transformer-network with MIT License

6 votes

def Adam(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
    updates = []
    grads = T.grad(cost, params)
    i = theano.shared(np.array(0., theano.config.floatX))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        m = theano.shared(p.get_value() * 0.)
        v = theano.shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m)
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates

Source File: test_basic.py From D-VAE with MIT License

6 votes

def test_csm_grad(self):
        for sparsetype in ('csr', 'csc'):
            x = tensor.vector()
            y = tensor.ivector()
            z = tensor.ivector()
            s = tensor.ivector()
            call = getattr(sp, sparsetype + '_matrix')
            spm = call(random_lil((300, 400), config.floatX, 5))
            out = tensor.grad(dense_from_sparse(
                CSM(sparsetype)(x, y, z, s)
            ).sum(), x)
            self._compile_and_check([x, y, z, s],
                                    [out],
                                    [spm.data, spm.indices, spm.indptr,
                                     spm.shape],
                                    (CSMGrad, CSMGradC)
                                   )

Source File: pooling.py From Depth-Map-Prediction with GNU General Public License v3.0

6 votes

def test_cmrnorm():
    from theano.tests.unittest_tools import verify_grad

    xtest = np.random.rand(2,8,3,4)
    xtest = xtest.astype(theano.config.floatX)

    x = T.tensor4('x', dtype=theano.config.floatX)
    x.tag.test_value = xtest

    y = cmrnorm(x, input_shape=xtest.shape[1:])
    f = theano.function([x], y, mode='DEBUG_MODE')
    f(xtest)

    f = theano.function([x], gpu_from_host(T.grad(T.sum(y), wrt=x)),
                        mode='DEBUG_MODE')
    f(xtest)
    theano.printing.debugprint(f)

    T.verify_grad(lambda x: cmrnorm(x, input_shape=xtest.shape[1:]),
                  (xtest,),
                  rng=np.random.RandomState(0))

    print 'cmrnorm passed'

Source File: hgru4rec.py From hgru4rec with MIT License

6 votes

def adadelta(self, param, grad, updates, sample_idx=None, epsilon=1e-6):
        v1 = np.float32(self.decay)
        v2 = np.float32(1.0 - self.decay)
        acc = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        upd = theano.shared(param.get_value(borrow=False) * 0., borrow=True)
        if sample_idx is None:
            acc_new = acc + grad ** 2
            updates[acc] = acc_new
            grad = T.sqrt(upd + epsilon) * grad
            upd_new = v1 * upd + v2 * grad ** 2
            updates[upd] = upd_new
        else:
            acc_s = acc[sample_idx]
            acc_new = acc_s + grad ** 2
            updates[acc] = T.set_subtensor(acc_s, acc_new)
            upd_s = upd[sample_idx]
            upd_new = v1 * upd_s + v2 * grad ** 2
            updates[upd] = T.set_subtensor(upd_s, upd_new)
            grad = T.sqrt(upd_s + epsilon) * grad
        gradient_scaling = T.cast(T.sqrt(acc_new + epsilon), theano.config.floatX)
        return grad / gradient_scaling

Source File: test_nnet.py From D-VAE with MIT License

6 votes

def test_local_softmax_grad_optimization_and_big_input(self):
        """Test the Logsoftmax's grad substitution.

        Check that Log(Softmax(x))'s grad is substituted with Logsoftmax(x)'s
        grad and that the new operation does not explode for big inputs.
        Note that only the grad is checked.
        """
        m = theano.config.mode
        m = theano.compile.get_mode(m)
        m.check_isfinite = False
        # some inputs that are large to make the gradient explode in the non
        # optimized case
        a = numpy.exp(10 * numpy.random.rand(5, 10).astype(theano.config.floatX))

        def myfunc(x):
            sm = tensor.nnet.softmax(x)
            logsm = tensor.log(sm)
            return logsm
        # We set step to 0.1 because for big values we need a big epsilon
        utt.verify_grad(myfunc, [a], eps=0.1, mode=m)
        f = theano.function([], myfunc(a))
        self.assertTrue(hasattr(f.maker.fgraph.outputs[0].tag, 'trace'))

Source File: test_stack.py From spinn with MIT License

6 votes

def test_speed(self):
        top = self.stack.final_stack[-self.batch_size:]
        cost = self._make_cost(top)
        error_signal = T.grad(cost, top)

        # Build automatic backprop function.
        self.stack.make_backprop_scan(error_signal, [self.y],
                                      compute_embedding_gradients=False)
        f = theano.function(
            [self.X, self.transitions, self.y],
            [cost] + self.stack.gradients.values(),
            updates=self.stack.scan_updates + self.stack.bscan_updates)
        theano.printing.debugprint(f.maker.fgraph.outputs[1])

        for t in range(10):
            self._run_batch(f)

Source File: aa.py From D-VAE with MIT License

6 votes

def __init__(self):
        super(M, self).__init__()

        x = T.matrix('x') # input, target
        self.w = module.Member(T.matrix('w')) # weights
        self.a = module.Member(T.vector('a')) # hid bias
        self.b = module.Member(T.vector('b')) # output bias

        self.hid = T.tanh(T.dot(x, self.w) + self.a)
        hid = self.hid

        self.out = T.tanh(T.dot(hid, self.w.T) + self.b)
        out = self.out

        self.err = 0.5 * T.sum((out - x)**2)
        err = self.err

        params = [self.w, self.a, self.b]

        gparams = T.grad(err, params)

        updates = [(p, p - 0.01 * gp) for p, gp in zip(params, gparams)]

        self.step = module.Method([x], err, updates=dict(updates))

Source File: residual_gradient_descent.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: rbm_pretraining.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: rbm_pretraining.py From Projects with MIT License

6 votes

def get_cost_updates(self, lr=0.1, persistent=None, k=1):
        pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(self.input)
        if persistent is None:
            chain_start = ph_sample
        else:
            chain_start = persistent
        ([pre_sigmoid_nvs,nv_means,nv_samples,pre_sigmoid_nhs,nh_means,nh_samples],updates) = \
            theano.scan(self.gibbs_step, outputs_info=[None, None, None, None, None, chain_start],n_steps=k,name="gibbs_step")
        chain_end = nv_samples[-1]
        cost = T.mean(self.free_energy(self.input)) - T.mean(self.free_energy(chain_end))
        gparams = T.grad(cost, self.params, consider_constant=[chain_end])
        for gparam, param in zip(gparams, self.params):
            updates[param] = param - gparam * T.cast(lr,dtype=theano.config.floatX)
        if persistent:
            updates[persistent] = nh_samples[-1]
            monitoring_cost = self.get_pseudo_likelihood_cost(updates)  
        else:
            monitoring_cost = self.get_reconstruction_cost(updates,pre_sigmoid_nvs[-1])
        return monitoring_cost, updates

Source File: batch_normalization.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: conv2d_crossvalidation.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: convnet.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: convlstm_between_subject.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adaptive moment estimation gradient descent
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load data

Source File: lstm_within_subject.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adaptive moment estimation gradient descent
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load data

Source File: lstm_between_subject.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adaptive moment estimation gradient descent
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load data

Source File: model2.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adam gradient descent updates
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#load saved lstm if it exists, else initialize new lstm

Source File: model1.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        '''
        adam gradient descent updates
        '''
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

#open previous lowest training cost if it exists

Source File: nn.py From opt-mmd with BSD 3-Clause "New" or "Revised" License

6 votes

def adam_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    t = th.shared(np.cast[th.config.floatX](1.))
    for p, g in zip(params, grads):
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v_t = mom1*v + (1. - mom1)*g
        mg_t = mom2*mg + (1. - mom2)*T.square(g)
        v_hat = v_t / (1. - mom1 ** t)
        mg_hat = mg_t / (1. - mom2 ** t)
        g_t = v_hat / T.sqrt(mg_hat + 1e-8)
        p_t = p - lr * g_t
        updates.append((v, v_t))
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    updates.append((t, t+1))
    return updates

Source File: conv_net.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: theano_nn.py From Projects with MIT License

6 votes

def __init__(self,classes,hidden_layers,features,nodes_per_hidden_layer,learning_rate,regularization):
        self.hidden_layers = []
        self.hidden_layers.append(layer(features,nodes_per_hidden_layer))
        for i in range(hidden_layers-1):
            self.hidden_layers.append(layer(nodes_per_hidden_layer,nodes_per_hidden_layer))
        self.output_layer = layer(nodes_per_hidden_layer,classes)
        self.params = []
        for l in self.hidden_layers:
            self.params.extend(l.get_params())
        self.params.extend(self.output_layer.get_params())
        self.A = T.matrix()
        self.t = T.matrix()
        self.s = 1/(1+T.exp(-T.dot(self.A,self.params[0])-self.params[1]))
        for i in range(hidden_layers):
            self.s = 1/(1+T.exp(-T.dot(self.s,self.params[2*(i+1)])-self.params[2*(i+1)+1]))
        self.cost = -self.t*T.log(self.s)-(1-self.t)*T.log(1-self.s)
        self.cost = self.cost.mean()
        for i in range(hidden_layers+1):
            self.cost += regularization*(self.params[2*i]**2).mean()
        self.gparams = [T.grad(self.cost, param) for param in self.params]
        self.propogate = theano.function([self.A,self.t],self.cost,updates=[(param,param-learning_rate*gparam) for param,gparam in zip(self.params,self.gparams)],allow_input_downcast=True)
        self.classify = theano.function([self.A],self.s,allow_input_downcast=True)

Source File: convolutional_nn.py From Projects with MIT License

6 votes

def __init__(self,convolutional_layers,feature_maps,filter_shapes,poolsize,feedforward_layers,feedforward_nodes,classes,learning_rate,regularization):
        self.input = T.tensor4()
        self.convolutional_layers = []
        self.convolutional_layers.append(convolutional_layer(self.input,feature_maps[1],feature_maps[0],filter_shapes[0][0],filter_shapes[0][1],poolsize[0]))
        for i in range(1,convolutional_layers):
            self.convolutional_layers.append(convolutional_layer(self.convolutional_layers[i-1].output,feature_maps[i+1],feature_maps[i],filter_shapes[i][0],filter_shapes[i][1],poolsize[i]))
        self.feedforward_layers = []
        self.feedforward_layers.append(feedforward_layer(self.convolutional_layers[-1].output.flatten(2),flattened,feedforward_nodes[0]))
        for i in range(1,feedforward_layers):
            self.feedforward_layers.append(feedforward_layer(self.feedforward_layers[i-1].output,feedforward_nodes[i-1],feedforward_nodes[i]))
        self.output_layer = feedforward_layer(self.feedforward_layers[-1].output,feedforward_nodes[-1],classes)
        self.params = []
        for l in self.convolutional_layers + self.feedforward_layers:
            self.params.extend(l.get_params())
        self.params.extend(self.output_layer.get_params())
        self.target = T.matrix()
        self.output = self.output_layer.output
        self.cost = -self.target*T.log(self.output)-(1-self.target)*T.log(1-self.output)
        self.cost = self.cost.mean()
        for i in range(convolutional_layers+feedforward_layers+1):
            self.cost += regularization*(self.params[2*i]**2).mean()
        self.gparams = [T.grad(self.cost, param) for param in self.params]
        self.propogate = theano.function([self.input,self.target],self.cost,updates=[(param,param-learning_rate*gparam) for param,gparam in zip(self.params,self.gparams)],allow_input_downcast=True)
        self.classify = theano.function([self.input],self.output,allow_input_downcast=True)

Source File: updates.py From iGAN with MIT License

6 votes

def __call__(self, params, cost):
        updates = []
        grads = T.grad(cost, params)
        grads = clip_norms(grads, self.clipnorm)
        t = theano.shared(floatX(1.))
        b1_t = self.b1 * self.l**(t - 1)

        for p, g in zip(params, grads):
            g = self.regularizer.gradient_regularize(p, g)
            m = theano.shared(p.get_value() * 0.)
            v = theano.shared(p.get_value() * 0.)

            m_t = b1_t * m + (1 - b1_t) * g
            v_t = self.b2 * v + (1 - self.b2) * g**2
            m_c = m_t / (1 - self.b1**t)
            v_c = v_t / (1 - self.b2**t)
            p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e)
            p_t = self.regularizer.weight_regularize(p_t)
            updates.append((m, m_t))
            updates.append((v, v_t))
            updates.append((p, p_t))
        updates.append((t, t + 1.))
        return updates

Source File: gradient_descent.py From Projects with MIT License

6 votes

def adam(self, cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8):
        updates = []
        grads = T.grad(cost, params)
        self.i = theano.shared(np.float32(0.))
        i_t = self.i + 1.
        fix1 = 1. - (1. - b1)**i_t
        fix2 = 1. - (1. - b2)**i_t
        lr_t = lr * (T.sqrt(fix2) / fix1)
        for p, g in zip(params, grads):
            self.m = theano.shared(p.get_value() * 0.)
            self.v = theano.shared(p.get_value() * 0.)
            m_t = (b1 * g) + ((1. - b1) * self.m)
            v_t = (b2 * T.sqr(g)) + ((1. - b2) * self.v)
            g_t = m_t / (T.sqrt(v_t) + e)
            p_t = p - (lr_t * g_t)
            updates.append((self.m, m_t))
            updates.append((self.v, v_t))
            updates.append((p, p_t))
        updates.append((self.i, i_t))
        return updates

Source File: optimizers.py From CAPTCHA-breaking with MIT License

5 votes

def get_gradients(self, loss, params):

        grads = T.grad(loss, params)

        if hasattr(self, 'clipnorm') and self.clipnorm > 0:
            norm = T.sqrt(sum([T.sum(g ** 2) for g in grads]))
            grads = [clip_norm(g, self.clipnorm, norm) for g in grads]

        return grads

Source File: test_nnet.py From D-VAE with MIT License

5 votes

def test_softmax_grad_optimizations_vector(self):
        x = tensor.vector('x')
        one_of_n = tensor.lvector('one_of_n')
        op = crossentropy_categorical_1hot
        xe = op(softmax_op(x), one_of_n)
        sum_xe = tensor.sum(xe)
        g_x = tensor.grad(sum_xe, x)
        fgraph = gof.FunctionGraph(
                [x, one_of_n],
                [g_x])

        # print 'BEFORE'
        # for node in fgraph.toposort():
        #    print node.op, node.inputs
        # print '----'
        theano.compile.mode.optdb.query(
                theano.compile.mode.OPT_FAST_RUN).optimize(fgraph)

        # print 'AFTER'
        # for node in fgraph.toposort():
        #    print node.op, node.inputs

        has_cx1hot = False
        has_cx1hotdx = False
        has_softmax = False
        has_softmaxdx = False
        for node in fgraph.toposort():
            if node.op == crossentropy_softmax_argmax_1hot_with_bias:
                has_cx1hot = True
            if node.op == crossentropy_softmax_1hot_with_bias_dx:
                has_cx1hotdx = True
            if node.op == softmax_op:
                has_softmax = True
            if node.op == softmax_grad:
                has_softmaxdx = True
        assert not has_cx1hot
        assert has_cx1hotdx
        assert has_softmax
        assert not has_softmaxdx

Python theano.tensor.grad() Examples