Python Examples of theano.tensor.maximum

Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0

6 votes

def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next

Source File: nn.py From GELUs with MIT License

6 votes

def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            updates.append((v,v_t))
        else:
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates

Source File: NN_ConvLayer_3D.py From Deep_MRI_brain_extraction with MIT License

6 votes

def max_pool_along_channel_axis(sym_input, pool_factor):
    """ for 3D conv."""
    s = None
    for i in xrange(pool_factor):
        t = sym_input[:,:,i::pool_factor]
        if s is None:
            s = t
        else:
            s = T.maximum(s, t)
    return s
#    Ns, Ts, C, Hs, Ws = 1, 70, 1, 70, 70  -> 70^3
#    Nf, Tf, C, Hf, Wf = 32, 5 , 1, 5 , 5  -> 32 filters of shape 5^3
#    signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32')
#    filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32')
#
# in 3D
#        input:  (1, 70,  3, 70, 70)
#       filters: (32, 5 , 3,  5 , 5)
#    --> output: (1, 66, 32, 66, 66)

Source File: neuagent.py From dl4ir-webnav with BSD 3-Clause "New" or "Revised" License

6 votes

def compute_emb(x, W):

    def _step(xi, emb, W):
        if prm.att_doc:
            new_shape = (xi.shape[0], xi.shape[1], xi.shape[2], prm.dim_emb)
        else:
            new_shape = (xi.shape[0], xi.shape[1], prm.dim_emb)

        out = W[xi.flatten()].reshape(new_shape).sum(-2)
        return out / tensor.maximum(1., tensor.neq(xi,-1).astype('float32').sum(-1, keepdims=True))

    if prm.att_doc:
        emb_init = tensor.alloc(0., x.shape[1], x.shape[2], prm.dim_emb)
    else:
        emb_init = tensor.alloc(0., x.shape[1], prm.dim_emb)

    (embs), scan_updates = theano.scan(_step,
                                sequences=[x],
                                outputs_info=[emb_init],
                                non_sequences=[W],
                                name='emb_scan',
                                n_steps=x.shape[0])

    return embs

Source File: NN_ConvLayer_2D.py From Deep_MRI_brain_extraction with MIT License

6 votes

def my_max_pool_2d(sym_input, pool_shape = (2,2)):
    """ this one is pure theano. Hence all gradient-related stuff is working! No dimshuffling"""

    s = None
    for i in xrange(pool_shape[1]):
        t = sym_input[:,:,:,i::pool_shape[1]]
        if s is None:
            s = t
        else:
            s = T.maximum(s, t)

    temp = s
    s = None
    for i in xrange(pool_shape[0]):
        t = temp[:,:,i::pool_shape[0],:]
        if s is None:
            s = t
        else:
            s = T.maximum(s, t)

    sym_ret = s

    return sym_ret

Source File: pool.py From D-VAE with MIT License

6 votes

def max_pool_2d_same_size(input, patch_size):
    """
    Takes as input a 4-D tensor. It sets all non maximum values
    of non-overlapping patches of size (patch_size[0],patch_size[1]) to zero,
    keeping only the maximum values. The output has the same dimensions as
    the input.

    Parameters
    ----------
    input : 4-D theano tensor of input images
        Input images. Max pooling will be done over the 2 last dimensions.
    patch_size : tuple of length 2
        Size of the patch (patch height, patch width).
        (2,2) will retain only one non-zero value per patch of 4 values.

    """
    output = Pool(patch_size, True)(input)
    outs = MaxPoolGrad(patch_size, True)(input, output, output)
    return outs

Source File: optim.py From iaf with MIT License

6 votes

def AdaMax(w, objective, alpha=.01, beta1=.1, beta2=.001):
    print 'AdaMax', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')
    
    new = OrderedDict()
    
    for i in range(len(w)):
        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        new[mom1] = (1-beta1) * mom1 + beta1 * g[i]
        new[_max] = T.maximum((1-beta2)*_max, abs(g[i]) + 1e-8)
        new[w[i]] = w[i] + alpha *  new[mom1] / new[_max]
                
    return new

# AdaMax that averages over multiple minibatches

Source File: theano_backend.py From GraphicDesignPatternByPython with MIT License

6 votes

def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev):
    active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()]
    active_next = T.cast(T.minimum(
        T.maximum(
            active + 1,
            T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1
        ), log_p_curr.shape[0]), 'int32')

    common_factor = T.max(log_p_prev[:active])
    p_prev = T.exp(log_p_prev[:active] - common_factor)
    _p_prev = zeros[:active_next]
    # copy over
    _p_prev = T.set_subtensor(_p_prev[:active], p_prev)
    # previous transitions
    _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1])
    # skip transitions
    _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs])
    updated_log_p_prev = T.log(_p_prev) + common_factor

    log_p_next = T.set_subtensor(
        zeros[:active_next],
        log_p_curr[:active_next] + updated_log_p_prev
    )
    return active_next, log_p_next

Source File: __init__.py From adversarial with BSD 3-Clause "New" or "Revised" License

6 votes

def get_noise(self, size):

        # Allow just requesting batch size
        if isinstance(size, int):
            size = (size, self.get_input_space().get_total_dimension())

        if not hasattr(self, 'noise'):
            self.noise = "gaussian"
        if self.noise == "uniform":
            return self.theano_rng.uniform(low=-np.sqrt(3), high=np.sqrt(3), size=size, dtype='float32')
        elif self.noise == "gaussian":
            return self.theano_rng.normal(size=size, dtype='float32')
        elif self.noise == "spherical":
            noise = self.theano_rng.normal(size=size, dtype='float32')
            noise = noise / T.maximum(1e-7, T.sqrt(T.sqr(noise).sum(axis=1))).dimshuffle(0, 'x')
            return noise
        else:
            raise NotImplementedError(self.noise)

Source File: nn.py From weightnorm with MIT License

6 votes

def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999):
    updates = []
    grads = T.grad(cost, params)
    for p, g in zip(params, grads):
        mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.))
        if mom1>0:
            v_t = mom1*v + (1. - mom1)*g
            updates.append((v,v_t))
        else:
            v_t = g
        mg_t = T.maximum(mom2*mg, abs(g))
        g_t = v_t / (mg_t + 1e-6)
        p_t = p - lr * g_t
        updates.append((mg, mg_t))
        updates.append((p, p_t))
    return updates

Source File: attention.py From attention-lvcsr with MIT License

6 votes

def compute_weights(self, energies, attended_mask):
        if self.energy_normalizer == 'softmax':
            logger.debug("Using softmax attention weights normalization")
            energies = energies - energies.max(axis=0)
            unnormalized_weights = tensor.exp(energies)
        elif self.energy_normalizer == 'logistic':
            logger.debug("Using smoothfocus (logistic sigm) "
                        "attention weights normalization")
            unnormalized_weights = tensor.nnet.sigmoid(energies)
        elif self.energy_normalizer == 'relu':
            logger.debug("Using ReLU attention weights normalization")
            unnormalized_weights = tensor.maximum(energies/1000., 0.0)
        else:
            raise Exception("Unknown energey_normalizer: {}"
                            .format(self.energy_computer))
        if attended_mask:
            unnormalized_weights *= attended_mask

        # If mask consists of all zeros use 1 as the normalization coefficient
        normalization = (unnormalized_weights.sum(axis=0) +
                         tensor.all(1 - attended_mask, axis=0))
        return unnormalized_weights / normalization

Source File: depth.py From Depth-Map-Prediction with GNU General Public License v3.0

6 votes

def define_cost(self, pred, y0, m0):
        bsize = self.bsize
        npix = int(np.prod(test_shape(y0)[1:]))
        y0_target = y0.reshape((self.bsize, npix))
        y0_mask = m0.reshape((self.bsize, npix))
        pred = pred.reshape((self.bsize, npix))

        p = pred * y0_mask
        t = y0_target * y0_mask

        d = (p - t)

        nvalid_pix = T.sum(y0_mask, axis=1)
        depth_cost = (T.sum(nvalid_pix * T.sum(d**2, axis=1))
                         - 0.5*T.sum(T.sum(d, axis=1)**2)) \
                     / T.maximum(T.sum(nvalid_pix**2), 1)

        return depth_cost

Source File: theano_backend.py From GraphicDesignPatternByPython with MIT License

5 votes

def l2_normalize(x, axis=None):
    square_sum = T.sum(T.square(x), axis=axis, keepdims=True)
    norm = T.sqrt(T.maximum(square_sum, epsilon()))
    return x / norm

Source File: theano_backend.py From GraphicDesignPatternByPython with MIT License

5 votes

def maximum(x, y):
    return T.maximum(x, y)

Source File: gru4rec.py From sars_tutorial with MIT License

5 votes

def relu(self, X):
        return T.maximum(X, 0)

Source File: hgru4rec.py From sars_tutorial with MIT License

5 votes

def relu(self, X):
        return T.maximum(X, 0)

Source File: NN_ConvLayer_2D.py From Deep_MRI_brain_extraction with MIT License

5 votes

def get_reconstructed_input(self, hidden):
    """ Computes the reconstructed input given the values of the hidden layer """
    repeated_conv = conv.conv2d(input = hidden, filters = self.W_prime, border_mode='full')

    multiple_conv_out = [repeated_conv.flatten()] * np.prod(self.poolsize)

    stacked_conv_neibs = T.stack(*multiple_conv_out).T

    stretch_unpooling_out = theano.sandbox.neighbours.neibs2images(stacked_conv_neibs, self.pl, self.x.shape)

    rectified_linear_activation = lambda x: T.maximum(0.0, x)
    return rectified_linear_activation(stretch_unpooling_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))

Source File: nn.py From weightnorm with MIT License

5 votes

def relu(x):
    return T.maximum(x, 0)

Source File: expressions.py From attention-lvcsr with MIT License

5 votes

def monotonicity_penalty(weights, mask_x=None):
    cumsums = tensor.cumsum(weights, axis=2)
    penalties = tensor.maximum(cumsums[1:] - cumsums[:-1], 0).sum(axis=2)
    if mask_x:
        penalties *= mask_x[1:]
    return penalties.sum()

Source File: NN_ConvLayer_2D.py From Deep_MRI_brain_extraction with MIT License

5 votes

def max_pool_along_second_axis(sym_input, pool_factor):
    """ for MLP and 2D conv"""
    s = None
    for i in xrange(pool_factor):
        t = sym_input[:,i::pool_factor]
        if s is None:
            s = t
        else:
            s = T.maximum(s, t)
    return s

Source File: costs.py From dcgan_code with MIT License

5 votes

def Hinge(y_true, y_pred):
    return T.maximum(1. - y_true * y_pred, 0.).mean()

Source File: costs.py From dcgan_code with MIT License

5 votes

def SquaredHinge(y_true, y_pred):
    return T.sqr(T.maximum(1. - y_true * y_pred, 0.)).mean()

Source File: optim.py From iaf with MIT License

5 votes

def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1):
    if n_accum == 1:
        return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
    print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum
    
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')

    new = OrderedDict()
    
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(it,n_accum), 0)
    update = T.eq(T.mod(it,n_accum), n_accum-1)
    
    ws_avg = []
    for j in range(len(ws)):
        w_avg = {}
        for i in ws[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0.)
            w_avg[i] = G.sharedf(_w.get_value())
            g_sum = G.sharedf(_w.get_value() * 0.)
        
            new[g_sum] = ifelse(reset, _g, g_sum + _g)
            new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
            new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
            new[_w] = ifelse(update, _w + alpha *  new[mom1] / new[_max], _w)
            new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i])
        ws_avg += [w_avg]   
    return new, ws_avg

Source File: optim.py From iaf with MIT License

5 votes

def Eve(w, w_avg, f, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, disconnected_inputs='raise'):
    print 'Eve', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3

    mom = {}
    _max = {}
    delta = {}
    w_prime = {}
    for i in w:
        mom[i] = G.sharedf(w[i].get_value() * 0.)
        _max[i] = G.sharedf(w[i].get_value() * 0. + 1e-8)
        delta[i] = G.sharedf(w[i].get_value() * 0.)
        w_prime[i] = w[i] + (1-beta1)/beta1 * delta[i]
    
    train_cost = f(w_prime).mean()
    g = G.ndict.T_grad(train_cost, w, disconnected_inputs=disconnected_inputs) #warn/raise
    
    new = OrderedDict()
    for i in w:
        new[mom[i]] = (1-beta1) * mom[i] + beta1 * g[i]
        new[_max[i]] = T.maximum((1-beta2)*_max[i], abs(g[i]) + 1e-8)
        new[delta[i]] = alpha * new[mom[i]] / new[_max[i]]
        new[w[i]] = w[i] + new[delta[i]]
    
    for i in w:
        new[w_avg[i]] = beta3 * w[i] + (1.-beta3) * w_avg[i]
    return train_cost, new
    
# AdaMax that keeps running average of parameter
# Accumulates gradient over n_accum minibatches

Source File: optim.py From iaf with MIT License

5 votes

def AdaMaxAvg(ws, ws_avg, objective, alpha=.01, beta1=.1, beta2=.001, update_keys=None, disconnected_inputs='raise'):
    print 'AdaMax_Avg', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2
    
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs=disconnected_inputs) #warn/raise
    
    if update_keys is None:
        update_keys = [ws[j].keys() for j in range(len(ws))]
    
    new = OrderedDict()
    for j in range(len(ws)):
        if ws_avg is not None:
            w_avg = ws_avg[j]
        for i in update_keys[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0. + 1e-8)
            
            new[mom1] = (1-beta1) * mom1 + beta1 * _g
            new[_max] = T.maximum((1-beta2)*_max, abs(_g) + 1e-8)
            new[_w] = _w + alpha *  new[mom1] / new[_max]
            if ws_avg is not None:
                new[w_avg[i]] = beta2 * _w + (1.-beta2) * w_avg[i]
    return new

# Eve that keeps running average of parameter

Source File: optim.py From iaf with MIT License

5 votes

def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2):
    print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')
    
    new = OrderedDict()
    
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(new[it],n_accum), 0)
    update = T.eq(T.mod(new[it],n_accum), n_accum-1)

    for i in range(len(w)):
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        g_sum = G.sharedf(w[i].get_value() * 0.)
        
        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        new[g_sum] = ifelse(reset, g[i], g_sum + g[i])
        new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
        new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
        new[w[i]] = ifelse(update, w[i] + alpha *  new[mom1] / new[_max], w[i])
                
    return new

# AdaMax that keeps running average of parameter

Source File: ctc_cost.py From CTC-Connectionist-Temporal-Classification with Apache License 2.0

5 votes

def log_add(a, b):
        max_ = tensor.maximum(a, b)
        return (max_ + tensor.log1p(tensor.exp(a + b - 2 * max_)))

Source File: activation.py From OpenDeep with Apache License 2.0

5 votes

def rectifier(x, leaky=0):
    """
    Returns the element-wise rectifier (ReLU) applied to x.

    Parameters
    ----------
    x : tensor
        Symbolic Tensor (or compatible).
    leaky: scalar or tensor
        Slope for negative input, usually between 0 and 1. The default value of 0 will lead to the standard rectifier,
        1 will lead to a linear activation function, and any value in between will give a leaky rectifier.
        A shared variable (broadcastable against x) will result in a parameterized rectifier with learnable slope(s).

    Returns
    -------
    tensor
        Element-wise rectifier: rectifier(x) = max(0,x) applied to `x`.

    """
    # return T.maximum(as_floatX(0), x)
    # below fix is taken from Lasagne framework:
    # https://github.com/benanne/Lasagne/blob/master/lasagne/nonlinearities.py
    # The following is faster than lambda x: T.maximum(0, x)
    # Thanks to @SnippyHolloW for pointing this out.
    # See: https://github.com/SnippyHolloW/abnet/blob/807aeb9/layers.py#L15
    # return (x + abs(x)) / as_floatX(2.0)
    return T.nnet.relu(x, alpha=leaky)

Source File: nn.py From weightnorm with MIT License

5 votes

def lrelu(x, a=0.1):
    return T.maximum(x, a*x)

Source File: basic.py From attention-lvcsr with MIT License

5 votes

def structured_maximum(x, y):
    """
    Structured elemwise maximum of sparse matrix x by scalar y.

    """
    # see decorator for function body

Python theano.tensor.maximum() Examples