Python theano.tensor.switch() Examples

The following are 30 code examples of theano.tensor.switch(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module theano.tensor , or try the search function .
Example #1
Source File: layers.py    From DL4MT with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def shared_dropout_layer(shape, use_noise, trng, value, scaled=True):
    #re-scale dropout at training time, so we don't need to at test time
    if scaled:
        proj = tensor.switch(
            use_noise,
            trng.binomial(shape, p=value, n=1,
                                        dtype='float32')/value,
            theano.shared(numpy.float32(1.)))
    else:
        proj = tensor.switch(
            use_noise,
            trng.binomial(shape, p=value, n=1,
                                        dtype='float32'),
            theano.shared(numpy.float32(value)))
    return proj


# feedforward layer: affine transformation + point-wise nonlinearity 
Example #2
Source File: util.py    From gated-graph-transformer-network with MIT License 6 votes vote down vote up
def reduce_log_sum(tensor, axis=None, guaranteed_finite=False):
    """
    Sum probabilities in the log domain, i.e return
        log(e^vec[0] + e^vec[1] + ...)
        = log(e^x e^(vec[0]-x) + e^x e^(vec[1]-x) + ...)
        = log(e^x [e^(vec[0]-x) + e^(vec[1]-x) + ...])
        = log(e^x) + log(e^(vec[0]-x) + e^(vec[1]-x) + ...)
        = x + log(e^(vec[0]-x) + e^(vec[1]-x) + ...)
    For numerical stability, we choose x = max(vec)
    Note that if x is -inf, that means all values are -inf,
    so the answer should be -inf. In this case, choose x = 0
    """
    maxval = T.max(tensor, axis)
    maxval_full = T.max(tensor, axis, keepdims=True)
    if not guaranteed_finite:
        maxval = T.switch(T.isfinite(maxval), maxval, T.zeros_like(maxval))
        maxval_full = T.switch(T.isfinite(maxval_full), maxval_full, T.zeros_like(maxval_full))
    reduced_sum = T.sum(T.exp(tensor - maxval_full), axis)
    logsum = maxval + T.log(reduced_sum)
    return logsum 
Example #3
Source File: optim.py    From iaf with MIT License 6 votes vote down vote up
def AdaMax(w, objective, alpha=.01, beta1=.1, beta2=.001):
    print 'AdaMax', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')
    
    new = OrderedDict()
    
    for i in range(len(w)):
        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        new[mom1] = (1-beta1) * mom1 + beta1 * g[i]
        new[_max] = T.maximum((1-beta2)*_max, abs(g[i]) + 1e-8)
        new[w[i]] = w[i] + alpha *  new[mom1] / new[_max]
                
    return new

# AdaMax that averages over multiple minibatches 
Example #4
Source File: theano_backend.py    From GraphicDesignPatternByPython with MIT License 6 votes vote down vote up
def switch(condition, then_expression, else_expression):
    """Switches between two operations depending on a scalar value.

    Note that both `then_expression` and `else_expression`
    should be symbolic tensors of the *same shape*.

    # Arguments
        condition: scalar tensor (`int` or `bool`).
        then_expression: either a tensor, or a callable that returns a tensor.
        else_expression: either a tensor, or a callable that returns a tensor.

    # Returns
        The selected tensor.
    """
    if callable(then_expression):
        then_expression = then_expression()
    if callable(else_expression):
        else_expression = else_expression()
    cond_ndim = ndim(condition)
    expr_ndim = ndim(then_expression)
    if cond_ndim < expr_ndim:
        ndim_diff = expr_ndim - cond_ndim
        for _ in range(ndim_diff):
            condition = expand_dims(condition)
    return T.switch(condition, then_expression, else_expression) 
Example #5
Source File: base.py    From carl with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def bound(expression, out, *predicates):
    """Bound a theano expression.

    Parameters
    ----------
    * `expression` [theano expression]:
        The expression to bound.

    * `out` [theano expression]:
        The out-of-bounds value.

    * `*predicates` [list of theano expressions]:
        The list of predicates defining the boundaries of `expression`.

    Returns
    -------
    * `value` [theano expression]:
         The bounded expression.
    """
    guard = 1

    for p in predicates:
        guard *= p

    return T.switch(guard, expression, out) 
Example #6
Source File: keras_extensions.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def time_distributed_nonzero_max_pooling(x):
    """
    Computes maximum along the first (time) dimension.
    It ignores the mask m.

    In:
        x - input; a 3D tensor
        mask_value - value to mask out, if None then no masking; 
            by default 0.0, 
    """

    import theano.tensor as T

    mask_value=0.0
    x = T.switch(T.eq(x, mask_value), -numpy.inf, x)
    masked_max_x = x.max(axis=1)
    # replace infinities with mask_value
    masked_max_x = T.switch(T.eq(masked_max_x, -numpy.inf), 0, masked_max_x)
    return masked_max_x 
Example #7
Source File: keras_extensions.py    From visual_turing_test-tutorial with MIT License 6 votes vote down vote up
def time_distributed_masked_max(x, m):
    """
    Computes max along the first (time) dimension.

    In:
        x - input; a 3D tensor
        m - mask
        m_value - value for masking
    """
    # place infinities where mask is off
    m_value = 0.0
    tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0)
    x_with_inf = x + K.expand_dims(tmp)
    x_max = K.max(x_with_inf, axis=1) 
    r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max)
    return r 


## classes  ##

# Transforms existing layers to masked layers 
Example #8
Source File: toolbox.py    From Theano-Lights with MIT License 6 votes vote down vote up
def sgdmgc(cost, params, lr=1.0, alpha=0.1, max_magnitude=5.0, infDecay=0.1):
    """SGD with momentum and gradient clipping"""
    grads = T.grad(cost=cost, wrt=params)
    updates = []

    norm = norm_gs(params, grads)
    sqrtnorm = T.sqrt(norm)
    not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
    adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)

    for p, g in zip(params, grads):
        v = shared(p.get_value() * 0.)
        g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
        v_new = v * (1.0 - alpha) - alpha * lr * g
        updates.append((v, v_new))
        updates.append((p, p + v_new ))
    
    return updates, norm 
Example #9
Source File: __init__.py    From adversarial with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def __init__(self, scale_grads=1, target_scale=.1,
            discriminator_default_input_include_prob = 1.,
            discriminator_input_include_probs=None,
            discriminator_default_input_scale=1.,
            discriminator_input_scales=None,
            generator_default_input_include_prob = 1.,
            generator_default_input_scale=1.,
            inference_default_input_include_prob=None,
            inference_input_include_probs=None,
            inference_default_input_scale=1.,
            inference_input_scales=None,
            init_now_train_generator=True,
            ever_train_discriminator=True,
            ever_train_generator=True,
            ever_train_inference=True,
            no_drop_in_d_for_g=False,
            alternate_g = False):
        self.__dict__.update(locals())
        del self.self
        # These allow you to dynamically switch off training parts.
        # If the corresponding ever_train_* is False, these have
        # no effect.
        self.now_train_generator = sharedX(init_now_train_generator)
        self.now_train_discriminator = sharedX(numpy.array(1., dtype='float32'))
        self.now_train_inference = sharedX(numpy.array(1., dtype='float32')) 
Example #10
Source File: PoolLayer.py    From NSC with MIT License 5 votes vote down vote up
def __init__(self, input, rate, istrain):
        rate = numpy.float32(rate)
        self.input = input
        srng = T.shared_randomstreams.RandomStreams()
        mask = srng.binomial(n=1, p=numpy.float32(1-rate), size=input.shape, dtype='float32')
        self.output = T.switch(istrain, mask*self.input, self.input*numpy.float32(1-rate))
        self.params = [] 
Example #11
Source File: ff_layers.py    From LV_groundhog with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def fprop(self, all_states):
        if self.ntimes:
            stateshape0 = all_states.shape[0]
            shape0 = TT.switch(TT.gt(self.n, 0), self.n, all_states.shape[0])

            single_frame = TT.shape_padleft(all_states[stateshape0-1])
            mask = TT.alloc(numpy.float32(1), shape0, *[1 for k in xrange(all_states.ndim-1)])
            rval = single_frame * mask
            self.out = rval
            return rval

        single_frame = all_states[all_states.shape[0]-1]
        self.out = single_frame
        return single_frame 
Example #12
Source File: gru4rec.py    From sars_tutorial with MIT License 5 votes vote down vote up
def execute(self, X):
            return self.lmbd * T.switch(T.ge(X, 0), X, self.alpha * (T.exp(X) - 1)) 
Example #13
Source File: gru4rec.py    From sars_tutorial with MIT License 5 votes vote down vote up
def execute(self, X):
            return T.switch(T.ge(X, 0), X, self.alpha * (T.exp(X) - 1)) 
Example #14
Source File: exponential.py    From carl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def __init__(self, inverse_scale=1.0):
        """Constructor.

        Parameters
        ----------
        * `inverse_scale` [float]:
            The inverse scale.
        """
        super(Exponential, self).__init__(inverse_scale=inverse_scale)

        # pdf
        self.pdf_ = T.switch(
            T.lt(self.X, 0.),
            0.,
            self.inverse_scale * T.exp(-self.inverse_scale * self.X)).ravel()
        self._make(self.pdf_, "pdf")

        # -log pdf
        self.nll_ = bound(
            -T.log(self.inverse_scale) + self.inverse_scale * self.X,
            np.inf,
            self.inverse_scale > 0.).ravel()
        self._make(self.nll_, "nll")

        # cdf
        self.cdf_ = (1. - T.exp(-self.inverse_scale * self.X)).ravel()
        self._make(self.cdf_, "cdf")

        # ppf
        self.ppf_ = -T.log(1. - self.p) / self.inverse_scale
        self._make(self.ppf_, "ppf", args=[self.p]) 
Example #15
Source File: gram.py    From gram with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def dropout_layer(state_before, use_noise, trng, prob):
    proj = T.switch(use_noise, (state_before * trng.binomial(state_before.shape, p=prob, n=1, dtype=state_before.dtype)), state_before * 0.5)
    return proj 
Example #16
Source File: helpers.py    From deep-prior with GNU General Public License v3.0 5 votes vote down vote up
def TruncLin(x):
    """
    Truncated linear unit
    :param x: input value
    :return: max(min(x,1),-1)
    """
    import theano.tensor as T
    return T.switch(x < -1, -1, T.switch(x > 1, 1, x)) 
Example #17
Source File: PoolLayer.py    From NSC with MIT License 5 votes vote down vote up
def __init__(self, input, rate, istrain):
        rate = numpy.float32(rate)
        self.input = input
        srng = T.shared_randomstreams.RandomStreams()
        mask = srng.binomial(n=1, p=numpy.float32(1-rate), size=input.shape, dtype='float32')
        self.output = T.switch(istrain, mask*self.input, self.input*numpy.float32(1-rate))
        self.params = [] 
Example #18
Source File: PoolLayer.py    From NSC with MIT License 5 votes vote down vote up
def __init__(self, input, rate, istrain):
        rate = numpy.float32(rate)
        self.input = input
        srng = T.shared_randomstreams.RandomStreams()
        mask = srng.binomial(n=1, p=numpy.float32(1-rate), size=input.shape, dtype='float32')
        self.output = T.switch(istrain, mask*self.input, self.input*numpy.float32(1-rate))
        self.params = [] 
Example #19
Source File: helpers.py    From deep-prior with GNU General Public License v3.0 5 votes vote down vote up
def InvReLU(x):
    """
    Rectified linear unit
    :param x: input value
    :return: max(x,0)
    """
    import theano.tensor as T
    x *= -1.
    return T.switch(x < 0, 0, x) 
Example #20
Source File: simple.py    From attention-lvcsr with MIT License 5 votes vote down vote up
def apply(self, input_):
        return tensor.switch(input_ > 0, input_, 0) 
Example #21
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def adamgc(cost, params, lr=0.0002, b1=0.1, b2=0.001, e=1e-8, max_magnitude=5.0, infDecay=0.1):
    updates = []
    grads = T.grad(cost, params)
    
    norm = norm_gs(params, grads)
    sqrtnorm = T.sqrt(norm)
    not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
    adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)

    i = shared(floatX(0.))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
        m = shared(p.get_value() * 0.)
        v = shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m) 
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)
        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates, norm

#-------------------------------------------------------------------------------------------------- 
Example #22
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def adamgc_(cost, params, lr=0.0002, b1=0.1, b2=0.01, e=1e-8, max_magnitude=5.0, infDecay=0.1):
    updates = []
    grads = T.grad(cost, params)
    
    norm = norm_gs(params, grads)
    sqrtnorm = T.sqrt(norm)
    not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
    adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)

    i = shared(floatX(0.))
    i_t = i + 1.
    fix1 = 1. - (1. - b1)**i_t
    fix2 = 1. - (1. - b2)**i_t
    lr_t = lr * (T.sqrt(fix2) / fix1)
    for p, g in zip(params, grads):
        g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
        m = shared(p.get_value() * 0.)
        v = shared(p.get_value() * 0.)
        m_t = (b1 * g) + ((1. - b1) * m) 
        v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
        g_t = m_t / (T.sqrt(v_t) + e)
        p_t = p - (lr_t * g_t)

        #e_t = shared(p.get_value() * 0.)
        #de_t = (srnd.normal(p.shape, std = 0.05, dtype=theano.config.floatX)*p_t - e_t)*0.05  #*p_t
        #p_t = p_t + de_t
        #updates.append((e_t, e_t + de_t))

        updates.append((m, m_t))
        updates.append((v, v_t))
        updates.append((p, p_t))
    updates.append((i, i_t))
    return updates, norm 
Example #23
Source File: toolbox.py    From Theano-Lights with MIT License 5 votes vote down vote up
def sgdgc(cost, params, lr=1.0, max_magnitude=5.0, infDecay=0.1):
    """SGD with gradient clipping"""
    grads = T.grad(cost=cost, wrt=params)
    updates = []

    norm = norm_gs(params, grads)
    sqrtnorm = T.sqrt(norm)
    #not_finite = T.or_(T.isnan(sqrtnorm), T.isinf(sqrtnorm))
    adj_norm_gs = T.switch(T.ge(sqrtnorm, max_magnitude), max_magnitude / sqrtnorm, 1.)

    for p, g in zip(params, grads):
        #g = T.switch(not_finite, infDecay * p, g * adj_norm_gs)
        updates.append((p, p - lr * g * adj_norm_gs))  
    
    return updates, norm 
Example #24
Source File: regression.py    From Diffusion-Probabilistic-Models with MIT License 5 votes vote down vote up
def apply(self, input_):
        return T.switch(input_ > 0, input_, 0.05*input_) 
Example #25
Source File: session_encdec.py    From hred-qs with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def compute_updates(self, training_cost, params):
        updates = []
        grads = T.grad(training_cost, params)
        grads = OrderedDict(zip(params, grads))
        # Clip stuff
        c = numpy.float32(self.cutoff)
        clip_grads = []

        norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
        normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
        notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
        for p, g in grads.items():
            clip_grads.append((p, T.switch(notfinite, numpy.float32(.1) * p, g * normalization)))

        grads = OrderedDict(clip_grads)

        if self.updater == 'adagrad':
            updates = Adagrad(grads, self.lr)
        elif self.updater == 'sgd':
            raise Exception("Sgd not implemented!")
        elif self.updater == 'adadelta':
            updates = Adadelta(grads)
        elif self.updater == 'rmsprop':
            updates = RMSProp(grads, self.lr)
        elif self.updater == 'adam':
            updates = Adam(grads)
        else:
            raise Exception("Updater not understood!")
        return updates 
Example #26
Source File: updates.py    From dcgan_code with MIT License 5 votes vote down vote up
def clip_norm(g, c, n):
    if c > 0:
        g = T.switch(T.ge(n, c), g*c/n, g)
    return g 
Example #27
Source File: optim.py    From iaf with MIT License 5 votes vote down vote up
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1):
    if n_accum == 1:
        return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3)
    print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum
    
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise')

    new = OrderedDict()
    
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(it,n_accum), 0)
    update = T.eq(T.mod(it,n_accum), n_accum-1)
    
    ws_avg = []
    for j in range(len(ws)):
        w_avg = {}
        for i in ws[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0.)
            w_avg[i] = G.sharedf(_w.get_value())
            g_sum = G.sharedf(_w.get_value() * 0.)
        
            new[g_sum] = ifelse(reset, _g, g_sum + _g)
            new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
            new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
            new[_w] = ifelse(update, _w + alpha *  new[mom1] / new[_max], _w)
            new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i])
        ws_avg += [w_avg]   
    return new, ws_avg 
Example #28
Source File: optim.py    From iaf with MIT License 5 votes vote down vote up
def AdaMaxAvg(ws, ws_avg, objective, alpha=.01, beta1=.1, beta2=.001, update_keys=None, disconnected_inputs='raise'):
    print 'AdaMax_Avg', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2
    
    gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs=disconnected_inputs) #warn/raise
    
    if update_keys is None:
        update_keys = [ws[j].keys() for j in range(len(ws))]
    
    new = OrderedDict()
    for j in range(len(ws)):
        if ws_avg is not None:
            w_avg = ws_avg[j]
        for i in update_keys[j]:
            _w = ws[j][i]
            _g = gs[j][i]
            #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's
            mom1 = G.sharedf(_w.get_value() * 0.)
            _max = G.sharedf(_w.get_value() * 0. + 1e-8)
            
            new[mom1] = (1-beta1) * mom1 + beta1 * _g
            new[_max] = T.maximum((1-beta2)*_max, abs(_g) + 1e-8)
            new[_w] = _w + alpha *  new[mom1] / new[_max]
            if ws_avg is not None:
                new[w_avg[i]] = beta2 * _w + (1.-beta2) * w_avg[i]
    return new

# Eve that keeps running average of parameter 
Example #29
Source File: optim.py    From iaf with MIT License 5 votes vote down vote up
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2):
    print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum
    g = T.grad(objective.sum(), w, disconnected_inputs='warn')
    
    new = OrderedDict()
    
    from theano.ifelse import ifelse
    it = G.sharedf(0.)
    new[it] = it + 1
    reset = T.eq(T.mod(new[it],n_accum), 0)
    update = T.eq(T.mod(new[it],n_accum), n_accum-1)

    for i in range(len(w)):
        mom1 = G.sharedf(w[i].get_value() * 0.)
        _max = G.sharedf(w[i].get_value() * 0.)
        g_sum = G.sharedf(w[i].get_value() * 0.)
        
        #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's
        new[g_sum] = ifelse(reset, g[i], g_sum + g[i])
        new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1)
        new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max)
        new[w[i]] = ifelse(update, w[i] + alpha *  new[mom1] / new[_max], w[i])
                
    return new

# AdaMax that keeps running average of parameter 
Example #30
Source File: activation.py    From OpenDeep with Apache License 2.0 5 votes vote down vote up
def elu(x, alpha=1):
    """
    (from Lasagne https://github.com/Lasagne/Lasagne/blob/master/lasagne/nonlinearities.py)

    Exponential Linear Unit :math:`\\varphi(x) = (x > 0) ? x : e^x - 1`
    The Exponential Linear Unit (EUL) was introduced in [1]_. Compared to the
    linear rectifier :func:`rectify`, it has a mean activation closer to zero
    and nonzero gradient for negative input, which can help convergence.
    Compared to the leaky rectifier, it saturates for
    highly negative inputs.

    Parameters
    ----------
    x : float32
        The activation (the summed, weighed input of a neuron).
    Returns
    -------
    float32
        The output of the exponential linear unit for the activation.

    References
    ----------
    .. [1] Djork-Arne Clevert, Thomas Unterthiner, Sepp Hochreiter (2015):
       Fast and Accurate Deep Network Learning by Exponential Linear Units
       (ELUs), http://arxiv.org/abs/1511.07289
    """
    assert alpha > 0, "alpha parameter to ELU has to be > 0."
    return T.switch(x > 0, x, alpha*(T.exp(x) - 1))