Python theano.tensor.maximum() Examples
The following are 30
code examples of theano.tensor.maximum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
theano.tensor
, or try the search function
.
Example #1
Source File: theano_backend.py From Att-ChemdNER with Apache License 2.0 | 6 votes |
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev): active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()] active_next = T.cast(T.minimum( T.maximum( active + 1, T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1 ), log_p_curr.shape[0]), 'int32') common_factor = T.max(log_p_prev[:active]) p_prev = T.exp(log_p_prev[:active] - common_factor) _p_prev = zeros[:active_next] # copy over _p_prev = T.set_subtensor(_p_prev[:active], p_prev) # previous transitions _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1]) # skip transitions _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs]) updated_log_p_prev = T.log(_p_prev) + common_factor log_p_next = T.set_subtensor( zeros[:active_next], log_p_curr[:active_next] + updated_log_p_prev ) return active_next, log_p_next
Example #2
Source File: nn.py From GELUs with MIT License | 6 votes |
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) for p, g in zip(params, grads): mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) if mom1>0: v_t = mom1*v + (1. - mom1)*g updates.append((v,v_t)) else: v_t = g mg_t = T.maximum(mom2*mg, abs(g)) g_t = v_t / (mg_t + 1e-6) p_t = p - lr * g_t updates.append((mg, mg_t)) updates.append((p, p_t)) return updates
Example #3
Source File: NN_ConvLayer_3D.py From Deep_MRI_brain_extraction with MIT License | 6 votes |
def max_pool_along_channel_axis(sym_input, pool_factor): """ for 3D conv.""" s = None for i in xrange(pool_factor): t = sym_input[:,:,i::pool_factor] if s is None: s = t else: s = T.maximum(s, t) return s # Ns, Ts, C, Hs, Ws = 1, 70, 1, 70, 70 -> 70^3 # Nf, Tf, C, Hf, Wf = 32, 5 , 1, 5 , 5 -> 32 filters of shape 5^3 # signals = numpy.arange(Ns*Ts*C*Hs*Ws).reshape(Ns, Ts, C, Hs, Ws).astype('float32') # filters = numpy.arange(Nf*Tf*C*Hf*Wf).reshape(Nf, Tf, C, Hf, Wf).astype('float32') # # in 3D # input: (1, 70, 3, 70, 70) # filters: (32, 5 , 3, 5 , 5) # --> output: (1, 66, 32, 66, 66)
Example #4
Source File: neuagent.py From dl4ir-webnav with BSD 3-Clause "New" or "Revised" License | 6 votes |
def compute_emb(x, W): def _step(xi, emb, W): if prm.att_doc: new_shape = (xi.shape[0], xi.shape[1], xi.shape[2], prm.dim_emb) else: new_shape = (xi.shape[0], xi.shape[1], prm.dim_emb) out = W[xi.flatten()].reshape(new_shape).sum(-2) return out / tensor.maximum(1., tensor.neq(xi,-1).astype('float32').sum(-1, keepdims=True)) if prm.att_doc: emb_init = tensor.alloc(0., x.shape[1], x.shape[2], prm.dim_emb) else: emb_init = tensor.alloc(0., x.shape[1], prm.dim_emb) (embs), scan_updates = theano.scan(_step, sequences=[x], outputs_info=[emb_init], non_sequences=[W], name='emb_scan', n_steps=x.shape[0]) return embs
Example #5
Source File: NN_ConvLayer_2D.py From Deep_MRI_brain_extraction with MIT License | 6 votes |
def my_max_pool_2d(sym_input, pool_shape = (2,2)): """ this one is pure theano. Hence all gradient-related stuff is working! No dimshuffling""" s = None for i in xrange(pool_shape[1]): t = sym_input[:,:,:,i::pool_shape[1]] if s is None: s = t else: s = T.maximum(s, t) temp = s s = None for i in xrange(pool_shape[0]): t = temp[:,:,i::pool_shape[0],:] if s is None: s = t else: s = T.maximum(s, t) sym_ret = s return sym_ret
Example #6
Source File: pool.py From D-VAE with MIT License | 6 votes |
def max_pool_2d_same_size(input, patch_size): """ Takes as input a 4-D tensor. It sets all non maximum values of non-overlapping patches of size (patch_size[0],patch_size[1]) to zero, keeping only the maximum values. The output has the same dimensions as the input. Parameters ---------- input : 4-D theano tensor of input images Input images. Max pooling will be done over the 2 last dimensions. patch_size : tuple of length 2 Size of the patch (patch height, patch width). (2,2) will retain only one non-zero value per patch of 4 values. """ output = Pool(patch_size, True)(input) outs = MaxPoolGrad(patch_size, True)(input, output, output) return outs
Example #7
Source File: optim.py From iaf with MIT License | 6 votes |
def AdaMax(w, objective, alpha=.01, beta1=.1, beta2=.001): print 'AdaMax', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2 g = T.grad(objective.sum(), w, disconnected_inputs='warn') new = OrderedDict() for i in range(len(w)): #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's mom1 = G.sharedf(w[i].get_value() * 0.) _max = G.sharedf(w[i].get_value() * 0.) new[mom1] = (1-beta1) * mom1 + beta1 * g[i] new[_max] = T.maximum((1-beta2)*_max, abs(g[i]) + 1e-8) new[w[i]] = w[i] + alpha * new[mom1] / new[_max] return new # AdaMax that averages over multiple minibatches
Example #8
Source File: theano_backend.py From GraphicDesignPatternByPython with MIT License | 6 votes |
def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev): active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()] active_next = T.cast(T.minimum( T.maximum( active + 1, T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1 ), log_p_curr.shape[0]), 'int32') common_factor = T.max(log_p_prev[:active]) p_prev = T.exp(log_p_prev[:active] - common_factor) _p_prev = zeros[:active_next] # copy over _p_prev = T.set_subtensor(_p_prev[:active], p_prev) # previous transitions _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1]) # skip transitions _p_prev = T.inc_subtensor(_p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs]) updated_log_p_prev = T.log(_p_prev) + common_factor log_p_next = T.set_subtensor( zeros[:active_next], log_p_curr[:active_next] + updated_log_p_prev ) return active_next, log_p_next
Example #9
Source File: __init__.py From adversarial with BSD 3-Clause "New" or "Revised" License | 6 votes |
def get_noise(self, size): # Allow just requesting batch size if isinstance(size, int): size = (size, self.get_input_space().get_total_dimension()) if not hasattr(self, 'noise'): self.noise = "gaussian" if self.noise == "uniform": return self.theano_rng.uniform(low=-np.sqrt(3), high=np.sqrt(3), size=size, dtype='float32') elif self.noise == "gaussian": return self.theano_rng.normal(size=size, dtype='float32') elif self.noise == "spherical": noise = self.theano_rng.normal(size=size, dtype='float32') noise = noise / T.maximum(1e-7, T.sqrt(T.sqr(noise).sum(axis=1))).dimshuffle(0, 'x') return noise else: raise NotImplementedError(self.noise)
Example #10
Source File: nn.py From weightnorm with MIT License | 6 votes |
def adamax_updates(params, cost, lr=0.001, mom1=0.9, mom2=0.999): updates = [] grads = T.grad(cost, params) for p, g in zip(params, grads): mg = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) v = th.shared(np.cast[th.config.floatX](p.get_value() * 0.)) if mom1>0: v_t = mom1*v + (1. - mom1)*g updates.append((v,v_t)) else: v_t = g mg_t = T.maximum(mom2*mg, abs(g)) g_t = v_t / (mg_t + 1e-6) p_t = p - lr * g_t updates.append((mg, mg_t)) updates.append((p, p_t)) return updates
Example #11
Source File: attention.py From attention-lvcsr with MIT License | 6 votes |
def compute_weights(self, energies, attended_mask): if self.energy_normalizer == 'softmax': logger.debug("Using softmax attention weights normalization") energies = energies - energies.max(axis=0) unnormalized_weights = tensor.exp(energies) elif self.energy_normalizer == 'logistic': logger.debug("Using smoothfocus (logistic sigm) " "attention weights normalization") unnormalized_weights = tensor.nnet.sigmoid(energies) elif self.energy_normalizer == 'relu': logger.debug("Using ReLU attention weights normalization") unnormalized_weights = tensor.maximum(energies/1000., 0.0) else: raise Exception("Unknown energey_normalizer: {}" .format(self.energy_computer)) if attended_mask: unnormalized_weights *= attended_mask # If mask consists of all zeros use 1 as the normalization coefficient normalization = (unnormalized_weights.sum(axis=0) + tensor.all(1 - attended_mask, axis=0)) return unnormalized_weights / normalization
Example #12
Source File: depth.py From Depth-Map-Prediction with GNU General Public License v3.0 | 6 votes |
def define_cost(self, pred, y0, m0): bsize = self.bsize npix = int(np.prod(test_shape(y0)[1:])) y0_target = y0.reshape((self.bsize, npix)) y0_mask = m0.reshape((self.bsize, npix)) pred = pred.reshape((self.bsize, npix)) p = pred * y0_mask t = y0_target * y0_mask d = (p - t) nvalid_pix = T.sum(y0_mask, axis=1) depth_cost = (T.sum(nvalid_pix * T.sum(d**2, axis=1)) - 0.5*T.sum(T.sum(d, axis=1)**2)) \ / T.maximum(T.sum(nvalid_pix**2), 1) return depth_cost
Example #13
Source File: theano_backend.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def l2_normalize(x, axis=None): square_sum = T.sum(T.square(x), axis=axis, keepdims=True) norm = T.sqrt(T.maximum(square_sum, epsilon())) return x / norm
Example #14
Source File: theano_backend.py From GraphicDesignPatternByPython with MIT License | 5 votes |
def maximum(x, y): return T.maximum(x, y)
Example #15
Source File: gru4rec.py From sars_tutorial with MIT License | 5 votes |
def relu(self, X): return T.maximum(X, 0)
Example #16
Source File: hgru4rec.py From sars_tutorial with MIT License | 5 votes |
def relu(self, X): return T.maximum(X, 0)
Example #17
Source File: NN_ConvLayer_2D.py From Deep_MRI_brain_extraction with MIT License | 5 votes |
def get_reconstructed_input(self, hidden): """ Computes the reconstructed input given the values of the hidden layer """ repeated_conv = conv.conv2d(input = hidden, filters = self.W_prime, border_mode='full') multiple_conv_out = [repeated_conv.flatten()] * np.prod(self.poolsize) stacked_conv_neibs = T.stack(*multiple_conv_out).T stretch_unpooling_out = theano.sandbox.neighbours.neibs2images(stacked_conv_neibs, self.pl, self.x.shape) rectified_linear_activation = lambda x: T.maximum(0.0, x) return rectified_linear_activation(stretch_unpooling_out + self.b_prime.dimshuffle('x', 0, 'x', 'x'))
Example #18
Source File: nn.py From weightnorm with MIT License | 5 votes |
def relu(x): return T.maximum(x, 0)
Example #19
Source File: expressions.py From attention-lvcsr with MIT License | 5 votes |
def monotonicity_penalty(weights, mask_x=None): cumsums = tensor.cumsum(weights, axis=2) penalties = tensor.maximum(cumsums[1:] - cumsums[:-1], 0).sum(axis=2) if mask_x: penalties *= mask_x[1:] return penalties.sum()
Example #20
Source File: NN_ConvLayer_2D.py From Deep_MRI_brain_extraction with MIT License | 5 votes |
def max_pool_along_second_axis(sym_input, pool_factor): """ for MLP and 2D conv""" s = None for i in xrange(pool_factor): t = sym_input[:,i::pool_factor] if s is None: s = t else: s = T.maximum(s, t) return s
Example #21
Source File: costs.py From dcgan_code with MIT License | 5 votes |
def Hinge(y_true, y_pred): return T.maximum(1. - y_true * y_pred, 0.).mean()
Example #22
Source File: costs.py From dcgan_code with MIT License | 5 votes |
def SquaredHinge(y_true, y_pred): return T.sqr(T.maximum(1. - y_true * y_pred, 0.)).mean()
Example #23
Source File: optim.py From iaf with MIT License | 5 votes |
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1): if n_accum == 1: return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3) print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(it,n_accum), 0) update = T.eq(T.mod(it,n_accum), n_accum-1) ws_avg = [] for j in range(len(ws)): w_avg = {} for i in ws[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0.) w_avg[i] = G.sharedf(_w.get_value()) g_sum = G.sharedf(_w.get_value() * 0.) new[g_sum] = ifelse(reset, _g, g_sum + _g) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w) new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i]) ws_avg += [w_avg] return new, ws_avg
Example #24
Source File: optim.py From iaf with MIT License | 5 votes |
def Eve(w, w_avg, f, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, disconnected_inputs='raise'): print 'Eve', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3 mom = {} _max = {} delta = {} w_prime = {} for i in w: mom[i] = G.sharedf(w[i].get_value() * 0.) _max[i] = G.sharedf(w[i].get_value() * 0. + 1e-8) delta[i] = G.sharedf(w[i].get_value() * 0.) w_prime[i] = w[i] + (1-beta1)/beta1 * delta[i] train_cost = f(w_prime).mean() g = G.ndict.T_grad(train_cost, w, disconnected_inputs=disconnected_inputs) #warn/raise new = OrderedDict() for i in w: new[mom[i]] = (1-beta1) * mom[i] + beta1 * g[i] new[_max[i]] = T.maximum((1-beta2)*_max[i], abs(g[i]) + 1e-8) new[delta[i]] = alpha * new[mom[i]] / new[_max[i]] new[w[i]] = w[i] + new[delta[i]] for i in w: new[w_avg[i]] = beta3 * w[i] + (1.-beta3) * w_avg[i] return train_cost, new # AdaMax that keeps running average of parameter # Accumulates gradient over n_accum minibatches
Example #25
Source File: optim.py From iaf with MIT License | 5 votes |
def AdaMaxAvg(ws, ws_avg, objective, alpha=.01, beta1=.1, beta2=.001, update_keys=None, disconnected_inputs='raise'): print 'AdaMax_Avg', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2 gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs=disconnected_inputs) #warn/raise if update_keys is None: update_keys = [ws[j].keys() for j in range(len(ws))] new = OrderedDict() for j in range(len(ws)): if ws_avg is not None: w_avg = ws_avg[j] for i in update_keys[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0. + 1e-8) new[mom1] = (1-beta1) * mom1 + beta1 * _g new[_max] = T.maximum((1-beta2)*_max, abs(_g) + 1e-8) new[_w] = _w + alpha * new[mom1] / new[_max] if ws_avg is not None: new[w_avg[i]] = beta2 * _w + (1.-beta2) * w_avg[i] return new # Eve that keeps running average of parameter
Example #26
Source File: optim.py From iaf with MIT License | 5 votes |
def AdaMax2(w, objective, alpha=.01, beta1=.1, beta2=.001, n_accum=2): print 'AdaMax2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2, 'n_accum:', n_accum g = T.grad(objective.sum(), w, disconnected_inputs='warn') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(new[it],n_accum), 0) update = T.eq(T.mod(new[it],n_accum), n_accum-1) for i in range(len(w)): mom1 = G.sharedf(w[i].get_value() * 0.) _max = G.sharedf(w[i].get_value() * 0.) g_sum = G.sharedf(w[i].get_value() * 0.) #gi = T.switch(T.isnan(gi),T.zeros_like(gi),gi) #remove NaN's new[g_sum] = ifelse(reset, g[i], g_sum + g[i]) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[w[i]] = ifelse(update, w[i] + alpha * new[mom1] / new[_max], w[i]) return new # AdaMax that keeps running average of parameter
Example #27
Source File: ctc_cost.py From CTC-Connectionist-Temporal-Classification with Apache License 2.0 | 5 votes |
def log_add(a, b): max_ = tensor.maximum(a, b) return (max_ + tensor.log1p(tensor.exp(a + b - 2 * max_)))
Example #28
Source File: activation.py From OpenDeep with Apache License 2.0 | 5 votes |
def rectifier(x, leaky=0): """ Returns the element-wise rectifier (ReLU) applied to x. Parameters ---------- x : tensor Symbolic Tensor (or compatible). leaky: scalar or tensor Slope for negative input, usually between 0 and 1. The default value of 0 will lead to the standard rectifier, 1 will lead to a linear activation function, and any value in between will give a leaky rectifier. A shared variable (broadcastable against x) will result in a parameterized rectifier with learnable slope(s). Returns ------- tensor Element-wise rectifier: rectifier(x) = max(0,x) applied to `x`. """ # return T.maximum(as_floatX(0), x) # below fix is taken from Lasagne framework: # https://github.com/benanne/Lasagne/blob/master/lasagne/nonlinearities.py # The following is faster than lambda x: T.maximum(0, x) # Thanks to @SnippyHolloW for pointing this out. # See: https://github.com/SnippyHolloW/abnet/blob/807aeb9/layers.py#L15 # return (x + abs(x)) / as_floatX(2.0) return T.nnet.relu(x, alpha=leaky)
Example #29
Source File: nn.py From weightnorm with MIT License | 5 votes |
def lrelu(x, a=0.1): return T.maximum(x, a*x)
Example #30
Source File: basic.py From attention-lvcsr with MIT License | 5 votes |
def structured_maximum(x, y): """ Structured elemwise maximum of sparse matrix x by scalar y. """ # see decorator for function body