Python keras.backend.epsilon() Examples
The following are 30
code examples of keras.backend.epsilon().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: optimizers.py From keras-adamw with MIT License | 7 votes |
def get_config(self): config = { 'learning_rate': float(K_eval(self.learning_rate)), 'beta_1': float(K_eval(self.beta_1)), 'beta_2': float(K_eval(self.beta_2)), 'decay': float(K_eval(self.decay)), 'batch_size': int(self.batch_size), 'total_iterations': int(self.total_iterations), 'weight_decays': self.weight_decays, 'lr_multipliers': self.lr_multipliers, 'use_cosine_annealing': self.use_cosine_annealing, 't_cur': int(K_eval(self.t_cur)), 'eta_t': float(K_eval(self.eta_t)), 'eta_min': float(K_eval(self.eta_min)), 'eta_max': float(K_eval(self.eta_max)), 'init_verbose': self.init_verbose, 'epsilon': self.epsilon, 'amsgrad': self.amsgrad } base_config = super(AdamW, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example #2
Source File: rnn_feature.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #3
Source File: train.py From async-rl with MIT License | 6 votes |
def __init__(self, action_space, batch_size=32, screen=(84, 84), swap_freq=200): from keras.optimizers import RMSprop # ----- self.screen = screen self.input_depth = 1 self.past_range = 3 self.observation_shape = (self.input_depth * self.past_range,) + self.screen self.batch_size = batch_size _, _, self.train_net, adventage = build_network(self.observation_shape, action_space.n) self.train_net.compile(optimizer=RMSprop(epsilon=0.1, rho=0.99), loss=[value_loss(), policy_loss(adventage, args.beta)]) self.pol_loss = deque(maxlen=25) self.val_loss = deque(maxlen=25) self.values = deque(maxlen=25) self.entropy = deque(maxlen=25) self.swap_freq = swap_freq self.swap_counter = self.swap_freq self.unroll = np.arange(self.batch_size) self.targets = np.zeros((self.batch_size, action_space.n)) self.counter = 0
Example #4
Source File: adabound.py From keras-adabound with MIT License | 6 votes |
def __init__(self, lr=0.001, final_lr=0.1, beta_1=0.9, beta_2=0.999, gamma=1e-3, epsilon=None, decay=0., amsbound=False, weight_decay=0.0, **kwargs): super(AdaBound, self).__init__(**kwargs) if not 0. <= gamma <= 1.: raise ValueError("Invalid `gamma` parameter. Must lie in [0, 1] range.") with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.final_lr = final_lr self.gamma = gamma if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsbound = amsbound self.weight_decay = float(weight_decay) self.base_lr = float(lr)
Example #5
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #6
Source File: conv_featuremaps_visualization.py From MCF-3D-CNN with MIT License | 6 votes |
def deprocess_image(x): # normalize tensor: center on 0., ensure std is 0.1 x -= x.mean() x /= (x.std() + K.epsilon()) x *= 0.1 # clip to [0, 1] x += 0.5 x = np.clip(x, 0, 1) # convert to RGB array x *= 255 if K.image_data_format() == 'channels_first': x = x.transpose((1, 2, 0)) x = np.clip(x, 0, 255).astype('uint8') return x
Example #7
Source File: attention_with_context.py From DeepResearch with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #8
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #9
Source File: optimizers_225.py From keras-adamw with MIT License | 6 votes |
def get_config(self): config = { 'lr': float(K.get_value(self.lr)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)), 'epsilon': self.epsilon, 'schedule_decay': self.schedule_decay, 'batch_size': int(self.batch_size), 'total_iterations': int(self.total_iterations), 'weight_decays': self.weight_decays, 'lr_multipliers': self.lr_multipliers, 'use_cosine_annealing': self.use_cosine_annealing, 't_cur': int(K.get_value(self.t_cur)), 'eta_t': float(K.eval(self.eta_t)), 'eta_min': float(K.get_value(self.eta_min)), 'eta_max': float(K.get_value(self.eta_max)), 'init_verbose': self.init_verbose } base_config = super(NadamW, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example #10
Source File: optimizers.py From keras-lookahead with MIT License | 6 votes |
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 lr_t = self.learning_rate * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update_sub(p, p_t)) return self.updates
Example #11
Source File: attlayer.py From DeepMoji with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #12
Source File: optimizers_225.py From keras-adamw with MIT License | 6 votes |
def get_config(self): config = { 'lr': float(K.get_value(self.lr)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)), 'decay': float(K.get_value(self.decay)), 'batch_size': int(self.batch_size), 'total_iterations': int(self.total_iterations), 'weight_decays': self.weight_decays, 'lr_multipliers': self.lr_multipliers, 'use_cosine_annealing': self.use_cosine_annealing, 't_cur': int(K.get_value(self.t_cur)), 'eta_t': float(K.eval(self.eta_t)), 'eta_min': float(K.get_value(self.eta_min)), 'eta_max': float(K.get_value(self.eta_max)), 'init_verbose': self.init_verbose, 'epsilon': self.epsilon, 'amsgrad': self.amsgrad } base_config = super(AdamW, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example #13
Source File: optimizers.py From keras-adamw with MIT License | 6 votes |
def get_config(self): config = { 'learning_rate': float(K_eval(self.learning_rate)), 'beta_1': float(K_eval(self.beta_1)), 'beta_2': float(K_eval(self.beta_2)), 'epsilon': self.epsilon, 'schedule_decay': self.schedule_decay, 'batch_size': int(self.batch_size), 'total_iterations': int(self.total_iterations), 'weight_decays': self.weight_decays, 'lr_multipliers': self.lr_multipliers, 'use_cosine_annealing': self.use_cosine_annealing, 't_cur': int(K_eval(self.t_cur)), 'eta_t': float(K_eval(self.eta_t)), 'eta_min': float(K_eval(self.eta_min)), 'eta_max': float(K_eval(self.eta_max)), 'init_verbose': self.init_verbose } base_config = super(NadamW, self).get_config() return dict(list(base_config.items()) + list(config.items()))
Example #14
Source File: attention_layer.py From text-classifier with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
Example #15
Source File: feature_vis.py From facies_net with GNU Lesser General Public License v3.0 | 6 votes |
def smoothing(im, mode = None): # utility function to smooth an image if mode is None: return im elif mode == 'L2': # L2 norm return im / (np.sqrt(np.mean(np.square(im))) + K.epsilon()) elif mode == 'GaussianBlur': # Gaussian Blurring with width of 3 return filters.gaussian_filter(im,1/8) elif mode == 'Decay': # Decay regularization decay = 0.98 return decay * im elif mode == 'Clip_weak': # Clip weak pixel regularization percentile = 1 threshold = np.percentile(np.abs(im),percentile) im[np.where(np.abs(im) < threshold)] = 0 return im else: # print error message print('Unknown smoothing parameter. No smoothing implemented.') return im
Example #16
Source File: contrib.py From steppy-toolkit with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #17
Source File: losses.py From icassp19 with MIT License | 6 votes |
def crossentropy_reed_wrap(_beta): def crossentropy_reed_core(y_true, y_pred): """ This loss function is proposed in: Reed et al. "Training Deep Neural Networks on Noisy Labels with Bootstrapping", 2014 :param y_true: :param y_pred: :return: """ # hyper param print(_beta) y_pred = K.clip(y_pred, K.epsilon(), 1) # (1) dynamically update the targets based on the current state of the model: bootstrapped target tensor # use predicted class proba directly to generate regression targets y_true_update = _beta * y_true + (1 - _beta) * y_pred # (2) compute loss as always _loss = -K.sum(y_true_update * K.log(y_pred), axis=-1) return _loss return crossentropy_reed_core
Example #18
Source File: scale_dot_product_attention.py From Keras-TextClassification with MIT License | 6 votes |
def call(self, inputs, mask=None, **kwargs): if isinstance(inputs, list): query, key, value = inputs else: query = key = value = inputs if isinstance(mask, list): mask = mask[1] feature_dim = K.shape(query)[-1] e = K.batch_dot(query, key, axes=2) / K.sqrt(K.cast(feature_dim, dtype=K.floatx())) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.history_only: query_len, key_len = K.shape(query)[1], K.shape(key)[1] indices = K.tile(K.expand_dims(K.arange(key_len), axis=0), [query_len, 1]) upper = K.expand_dims(K.arange(key_len), axis=-1) e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None: e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon()) v = K.batch_dot(a, value) if self.return_attention: return [v, a] return v
Example #19
Source File: AdamAccumulate.py From Coloring-greyscale-images with MIT License | 6 votes |
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, accum_iters=1, **kwargs): if accum_iters < 1: raise ValueError('accum_iters must be >= 1') super(AdamAccumulate, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad self.accum_iters = K.variable(accum_iters, K.dtype(self.iterations)) self.accum_iters_float = K.cast(self.accum_iters, K.floatx())
Example #20
Source File: attention.py From deephlapan with GNU General Public License v2.0 | 6 votes |
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
Example #21
Source File: keras_radam.py From Keras-TextClassification with MIT License | 5 votes |
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., **kwargs): super(RAdam, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay
Example #22
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 5 votes |
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0., **kwargs): super(AMSgrad, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') self.epsilon = epsilon self.initial_decay = decay
Example #23
Source File: bert.py From keras-bert-ner with MIT License | 5 votes |
def __init__(self, **kwargs): super(LayerNormalization, self).__init__(**kwargs) self.epsilon = K.epsilon() * K.epsilon()
Example #24
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 5 votes |
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) scale = K.sqrt(s_squared_norm + K.epsilon()) return x / scale
Example #25
Source File: capsulelayers.py From CapsNet-Fashion-MNIST with MIT License | 5 votes |
def squash(vectors, axis=-1): """ The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0 :param vectors: some vectors to be squashed, N-dim tensor :param axis: the axis to squash :return: a Tensor with same shape as input vectors """ s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True) scale = s_squared_norm / (1 + s_squared_norm) / K.sqrt(s_squared_norm + K.epsilon()) return scale * vectors
Example #26
Source File: models.py From voxelmorph with GNU General Public License v3.0 | 5 votes |
def _log_layer_wrap(reg=K.epsilon()): def _log_layer(tens): return K.log(tens + reg) return _log_layer # def _global_max_nd(x): # return K.exp(x)
Example #27
Source File: metrics.py From voxelmorph with GNU General Public License v3.0 | 5 votes |
def _hard_max(tens, axis): """ we can't use the argmax function in a loss, as it's not differentiable We can use it in a metric, but not in a loss function therefore, we replace the 'hard max' operation (i.e. argmax + onehot) with this approximation """ tensmax = K.max(tens, axis=axis, keepdims=True) eps_hot = K.maximum(tens - tensmax + K.epsilon(), 0) one_hot = eps_hot / K.epsilon() return one_hot
Example #28
Source File: metrics.py From voxelmorph with GNU General Public License v3.0 | 5 votes |
def loss(self, y_true, y_pred): """ categorical crossentropy loss """ if self.crop_indices is not None: y_true = utils.batch_gather(y_true, self.crop_indices) y_pred = utils.batch_gather(y_pred, self.crop_indices) if self.use_float16: y_true = K.cast(y_true, 'float16') y_pred = K.cast(y_pred, 'float16') # scale and clip probabilities # this should not be necessary for softmax output. y_pred /= K.sum(y_pred, axis=-1, keepdims=True) y_pred = K.clip(y_pred, K.epsilon(), 1) # compute log probability log_post = K.log(y_pred) # likelihood # loss loss = - y_true * log_post # weighted loss if self.weights is not None: loss *= self.weights if self.vox_weights is not None: loss *= self.vox_weights # take the total loss # loss = K.batch_flatten(loss) mloss = K.mean(K.sum(K.cast(loss, 'float32'), -1)) tf.verify_tensor_all_finite(mloss, 'Loss not finite') return mloss
Example #29
Source File: bert.py From keras-bert-ner with MIT License | 5 votes |
def call(self, inputs): mean = K.mean(inputs, axis=-1, keepdims=True) variance = K.mean(K.square(inputs - mean), axis=-1, keepdims=True) std = K.sqrt(variance + self.epsilon) outputs = (inputs - mean) / std outputs *= self.gamma outputs += self.beta return outputs
Example #30
Source File: layers.py From keras-utilities with MIT License | 5 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) # ait = K.dot(uit, self.u) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result