Python keras.backend.floatx() Examples
The following are 30
code examples of keras.backend.floatx().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: learn_labelembedding.py From semantic-embeddings with MIT License | 6 votes |
def labelembed_loss(out1, out2, tar, targets, tau = 2., alpha = 0.9, beta = 0.5, num_classes = 100): out2_prob = K.softmax(out2) tau2_prob = K.stop_gradient(K.softmax(out2 / tau)) soft_tar = K.stop_gradient(K.softmax(tar)) L_o1_y = K.sparse_categorical_crossentropy(output = K.softmax(out1), target = targets) pred = K.argmax(out2, axis = -1) mask = K.stop_gradient(K.cast(K.equal(pred, K.cast(targets, 'int64')), K.floatx())) L_o1_emb = -cross_entropy(out1, soft_tar) # pylint: disable=invalid-unary-operand-type L_o2_y = K.sparse_categorical_crossentropy(output = out2_prob, target = targets) L_emb_o2 = -cross_entropy(tar, tau2_prob) * mask * (K.cast(K.shape(mask)[0], K.floatx())/(K.sum(mask)+1e-8)) # pylint: disable=invalid-unary-operand-type L_re = K.relu(K.sum(out2_prob * K.one_hot(K.cast(targets, 'int64'), num_classes), axis = -1) - alpha) return beta * L_o1_y + (1-beta) * L_o1_emb + L_o2_y + L_emb_o2 + L_re
Example #2
Source File: attention_with_context.py From DeepResearch with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #3
Source File: scale_dot_product_attention.py From Keras-TextClassification with MIT License | 6 votes |
def call(self, inputs, mask=None, **kwargs): if isinstance(inputs, list): query, key, value = inputs else: query = key = value = inputs if isinstance(mask, list): mask = mask[1] feature_dim = K.shape(query)[-1] e = K.batch_dot(query, key, axes=2) / K.sqrt(K.cast(feature_dim, dtype=K.floatx())) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.history_only: query_len, key_len = K.shape(query)[1], K.shape(key)[1] indices = K.tile(K.expand_dims(K.arange(key_len), axis=0), [query_len, 1]) upper = K.expand_dims(K.arange(key_len), axis=-1) e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None: e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx()) a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon()) v = K.batch_dot(a, value) if self.return_attention: return [v, a] return v
Example #4
Source File: graph.py From Keras-TextClassification with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #5
Source File: contrib.py From steppy-toolkit with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #6
Source File: ChainCRF.py From naacl18-multitask_argument_mining with Apache License 2.0 | 6 votes |
def add_boundary_energy(x, b_start=None, b_end=None, mask=None): '''Given the observations x, it adds the start boundary energy b_start (resp. end boundary energy b_end on the start (resp. end) elements and multiplies the mask.''' if mask is None: if b_start is not None: x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1) if b_end is not None: x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1) else: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, 2) x *= mask if b_start is not None: mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1) start_mask = K.cast(K.greater(mask, mask_r), K.floatx()) x = x + start_mask * b_start if b_end is not None: mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1) end_mask = K.cast(K.greater(mask, mask_l), K.floatx()) x = x + end_mask * b_end return x
Example #7
Source File: transform_rnn.py From View-Adaptive-Neural-Networks-for-Skeleton-based-Human-Action-Recognition with MIT License | 6 votes |
def call(self,x,mask=None): conv_input,theta = x s = theta.shape theta = T.reshape(theta,[-1,s[2]]) m = K.not_equal(conv_input,0.) #### For translation trans = _trans(theta) output = _transform_trans(trans, conv_input) output = output * K.cast(m,K.floatx()) ### For rotation M = _fusion(theta) output = _transform_rot(M,output) return output
Example #8
Source File: ChainCRF.py From naacl18-multitask_argument_mining with Apache License 2.0 | 6 votes |
def _forward(x, reduce_step, initial_states, U, mask=None): '''Forward recurrence of the linear chain crf.''' def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
Example #9
Source File: model.py From BERT with Apache License 2.0 | 6 votes |
def create_transformer(embedding_dim: int = 768, embedding_dropout: float = 0.1, vocab_size: int = 30000, max_len: int = 512, trainable_pos_embedding: bool = True, num_heads: int = 12, num_layers: int = 12, attention_dropout: float = 0.1, use_one_embedding_dropout: bool = False, d_hid: int = 768 * 4, residual_dropout: float = 0.1, use_attn_mask: bool = True) -> keras.Model: vocab_size += TextEncoder.SPECIAL_COUNT tokens = Input(batch_shape=(None, max_len), name='token_input', dtype='int32') segment_ids = Input(batch_shape=(None, max_len), name='segment_input', dtype='int32') pos_ids = Input(batch_shape=(None, max_len), name='position_input', dtype='int32') attn_mask = Input(batch_shape=(None, 1, max_len, max_len), name='attention_mask_input', dtype=K.floatx()) if use_attn_mask else None inputs = [tokens, segment_ids, pos_ids] embedding_layer = Embedding(embedding_dim, embedding_dropout, vocab_size, max_len, trainable_pos_embedding, use_one_embedding_dropout) x = embedding_layer(inputs) for i in range(num_layers): x = EncoderLayer(embedding_dim, num_heads, d_hid, residual_dropout, attention_dropout, use_attn_mask, i)(x, attn_mask) inputs = inputs + ([attn_mask] if use_attn_mask else []) return keras.Model(inputs=inputs, outputs=x, name='Transformer')
Example #10
Source File: my_image.py From MachineLearning with Apache License 2.0 | 6 votes |
def next(self): # Keeps under lock only the mechanism which advances # the indexing of each batch. with self.lock: index_array, current_index, current_batch_size = next(self.index_generator) # The transformation of images is not under thread lock # so it can be done in parallel batch_x = np.zeros(tuple([current_batch_size] + list(self.image_size)), dtype=K.floatx()) for i, j in enumerate(index_array): x = scipy.misc.imread(self.x[j]) x = scipy.misc.imresize(x, self.image_size) x = self.image_data_generator.random_transform(x.astype(K.floatx())) x = self.image_data_generator.standardize(x) batch_x[i] = x if self.save_to_dir: for i in range(current_batch_size): img = image.array_to_img(batch_x[i], self.data_format, scale=True) fname = '{prefix}_{index}_{hash}.{format}'.format(prefix=self.save_prefix, index=current_index + i, hash=np.random.randint(1e4), format=self.save_format) img.save(os.path.join(self.save_to_dir, fname)) batch_y = self.y[index_array] return batch_x, batch_y
Example #11
Source File: test_qrnn.py From keras_bn_library with MIT License | 6 votes |
def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001): """Generates an absolute cosine time series with the amplitude exponentially decreasing Arguments: amp: amplitude of the cosine function period: period of the cosine function x0: initial x of the time series xn: final x of the time series step: step of the time series discretization k: exponential rate """ cos = np.zeros(((xn - x0) * step, 1, 1), dtype=K.floatx()) for i in range(len(cos)): idx = x0 + i * step cos[i, 0, 0] = amp * np.cos(2 * np.pi * idx / period) cos[i, 0, 0] = cos[i, 0, 0] * np.exp(-k * idx) return cos
Example #12
Source File: layers.py From keras_bn_library with MIT License | 6 votes |
def call(self, x, mask=None): if self.mode == 'maximum_likelihood': # draw maximum likelihood sample from Bernoulli distribution # x* = argmax_x p(x) = 1 if p(x=1) >= 0.5 # 0 otherwise return K.round(x) elif self.mode == 'random': # draw random sample from Bernoulli distribution # x* = x ~ p(x) = 1 if p(x=1) > uniform(0, 1) # 0 otherwise #return self.srng.binomial(size=x.shape, n=1, p=x, dtype=K.floatx()) return K.random_binomial(x.shape, p=x, dtype=K.floatx()) elif self.mode == 'mean_field': # draw mean-field approximation sample from Bernoulli distribution # x* = E[p(x)] = E[Bern(x; p)] = p return x elif self.mode == 'nrlu': return nrlu(x) else: raise NotImplementedError('Unknown sample mode!')
Example #13
Source File: FCD.py From FCD with GNU Lesser General Public License v3.0 | 6 votes |
def build_masked_loss(loss_function, mask_value): """Builds a loss function that masks based on targets Args: loss_function: The loss function to mask mask_value: The value to mask in the targets Returns: function: a loss function that acts like loss_function with masked inputs """ def masked_loss_function(y_true, y_pred): mask = K.cast(K.not_equal(y_true, mask_value), K.floatx()) return loss_function(y_true * mask, y_pred * mask) return masked_loss_function
Example #14
Source File: ChainCRF.py From elmo-bilstm-cnn-crf with Apache License 2.0 | 6 votes |
def add_boundary_energy(x, b_start=None, b_end=None, mask=None): '''Given the observations x, it adds the start boundary energy b_start (resp. end boundary energy b_end on the start (resp. end) elements and multiplies the mask.''' if mask is None: if b_start is not None: x = K.concatenate([x[:, :1, :] + b_start, x[:, 1:, :]], axis=1) if b_end is not None: x = K.concatenate([x[:, :-1, :], x[:, -1:, :] + b_end], axis=1) else: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, 2) x *= mask if b_start is not None: mask_r = K.concatenate([K.zeros_like(mask[:, :1]), mask[:, :-1]], axis=1) start_mask = K.cast(K.greater(mask, mask_r), K.floatx()) x = x + start_mask * b_start if b_end is not None: mask_l = K.concatenate([mask[:, 1:], K.zeros_like(mask[:, -1:])], axis=1) end_mask = K.cast(K.greater(mask, mask_l), K.floatx()) x = x + end_mask * b_end return x
Example #15
Source File: utils.py From EfficientUnet with MIT License | 6 votes |
def conv_kernel_initializer(shape, dtype=K.floatx()): """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, tf.contrib.layers.variance_scaling_initializer uses a truncated normal with a corrected standard deviation. Args: shape: shape of variable dtype: dtype of variable Returns: an initialization for the variable """ kernel_height, kernel_width, _, out_filters = shape fan_out = int(kernel_height * kernel_width * out_filters) return tf.random_normal( shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
Example #16
Source File: attention_layer.py From text-classifier with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
Example #17
Source File: optimizers.py From keras-lookahead with MIT License | 6 votes |
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 lr_t = self.learning_rate * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update_sub(p, p_t)) return self.updates
Example #18
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #19
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #20
Source File: rnn_feature.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number ε to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a return K.sum(weighted_input, axis=1)
Example #21
Source File: attention.py From deephlapan with GNU General Public License v2.0 | 6 votes |
def call(self, x, mask=None): eij = dot_product(x, self.W) if self.bias: eij += self.b eij = K.tanh(eij) a = K.exp(eij) if mask is not None: a *= K.cast(mask, K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * K.expand_dims(a) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, a] return result
Example #22
Source File: AdamAccumulate.py From Coloring-greyscale-images with MIT License | 6 votes |
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0., amsgrad=False, accum_iters=1, **kwargs): if accum_iters < 1: raise ValueError('accum_iters must be >= 1') super(AdamAccumulate, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(decay, name='decay') if epsilon is None: epsilon = K.epsilon() self.epsilon = epsilon self.initial_decay = decay self.amsgrad = amsgrad self.accum_iters = K.variable(accum_iters, K.dtype(self.iterations)) self.accum_iters_float = K.cast(self.accum_iters, K.floatx())
Example #23
Source File: attlayer.py From DeepMoji with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #24
Source File: ChainCRF.py From elmo-bilstm-cnn-crf with Apache License 2.0 | 6 votes |
def _forward(x, reduce_step, initial_states, U, mask=None): '''Forward recurrence of the linear chain crf.''' def _forward_step(energy_matrix_t, states): alpha_tm1 = states[-1] new_states = reduce_step(K.expand_dims(alpha_tm1, 2) + energy_matrix_t) return new_states[0], new_states U_shared = K.expand_dims(K.expand_dims(U, 0), 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask_U = K.expand_dims(K.expand_dims(mask[:, :-1] * mask[:, 1:], 2), 3) U_shared = U_shared * mask_U inputs = K.expand_dims(x[:, 1:, :], 2) + U_shared inputs = K.concatenate([inputs, K.zeros_like(inputs[:, -1:, :, :])], axis=1) last, values, _ = K.rnn(_forward_step, inputs, initial_states) return last, values
Example #25
Source File: model.py From BERT-keras with GNU General Public License v3.0 | 6 votes |
def create_transformer(embedding_dim: int = 768, embedding_dropout: float = 0.1, vocab_size: int = 30000, max_len: int = 512, trainable_pos_embedding: bool = True, num_heads: int = 12, num_layers: int = 12, attention_dropout: float = 0.1, use_one_embedding_dropout: bool = False, d_hid: int = 768 * 4, residual_dropout: float = 0.1, use_attn_mask: bool = True, embedding_layer_norm: bool = False, neg_inf: float = -1e9, layer_norm_epsilon: float = 1e-5, accurate_gelu: bool = False) -> keras.Model: vocab_size += TextEncoder.SPECIAL_COUNT tokens = Input(batch_shape=(None, max_len), name='token_input', dtype='int32') segment_ids = Input(batch_shape=(None, max_len), name='segment_input', dtype='int32') pos_ids = Input(batch_shape=(None, max_len), name='position_input', dtype='int32') attn_mask = Input(batch_shape=(None, 1, max_len, max_len), name='attention_mask_input', dtype=K.floatx()) if use_attn_mask else None inputs = [tokens, segment_ids, pos_ids] embedding_layer = Embedding(embedding_dim, embedding_dropout, vocab_size, max_len, trainable_pos_embedding, use_one_embedding_dropout, embedding_layer_norm, layer_norm_epsilon) x = embedding_layer(inputs) for i in range(num_layers): x = EncoderLayer(embedding_dim, num_heads, d_hid, residual_dropout, attention_dropout, use_attn_mask, i, neg_inf, layer_norm_epsilon, accurate_gelu)(x, attn_mask) if use_attn_mask: inputs.append(attn_mask) return keras.Model(inputs=inputs, outputs=[x], name='Transformer')
Example #26
Source File: image_utils.py From keras-yolo with MIT License | 6 votes |
def load_img(img_path, target_size): """ loads the image the same way darknet does, processes it and returns it (as array). uses PIL, like keras.preprocessing.image module. This loads image in RGB format. """ from PIL import Image as pil_image import keras.backend as K import numpy as np img = pil_image.open(img_path) # TODO: check format and convert to RGB #resize x = np.asarray(img, dtype=K.floatx())/255.0 #print(x[0,0,0], x[1,0,0], x[0,1,0], x[1,1,0], img.mode) x = letterbox_image(x, target_size) return x
Example #27
Source File: FCD.py From FCD with GNU Lesser General Public License v3.0 | 5 votes |
def masked_accuracy(y_true, y_pred): a = K.sum(K.cast(K.equal(y_true, K.round(y_pred)), K.floatx())) c = K.sum(K.cast(K.not_equal(y_true, 0.5), K.floatx())) acc = (a) / c return acc
Example #28
Source File: retain_train.py From retain-keras with Apache License 2.0 | 5 votes |
def __call__(self, w): other_weights = K.cast(K.greater_equal(w, 0)[:-1], K.floatx()) last_weight = K.cast(K.equal(K.reshape(w[-1, :], (1, K.shape(w)[1])), 0.), K.floatx()) appended = K.concatenate([other_weights, last_weight], axis=0) w *= appended return w
Example #29
Source File: model.py From attention-is-all-you-need-keras with MIT License | 5 votes |
def _get_accuracy(args, null_token_value): y_pred, y_true = args y_true = K.cast(y_true, "int32") mask = 1.0 - K.cast(K.equal(y_true, null_token_value), K.floatx()) y_pred = K.cast(K.argmax(y_pred, axis=-1), "int32") correct = K.cast( K.equal(y_pred, y_true), K.floatx() ) correct = K.sum(correct * mask, -1) / K.sum(mask, -1) return K.mean(correct)
Example #30
Source File: retain_evaluation.py From retain-keras with Apache License 2.0 | 5 votes |
def __call__(self, w): other_weights = K.cast(K.greater_equal(w, 0)[:-1], K.floatx()) last_weight = K.cast(K.equal(K.reshape(w[-1, :], (1, K.shape(w)[1])), 0.), K.floatx()) appended = K.concatenate([other_weights, last_weight], axis=0) w *= appended return w