Python keras.backend.dot() Examples
The following are 30
code examples of keras.backend.dot().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: attention.py From keras-transformer with MIT License | 6 votes |
def call(self, inputs, **kwargs): if not K.is_tensor(inputs): raise ValueError( 'The layer can be called only with one tensor as an argument') _, seq_len, d_model = K.int_shape(inputs) # The first thing we need to do is to perform affine transformations # of the inputs to get the Queries, the Keys and the Values. qkv = K.dot(K.reshape(inputs, [-1, d_model]), self.qkv_weights) # splitting the keys, the values and the queries before further # processing pre_q, pre_k, pre_v = [ K.reshape( # K.slice(qkv, (0, i * d_model), (-1, d_model)), qkv[:, i * d_model:(i + 1) * d_model], (-1, seq_len, self.num_heads, d_model // self.num_heads)) for i in range(3)] attention_out = self.attention(pre_q, pre_v, pre_k, seq_len, d_model, training=kwargs.get('training')) return attention_out
Example #2
Source File: contrib.py From steppy-toolkit with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #3
Source File: attention.py From keras-utility-layer-collection with MIT License | 6 votes |
def step(self, x, states): h = states[0] # states[1] necessary? # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim] total_x_prod = states[-1] # comes from the constants (equals the input sequence) X = states[-2] # expand dims to add the vector which is only valid for this time step # to total_x_prod which is valid for all time steps hw = K.expand_dims(K.dot(h, self._W2), 1) additive_atn = total_x_prod + hw attention = K.softmax(K.dot(additive_atn, self._V), axis=1) x_weighted = K.sum(attention * X, [1]) x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3 h, new_states = self.layer.cell.call(x, states[:-2]) return h, new_states
Example #4
Source File: attention.py From keras-utility-layer-collection with MIT License | 6 votes |
def step(self, x, states): h = states[0] # states[1] necessary? # comes from the constants X_static = states[-2] # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim] total_x_static_prod = states[-1] # expand dims to add the vector which is only valid for this time step # to total_x_prod which is valid for all time steps hw = K.expand_dims(K.dot(h, self._W2), 1) additive_atn = total_x_static_prod + hw attention = K.softmax(K.dot(additive_atn, self._V), axis=1) static_x_weighted = K.sum(attention * X_static, [1]) x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3 h, new_states = self.layer.cell.call(x, states[:-2]) # append attention to the states to "smuggle" it out of the RNN wrapper attention = K.squeeze(attention, -1) h = K.concatenate([h, attention]) return h, new_states
Example #5
Source File: rhn.py From deep-models with Apache License 2.0 | 6 votes |
def step(self, x, states): h_st, B_U, B_W = states if self.consume_less == 'cpu': x_t = x[:, :self.output_dim] x_h = x[:, self.output_dim: 2 * self.output_dim] elif self.consume_less == 'mem': x_t = K.dot(x * B_W[0], self.W_t) + self.b_t x_h = K.dot(x * B_W[1], self.W_h) + self.b_h else: raise Exception('Unknown `consume_less` mode.') for l in xrange(self.L): if l == 0: t = self.inner_activation(x_t + K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l]) h = self.activation(x_h + K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l]) else: t = self.inner_activation(K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l]) h = self.activation(K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l]) h_st = h * t + h_st * (1 - t) return h_st, [h_st]
Example #6
Source File: layers.py From DeepLearn with MIT License | 6 votes |
def call(self , x, mask=None): e1=x[0].T e2=x[1].T batch_size = K.shape(x[0])[0] sim = [] V_out = K.dot(self.V, K.concatenate([e1,e2],axis=0)) for i in range(self.k): temp = K.batch_dot(K.dot(e1.T,self.W[i,:,:]),e2.T,axes=1) sim.append(temp) sim=K.reshape(sim,(self.k,batch_size)) tensor_bi_product = self.activation(V_out+sim) tensor_bi_product = K.dot(self.U.T, tensor_bi_product).T return tensor_bi_product
Example #7
Source File: rnnrbm.py From keras_bn_library with MIT License | 6 votes |
def step(self, x, states): u_tm1 = states[0] B_U = states[3] B_W = states[4] bv_t = self.bv + K.dot(u_tm1, self.Wuv) bh_t = self.bh + K.dot(u_tm1, self.Wuh) if self.consume_less == 'cpu': h = x else: h = self.b + K.dot(x * B_W, self.W) u_t = self.activation(h + K.dot(u_tm1 * B_U, self.U)) return x, [u_t, bv_t, bh_t]
Example #8
Source File: recurrent.py From keras_bn_library with MIT License | 6 votes |
def step(self, x, states): h_tm1 = states[0] c_tm1 = states[1] x_t = self.activation(K.dot(h_tm1, self.A) + self.ba) z = K.dot(x_t, self.W) + K.dot(h_tm1, self.U) + self.b z0 = z[:, :self.input_dim] z1 = z[:, self.input_dim: 2 * self.input_dim] z2 = z[:, 2 * self.input_dim: 3 * self.input_dim] z3 = z[:, 3 * self.input_dim:] i = self.inner_activation(z0) f = self.inner_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.inner_activation(z3) h = o * self.activation(c) return x_t, [h, c]
Example #9
Source File: rbm.py From keras_bn_library with MIT License | 6 votes |
def sample_h_given_x(self, x): h_pre = K.dot(x, self.Wrbm) + self.bh h_sigm = self.activation(self.scaling_h_given_x * h_pre) # drop out noise #if(0.0 < self.p < 1.0): # noise_shape = self._get_noise_shape(h_sigm) # h_sigm = K.in_train_phase(K.dropout(h_sigm, self.p, noise_shape), h_sigm) if(self.hidden_unit_type == 'binary'): h_samp = K.random_binomial(shape=h_sigm.shape, p=h_sigm) # random sample # \hat{h} = 1, if p(h=1|x) > uniform(0, 1) # 0, otherwise elif(self.hidden_unit_type == 'nrlu'): h_samp = nrlu(h_pre) else: h_samp = h_sigm if(0.0 < self.p < 1.0): noise_shape = self._get_noise_shape(h_samp) h_samp = K.in_train_phase(K.dropout(h_samp, self.p, noise_shape), h_samp) return h_samp, h_pre, h_sigm
Example #10
Source File: head.py From NTM-Keras with MIT License | 6 votes |
def reading(memory_t, weight_t): """ Reading memory. :param memory_t: the $N \times M$ memory matrix at time $t$, where $N$ is the number of memory locations, and $M$ is the vector size at each location. :param weight_t: $w_t$ is a vector of weightings over the $N$ locations emitted by a reading head at time $t$. Since all weightings are normalized, the $N$ elements $w_t(i)$ of $\textbf{w}_t$ obey the following constraints: $$\sum_{i=1}^{N} w_t(i) = 1, 0 \le w_t(i) \le 1,\forall i$$ The length $M$ read vector $r_t$ returned by the head is defined as a convex combination of the row-vectors $M_t(i)$ in memory: $$\textbf{r}_t \leftarrow \sum_{i=1}^{N}w_t(i)\textbf{M}_t(i)$$ :return: the content reading from memory. """ r_t = K.dot(memory_t, weight_t) return r_t
Example #11
Source File: loupe_keras.py From FSA-Net with Apache License 2.0 | 6 votes |
def call(self, inputs): """ In Keras, there are two way to do matrix multiplication (dot product) 1) K.dot : AxB -> when A has batchsize and B doesn't, use K.dot 2) tf.matmul: AxB -> when A and B both have batchsize, use tf.matmul Error example: Use tf.matmul when A has batchsize (3 dim) and B doesn't (2 dim) ValueError: Shape must be rank 2 but is rank 3 for 'net_vlad_1/MatMul' (op: 'MatMul') with input shapes: [?,21,64], [64,3] tf.matmul might still work when the dim of A is (?,64), but this is too confusing. Just follow the above rules. """ gates = K.dot(inputs, self.gating_weights) gates += self.gating_biases gates = tf.sigmoid(gates) activation = tf.multiply(inputs,gates) return activation
Example #12
Source File: attention_layer.py From text-classifier with Apache License 2.0 | 6 votes |
def call(self, x, mask=None): # size of x :[batch_size, sel_len, attention_dim] # size of u :[batch_size, attention_dim] # uit = tanh(xW+b) uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b)) ait = K.dot(uit, self.u) ait = K.squeeze(ait, -1) ait = K.exp(ait) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano ait *= K.cast(mask, K.floatx()) ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx()) ait = K.expand_dims(ait) weighted_input = x * ait output = K.sum(weighted_input, axis=1) return output
Example #13
Source File: utils.py From semantic-embeddings with MIT License | 6 votes |
def devise_ranking_loss(embedding, margin = 0.1): """ The ranking loss used by DeViSE. # Arguments: - embedding: 2-d numpy array whose rows are class embeddings. - margin: margin for the ranking loss. # Returns: a Keras loss function taking y_true and y_pred as inputs and returning a loss tensor. """ def _loss(y_true, y_pred): embedding_t = K.constant(embedding.T) true_sim = K.sum(y_true * y_pred, axis = -1) other_sim = K.dot(y_pred, embedding_t) return K.sum(K.relu(margin - true_sim[:,None] + other_sim), axis = -1) - margin return _loss
Example #14
Source File: attlayer.py From DeepMoji with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #15
Source File: losses.py From style-transfer with MIT License | 6 votes |
def gram_matrix(x): """ Computes the outer-product of the input tensor x. Input ----- - x: input tensor of shape (C x H x W) Returns ------- - x . x^T Note that this can be computed efficiently if x is reshaped as a tensor of shape (C x H*W). """ # assert K.ndim(x) == 3 if K.image_dim_ordering() == 'th': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) return K.dot(features, K.transpose(features))
Example #16
Source File: graph.py From Keras-TextClassification with MIT License | 6 votes |
def call(self, x, mask=None): # computes a probability distribution over the timesteps # uses 'max trick' for numerical stability # reshape is done to avoid issue with Tensorflow # and 1-dimensional weights logits = K.dot(x, self.W) x_shape = K.shape(x) logits = K.reshape(logits, (x_shape[0], x_shape[1])) ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) # masked timesteps have zero weight if mask is not None: mask = K.cast(mask, K.floatx()) ai = ai * mask att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) weighted_input = x * K.expand_dims(att_weights) result = K.sum(weighted_input, axis=1) if self.return_attention: return [result, att_weights] return result
Example #17
Source File: attention.py From keras-transformer with MIT License | 6 votes |
def mask_attention_if_needed(self, dot_product): """ Makes sure that (when enabled) each position (of a decoder's self-attention) cannot attend to subsequent positions. This is achieved by assigning -inf (or some large negative number) to all invalid connections. Later softmax will turn them into zeros. We need this to guarantee that decoder's predictions are based on what has happened before the position, not after. The method does nothing if masking is turned off. :param dot_product: scaled dot-product of Q and K after reshaping them to 3D tensors (batch * num_heads, rows, cols) """ if not self.use_masking: return dot_product last_dims = K.int_shape(dot_product)[-2:] low_triangle_ones = ( np.tril(np.ones(last_dims)) # to ensure proper broadcasting .reshape((1,) + last_dims)) inverse_low_triangle = 1 - low_triangle_ones close_to_negative_inf = -1e9 result = ( K.constant(low_triangle_ones, dtype=K.floatx()) * dot_product + K.constant(close_to_negative_inf * inverse_low_triangle)) return result
Example #18
Source File: attention.py From keras-transformer with MIT License | 6 votes |
def call(self, inputs, **kwargs): if not (isinstance(inputs, list) and len(inputs) == 2): raise ValueError( 'You can call this layer only with a list of two tensors ' '(for keys/values and queries)') key_values_input, query_input = inputs _, value_seq_len, d_model = K.int_shape(key_values_input) query_seq_len = K.int_shape(inputs[1])[-2] # The first thing we need to do is to perform affine transformations # of the inputs to get the Queries, the Keys and the Values. kv = K.dot(K.reshape(key_values_input, [-1, d_model]), self.kv_weights) # splitting the keys, the values and the queries before further # processing pre_k, pre_v = [ K.reshape( # K.slice(kv, (0, i * d_model), (-1, d_model)), kv[:, i * d_model: (i + 1) * d_model], (-1, value_seq_len, self.num_heads, d_model // self.num_heads)) for i in range(2)] pre_q = K.reshape( K.dot(K.reshape(query_input, [-1, d_model]), self.q_weights), (-1, query_seq_len, self.num_heads, d_model // self.num_heads)) return self.attention(pre_q, pre_v, pre_k, query_seq_len, d_model, training=kwargs.get('training'))
Example #19
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 5 votes |
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
Example #20
Source File: GraphEmbedding.py From conv_qsar_fast with MIT License | 5 votes |
def attributes_to_fp_contribution(self, attributes, depth): '''Given a 2D tensor of attributes where the first dimension corresponds to a single node, this method will apply the output sparsifying (often softmax) function and return the contribution to the fingerprint''' # Apply output activation function output_dot = K.dot(attributes[:, :-1], self.W_output[depth, :, :]) # ignore last attribute (bond flag) output_dot.name = 'output_dot' output_bias = self.b_output[depth, 0, :] output_bias.name = 'output_bias' output_activated = self.activation_output(output_dot + output_bias) output_activated.name = 'output_activated' return output_activated
Example #21
Source File: GraphEmbedding_sumAfter.py From conv_qsar_fast with MIT License | 5 votes |
def attributes_to_fp_contribution(self, attributes, depth): '''Given a 2D tensor of attributes where the first dimension corresponds to a single node, this method will apply the output sparsifying (often softmax) function and return the contribution to the fingerprint''' # Apply output activation function output_dot = K.dot(attributes[:, :-1], self.W_output[depth, :, :]) # ignore last attribute (bond flag) output_dot.name = 'output_dot' output_bias = self.b_output[depth, 0, :] output_bias.name = 'output_bias' output_activated = self.activation_output(output_dot + output_bias) output_activated.name = 'output_activated' return output_activated
Example #22
Source File: attention.py From Document-Classifier-LSTM with MIT License | 5 votes |
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
Example #23
Source File: attention.py From keras-utility-layer-collection with MIT License | 5 votes |
def get_constants(self, x): # add constants to speed up calculation constants = [x, K.dot(x, self._W1) + self._b2] return constants
Example #24
Source File: utils.py From semantic-embeddings with MIT License | 5 votes |
def inv_correlation(y_true, y_pred): """ Computes 1 minus the dot product between corresponding pairs of samples in two tensors. """ return 1. - K.sum(y_true * y_pred, axis = -1)
Example #25
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 5 votes |
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
Example #26
Source File: rnn_feature.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 5 votes |
def dot_product(x, kernel): """ Wrapper for dot product operation, in order to be compatible with both Theano and Tensorflow Args: x (): input kernel (): weights Returns: """ if K.backend() == 'tensorflow': return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1) else: return K.dot(x, kernel)
Example #27
Source File: ind_rnn.py From Keras-IndRNN with MIT License | 5 votes |
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=1) if (0 < self.recurrent_dropout < 1 and self._recurrent_masks is None): _recurrent_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=1) self._recurrent_masks = _recurrent_mask # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_masks = self._recurrent_masks h_tm1 = states[0] # previous state if 0. < self.dropout < 1.: inputs *= dp_mask[0] if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_masks[0] h = K.dot(inputs, self.kernel) h = h + (h_tm1 * self.recurrent_kernel) if self.use_bias: h = K.bias_add(h, self.bias) h = self.activation(h) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h]
Example #28
Source File: attention.py From keras-utility-layer-collection with MIT License | 5 votes |
def _multiplicative_similarity(self, source, query): qp = K.dot(query, self._weights["w_a"]) similarity = K.batch_dot(K.permute_dimensions(qp, [0, 2, 1]), source, axes=[1, 2]) return similarity
Example #29
Source File: detection.py From keras-yolo with MIT License | 5 votes |
def call(self, x, training=None): #output_shape = self.compute_output_shape(x.shape) #if x.shape[1:]!=output_shape[1:]: # return x.reshape((-1,)+ output_shape[1:]) return x #K.dot(x, self.kernel)
Example #30
Source File: bert.py From keras-bert-ner with MIT License | 5 votes |
def call(self, inputs): if not hasattr(self, "kernel"): embedding_layer = inputs._keras_history[0] if embedding_layer.name != self.embedding_name: def recursive_search(layer): """递归向上搜索,根据名字找Embedding层 """ last_layer = layer._inbound_nodes[0].inbound_layers if isinstance(last_layer, list): if len(last_layer) == 0: return None else: last_layer = last_layer[0] if last_layer.name == self.embedding_name: return last_layer else: return recursive_search(last_layer) embedding_layer = recursive_search(embedding_layer) if embedding_layer is None: raise Exception("Embedding layer not found") self.kernel = K.transpose(embedding_layer.embeddings) self.units = K.int_shape(self.kernel)[1] self.bias = self.add_weight(name="bias", shape=(self.units, ), initializer="zeros") outputs = K.dot(inputs, self.kernel) outputs = K.bias_add(outputs, self.bias) outputs = self.activation(outputs) return outputs