Python keras.backend.dot() Examples

The following are 30 code examples of keras.backend.dot(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function .
Example #1
Source File: attention.py    From keras-transformer with MIT License 6 votes vote down vote up
def call(self, inputs, **kwargs):
        if not K.is_tensor(inputs):
            raise ValueError(
                'The layer can be called only with one tensor as an argument')
        _, seq_len, d_model = K.int_shape(inputs)
        # The first thing we need to do is to perform affine transformations
        # of the inputs to get the Queries, the Keys and the Values.
        qkv = K.dot(K.reshape(inputs, [-1, d_model]), self.qkv_weights)
        # splitting the keys, the values and the queries before further
        # processing
        pre_q, pre_k, pre_v = [
            K.reshape(
                # K.slice(qkv, (0, i * d_model), (-1, d_model)),
                qkv[:, i * d_model:(i + 1) * d_model],
                (-1, seq_len, self.num_heads, d_model // self.num_heads))
            for i in range(3)]
        attention_out = self.attention(pre_q, pre_v, pre_k, seq_len, d_model,
                                       training=kwargs.get('training'))
        return attention_out 
Example #2
Source File: contrib.py    From steppy-toolkit with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result 
Example #3
Source File: attention.py    From keras-utility-layer-collection with MIT License 6 votes vote down vote up
def step(self, x, states):   
        h = states[0]
        # states[1] necessary?

        # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim]
        total_x_prod = states[-1]
        # comes from the constants (equals the input sequence)
        X = states[-2]
        
        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        x_weighted = K.sum(attention * X, [1])

        x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3
        
        h, new_states = self.layer.cell.call(x, states[:-2])
        
        return h, new_states 
Example #4
Source File: attention.py    From keras-utility-layer-collection with MIT License 6 votes vote down vote up
def step(self, x, states):  
        h = states[0]
        # states[1] necessary?
        
        # comes from the constants
        X_static = states[-2]
        # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim]
        total_x_static_prod = states[-1]

        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_static_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        static_x_weighted = K.sum(attention * X_static, [1])
        
        x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3

        h, new_states = self.layer.cell.call(x, states[:-2])
        
        # append attention to the states to "smuggle" it out of the RNN wrapper
        attention = K.squeeze(attention, -1)
        h = K.concatenate([h, attention])

        return h, new_states 
Example #5
Source File: rhn.py    From deep-models with Apache License 2.0 6 votes vote down vote up
def step(self, x, states):
    h_st, B_U, B_W = states

    if self.consume_less == 'cpu':
      x_t = x[:, :self.output_dim]
      x_h = x[:, self.output_dim: 2 * self.output_dim]
    elif self.consume_less == 'mem':
      x_t = K.dot(x * B_W[0], self.W_t) + self.b_t
      x_h = K.dot(x * B_W[1], self.W_h) + self.b_h
    else:
      raise Exception('Unknown `consume_less` mode.')

    for l in xrange(self.L):
      if l == 0:
        t = self.inner_activation(x_t + K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l])
        h = self.activation(x_h + K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l])
      else:
        t = self.inner_activation(K.dot(h_st * B_U[0], self.U_ts[l]) + self.b_ts[l])
        h = self.activation(K.dot(h_st * B_U[1], self.U_hs[l]) + self.b_hs[l])
      h_st = h * t + h_st * (1 - t)

    return h_st, [h_st] 
Example #6
Source File: layers.py    From DeepLearn with MIT License 6 votes vote down vote up
def call(self , x, mask=None):
        
        e1=x[0].T
        e2=x[1].T
        
        batch_size = K.shape(x[0])[0]
        sim = []
        V_out = K.dot(self.V, K.concatenate([e1,e2],axis=0))     

        for i in range(self.k): 
            temp = K.batch_dot(K.dot(e1.T,self.W[i,:,:]),e2.T,axes=1)
            sim.append(temp)
        sim=K.reshape(sim,(self.k,batch_size))

        tensor_bi_product = self.activation(V_out+sim)
        tensor_bi_product = K.dot(self.U.T, tensor_bi_product).T

        return tensor_bi_product 
Example #7
Source File: rnnrbm.py    From keras_bn_library with MIT License 6 votes vote down vote up
def step(self, x, states):
		u_tm1 = states[0]
		B_U = states[3]
		B_W = states[4]

		bv_t = self.bv + K.dot(u_tm1, self.Wuv)
		bh_t = self.bh + K.dot(u_tm1, self.Wuh)

		if self.consume_less == 'cpu':
			h = x
		else:
			h = self.b + K.dot(x * B_W, self.W)

		u_t = self.activation(h + K.dot(u_tm1 * B_U, self.U))

		return x, [u_t, bv_t, bh_t] 
Example #8
Source File: recurrent.py    From keras_bn_library with MIT License 6 votes vote down vote up
def step(self, x, states):
		h_tm1 = states[0]
		c_tm1 = states[1]

		x_t = self.activation(K.dot(h_tm1, self.A) + self.ba)
		z = K.dot(x_t, self.W) + K.dot(h_tm1, self.U) + self.b


		z0 = z[:, :self.input_dim]
		z1 = z[:, self.input_dim: 2 * self.input_dim]
		z2 = z[:, 2 * self.input_dim: 3 * self.input_dim]
		z3 = z[:, 3 * self.input_dim:]

		i = self.inner_activation(z0)
		f = self.inner_activation(z1)
		c = f * c_tm1 + i * self.activation(z2)
		o = self.inner_activation(z3)

		h = o * self.activation(c)

		return x_t, [h, c] 
Example #9
Source File: rbm.py    From keras_bn_library with MIT License 6 votes vote down vote up
def sample_h_given_x(self, x):
		h_pre = K.dot(x, self.Wrbm) + self.bh	
		h_sigm = self.activation(self.scaling_h_given_x * h_pre)

		# drop out noise
		#if(0.0 < self.p < 1.0):
		#	noise_shape = self._get_noise_shape(h_sigm)
		#	h_sigm = K.in_train_phase(K.dropout(h_sigm, self.p, noise_shape), h_sigm)
		
		if(self.hidden_unit_type == 'binary'):
			h_samp = K.random_binomial(shape=h_sigm.shape, p=h_sigm)
	        # random sample
	        #   \hat{h} = 1,      if p(h=1|x) > uniform(0, 1)
	        #             0,      otherwise
		elif(self.hidden_unit_type == 'nrlu'):
			h_samp = nrlu(h_pre)
		else:
			h_samp = h_sigm

		if(0.0 < self.p < 1.0):
			noise_shape = self._get_noise_shape(h_samp)
			h_samp = K.in_train_phase(K.dropout(h_samp, self.p, noise_shape), h_samp)

		return h_samp, h_pre, h_sigm 
Example #10
Source File: head.py    From NTM-Keras with MIT License 6 votes vote down vote up
def reading(memory_t, weight_t):
    """
    Reading memory.
    :param memory_t: the $N \times M$ memory matrix at time $t$, where $N$
    is the number of memory locations, and $M$ is the vector size at each
    location.
    :param weight_t: $w_t$ is a vector of weightings over the $N$ locations
    emitted by a reading head at time $t$.

    Since all weightings are normalized, the $N$ elements $w_t(i)$ of
    $\textbf{w}_t$ obey the following constraints:
    $$\sum_{i=1}^{N} w_t(i) = 1, 0 \le w_t(i) \le 1,\forall i$$

    The length $M$ read vector $r_t$ returned by the head is defined as a
    convex combination of the row-vectors $M_t(i)$ in memory:
    $$\textbf{r}_t \leftarrow \sum_{i=1}^{N}w_t(i)\textbf{M}_t(i)$$
    :return: the content reading from memory.
    """
    r_t = K.dot(memory_t, weight_t)
    return r_t 
Example #11
Source File: loupe_keras.py    From FSA-Net with Apache License 2.0 6 votes vote down vote up
def call(self, inputs):
        """
        In Keras, there are two way to do matrix multiplication (dot product)
        1) K.dot : AxB -> when A has batchsize and B doesn't, use K.dot
        2) tf.matmul: AxB -> when A and B both have batchsize, use tf.matmul
        
        Error example: Use tf.matmul when A has batchsize (3 dim) and B doesn't (2 dim)
        ValueError: Shape must be rank 2 but is rank 3 for 'net_vlad_1/MatMul' (op: 'MatMul') with input shapes: [?,21,64], [64,3]
        
        tf.matmul might still work when the dim of A is (?,64), but this is too confusing.
        Just follow the above rules.
        """
        gates = K.dot(inputs, self.gating_weights)
        gates += self.gating_biases
        gates = tf.sigmoid(gates)

        activation = tf.multiply(inputs,gates)
        return activation 
Example #12
Source File: attention_layer.py    From text-classifier with Apache License 2.0 6 votes vote down vote up
def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output 
Example #13
Source File: utils.py    From semantic-embeddings with MIT License 6 votes vote down vote up
def devise_ranking_loss(embedding, margin = 0.1):
    """ The ranking loss used by DeViSE.

    # Arguments:

    - embedding: 2-d numpy array whose rows are class embeddings.

    - margin: margin for the ranking loss.

    # Returns:
        a Keras loss function taking y_true and y_pred as inputs and returning a loss tensor.
    """
    
    def _loss(y_true, y_pred):
        embedding_t = K.constant(embedding.T)
        true_sim = K.sum(y_true * y_pred, axis = -1)
        other_sim = K.dot(y_pred, embedding_t)
        return K.sum(K.relu(margin - true_sim[:,None] + other_sim), axis = -1) - margin
    
    return _loss 
Example #14
Source File: attlayer.py    From DeepMoji with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result 
Example #15
Source File: losses.py    From style-transfer with MIT License 6 votes vote down vote up
def gram_matrix(x):
	"""
	Computes the outer-product of the input tensor x.

	Input
	-----
	- x: input tensor of shape (C x H x W)

	Returns
	-------
	- x . x^T

	Note that this can be computed efficiently if x is reshaped
	as a tensor of shape (C x H*W).
	"""
	# assert K.ndim(x) == 3
	if K.image_dim_ordering() == 'th':
		features = K.batch_flatten(x)
	else:
		features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1)))
	return K.dot(features, K.transpose(features)) 
Example #16
Source File: graph.py    From Keras-TextClassification with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result 
Example #17
Source File: attention.py    From keras-transformer with MIT License 6 votes vote down vote up
def mask_attention_if_needed(self, dot_product):
        """
        Makes sure that (when enabled) each position
        (of a decoder's self-attention) cannot attend to subsequent positions.
        This is achieved by assigning -inf (or some large negative number)
        to all invalid connections. Later softmax will turn them into zeros.
        We need this to guarantee that decoder's predictions are based
        on what has happened before the position, not after.
        The method does nothing if masking is turned off.
        :param dot_product: scaled dot-product of Q and K after reshaping them
        to 3D tensors (batch * num_heads, rows, cols)
        """
        if not self.use_masking:
            return dot_product
        last_dims = K.int_shape(dot_product)[-2:]
        low_triangle_ones = (
            np.tril(np.ones(last_dims))
            # to ensure proper broadcasting
            .reshape((1,) + last_dims))
        inverse_low_triangle = 1 - low_triangle_ones
        close_to_negative_inf = -1e9
        result = (
            K.constant(low_triangle_ones, dtype=K.floatx()) * dot_product +
            K.constant(close_to_negative_inf * inverse_low_triangle))
        return result 
Example #18
Source File: attention.py    From keras-transformer with MIT License 6 votes vote down vote up
def call(self, inputs, **kwargs):
        if not (isinstance(inputs, list) and len(inputs) == 2):
            raise ValueError(
                'You can call this layer only with a list of two tensors '
                '(for keys/values and queries)')
        key_values_input, query_input = inputs
        _, value_seq_len, d_model = K.int_shape(key_values_input)
        query_seq_len = K.int_shape(inputs[1])[-2]
        # The first thing we need to do is to perform affine transformations
        # of the inputs to get the Queries, the Keys and the Values.
        kv = K.dot(K.reshape(key_values_input, [-1, d_model]), self.kv_weights)
        # splitting the keys, the values and the queries before further
        # processing
        pre_k, pre_v = [
            K.reshape(
                # K.slice(kv, (0, i * d_model), (-1, d_model)),
                kv[:, i * d_model: (i + 1) * d_model],
                (-1, value_seq_len,
                 self.num_heads, d_model // self.num_heads))
            for i in range(2)]
        pre_q = K.reshape(
            K.dot(K.reshape(query_input, [-1, d_model]), self.q_weights),
            (-1, query_seq_len, self.num_heads, d_model // self.num_heads))
        return self.attention(pre_q, pre_v, pre_k, query_seq_len, d_model,
                              training=kwargs.get('training')) 
Example #19
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example #20
Source File: GraphEmbedding.py    From conv_qsar_fast with MIT License 5 votes vote down vote up
def attributes_to_fp_contribution(self, attributes, depth):
		'''Given a 2D tensor of attributes where the first dimension corresponds to a single
		node, this method will apply the output sparsifying (often softmax) function and return
		the contribution to the fingerprint'''
		# Apply output activation function
		output_dot = K.dot(attributes[:, :-1], self.W_output[depth, :, :]) # ignore last attribute (bond flag)
		output_dot.name = 'output_dot'
		output_bias = self.b_output[depth, 0, :]
		output_bias.name = 'output_bias'
		output_activated = self.activation_output(output_dot + output_bias)
		output_activated.name = 'output_activated'
		return output_activated 
Example #21
Source File: GraphEmbedding_sumAfter.py    From conv_qsar_fast with MIT License 5 votes vote down vote up
def attributes_to_fp_contribution(self, attributes, depth):
		'''Given a 2D tensor of attributes where the first dimension corresponds to a single
		node, this method will apply the output sparsifying (often softmax) function and return
		the contribution to the fingerprint'''
		# Apply output activation function
		output_dot = K.dot(attributes[:, :-1], self.W_output[depth, :, :]) # ignore last attribute (bond flag)
		output_dot.name = 'output_dot'
		output_bias = self.b_output[depth, 0, :]
		output_bias.name = 'output_bias'
		output_activated = self.activation_output(output_dot + output_bias)
		output_activated.name = 'output_activated'
		return output_activated 
Example #22
Source File: attention.py    From Document-Classifier-LSTM with MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example #23
Source File: attention.py    From keras-utility-layer-collection with MIT License 5 votes vote down vote up
def get_constants(self, x):
        # add constants to speed up calculation
        constants = [x, K.dot(x, self._W1) + self._b2]
        
        return constants 
Example #24
Source File: utils.py    From semantic-embeddings with MIT License 5 votes vote down vote up
def inv_correlation(y_true, y_pred):
    """ Computes 1 minus the dot product between corresponding pairs of samples in two tensors. """
    return 1. - K.sum(y_true * y_pred, axis = -1) 
Example #25
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example #26
Source File: rnn_feature.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 5 votes vote down vote up
def dot_product(x, kernel):
    """
    Wrapper for dot product operation, in order to be compatible with both
    Theano and Tensorflow
    Args:
        x (): input
        kernel (): weights
    Returns:
    """
    if K.backend() == 'tensorflow':
        return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)
    else:
        return K.dot(x, kernel) 
Example #27
Source File: ind_rnn.py    From Keras-IndRNN with MIT License 5 votes vote down vote up
def call(self, inputs, states, training=None):
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(
                K.ones_like(inputs),
                self.dropout,
                training=training,
                count=1)
        if (0 < self.recurrent_dropout < 1 and
                self._recurrent_masks is None):
            _recurrent_mask = _generate_dropout_mask(
                K.ones_like(states[0]),
                self.recurrent_dropout,
                training=training,
                count=1)
            self._recurrent_masks = _recurrent_mask

        # dropout matrices for input units
        dp_mask = self._dropout_mask
        # dropout matrices for recurrent units
        rec_dp_masks = self._recurrent_masks

        h_tm1 = states[0]  # previous state

        if 0. < self.dropout < 1.:
            inputs *= dp_mask[0]

        if 0. < self.recurrent_dropout < 1.:
            h_tm1 *= rec_dp_masks[0]

        h = K.dot(inputs, self.kernel)
        h = h + (h_tm1 * self.recurrent_kernel)

        if self.use_bias:
            h = K.bias_add(h, self.bias)

        h = self.activation(h)

        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                h._uses_learning_phase = True
        return h, [h] 
Example #28
Source File: attention.py    From keras-utility-layer-collection with MIT License 5 votes vote down vote up
def _multiplicative_similarity(self, source, query):
        qp = K.dot(query, self._weights["w_a"])
        similarity = K.batch_dot(K.permute_dimensions(qp, [0, 2, 1]), source, axes=[1, 2])
        
        return similarity 
Example #29
Source File: detection.py    From keras-yolo with MIT License 5 votes vote down vote up
def call(self, x, training=None):
        #output_shape = self.compute_output_shape(x.shape)
        #if x.shape[1:]!=output_shape[1:]:
        #    return x.reshape((-1,)+ output_shape[1:]) 
        return x #K.dot(x, self.kernel) 
Example #30
Source File: bert.py    From keras-bert-ner with MIT License 5 votes vote down vote up
def call(self, inputs):
        if not hasattr(self, "kernel"):
            embedding_layer = inputs._keras_history[0]

            if embedding_layer.name != self.embedding_name:

                def recursive_search(layer):
                    """递归向上搜索,根据名字找Embedding层
                    """
                    last_layer = layer._inbound_nodes[0].inbound_layers
                    if isinstance(last_layer, list):
                        if len(last_layer) == 0:
                            return None
                        else:
                            last_layer = last_layer[0]
                    if last_layer.name == self.embedding_name:
                        return last_layer
                    else:
                        return recursive_search(last_layer)

                embedding_layer = recursive_search(embedding_layer)
                if embedding_layer is None:
                    raise Exception("Embedding layer not found")

                self.kernel = K.transpose(embedding_layer.embeddings)
                self.units = K.int_shape(self.kernel)[1]
                self.bias = self.add_weight(name="bias",
                                            shape=(self.units, ),
                                            initializer="zeros")

        outputs = K.dot(inputs, self.kernel)
        outputs = K.bias_add(outputs, self.bias)
        outputs = self.activation(outputs)
        return outputs