Python keras.backend.softmax() Examples

The following are 30 code examples of keras.backend.softmax(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module keras.backend , or try the search function .
Example #1
Source File: capsule.py    From Keras-TextClassification with MIT License 6 votes vote down vote up
def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        outputs = None
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs 
Example #2
Source File: augmented_model.py    From tying-wv-and-wc with MIT License 6 votes vote down vote up
def augmented_loss(self, y_true, y_pred):
        _y_pred = Activation("softmax")(y_pred)
        loss = K.categorical_crossentropy(_y_pred, y_true)

        # y is (batch x seq x vocab)
        y_indexes = K.argmax(y_true, axis=2)  # turn one hot to index. (batch x seq)
        y_vectors = self.embedding(y_indexes)  # lookup the vector (batch x seq x vector_length)

        #v_length = self.setting.vector_length
        #y_vectors = K.reshape(y_vectors, (-1, v_length))
        #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors)
        #y_t = K.squeeze(y_t, axis=2)  # unknown but necessary operation
        #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))

        # vector x embedding dot products (batch x seq x vocab)
        y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1)
        y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size))  # explicitly set shape
        y_t = K.softmax(y_t / self.temperature)
        _y_pred_t = Activation("softmax")(y_pred / self.temperature)
        aug_loss = kullback_leibler_divergence(y_t, _y_pred_t)
        loss += (self.gamma * self.temperature) * aug_loss
        return loss 
Example #3
Source File: augmented_model.py    From tying-wv-and-wc with MIT License 6 votes vote down vote up
def __init__(self, 
        vocab_size, 
        sequence_size,
        setting=None,
        checkpoint_path="",
        temperature=10,
        tying=False):

        super().__init__(vocab_size, sequence_size, setting, checkpoint_path)
        self.temperature = temperature
        self.tying = tying
        self.gamma = self.setting.gamma

        if tying:
            self.model.pop()  # remove activation
            self.model.pop()  # remove projection (use self embedding)
            self.model.add(Lambda(lambda x: K.dot(x, K.transpose(self.embedding.embeddings))))
            self.model.add(Activation("softmax")) 
Example #4
Source File: attention.py    From keras-utility-layer-collection with MIT License 6 votes vote down vote up
def step(self, x, states):  
        h = states[0]
        # states[1] necessary?
        
        # comes from the constants
        X_static = states[-2]
        # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim]
        total_x_static_prod = states[-1]

        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_static_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        static_x_weighted = K.sum(attention * X_static, [1])
        
        x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3

        h, new_states = self.layer.cell.call(x, states[:-2])
        
        # append attention to the states to "smuggle" it out of the RNN wrapper
        attention = K.squeeze(attention, -1)
        h = K.concatenate([h, attention])

        return h, new_states 
Example #5
Source File: attention.py    From keras-utility-layer-collection with MIT License 6 votes vote down vote up
def step(self, x, states):   
        h = states[0]
        # states[1] necessary?

        # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim]
        total_x_prod = states[-1]
        # comes from the constants (equals the input sequence)
        X = states[-2]
        
        # expand dims to add the vector which is only valid for this time step
        # to total_x_prod which is valid for all time steps
        hw = K.expand_dims(K.dot(h, self._W2), 1)
        additive_atn = total_x_prod + hw
        attention = K.softmax(K.dot(additive_atn, self._V), axis=1)
        x_weighted = K.sum(attention * X, [1])

        x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3
        
        h, new_states = self.layer.cell.call(x, states[:-2])
        
        return h, new_states 
Example #6
Source File: self_attention.py    From nlp_toolkit with MIT License 6 votes vote down vote up
def Mask(self, inputs, seq_len, mode='mul'):
        """
        # Arguments:
            inputs: input tensor with shape (batch_size, seq_len, input_size)
            seq_len: Each sequence's actual length with shape (batch_size,)
            mode:
                mul: mask the rest dim with zero, used before fully-connected layer
                add: subtract a big constant from the rest, used before softmax layer
        # Reutrns:
            Masked tensors with the same shape of input tensor
        """
        if seq_len is None:
            return inputs
        else:
            mask = K.one_hot(seq_len[:, 0], K.shape(inputs)[1])
            mask = 1 - K.cumsum(mask, 1)
            for _ in range(len(inputs.shape) - 2):
                mask = K.expand_dims(mask, 2)
            if mode == 'mul':
                return inputs * mask
            if mode == 'add':
                return inputs - (1 - mask) * 1e12 
Example #7
Source File: multi_dim_attention.py    From nlp_toolkit with MIT License 6 votes vote down vote up
def call(self, x, mask=None):
        uit = K.tanh(K.dot(x, self.Ws1))
        ait = K.dot(uit, self.Ws2)
        ait = K.permute_dimensions(ait, (0, 2, 1))
        A = K.softmax(ait, axis=1)
        M = K.batch_dot(A, x)
        if self.punish:
            A_T = K.permute_dimensions(A, (0, 2, 1))
            tile_eye = K.tile(K.eye(self.weight_ws2), [self.batch_size, 1])
            tile_eye = K.reshape(
                tile_eye, shape=[-1, self.weight_ws2, self.weight_ws2])
            AA_T = K.batch_dot(A, A_T) - tile_eye
            P = K.l2_normalize(AA_T, axis=(1, 2))
            return M, P
        else:
            return M 
Example #8
Source File: neural_networks.py    From Quora with MIT License 6 votes vote down vote up
def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))    # noqa
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]  # noqa

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]  # noqa
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]    # noqa
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(tf.keras.backend.batch_dot(c, u_hat_vecs, [2, 2]))    # noqa
            if i < self.routings - 1:
                b = tf.keras.backend.batch_dot(outputs, u_hat_vecs, [2, 3])
        return outputs 
Example #9
Source File: submission_v50.py    From Quora with MIT License 6 votes vote down vote up
def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))    # noqa
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]  # noqa

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]  # noqa
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]    # noqa
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(tf.keras.backend.batch_dot(c, u_hat_vecs, [2, 2]))    # noqa
            if i < self.routings - 1:
                b = tf.keras.backend.batch_dot(outputs, u_hat_vecs, [2, 3])
        return outputs 
Example #10
Source File: memory.py    From NTM-Keras with MIT License 6 votes vote down vote up
def content_addressing(memory_t,  key_vector_t, key_strength_t):
    '''
    Focusing by content.
    :param memory_t: external memory.
    :param key_vector_t: key vector.
    :param key_strength_t: the strength of key.
    :return:
    '''
    # print("content addressing:")
    # print(">>memory_t")
    # print(key_vector_t)
    # print(">>key_vector_t")
    # print(key_vector_t)
    # print(">>key_strength_t")
    # print(key_strength_t)
    _weight_content_t = \
        key_strength_t * cosine_similarity_group(key_vector_t, memory_t)
    weight_content_t = softmax(_weight_content_t)
    # print("_weight_content_t")
    # print(_weight_content_t)
    return weight_content_t 
Example #11
Source File: ntm.py    From ntm_keras with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _get_weight_vector(self, M, w_tm1, k, beta, g, s, gamma):
#        M = tf.Print(M, [M, w_tm1, k], message='get weights beg1: ')
#        M = tf.Print(M, [beta, g, s, gamma], message='get weights beg2: ')
        # Content adressing, see Chapter 3.3.1:
        num = beta * _cosine_distance(M, k)
        w_c  = K.softmax(num) # It turns out that equation (5) is just softmax.
        # Location adressing, see Chapter 3.3.2:
        # Equation 7:
        w_g = (g * w_c) + (1-g)*w_tm1
        # C_s is the circular convolution
        #C_w = K.sum((self.C[None, :, :, :] * w_g[:, None, None, :]),axis=3)
        # Equation 8:
        # TODO: Explain
        C_s = K.sum(K.repeat_elements(self.C[None, :, :, :], self.batch_size, axis=0) * s[:,:,None,None], axis=1)
        w_tilda = K.batch_dot(C_s, w_g)
        # Equation 9:
        w_out = _renorm(w_tilda ** gamma)

        return w_out 
Example #12
Source File: temporal_mean_rate_theano.py    From snn_toolbox with MIT License 6 votes vote down vote up
def softmax_activation(self, mem):
        """Softmax activation."""

        # spiking_samples = k.less_equal(k.random_uniform([self.config.getint(
        #     'simulation', 'batch_size'), 1]), 300 * self.dt / 1000.)
        # spiking_neurons = k.T.repeat(spiking_samples, 10, axis=1)
        # activ = k.T.nnet.softmax(mem)
        # max_activ = k.max(activ, axis=1, keepdims=True)
        # output_spikes = k.equal(activ, max_activ).astype(k.floatx())
        # output_spikes = k.T.set_subtensor(output_spikes[k.equal(
        #     spiking_neurons, 0).nonzero()], 0.)
        # new_and_reset_mem = k.T.set_subtensor(mem[spiking_neurons.nonzero()],
        #                                       0.)
        # self.add_update([(self.mem, new_and_reset_mem)])
        # return output_spikes

        return k.T.mul(k.less_equal(k.random_uniform(mem.shape),
                                    k.softmax(mem)), self.v_thresh) 
Example #13
Source File: temporal_mean_rate_theano.py    From snn_toolbox with MIT License 6 votes vote down vote up
def set_reset_mem(self, mem, spikes):
        """
        Reset membrane potential ``mem`` array where ``spikes`` array is
        nonzero.
        """

        spike_idxs = k.T.nonzero(spikes)
        if (hasattr(self, 'activation_str') and
                self.activation_str == 'softmax'):
            new = mem.copy()  # k.T.set_subtensor(mem[spike_idxs], 0.)
        elif self.config.get('cell', 'reset') == 'Reset by subtraction':
            if self.payloads:  # Experimental.
                new = k.T.set_subtensor(mem[spike_idxs], 0.)
            else:
                pos_spike_idxs = k.T.nonzero(k.greater(spikes, 0))
                neg_spike_idxs = k.T.nonzero(k.less(spikes, 0))
                new = k.T.inc_subtensor(mem[pos_spike_idxs], -self.v_thresh)
                new = k.T.inc_subtensor(new[neg_spike_idxs], self.v_thresh)
        elif self.config.get('cell', 'reset') == 'Reset by modulo':
            new = k.T.set_subtensor(mem[spike_idxs],
                                    mem[spike_idxs] % self.v_thresh)
        else:  # self.config.get('cell', 'reset') == 'Reset to zero':
            new = k.T.set_subtensor(mem[spike_idxs], 0.)
        self.add_update([(self.mem, new)]) 
Example #14
Source File: models.py    From voxelmorph with GNU General Public License v3.0 6 votes vote down vote up
def _softmax(x, axis=-1, alpha=1):
    """
    building on keras implementation, allow alpha parameter

    Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
        alpha: a value to multiply all x
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    x = alpha * x
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D') 
Example #15
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 6 votes vote down vote up
def mlp_v2():
    model = Sequential()
    model.add(Dense(2048, input_shape=(21099,)))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())
#     model.add(Dense(1024))
#     model.add(Activation('relu'))
#     model.add(Dropout(0.5))   
#     model.add(BatchNormalization())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))   
    model.add(BatchNormalization())
    model.add(Dense(128))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))    
    model.add(BatchNormalization())
    model.add(Dense(6))
    model.add(Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer='nadam',
                  metrics=['accuracy'])
    return model 
Example #16
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 6 votes vote down vote up
def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs 
Example #17
Source File: models.py    From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License 6 votes vote down vote up
def CapsuleNet(n_capsule = 10, n_routings = 5, capsule_dim = 16,
     n_recurrent=100, dropout_rate=0.2, l2_penalty=0.0001):
    K.clear_session()

    inputs = Input(shape=(170,))
    x = Embedding(21099, 300,  trainable=True)(inputs)        
    x = SpatialDropout1D(dropout_rate)(x)
    x = Bidirectional(
        CuDNNGRU(n_recurrent, return_sequences=True,
                 kernel_regularizer=l2(l2_penalty),
                 recurrent_regularizer=l2(l2_penalty)))(x)
    x = PReLU()(x)
    x = Capsule(
        num_capsule=n_capsule, dim_capsule=capsule_dim,
        routings=n_routings, share_weights=True)(x)
    x = Flatten(name = 'concatenate')(x)
    x = Dropout(dropout_rate)(x)
#     fc = Dense(128, activation='sigmoid')(x)
    outputs = Dense(6, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy'])
    return model 
Example #18
Source File: seq2seq_attention.py    From stock-price-predict with MIT License 6 votes vote down vote up
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D') 
Example #19
Source File: seq2seq_attention_2.py    From stock-price-predict with MIT License 6 votes vote down vote up
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D') 
Example #20
Source File: attention.py    From keras-transformer with MIT License 6 votes vote down vote up
def mask_attention_if_needed(self, dot_product):
        """
        Makes sure that (when enabled) each position
        (of a decoder's self-attention) cannot attend to subsequent positions.
        This is achieved by assigning -inf (or some large negative number)
        to all invalid connections. Later softmax will turn them into zeros.
        We need this to guarantee that decoder's predictions are based
        on what has happened before the position, not after.
        The method does nothing if masking is turned off.
        :param dot_product: scaled dot-product of Q and K after reshaping them
        to 3D tensors (batch * num_heads, rows, cols)
        """
        if not self.use_masking:
            return dot_product
        last_dims = K.int_shape(dot_product)[-2:]
        low_triangle_ones = (
            np.tril(np.ones(last_dims))
            # to ensure proper broadcasting
            .reshape((1,) + last_dims))
        inverse_low_triangle = 1 - low_triangle_ones
        close_to_negative_inf = -1e9
        result = (
            K.constant(low_triangle_ones, dtype=K.floatx()) * dot_product +
            K.constant(close_to_negative_inf * inverse_low_triangle))
        return result 
Example #21
Source File: learn_labelembedding.py    From semantic-embeddings with MIT License 6 votes vote down vote up
def labelembed_loss(out1, out2, tar, targets, tau = 2., alpha = 0.9, beta = 0.5, num_classes = 100):
    
    out2_prob = K.softmax(out2)
    tau2_prob = K.stop_gradient(K.softmax(out2 / tau))
    soft_tar = K.stop_gradient(K.softmax(tar))
    
    L_o1_y = K.sparse_categorical_crossentropy(output = K.softmax(out1), target = targets)
    
    pred = K.argmax(out2, axis = -1)
    mask = K.stop_gradient(K.cast(K.equal(pred, K.cast(targets, 'int64')), K.floatx()))
    L_o1_emb = -cross_entropy(out1, soft_tar)  # pylint: disable=invalid-unary-operand-type
    
    L_o2_y = K.sparse_categorical_crossentropy(output = out2_prob, target = targets)
    L_emb_o2 = -cross_entropy(tar, tau2_prob) * mask * (K.cast(K.shape(mask)[0], K.floatx())/(K.sum(mask)+1e-8))  # pylint: disable=invalid-unary-operand-type
    L_re = K.relu(K.sum(out2_prob * K.one_hot(K.cast(targets, 'int64'), num_classes), axis = -1) - alpha)
    
    return beta * L_o1_y + (1-beta) * L_o1_emb + L_o2_y + L_emb_o2 + L_re 
Example #22
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis. 
Example #23
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis. 
Example #24
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of Capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            if K.backend() == 'theano':
                o = K.sum(o, axis=1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o 
Example #25
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis. 
Example #26
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex / K.sum(ex, axis=axis, keepdims=True)


# define the margin loss like hinge loss 
Example #27
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis. 
Example #28
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex / K.sum(ex, axis=axis, keepdims=True)


# define the margin loss like hinge loss 
Example #29
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex / K.sum(ex, axis=axis, keepdims=True)


# define the margin loss like hinge loss 
Example #30
Source File: cifar10_cnn_capsule.py    From DeepLearning_Wavelet-LSTM with MIT License 5 votes vote down vote up
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis.