Python keras.backend.softmax() Examples
The following are 30
code examples of keras.backend.softmax().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
keras.backend
, or try the search function
.
Example #1
Source File: capsule.py From Keras-TextClassification with MIT License | 6 votes |
def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] b = K.zeros_like(u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] outputs = None for i in range(self.routings): b = K.permute_dimensions(b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] c = K.softmax(b) c = K.permute_dimensions(c, (0, 2, 1)) b = K.permute_dimensions(b, (0, 2, 1)) outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(outputs, u_hat_vecs, [2, 3]) return outputs
Example #2
Source File: augmented_model.py From tying-wv-and-wc with MIT License | 6 votes |
def augmented_loss(self, y_true, y_pred): _y_pred = Activation("softmax")(y_pred) loss = K.categorical_crossentropy(_y_pred, y_true) # y is (batch x seq x vocab) y_indexes = K.argmax(y_true, axis=2) # turn one hot to index. (batch x seq) y_vectors = self.embedding(y_indexes) # lookup the vector (batch x seq x vector_length) #v_length = self.setting.vector_length #y_vectors = K.reshape(y_vectors, (-1, v_length)) #y_t = K.map_fn(lambda v: K.dot(self.embedding.embeddings, K.reshape(v, (-1, 1))), y_vectors) #y_t = K.squeeze(y_t, axis=2) # unknown but necessary operation #y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # vector x embedding dot products (batch x seq x vocab) y_t = tf.tensordot(y_vectors, K.transpose(self.embedding.embeddings), 1) y_t = K.reshape(y_t, (-1, self.sequence_size, self.vocab_size)) # explicitly set shape y_t = K.softmax(y_t / self.temperature) _y_pred_t = Activation("softmax")(y_pred / self.temperature) aug_loss = kullback_leibler_divergence(y_t, _y_pred_t) loss += (self.gamma * self.temperature) * aug_loss return loss
Example #3
Source File: augmented_model.py From tying-wv-and-wc with MIT License | 6 votes |
def __init__(self, vocab_size, sequence_size, setting=None, checkpoint_path="", temperature=10, tying=False): super().__init__(vocab_size, sequence_size, setting, checkpoint_path) self.temperature = temperature self.tying = tying self.gamma = self.setting.gamma if tying: self.model.pop() # remove activation self.model.pop() # remove projection (use self embedding) self.model.add(Lambda(lambda x: K.dot(x, K.transpose(self.embedding.embeddings)))) self.model.add(Activation("softmax"))
Example #4
Source File: attention.py From keras-utility-layer-collection with MIT License | 6 votes |
def step(self, x, states): h = states[0] # states[1] necessary? # comes from the constants X_static = states[-2] # equals K.dot(static_x, self._W1) + self._b2 with X.shape=[bs, L, static_input_dim] total_x_static_prod = states[-1] # expand dims to add the vector which is only valid for this time step # to total_x_prod which is valid for all time steps hw = K.expand_dims(K.dot(h, self._W2), 1) additive_atn = total_x_static_prod + hw attention = K.softmax(K.dot(additive_atn, self._V), axis=1) static_x_weighted = K.sum(attention * X_static, [1]) x = K.dot(K.concatenate([x, static_x_weighted], 1), self._W3) + self._b3 h, new_states = self.layer.cell.call(x, states[:-2]) # append attention to the states to "smuggle" it out of the RNN wrapper attention = K.squeeze(attention, -1) h = K.concatenate([h, attention]) return h, new_states
Example #5
Source File: attention.py From keras-utility-layer-collection with MIT License | 6 votes |
def step(self, x, states): h = states[0] # states[1] necessary? # equals K.dot(X, self._W1) + self._b2 with X.shape=[bs, T, input_dim] total_x_prod = states[-1] # comes from the constants (equals the input sequence) X = states[-2] # expand dims to add the vector which is only valid for this time step # to total_x_prod which is valid for all time steps hw = K.expand_dims(K.dot(h, self._W2), 1) additive_atn = total_x_prod + hw attention = K.softmax(K.dot(additive_atn, self._V), axis=1) x_weighted = K.sum(attention * X, [1]) x = K.dot(K.concatenate([x, x_weighted], 1), self._W3) + self._b3 h, new_states = self.layer.cell.call(x, states[:-2]) return h, new_states
Example #6
Source File: self_attention.py From nlp_toolkit with MIT License | 6 votes |
def Mask(self, inputs, seq_len, mode='mul'): """ # Arguments: inputs: input tensor with shape (batch_size, seq_len, input_size) seq_len: Each sequence's actual length with shape (batch_size,) mode: mul: mask the rest dim with zero, used before fully-connected layer add: subtract a big constant from the rest, used before softmax layer # Reutrns: Masked tensors with the same shape of input tensor """ if seq_len is None: return inputs else: mask = K.one_hot(seq_len[:, 0], K.shape(inputs)[1]) mask = 1 - K.cumsum(mask, 1) for _ in range(len(inputs.shape) - 2): mask = K.expand_dims(mask, 2) if mode == 'mul': return inputs * mask if mode == 'add': return inputs - (1 - mask) * 1e12
Example #7
Source File: multi_dim_attention.py From nlp_toolkit with MIT License | 6 votes |
def call(self, x, mask=None): uit = K.tanh(K.dot(x, self.Ws1)) ait = K.dot(uit, self.Ws2) ait = K.permute_dimensions(ait, (0, 2, 1)) A = K.softmax(ait, axis=1) M = K.batch_dot(A, x) if self.punish: A_T = K.permute_dimensions(A, (0, 2, 1)) tile_eye = K.tile(K.eye(self.weight_ws2), [self.batch_size, 1]) tile_eye = K.reshape( tile_eye, shape=[-1, self.weight_ws2, self.weight_ws2]) AA_T = K.batch_dot(A, A_T) - tile_eye P = K.l2_normalize(AA_T, axis=(1, 2)) return M, P else: return M
Example #8
Source File: neural_networks.py From Quora with MIT License | 6 votes |
def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) # noqa u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] # noqa b = K.zeros_like(u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] # noqa for i in range(self.routings): b = K.permute_dimensions(b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] # noqa c = K.softmax(b) c = K.permute_dimensions(c, (0, 2, 1)) b = K.permute_dimensions(b, (0, 2, 1)) outputs = self.activation(tf.keras.backend.batch_dot(c, u_hat_vecs, [2, 2])) # noqa if i < self.routings - 1: b = tf.keras.backend.batch_dot(outputs, u_hat_vecs, [2, 3]) return outputs
Example #9
Source File: submission_v50.py From Quora with MIT License | 6 votes |
def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) # noqa u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule] # noqa b = K.zeros_like(u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] # noqa for i in range(self.routings): b = K.permute_dimensions(b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] # noqa c = K.softmax(b) c = K.permute_dimensions(c, (0, 2, 1)) b = K.permute_dimensions(b, (0, 2, 1)) outputs = self.activation(tf.keras.backend.batch_dot(c, u_hat_vecs, [2, 2])) # noqa if i < self.routings - 1: b = tf.keras.backend.batch_dot(outputs, u_hat_vecs, [2, 3]) return outputs
Example #10
Source File: memory.py From NTM-Keras with MIT License | 6 votes |
def content_addressing(memory_t, key_vector_t, key_strength_t): ''' Focusing by content. :param memory_t: external memory. :param key_vector_t: key vector. :param key_strength_t: the strength of key. :return: ''' # print("content addressing:") # print(">>memory_t") # print(key_vector_t) # print(">>key_vector_t") # print(key_vector_t) # print(">>key_strength_t") # print(key_strength_t) _weight_content_t = \ key_strength_t * cosine_similarity_group(key_vector_t, memory_t) weight_content_t = softmax(_weight_content_t) # print("_weight_content_t") # print(_weight_content_t) return weight_content_t
Example #11
Source File: ntm.py From ntm_keras with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _get_weight_vector(self, M, w_tm1, k, beta, g, s, gamma): # M = tf.Print(M, [M, w_tm1, k], message='get weights beg1: ') # M = tf.Print(M, [beta, g, s, gamma], message='get weights beg2: ') # Content adressing, see Chapter 3.3.1: num = beta * _cosine_distance(M, k) w_c = K.softmax(num) # It turns out that equation (5) is just softmax. # Location adressing, see Chapter 3.3.2: # Equation 7: w_g = (g * w_c) + (1-g)*w_tm1 # C_s is the circular convolution #C_w = K.sum((self.C[None, :, :, :] * w_g[:, None, None, :]),axis=3) # Equation 8: # TODO: Explain C_s = K.sum(K.repeat_elements(self.C[None, :, :, :], self.batch_size, axis=0) * s[:,:,None,None], axis=1) w_tilda = K.batch_dot(C_s, w_g) # Equation 9: w_out = _renorm(w_tilda ** gamma) return w_out
Example #12
Source File: temporal_mean_rate_theano.py From snn_toolbox with MIT License | 6 votes |
def softmax_activation(self, mem): """Softmax activation.""" # spiking_samples = k.less_equal(k.random_uniform([self.config.getint( # 'simulation', 'batch_size'), 1]), 300 * self.dt / 1000.) # spiking_neurons = k.T.repeat(spiking_samples, 10, axis=1) # activ = k.T.nnet.softmax(mem) # max_activ = k.max(activ, axis=1, keepdims=True) # output_spikes = k.equal(activ, max_activ).astype(k.floatx()) # output_spikes = k.T.set_subtensor(output_spikes[k.equal( # spiking_neurons, 0).nonzero()], 0.) # new_and_reset_mem = k.T.set_subtensor(mem[spiking_neurons.nonzero()], # 0.) # self.add_update([(self.mem, new_and_reset_mem)]) # return output_spikes return k.T.mul(k.less_equal(k.random_uniform(mem.shape), k.softmax(mem)), self.v_thresh)
Example #13
Source File: temporal_mean_rate_theano.py From snn_toolbox with MIT License | 6 votes |
def set_reset_mem(self, mem, spikes): """ Reset membrane potential ``mem`` array where ``spikes`` array is nonzero. """ spike_idxs = k.T.nonzero(spikes) if (hasattr(self, 'activation_str') and self.activation_str == 'softmax'): new = mem.copy() # k.T.set_subtensor(mem[spike_idxs], 0.) elif self.config.get('cell', 'reset') == 'Reset by subtraction': if self.payloads: # Experimental. new = k.T.set_subtensor(mem[spike_idxs], 0.) else: pos_spike_idxs = k.T.nonzero(k.greater(spikes, 0)) neg_spike_idxs = k.T.nonzero(k.less(spikes, 0)) new = k.T.inc_subtensor(mem[pos_spike_idxs], -self.v_thresh) new = k.T.inc_subtensor(new[neg_spike_idxs], self.v_thresh) elif self.config.get('cell', 'reset') == 'Reset by modulo': new = k.T.set_subtensor(mem[spike_idxs], mem[spike_idxs] % self.v_thresh) else: # self.config.get('cell', 'reset') == 'Reset to zero': new = k.T.set_subtensor(mem[spike_idxs], 0.) self.add_update([(self.mem, new)])
Example #14
Source File: models.py From voxelmorph with GNU General Public License v3.0 | 6 votes |
def _softmax(x, axis=-1, alpha=1): """ building on keras implementation, allow alpha parameter Softmax activation function. # Arguments x : Tensor. axis: Integer, axis along which the softmax normalization is applied. alpha: a value to multiply all x # Returns Tensor, output of softmax transformation. # Raises ValueError: In case `dim(x) == 1`. """ x = alpha * x ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim > 2: e = K.exp(x - K.max(x, axis=axis, keepdims=True)) s = K.sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D')
Example #15
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def mlp_v2(): model = Sequential() model.add(Dense(2048, input_shape=(21099,))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(BatchNormalization()) # model.add(Dense(1024)) # model.add(Activation('relu')) # model.add(Dropout(0.5)) # model.add(BatchNormalization()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(BatchNormalization()) model.add(Dense(128)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(BatchNormalization()) model.add(Dense(6)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy']) return model
Example #16
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def call(self, u_vecs): if self.share_weights: u_hat_vecs = K.conv1d(u_vecs, self.W) else: u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1]) batch_size = K.shape(u_vecs)[0] input_num_capsule = K.shape(u_vecs)[1] u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3)) b = K.zeros_like(u_hat_vecs[:, :, :, 0]) # shape = [None, num_capsule, input_num_capsule] for i in range(self.routings): b = K.permute_dimensions(b, (0, 2, 1)) # shape = [None, input_num_capsule, num_capsule] c = K.softmax(b) c = K.permute_dimensions(c, (0, 2, 1)) b = K.permute_dimensions(b, (0, 2, 1)) outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(outputs, u_hat_vecs, [2, 3]) return outputs
Example #17
Source File: models.py From DigiX_HuaWei_Population_Age_Attribution_Predict with MIT License | 6 votes |
def CapsuleNet(n_capsule = 10, n_routings = 5, capsule_dim = 16, n_recurrent=100, dropout_rate=0.2, l2_penalty=0.0001): K.clear_session() inputs = Input(shape=(170,)) x = Embedding(21099, 300, trainable=True)(inputs) x = SpatialDropout1D(dropout_rate)(x) x = Bidirectional( CuDNNGRU(n_recurrent, return_sequences=True, kernel_regularizer=l2(l2_penalty), recurrent_regularizer=l2(l2_penalty)))(x) x = PReLU()(x) x = Capsule( num_capsule=n_capsule, dim_capsule=capsule_dim, routings=n_routings, share_weights=True)(x) x = Flatten(name = 'concatenate')(x) x = Dropout(dropout_rate)(x) # fc = Dense(128, activation='sigmoid')(x) outputs = Dense(6, activation='softmax')(x) model = Model(inputs=inputs, outputs=outputs) model.compile(loss='categorical_crossentropy', optimizer='nadam', metrics=['accuracy']) return model
Example #18
Source File: seq2seq_attention.py From stock-price-predict with MIT License | 6 votes |
def softmax(x, axis=1): """Softmax activation function. # Arguments x : Tensor. axis: Integer, axis along which the softmax normalization is applied. # Returns Tensor, output of softmax transformation. # Raises ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim > 2: e = K.exp(x - K.max(x, axis=axis, keepdims=True)) s = K.sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D')
Example #19
Source File: seq2seq_attention_2.py From stock-price-predict with MIT License | 6 votes |
def softmax(x, axis=1): """Softmax activation function. # Arguments x : Tensor. axis: Integer, axis along which the softmax normalization is applied. # Returns Tensor, output of softmax transformation. # Raises ValueError: In case `dim(x) == 1`. """ ndim = K.ndim(x) if ndim == 2: return K.softmax(x) elif ndim > 2: e = K.exp(x - K.max(x, axis=axis, keepdims=True)) s = K.sum(e, axis=axis, keepdims=True) return e / s else: raise ValueError('Cannot apply softmax to a tensor that is 1D')
Example #20
Source File: attention.py From keras-transformer with MIT License | 6 votes |
def mask_attention_if_needed(self, dot_product): """ Makes sure that (when enabled) each position (of a decoder's self-attention) cannot attend to subsequent positions. This is achieved by assigning -inf (or some large negative number) to all invalid connections. Later softmax will turn them into zeros. We need this to guarantee that decoder's predictions are based on what has happened before the position, not after. The method does nothing if masking is turned off. :param dot_product: scaled dot-product of Q and K after reshaping them to 3D tensors (batch * num_heads, rows, cols) """ if not self.use_masking: return dot_product last_dims = K.int_shape(dot_product)[-2:] low_triangle_ones = ( np.tril(np.ones(last_dims)) # to ensure proper broadcasting .reshape((1,) + last_dims)) inverse_low_triangle = 1 - low_triangle_ones close_to_negative_inf = -1e9 result = ( K.constant(low_triangle_ones, dtype=K.floatx()) * dot_product + K.constant(close_to_negative_inf * inverse_low_triangle)) return result
Example #21
Source File: learn_labelembedding.py From semantic-embeddings with MIT License | 6 votes |
def labelembed_loss(out1, out2, tar, targets, tau = 2., alpha = 0.9, beta = 0.5, num_classes = 100): out2_prob = K.softmax(out2) tau2_prob = K.stop_gradient(K.softmax(out2 / tau)) soft_tar = K.stop_gradient(K.softmax(tar)) L_o1_y = K.sparse_categorical_crossentropy(output = K.softmax(out1), target = targets) pred = K.argmax(out2, axis = -1) mask = K.stop_gradient(K.cast(K.equal(pred, K.cast(targets, 'int64')), K.floatx())) L_o1_emb = -cross_entropy(out1, soft_tar) # pylint: disable=invalid-unary-operand-type L_o2_y = K.sparse_categorical_crossentropy(output = out2_prob, target = targets) L_emb_o2 = -cross_entropy(tar, tau2_prob) * mask * (K.cast(K.shape(mask)[0], K.floatx())/(K.sum(mask)+1e-8)) # pylint: disable=invalid-unary-operand-type L_re = K.relu(K.sum(out2_prob * K.one_hot(K.cast(targets, 'int64'), num_classes), axis = -1) - alpha) return beta * L_o1_y + (1-beta) * L_o1_emb + L_o2_y + L_emb_o2 + L_re
Example #22
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) return scale * x # define our own softmax function instead of K.softmax # because K.softmax can not specify axis.
Example #23
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) return scale * x # define our own softmax function instead of K.softmax # because K.softmax can not specify axis.
Example #24
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def call(self, inputs): """Following the routing algorithm from Hinton's paper, but replace b = b + <u,v> with b = <u,v>. This change can improve the feature representation of Capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to realize a standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) for i in range(self.routings): c = softmax(b, 1) if K.backend() == 'theano': o = K.sum(o, axis=1) o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(o, hat_inputs, [2, 3]) if K.backend() == 'theano': o = K.sum(o, axis=1) return o
Example #25
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) return scale * x # define our own softmax function instead of K.softmax # because K.softmax can not specify axis.
Example #26
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def softmax(x, axis=-1): ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) return ex / K.sum(ex, axis=axis, keepdims=True) # define the margin loss like hinge loss
Example #27
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) return scale * x # define our own softmax function instead of K.softmax # because K.softmax can not specify axis.
Example #28
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def softmax(x, axis=-1): ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) return ex / K.sum(ex, axis=axis, keepdims=True) # define the margin loss like hinge loss
Example #29
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def softmax(x, axis=-1): ex = K.exp(x - K.max(x, axis=axis, keepdims=True)) return ex / K.sum(ex, axis=axis, keepdims=True) # define the margin loss like hinge loss
Example #30
Source File: cifar10_cnn_capsule.py From DeepLearning_Wavelet-LSTM with MIT License | 5 votes |
def squash(x, axis=-1): s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) return scale * x # define our own softmax function instead of K.softmax # because K.softmax can not specify axis.