Python chainer.functions.expand_dims() Examples
The following are 30
code examples of chainer.functions.expand_dims().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.functions
, or try the search function
.
Example #1
Source File: state_q_functions.py From chainerrl with MIT License | 6 votes |
def __call__(self, state): h = state for layer in self.hidden_layers: h = F.relu(layer(h)) v = self.v(h) mu = self.mu(h) if self.scale_mu: mu = scale_by_tanh(mu, high=self.action_space.high, low=self.action_space.low) mat_diag = F.exp(self.mat_diag(h)) if hasattr(self, 'mat_non_diag'): mat_non_diag = self.mat_non_diag(h) tril = lower_triangular_matrix(mat_diag, mat_non_diag) mat = F.matmul(tril, tril, transb=True) else: mat = F.expand_dims(mat_diag ** 2, axis=2) return QuadraticActionValue( mu, mat, v, min_action=self.action_space.low, max_action=self.action_space.high)
Example #2
Source File: gwm.py From chainer-chemistry with MIT License | 6 votes |
def __call__(self, g, n_nodes): """main calculation Args: g: super node feature. shape (bs, hidden_dim_super) n_nodes (int): number of nodes Returns: g_trans: super --> original transmission """ mb = len(g) # for local updates g_trans = self.F_super(g) # intermediate_h_super.shape == (mb, self.hidden_dim) g_trans = functions.tanh(g_trans) # intermediate_h_super.shape == (mb, 1, self.hidden_dim) g_trans = functions.expand_dims(g_trans, 1) # intermediate_h_super.shape == (mb, atom, self.hidden_dim) g_trans = functions.broadcast_to(g_trans, (mb, n_nodes, self.hidden_dim)) return g_trans
Example #3
Source File: models.py From wavenet with Apache License 2.0 | 6 votes |
def __call__(self, v, h, label): v_t = self.vertical_conv_t(v) v_s = self.vertical_conv_s(v) to_vertical_t = self.v_to_h_conv_t(v_t) to_vertical_s = self.v_to_h_conv_s(v_s) # v_gate = self.vertical_gate_conv(v) # label bias is added to both vertical and horizontal conv # here we take only shape as it should be the same label = F.broadcast_to(F.expand_dims(F.expand_dims(self.label(label), -1), -1), v_t.shape) v_t, v_s = v_t + label, v_s + label v = F.tanh(v_t) * F.sigmoid(v_s) h_t = self.horizontal_conv_t(h) h_s = self.horizontal_conv_s(h) h_t, h_s = h_t + to_vertical_t + label, h_s + to_vertical_s + label h = self.horizontal_output(F.tanh(h_t) * F.sigmoid(h_s)) return v, h
Example #4
Source File: transformer_recognizer.py From kiss with GNU General Public License v3.0 | 6 votes |
def predict(self, images, return_raw_classification_result=False): feature_map = self.extract_features(images) memory = self.transformer.encode(feature_map, None) target = self.get_bos_token_array(len(images), self.num_words) target = self.xp.reshape(target, (-1, 1)) char = None for _ in range(self.num_chars): decoded = self.transformer.decode(memory, None, target, self.mask) char = self.classifier(decoded, n_batch_axes=2) predicted_chars = self.decode_prediction(char) target = F.concat([target, predicted_chars[:, -1:]]) result = F.expand_dims(target[:, 1:], 1) if return_raw_classification_result: return result, char return result
Example #5
Source File: svhn_bbox_plotter.py From see with GNU General Public License v3.0 | 6 votes |
def decode_predictions(self, predictions): # concat all individual predictions and slice for each time step predictions = F.concat([F.expand_dims(p, axis=0) for p in predictions], axis=0) words = [] with cuda.get_device_from_array(predictions.data): for prediction in F.separate(predictions, axis=0): prediction = F.squeeze(prediction, axis=0) prediction = F.softmax(prediction, axis=1) prediction = self.xp.argmax(prediction.data, axis=1) word = self.loss_metrics.strip_prediction(prediction[self.xp.newaxis, ...])[0] if len(word) == 1 and word[0] == 0: return '' word = "".join(map(self.loss_metrics.label_to_char, word)) word = word.replace(chr(self.loss_metrics.char_map[str(self.loss_metrics.blank_symbol)]), '') words.append(word) text = " ".join(words) return text
Example #6
Source File: fsns.py From see with GNU General Public License v3.0 | 6 votes |
def attend(self, encoded_features): self.out_lstm.reset_state() transformed_encoded_features = F.concat([F.expand_dims(self.transform_encoded_features(feature), axis=1) for feature in encoded_features], axis=1) concat_encoded_features = F.concat([F.expand_dims(e, axis=1) for e in encoded_features], axis=1) lstm_output = self.xp.zeros_like(encoded_features[0]) outputs = [] for _ in range(self.num_labels): transformed_lstm_output = self.transform_out_lstm_feature(lstm_output) attended_feats = [] for transformed_encoded_feature in F.separate(transformed_encoded_features, axis=1): attended_feat = transformed_encoded_feature + transformed_lstm_output attended_feat = F.tanh(attended_feat) attended_feats.append(self.generate_attended_feat(attended_feat)) attended_feats = F.concat(attended_feats, axis=1) alphas = F.softmax(attended_feats, axis=1) lstm_input_feature = F.batch_matmul(alphas, concat_encoded_features, transa=True) lstm_input_feature = F.squeeze(lstm_input_feature, axis=1) lstm_output = self.out_lstm(lstm_input_feature) outputs.append(lstm_output) return outputs
Example #7
Source File: svhn_softmax_metrics.py From see with GNU General Public License v3.0 | 6 votes |
def calc_loss(self, x, t): batch_predictions, _, _ = x # concat all individual predictions and slice for each time step batch_predictions = F.concat([F.expand_dims(p, axis=0) for p in batch_predictions], axis=0) self.xp = cuda.get_array_module(batch_predictions[0], t) batch_size = t.shape[0] t = F.reshape(t, (batch_size, self.num_timesteps, -1)) losses = [] for predictions, labels in zip(F.separate(batch_predictions, axis=0), F.separate(t, axis=1)): batch_size, num_chars, num_classes = predictions.shape predictions = F.reshape(predictions, (batch_size * num_chars, num_classes)) labels = F.reshape(labels, (-1,)) losses.append(F.softmax_cross_entropy(predictions, labels)) return sum(losses)
Example #8
Source File: losses.py From EPG with MIT License | 6 votes |
def process_trajectory(self, l): """This is the time-dependent convolution operation, applied to a trajectory (in order). """ shp = l.shape[0] # First dim is batchsize=1, then either 1 channel for 2d conv or n_feat channels # for 1d conv. l = F.expand_dims(l, axis=0) l = F.transpose(l, (0, 2, 1)) l = self.traj_c0(l) l = F.leaky_relu(l) l = self.traj_c1(l) l = F.leaky_relu(l) l = F.sum(l, axis=(0, 2)) / l.shape[0] / l.shape[2] l = F.expand_dims(l, axis=0) l = self.traj_d0(l) l = F.tile(l, (shp, 1)) return l
Example #9
Source File: cnn.py From fpl with MIT License | 6 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_pose = self.pose_encoder(pose_x) h_ego = self.ego_encoder(ego_x) h = F.concat((h_pos, h_pose, h_ego), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #10
Source File: cnn.py From fpl with MIT License | 6 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_pose = self.pose_encoder(pose_x) h = F.concat((h_pos, h_pose), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #11
Source File: cnn.py From fpl with MIT License | 6 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_ego = self.ego_encoder(ego_x) h = F.concat((h_pos, h_ego), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #12
Source File: common.py From imgclsmob with MIT License | 6 votes |
def __call__(self, x): heatmap = x vector_dim = 2 batch = heatmap.shape[0] channels = heatmap.shape[1] in_size = x.shape[2:] heatmap_vector = F.reshape(heatmap, shape=(batch, channels, -1)) indices = F.cast(F.expand_dims(F.argmax(heatmap_vector, axis=vector_dim), axis=vector_dim), np.float32) scores = F.max(heatmap_vector, axis=vector_dim, keepdims=True) scores_mask = (scores.array > 0.0).astype(np.float32) pts_x = (indices.array % in_size[1]) * scores_mask pts_y = (indices.array // in_size[1]) * scores_mask pts = F.concat((pts_x, pts_y, scores), axis=vector_dim).array for b in range(batch): for k in range(channels): hm = heatmap[b, k, :, :].array px = int(pts_x[b, k]) py = int(pts_y[b, k]) if (0 < px < in_size[1] - 1) and (0 < py < in_size[0] - 1): pts[b, k, 0] += np.sign(hm[py, px + 1] - hm[py, px - 1]) * 0.25 pts[b, k, 1] += np.sign(hm[py + 1, px] - hm[py - 1, px]) * 0.25 return pts
Example #13
Source File: sknet.py From imgclsmob with MIT License | 6 votes |
def __call__(self, x): y = self.branches(x) u = F.sum(y, axis=1) s = F.average_pooling_2d(u, ksize=u.shape[2:]) z = self.fc1(s) w = self.fc2(z) batch = w.shape[0] w = F.reshape(w, shape=(batch, self.num_branches, self.out_channels)) w = self.softmax(w) w = F.expand_dims(F.expand_dims(w, axis=3), axis=4) y = y * w y = F.sum(y, axis=1) return y
Example #14
Source File: state_q_functions.py From chainerrl with MIT License | 6 votes |
def __call__(self, state): h = self.hidden_layers(state) v = self.v(h) mu = self.mu(h) if self.scale_mu: mu = scale_by_tanh(mu, high=self.action_space.high, low=self.action_space.low) mat_diag = F.exp(self.mat_diag(h)) if hasattr(self, 'mat_non_diag'): mat_non_diag = self.mat_non_diag(h) tril = lower_triangular_matrix(mat_diag, mat_non_diag) mat = F.matmul(tril, tril, transb=True) else: mat = F.expand_dims(mat_diag ** 2, axis=2) return QuadraticActionValue( mu, mat, v, min_action=self.action_space.low, max_action=self.action_space.high)
Example #15
Source File: iqn.py From chainerrl with MIT License | 6 votes |
def _evaluate_psi_x_with_quantile_thresholds(psi_x, phi, f, taus): assert psi_x.ndim == 2 batch_size, hidden_size = psi_x.shape assert taus.ndim == 2 assert taus.shape[0] == batch_size n_taus = taus.shape[1] phi_taus = phi(taus) assert phi_taus.ndim == 3 assert phi_taus.shape == (batch_size, n_taus, hidden_size) psi_x_b = F.broadcast_to( F.expand_dims(psi_x, axis=1), phi_taus.shape) h = psi_x_b * phi_taus h = F.reshape(h, (-1, hidden_size)) assert h.shape == (batch_size * n_taus, hidden_size) h = f(h) assert h.ndim == 2 assert h.shape[0] == batch_size * n_taus n_actions = h.shape[-1] h = F.reshape(h, (batch_size, n_taus, n_actions)) return QuantileDiscreteActionValue(h)
Example #16
Source File: attentions.py From espnet with Apache License 2.0 | 5 votes |
def __call__(self, enc_hs, dec_z, att_prev): """Compute NoAtt forward layer. Args: enc_hs (chainer.Variable | N-dimensional array): Input variable from encoders. dec_z: Dummy. att_prev (chainer.Variable | None): Attention weight. Returns: chainer.Variable: Sum over flames. chainer.Variable: Attention weight. """ # pre-compute all h outside the decoder loop if self.pre_compute_enc_h is None: self.enc_h = F.pad_sequence(enc_hs) # utt x frame x hdim self.h_length = self.enc_h.shape[1] # initialize attention weight with uniform dist. if att_prev is None: att_prev = [ self.xp.full(hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs ] att_prev = [chainer.Variable(att) for att in att_prev] att_prev = F.pad_sequence(att_prev) self.c = F.sum( self.enc_h * F.broadcast_to(F.expand_dims(att_prev, 2), self.enc_h.shape), axis=1, ) return self.c, att_prev
Example #17
Source File: test_expand_dims.py From chainer with MIT License | 5 votes |
def forward(self, inputs, device): x, = inputs y = functions.expand_dims(x, self.axis) return y,
Example #18
Source File: textrec_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_accuracy(self, x, t): batch_predictions, _, _ = x batch_predictions = F.concat([F.expand_dims(prediction, axis=0) for prediction in batch_predictions], axis=0) self.xp = cuda.get_array_module(batch_predictions[0], t) accuracies = [] with cuda.get_device_from_array(batch_predictions.data): classification = F.softmax(batch_predictions, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(t) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
Example #19
Source File: textrec_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_actual_loss(self, predictions, grid, labels): batch_size = labels.shape[0] labels = F.reshape(labels, (-1,)) predictions = F.concat([F.expand_dims(prediction, axis=1) for prediction in predictions], axis=1) predictions = F.reshape(predictions, (batch_size * self.num_timesteps, -1)) return F.softmax_cross_entropy(predictions, labels)
Example #20
Source File: updater.py From chainer-partial_convolution_image_inpainting with MIT License | 5 votes |
def imgcrop_batch(img,pos_list,size=128): B,ch,H,W = img.shape lis = [F.expand_dims(img[i,:,x:x+size,y:y+size],axis=0) for i,(x,y) in enumerate(pos_list)] return F.concat(lis,axis=0)
Example #21
Source File: svhn_softmax_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_accuracy(self, x, t): batch_predictions, _, _ = x # concat all individual predictions and slice for each time step batch_predictions = F.concat([F.expand_dims(p, axis=0) for p in batch_predictions], axis=0) self.xp = cuda.get_array_module(batch_predictions[0], t) batch_size = t.shape[0] t = F.reshape(t, (batch_size, self.num_timesteps, -1)) accuracies = [] with cuda.get_device_from_array(batch_predictions.data): for prediction, label in zip(F.separate(batch_predictions, axis=0), F.separate(t, axis=1)): classification = F.softmax(prediction, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) # classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(label.data) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
Example #22
Source File: pcl.py From chainerrl with MIT License | 5 votes |
def update_on_policy(self, statevar): assert self.t_start < self.t if not self.disable_online_update: next_values = {} for t in range(self.t_start + 1, self.t): next_values[t - 1] = self.past_values[t] if statevar is None: next_values[self.t - 1] = chainer.Variable( self.xp.zeros_like(self.past_values[self.t - 1].array)) else: with state_kept(self.model): _, v = self.model(statevar) next_values[self.t - 1] = v log_probs = {t: self.past_action_distrib[t].log_prob( self.xp.asarray(self.xp.expand_dims(a, 0))) for t, a in self.past_actions.items()} self.online_batch_losses.append(self.compute_loss( t_start=self.t_start, t_stop=self.t, rewards=self.past_rewards, values=self.past_values, next_values=next_values, log_probs=log_probs)) if len(self.online_batch_losses) == self.batchsize: loss = chainerrl.functions.sum_arrays( self.online_batch_losses) / self.batchsize self.update(loss) self.online_batch_losses = [] self.init_history_data_for_online_update()
Example #23
Source File: iqn.py From chainerrl with MIT License | 5 votes |
def compute_eltwise_huber_quantile_loss(y, t, taus, huber_loss_threshold=1.0): """Compute elementwise Huber losses for quantile regression. This is based on Algorithm 1 of https://arxiv.org/abs/1806.06923. This function assumes that, both of the two kinds of quantile thresholds, taus (used to compute y) and taus_prime (used to compute t) are iid samples from U([0,1]). Args: y (chainer.Variable): Quantile prediction from taus as a (batch_size, N)-shaped array. t (chainer.Variable or ndarray): Target values for quantile regression as a (batch_size, N_prime)-array. taus (ndarray): Quantile thresholds used to compute y as a (batch_size, N)-shaped array. huber_loss_threshold (float): Threshold of Huber loss. In the IQN paper, this is denoted by kappa. Returns: chainer.Variable: Loss (batch_size, N, N_prime) """ assert y.shape == taus.shape # (batch_size, N) -> (batch_size, N, 1) y = F.expand_dims(y, axis=2) # (batch_size, N_prime) -> (batch_size, 1, N_prime) t = F.expand_dims(t, axis=1) # (batch_size, N) -> (batch_size, N, 1) taus = F.expand_dims(taus, axis=2) # Broadcast to (batch_size, N, N_prime) y, t, taus = F.broadcast(y, t, taus) I_delta = ((t.array - y.array) > 0).astype('f') eltwise_huber_loss = F.huber_loss( y, t, delta=huber_loss_threshold, reduce='no') eltwise_loss = abs(taus - I_delta) * eltwise_huber_loss return eltwise_loss
Example #24
Source File: text_rec_bbox_plotter.py From see with GNU General Public License v3.0 | 5 votes |
def decode_predictions(self, predictions): # concat all individual predictions and slice for each time step predictions = F.concat([F.expand_dims(prediction, axis=0) for prediction in predictions], axis=0) with cuda.get_device_from_array(predictions.data): prediction = F.squeeze(predictions, axis=1) classification = F.softmax(prediction, axis=1) classification = classification.data classification = self.xp.argmax(classification, axis=1) words = self.loss_metrics.strip_prediction(classification[self.xp.newaxis, ...])[0] word = "".join(map(self.loss_metrics.label_to_char, words)) return word
Example #25
Source File: loss_metrics.py From see with GNU General Public License v3.0 | 5 votes |
def calc_accuracy(self, x, t): batch_predictions, _, _ = x self.xp = cuda.get_array_module(batch_predictions[0], t) batch_size = t.shape[0] t = F.reshape(t, (batch_size, self.num_timesteps, -1)) accuracies = [] for predictions, labels in zip(batch_predictions, F.separate(t, axis=1)): if isinstance(predictions, list): predictions = F.concat([F.expand_dims(p, axis=0) for p in predictions], axis=0) with cuda.get_device_from_array(predictions.data): classification = F.softmax(predictions, axis=2) classification = classification.data classification = self.xp.argmax(classification, axis=2) classification = self.xp.transpose(classification, (1, 0)) words = self.strip_prediction(classification) labels = self.strip_prediction(labels.data) num_correct_words = 0 for word, label in zip(words, labels): word = "".join(map(self.label_to_char, word)) label = "".join(map(self.label_to_char, label)) if word == label: num_correct_words += 1 accuracy = num_correct_words / len(labels) accuracies.append(accuracy) overall_accuracy = sum(accuracies) / max(len(accuracies), 1) self.scale_area_loss_factor(overall_accuracy) return overall_accuracy
Example #26
Source File: models.py From wavenet with Apache License 2.0 | 5 votes |
def __init__(self, *args, mask='B', **kwargs): super(MaskedConvolution2D, self).__init__( *args, **kwargs ) Cout, Cin, kh, kw = self.W.shape pre_mask = self.xp.ones_like(self.W.data).astype('f') yc, xc = kh // 2, kw // 2 # context masking - subsequent pixels won't hav access to next pixels (spatial dim) pre_mask[:, :, yc+1:, :] = 0.0 pre_mask[:, :, yc:, xc+1:] = 0.0 # same pixel masking - pixel won't access next color (conv filter dim) def bmask(i_out, i_in): cout_idx = np.expand_dims(np.arange(Cout) % 3 == i_out, 1) cin_idx = np.expand_dims(np.arange(Cin) % 3 == i_in, 0) a1, a2 = np.broadcast_arrays(cout_idx, cin_idx) return a1 * a2 for j in range(3): pre_mask[bmask(j, j), yc, xc] = 0.0 if mask == 'A' else 1.0 pre_mask[bmask(0, 1), yc, xc] = 0.0 pre_mask[bmask(0, 2), yc, xc] = 0.0 pre_mask[bmask(1, 2), yc, xc] = 0.0 self.mask = pre_mask
Example #27
Source File: state_q_functions.py From chainerrl with MIT License | 5 votes |
def scale_by_tanh(x, low, high): xp = cuda.get_array_module(x.array) scale = (high - low) / 2 scale = xp.expand_dims(xp.asarray(scale, dtype=np.float32), axis=0) mean = (high + low) / 2 mean = xp.expand_dims(xp.asarray(mean, dtype=np.float32), axis=0) return F.tanh(x) * scale + mean
Example #28
Source File: gnn_film_update.py From chainer-chemistry with MIT License | 5 votes |
def forward(self, h, adj): # --- Message part --- xp = self.xp mb, atom, ch = h.shape newshape = adj.shape + (ch, ) adj = functions.broadcast_to(adj[:, :, :, :, xp.newaxis], newshape) messages = functions.reshape(self.W_linear(h), (mb, atom, ch, self.n_edge_types)) messages = functions.transpose(messages, (3, 0, 1, 2)) film_weights = functions.reshape(self.W_g(h), (mb, atom, 2 * ch, self.n_edge_types)) film_weights = functions.transpose(film_weights, (3, 0, 1, 2)) # (n_edge_types, minibatch, atom, out_ch) gamma = film_weights[:, :, :, :ch] # (n_edge_types, minibatch, atom, out_ch) beta = film_weights[:, :, :, ch:] # --- Update part --- messages = functions.expand_dims( gamma, axis=3) * functions.expand_dims( messages, axis=2) + functions.expand_dims(beta, axis=3) messages = self.activation(messages) # (minibatch, n_edge_types, atom, atom, out_ch) messages = functions.transpose(messages, (1, 0, 2, 3, 4)) messages = adj * messages messages = functions.sum(messages, axis=3) # sum across atoms messages = functions.sum(messages, axis=1) # sum across n_edge_types messages = functions.reshape(messages, (mb * atom, ch)) messages = self.norm_layer(messages) messages = functions.reshape(messages, (mb, atom, ch)) return messages
Example #29
Source File: attenders.py From lencon with MIT License | 5 votes |
def _attend(self, p): p = self.xh(p) p = F.expand_dims(p, 1) p = F.broadcast_to(p, self.shape2) h = F.tanh(self.h + p) shape3 = (self.batchsize * self.src_len, self.dim_hid) h_reshaped = F.reshape(h, shape3) weight_reshaped = self.hw(h_reshaped) weight = F.reshape(weight_reshaped, (self.batchsize, self.src_len, 1)) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
Example #30
Source File: GAIN.py From Guided-Attention-Inference-Network with MIT License | 5 votes |
def get_gcam(self, end_output, activations, shape, label): self.cleargrads() class_id = self.set_init_grad(end_output, label) end_output.backward(retain_grad=True) grad = activations.grad_var grad = F.average_pooling_2d(grad, (grad.shape[-2], grad.shape[-1]), 1) grad = F.expand_dims(F.reshape(grad, (grad.shape[0]*grad.shape[1], grad.shape[2], grad.shape[3])), 0) weights = activations weights = F.expand_dims(F.reshape(weights, (weights.shape[0]*weights.shape[1], weights.shape[2], weights.shape[3])), 0) gcam = F.resize_images(F.relu(F.convolution_2d(weights, grad, None, 1, 0)), shape) return gcam, class_id