Python chainer.functions.batch_matmul() Examples
The following are 26
code examples of chainer.functions.batch_matmul().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.functions
, or try the search function
.
Example #1
Source File: MnihCNN_rcis.py From ssai-cnn with MIT License | 6 votes |
def channelwise_inhibited(self, h): self.c = random.randint(0, 2) xp = cuda.get_array_module(h.data) num = h.data.shape[0] h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (num, 16, 16)) z = Variable(xp.zeros_like(c.data), 'AUTO') c = F.batch_matmul(c, z) c = F.reshape(c, (num, 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) return F.concat(hs, 1)
Example #2
Source File: train.py From style_transfer_3d with MIT License | 6 votes |
def extract_features(vgg16, images, masks=None): mean = cp.array([103.939, 116.779, 123.68], 'float32') # BGR images = images[:, ::-1] * 255 - mean[None, :, None, None] features = vgg16(images, layers=['conv1_2', 'conv2_2', 'conv3_3', 'conv4_3']).values() if masks is None: masks = cp.ones((images.shape[0], images.shape[2], images.shape[3]), 'float32') else: masks = masks.data style_features = [] for f in features: scale = masks.shape[-1] / f.shape[-1] m = cf.average_pooling_2d(masks[:, None, :, :], scale, scale).data dim = f.shape[1] m = m.reshape((m.shape[0], -1)) f2 = f.transpose((0, 2, 3, 1)) f2 = f2.reshape((f2.shape[0], -1, f2.shape[-1])) f2 *= cp.sqrt(m)[:, :, None] f2 = cf.batch_matmul(f2.transpose((0, 2, 1)), f2) f2 /= dim * m.sum(axis=1)[:, None, None] style_features.append(f2) return style_features
Example #3
Source File: main.py From style_transfer_3d with MIT License | 6 votes |
def extract_style_feature(self, images, masks=None): xp = self.xp mean = xp.array([103.939, 116.779, 123.68], 'float32') # BGR images = images[:, ::-1] * 255 - mean[None, :, None, None] features = self.vgg16(images, layers=['conv1_2', 'conv2_2', 'conv3_3', 'conv4_3']).values() if masks is None: masks = xp.ones((images.shape[0], images.shape[2], images.shape[3])) style_features = [] for feature in features: scale = masks.shape[-1] / feature.shape[-1] m = cf.average_pooling_2d(masks[:, None, :, :], scale, scale).data dim = feature.shape[1] m = m.reshape((m.shape[0], -1)) f2 = feature.transpose((0, 2, 3, 1)) f2 = f2.reshape((f2.shape[0], -1, f2.shape[-1])) f2 *= xp.sqrt(m)[:, :, None] f2 = cf.batch_matmul(f2.transpose((0, 2, 1)), f2) f2 /= dim * m.sum(axis=1)[:, None, None] style_features.append(f2) return style_features
Example #4
Source File: S2S_att.py From seq2seq_temporal_attention with MIT License | 6 votes |
def __call__(self, a_list, state, batch_size, xp): e_list = [] sum_e = xp.zeros((batch_size, 1), dtype=xp.float32) for a in a_list: w = reshape(batch_matmul(state['h2'], a, transa=True), (batch_size, 1)) w.data = xp.clip(w.data, -40, 40) e = exp(w) e_list.append(e) sum_e = sum_e + e context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32) for a, e in zip(a_list, e_list): e /= sum_e context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size)) return context, e_list, sum_e
Example #5
Source File: fsns.py From see with GNU General Public License v3.0 | 6 votes |
def attend(self, encoded_features): self.out_lstm.reset_state() transformed_encoded_features = F.concat([F.expand_dims(self.transform_encoded_features(feature), axis=1) for feature in encoded_features], axis=1) concat_encoded_features = F.concat([F.expand_dims(e, axis=1) for e in encoded_features], axis=1) lstm_output = self.xp.zeros_like(encoded_features[0]) outputs = [] for _ in range(self.num_labels): transformed_lstm_output = self.transform_out_lstm_feature(lstm_output) attended_feats = [] for transformed_encoded_feature in F.separate(transformed_encoded_features, axis=1): attended_feat = transformed_encoded_feature + transformed_lstm_output attended_feat = F.tanh(attended_feat) attended_feats.append(self.generate_attended_feat(attended_feat)) attended_feats = F.concat(attended_feats, axis=1) alphas = F.softmax(attended_feats, axis=1) lstm_input_feature = F.batch_matmul(alphas, concat_encoded_features, transa=True) lstm_input_feature = F.squeeze(lstm_input_feature, axis=1) lstm_output = self.out_lstm(lstm_input_feature) outputs.append(lstm_output) return outputs
Example #6
Source File: net.py From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License | 6 votes |
def attend(self, query, key, value, mask, minfs=None): """ Input shapes: q=(b, units, dec_l), k=(b, units, enc_l), v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l) """ # Calculate Attention Scores with Mask for Zero-padded Areas pre_a = F.batch_matmul(query, key, transa=True) # (b, dec_l, enc_l) minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \ if minfs is None else minfs pre_a = F.where(mask, pre_a, minfs) a = F.softmax(pre_a, axis=2) # if values in axis=2 are all -inf, they become nan. thus do re-mask. a = F.where(self.xp.isnan(a.data), self.xp.zeros(a.shape, dtype=a.dtype), a) reshaped_a = a[:, None] # (b, 1, dec_xl, enc_l) # Calculate Weighted Sum pre_c = F.broadcast_to(reshaped_a, value.shape) * value c = F.sum(pre_c, axis=3, keepdims=True) # (b, units, dec_xl, 1) return c
Example #7
Source File: updater.py From chainer-partial_convolution_image_inpainting with MIT License | 6 votes |
def calc_loss_style(hout_dict,hcomp_dict,hgt_dict): layers = hgt_dict.keys() for i,layer_name in enumerate(layers): B,C,H,W = hout_dict[layer_name].shape hout = F.reshape(hout_dict[layer_name],(B,C,H*W)) hcomp = F.reshape(hcomp_dict[layer_name],(B,C,H*W)) hgt = F.reshape(hgt_dict[layer_name],(B,C,H*W)) hout_gram = F.batch_matmul(hout,hout,transb=True) hcomp_gram = F.batch_matmul(hcomp,hcomp,transb=True) hgt_gram = F.batch_matmul(hgt,hgt,transb=True) if i==0: L_style_out = F.mean_absolute_error(hout_gram,hgt_gram)/(C*H*W) L_style_comp = F.mean_absolute_error(hcomp_gram,hgt_gram)/(C*H*W) else: L_style_out += F.mean_absolute_error(hout_gram,hgt_gram)/(C*H*W) L_style_comp += F.mean_absolute_error(hcomp_gram,hgt_gram)/(C*H*W) return L_style_out + L_style_comp
Example #8
Source File: MnihCNN_cis.py From ssai-cnn with MIT License | 6 votes |
def channelwise_inhibited(self, h): xp = cuda.get_array_module(h.data) num = h.data.shape[0] h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (num, 16, 16)) z = Variable(xp.zeros_like(c.data), 'AUTO') c = F.batch_matmul(c, z) c = F.reshape(c, (num, 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) return F.concat(hs, 1)
Example #9
Source File: attention.py From knmt with GNU General Public License v3.0 | 6 votes |
def __call__(self, inpt, mask): mb_size = inpt.data.shape[0] max_length = inpt.data.shape[1] precomp = F.reshape(F.tanh(self.lin(F.reshape(inpt, (-1, self.Hi)))), (mb_size, -1, self.Ho)) mask_offset = max_length - len(mask) precomp_mask_penalties = self.xp.concatenate( [ self.xp.zeros((mb_size, mask_offset), dtype=self.xp.float32), -10000 * (1 - self.xp.concatenate([ self.xp.reshape(mask_elem, (mb_size, 1)).astype(self.xp.float32) for mask_elem in mask], 1)) ], 1 ) def compute_copy_coefficients(state): betas = F.reshape(batch_matmul(precomp, state), (mb_size, -1)) masked_betas = betas + precomp_mask_penalties return masked_betas return compute_copy_coefficients
Example #10
Source File: decoder_cells.py From knmt with GNU General Public License v3.0 | 5 votes |
def compute_logits(self, new_states, concatenated, attn): new_output_state = new_states[-1] all_concatenated = F.concat((concatenated, new_output_state)) logits = self.decoder_chain.lin_o(self.decoder_chain.maxo(all_concatenated)) if self.lexicon_probability_matrix is not None: current_mb_size = new_output_state.data.shape[0] assert self.mb_size is None or current_mb_size <= self.mb_size lexicon_probability_matrix = self.lexicon_probability_matrix[:current_mb_size] # Just making sure data shape is as expected attn_mb_size, max_source_length_attn = attn.data.shape assert attn_mb_size == current_mb_size lex_mb_size, max_source_length_lexicon, v_size_lexicon = lexicon_probability_matrix.shape assert max_source_length_lexicon == max_source_length_attn assert logits.data.shape == (current_mb_size, v_size_lexicon) if self.demux: assert lex_mb_size == 1 weighted_lex_probs = F.reshape( matmul_constant(attn, lexicon_probability_matrix.reshape(lexicon_probability_matrix.shape[1], lexicon_probability_matrix.shape[2])), logits.data.shape) else: assert lex_mb_size == current_mb_size # weighted_lex_probs = F.reshape( # F.batch_matmul(attn, ConstantFunction(lexicon_probability_matrix)(), transa = True), # logits.data.shape) weighted_lex_probs = F.reshape( batch_matmul_constant(attn, lexicon_probability_matrix, transa=True), logits.data.shape) logits += F.log(weighted_lex_probs + self.lex_epsilon) return logits
Example #11
Source File: chainer-gogh-multi.py From chainer-gogh with MIT License | 5 votes |
def get_matrix(y): ch = y.data.shape[1] wd = y.data.shape[2] gogh_y = F.reshape(y, (y.data.shape[0],ch,wd**2)) gogh_matrix = F.batch_matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2) return gogh_matrix
Example #12
Source File: angular_loss.py From deep_metric_learning with MIT License | 5 votes |
def angular_mc_loss(f, f_p, alpha=45, in_degree=True): ''' Args: f (chainer.Variable or xp.npdarray): Anchor vectors. Each vectors in f must be l2 normalized. f_p (chainer.Variable or xp.npdarray): Positive vectors. Each vectors in f must be l2 normalized. ''' xp = cuda.get_array_module(f) if in_degree: alpha = np.deg2rad(alpha) sq_tan_alpha = np.tan(alpha) ** 2 n_pairs = len(f) # first and second term of f_{a,p,n} term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p)) term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True) # term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1) f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs)) # multiply zero to diagonal components of f_apn mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype) f_apn = f_apn * mask return F.average(F.logsumexp(f_apn, axis=1))
Example #13
Source File: S2S_att.py From seq2seq_temporal_attention with MIT License | 5 votes |
def __call__(self, a_list, state, batch_size, xp): e_list = [] sum_e = xp.zeros((batch_size, 1), dtype=xp.float32) for a in a_list: v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1))) w = self.vw(v) e = exp(w) e_list.append(e) sum_e = sum_e + e context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32) for a, e in zip(a_list, e_list): e /= sum_e context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size)) return context, e_list, sum_e
Example #14
Source File: S2S_att.py From seq2seq_temporal_attention with MIT License | 5 votes |
def __call__(self, a_list, state, batch_size, xp): e_list = [] sum_e = xp.zeros((batch_size, 1), dtype=xp.float32) for a in a_list: w = self.aw(a, state['h2']) w.data = xp.clip(w.data, -20, 20) e = exp(w) e_list.append(e) sum_e = sum_e + e context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32) for a, e in zip(a_list, e_list): e /= sum_e context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size)) return context, e_list, sum_e
Example #15
Source File: attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def batch_matmul(a, b, transa=False, transb=False): return F.matmul(a[:, :, None], b, transa=transa, transb=transb)
Example #16
Source File: attenders.py From lencon with MIT License | 5 votes |
def _attend(self, p): weight = F.batch_matmul(self.source_hiddens, p) weight = F.where(self.mask, weight, self.minf) attention = F.softmax(weight) return attention
Example #17
Source File: attenders.py From lencon with MIT License | 5 votes |
def __call__(self, p, train=True): attention = self._attend(p) if self.history is not None: self.history.append( chainer.cuda.to_cpu(attention.data[0, :, 0]).tolist()) ret = F.batch_matmul(F.swapaxes(self.source_hiddens, 2, 1), attention) return F.reshape(ret, (self.batchsize, self.dim_out))
Example #18
Source File: model_py.py From models with MIT License | 5 votes |
def _attn(self, q, k, v): w = F.batch_matmul(q.reshape(-1, *q.shape[-2:]), k.reshape(-1, *k.shape[-2:])) if self.scale: w = w / math.sqrt(v.shape[-1]) # TF implem method: mask_attn_weights w = w * self.b.array[0] + -1e9 * (1 - self.b.array[0]) w = F.softmax(w, axis=2) w = self.attn_dropout(w) return F.batch_matmul(w, v.reshape(-1, *v.shape[-2:]))\ .reshape(v.shape[0], v.shape[1], v.shape[2], -1)
Example #19
Source File: attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def compute_ctxt_demux(self, fb_concat, mask): mb_size, nb_elems, Hi = fb_concat.data.shape assert Hi == self.Hi assert mb_size == 1 assert len(mask) == 0 precomputed_al_factor = F.reshape(self.al_lin_h( F.reshape(fb_concat, (mb_size * nb_elems, self.Hi))), (mb_size, nb_elems, self.Ha)) # concatenated_mask = F.concat([F.reshape(mask_elem, (mb_size, 1)) for mask_elem in mask], 1) def compute_ctxt(previous_state, prev_word_embedding=None): current_mb_size = previous_state.data.shape[0] al_factor = F.broadcast_to(precomputed_al_factor, (current_mb_size, nb_elems, self.Ha)) # used_fb_concat = F.broadcast_to(fb_concat, (current_mb_size, nb_elems, Hi)) # used_concatenated_mask = F.broadcast_to(concatenated_mask, (current_mb_size, nb_elems)) state_al_factor = self.al_lin_s(previous_state) #As suggested by Isao Goto if prev_word_embedding is not None: state_al_factor = state_al_factor + self.al_lin_y(prev_word_embedding) state_al_factor_bc = F.broadcast_to(F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha)) a_coeffs = F.reshape(self.al_lin_o(F.reshape(F.tanh(state_al_factor_bc + al_factor), (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems)) # with cuda.get_device_from_array(used_concatenated_mask.data): # a_coeffs = a_coeffs - 10000 * (1-used_concatenated_mask.data) attn = F.softmax(a_coeffs) # ci = F.reshape(F.batch_matmul(attn, used_fb_concat, transa = True), (current_mb_size, self.Hi)) ci = F.reshape(F.matmul(attn, F.reshape(fb_concat, (nb_elems, Hi))), (current_mb_size, self.Hi)) return ci, attn return compute_ctxt
Example #20
Source File: memnn.py From pfio with MIT License | 5 votes |
def query(self, u): xp = backend.get_array_module(u) size = self.m.shape[1] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, self.m.shape) tc = F.broadcast_to(tc, self.c.shape) p = F.softmax(F.batch_matmul(self.m + tm, u)) o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p) o = F.squeeze(o, -1) u = o + u return u
Example #21
Source File: test_matmul.py From chainer with MIT License | 5 votes |
def forward(self, inputs, device): x1, x2 = inputs with testing.assert_warns(DeprecationWarning): y = F.batch_matmul( x1, x2, transa=self.transa, transb=self.transb) return y,
Example #22
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def batch_matmul_last_dims(A, B, transa=False, transb=False): assert A.data.shape[:-2] == B.data.shape[:-2] reshaped_A = F.reshape(A, (-1,) + A.data.shape[-2:]) reshaped_B = F.reshape(B, (-1,) + B.data.shape[-2:]) reshaped_result = F.batch_matmul(reshaped_A, reshaped_B, transa=transa, transb=transb) result = F.reshape(reshaped_result, A.data.shape[:-2] + reshaped_result.data.shape[-2:]) return result ######################################################################## # Multihead Attention #
Example #23
Source File: ic_stn.py From see with GNU General Public License v3.0 | 4 votes |
def __call__(self, images): self.lstm.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) # h = self.rs4(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5) localizations = [] with cuda.get_device_from_array(h.data): for _ in range(self.num_timesteps): timestep_localizations = [] in_feature = h lstm_prediction = F.relu(self.lstm(in_feature)) transformed = self.transform_2(lstm_prediction) transformed = F.reshape(transformed, (-1, 2, 3)) transformation_params = rotation_dropout(transformed, ratio=self.dropout_ratio) timestep_localizations.append(transformation_params) # self.transform_2.disable_update() if self.do_parameter_refinement: transformation_params = self.to_homogeneous_coordinates(transformation_params) # refine the transformation parameters for _ in range(self.num_refinement_steps): transformation_deltas = self.do_transformation_param_refinement_step(images, transformation_params) transformation_deltas = self.to_homogeneous_coordinates(transformation_deltas) transformation_params = F.batch_matmul(transformation_params, transformation_deltas) # transformation_params = F.batch_matmul(transformation_deltas, transformation_params) timestep_localizations.append(transformation_params[:, :-1, :]) localizations.append(timestep_localizations) return [F.concat(loc, axis=0) for loc in zip(*localizations)]
Example #24
Source File: fsns.py From see with GNU General Public License v3.0 | 4 votes |
def __call__(self, images): self.lstm.reset_state() self.transform_2.reset_state() h = self.bn0(self.conv0(images)) h = F.average_pooling_2d(F.relu(h), 2, stride=2) h = self.rs1(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs2(h) h = F.max_pooling_2d(h, 2, stride=2) h = self.rs3(h) self.vis_anchor = h h = F.average_pooling_2d(h, 5, stride=2) localizations = [] with cuda.get_device_from_array(h.data): homogenuous_addon = self.xp.zeros((len(h), 1, 3), dtype=h.data.dtype) homogenuous_addon[:, 0, 2] = 1 for _ in range(self.num_timesteps): lstm_prediction = F.relu(self.lstm(h)) translation_transform = F.reshape(self.rotation_transform(lstm_prediction), (-1, 2, 3)) translation_transform = disable_shearing(translation_transform) translation_transform = F.concat((translation_transform, homogenuous_addon), axis=1) rotation_transform = F.reshape(self.rotation_transform(lstm_prediction), (-1, 2, 3)) rotation_transform = disable_translation(rotation_transform) rotation_transform = F.concat((rotation_transform, homogenuous_addon), axis=1) # first rotate, then translate transform = F.batch_matmul(rotation_transform, translation_transform) # homogenuous_multiplier = F.get_item(transform, (..., 2, 2)) # # # bring matrices from homogenous coordinates to normal coordinates transform = transform[:, :2, :] # transform = transform / homogenuous_multiplier localizations.append(rotation_dropout(transform, ratio=self.dropout_factor)) return F.concat(localizations, axis=0)
Example #25
Source File: VGG_cis.py From ssai-cnn with MIT License | 4 votes |
def __call__(self, x, t): h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv2_1(h)) h = F.relu(self.conv2_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv3_1(h)) h = F.relu(self.conv3_2(h)) h = F.relu(self.conv3_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv4_1(h)) h = F.relu(self.conv4_2(h)) h = F.relu(self.conv4_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv5_1(h)) h = F.relu(self.conv5_2(h)) h = F.relu(self.conv5_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.fc6(h)) h = F.relu(self.fc7(h)) h = self.fc8(h) # Channelwise Inhibited h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (x.data.shape[0], 16, 16)) xp = cuda.get_array_module(x.data) volatile = False if t is not None else True z = Variable(xp.zeros_like(c.data), volatile=volatile) c = F.batch_matmul(c, z) c = F.reshape(c, (x.data.shape[0], 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) self.pred = F.concat(hs, 1) if t is not None: self.loss = F.softmax_cross_entropy(self.pred, t) self.loss /= 16 * 16 return self.loss else: self.pred = F.softmax(self.pred) return self.pred
Example #26
Source File: modelx.py From SPReID with MIT License | 4 votes |
def __call__(self, x, t, dataset, train=True): # Create variables x = Variable(x) x.to_gpu(self.gpu_id) t = Variable(t) t.to_gpu(self.gpu_id) with chainer.using_config('train', False): with chainer.using_config('enable_backprop', False): xo = self.segmentation.predictor(x) y = self.segmentation.classifiers[0](xo) y = F.separate(F.softmax(y), axis=1) # foreground, head, torso-hand, lower-body, shoes segprob = F.stack((1.0-y[0], y[1]+y[2]+y[4]+y[13], y[5]+y[6]+y[7]+y[11]+y[10]+y[3]+y[14]+y[15], y[9]+y[16]+y[17]+y[12], y[18]+y[19]+y[8]), axis=1) # Forward with chainer.using_config('train', train): with chainer.using_config('enable_backprop', train): x = F.resize_images(x,self.args.scales_reid) # InceptionV3 backbone x = self.predictor(x) x_a = F.average_pooling_2d(x, x.shape[-2:]) # Resize to segmentation map resolution x = F.resize_images(x,segprob.shape[-2:]) # aggregate features at semantic parts xl = F.scale( F.batch_matmul( F.reshape(segprob,(segprob.shape[0], segprob.shape[1], -1)), F.reshape(x,(x.shape[0], x.shape[1], -1)), transb=True), 1.0/F.sum(segprob, axis=(2,3)), axis=0) xfg, xl = F.split_axis(xl, [1], axis=1) xl = F.max(xl, axis=1, keepdims=True) x = F.concat((xfg,xl), axis=2) # Classifiers x_s = F.reshape(x, (-1, 2*2048, 1, 1)) x = F.concat((x_s, x_a), axis=1) if train: self.y_s = self.classifiers[0](x) # Loss self.loss = F.softmax_cross_entropy(F.squeeze(self.y_s, axis=(2, 3)), t) # Clear grads for uninitialized params self.cleargrads() # Backwards self.loss.backward() # Reporter self.reporter.update({dataset:{'loss':self.loss.data.tolist()}}) else: x = F.squeeze(x) x.to_cpu() self.reporter.update({dataset:{'features':x.data}})