Python Examples of chainer.functions.batch

Source File: MnihCNN_rcis.py From ssai-cnn with MIT License

6 votes

def channelwise_inhibited(self, h):
        self.c = random.randint(0, 2)
        xp = cuda.get_array_module(h.data)
        num = h.data.shape[0]

        h = F.split_axis(h, 3, 1)
        c = F.reshape(h[self.c], (num, 16, 16))
        z = Variable(xp.zeros_like(c.data), 'AUTO')
        c = F.batch_matmul(c, z)
        c = F.reshape(c, (num, 1, 16, 16))
        hs = []
        for i, s in enumerate(h):
            if i == self.c:
                hs.append(c)
            else:
                hs.append(s)
        return F.concat(hs, 1)

Source File: train.py From style_transfer_3d with MIT License

6 votes

def extract_features(vgg16, images, masks=None):
    mean = cp.array([103.939, 116.779, 123.68], 'float32')  # BGR
    images = images[:, ::-1] * 255 - mean[None, :, None, None]
    features = vgg16(images, layers=['conv1_2', 'conv2_2', 'conv3_3', 'conv4_3']).values()

    if masks is None:
        masks = cp.ones((images.shape[0], images.shape[2], images.shape[3]), 'float32')
    else:
        masks = masks.data

    style_features = []
    for f in features:
        scale = masks.shape[-1] / f.shape[-1]
        m = cf.average_pooling_2d(masks[:, None, :, :], scale, scale).data
        dim = f.shape[1]

        m = m.reshape((m.shape[0], -1))
        f2 = f.transpose((0, 2, 3, 1))
        f2 = f2.reshape((f2.shape[0], -1, f2.shape[-1]))
        f2 *= cp.sqrt(m)[:, :, None]
        f2 = cf.batch_matmul(f2.transpose((0, 2, 1)), f2)
        f2 /= dim * m.sum(axis=1)[:, None, None]
        style_features.append(f2)

    return style_features

Source File: main.py From style_transfer_3d with MIT License

6 votes

def extract_style_feature(self, images, masks=None):
        xp = self.xp
        mean = xp.array([103.939, 116.779, 123.68], 'float32')  # BGR
        images = images[:, ::-1] * 255 - mean[None, :, None, None]
        features = self.vgg16(images, layers=['conv1_2', 'conv2_2', 'conv3_3', 'conv4_3']).values()
        if masks is None:
            masks = xp.ones((images.shape[0], images.shape[2], images.shape[3]))

        style_features = []
        for feature in features:
            scale = masks.shape[-1] / feature.shape[-1]
            m = cf.average_pooling_2d(masks[:, None, :, :], scale, scale).data
            dim = feature.shape[1]

            m = m.reshape((m.shape[0], -1))
            f2 = feature.transpose((0, 2, 3, 1))
            f2 = f2.reshape((f2.shape[0], -1, f2.shape[-1]))
            f2 *= xp.sqrt(m)[:, :, None]
            f2 = cf.batch_matmul(f2.transpose((0, 2, 1)), f2)
            f2 /= dim * m.sum(axis=1)[:, None, None]
            style_features.append(f2)

        return style_features

Source File: S2S_att.py From seq2seq_temporal_attention with MIT License

6 votes

def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            w = reshape(batch_matmul(state['h2'], a, transa=True), (batch_size, 1))
            w.data = xp.clip(w.data, -40, 40)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)

        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size))
        return context, e_list, sum_e

Source File: fsns.py From see with GNU General Public License v3.0

6 votes

def attend(self, encoded_features):
        self.out_lstm.reset_state()
        transformed_encoded_features = F.concat([F.expand_dims(self.transform_encoded_features(feature), axis=1) for feature in encoded_features], axis=1)
        concat_encoded_features = F.concat([F.expand_dims(e, axis=1) for e in encoded_features], axis=1)

        lstm_output = self.xp.zeros_like(encoded_features[0])
        outputs = []
        for _ in range(self.num_labels):
            transformed_lstm_output = self.transform_out_lstm_feature(lstm_output)
            attended_feats = []
            for transformed_encoded_feature in F.separate(transformed_encoded_features, axis=1):
                attended_feat = transformed_encoded_feature + transformed_lstm_output
                attended_feat = F.tanh(attended_feat)
                attended_feats.append(self.generate_attended_feat(attended_feat))

            attended_feats = F.concat(attended_feats, axis=1)
            alphas = F.softmax(attended_feats, axis=1)

            lstm_input_feature = F.batch_matmul(alphas, concat_encoded_features, transa=True)
            lstm_input_feature = F.squeeze(lstm_input_feature, axis=1)
            lstm_output = self.out_lstm(lstm_input_feature)
            outputs.append(lstm_output)
        return outputs

Source File: net.py From convolutional_seq2seq with BSD 3-Clause "New" or "Revised" License

6 votes

def attend(self, query, key, value, mask, minfs=None):
        """
        Input shapes:
            q=(b, units, dec_l), k=(b, units, enc_l),
            v=(b, units, dec_l, enc_l), m=(b, dec_l, enc_l)
        """

        # Calculate Attention Scores with Mask for Zero-padded Areas
        pre_a = F.batch_matmul(query, key, transa=True)  # (b, dec_l, enc_l)
        minfs = self.xp.full(pre_a.shape, -np.inf, pre_a.dtype) \
            if minfs is None else minfs
        pre_a = F.where(mask, pre_a, minfs)
        a = F.softmax(pre_a, axis=2)
        # if values in axis=2 are all -inf, they become nan. thus do re-mask.
        a = F.where(self.xp.isnan(a.data),
                    self.xp.zeros(a.shape, dtype=a.dtype), a)
        reshaped_a = a[:, None]  # (b, 1, dec_xl, enc_l)

        # Calculate Weighted Sum
        pre_c = F.broadcast_to(reshaped_a, value.shape) * value
        c = F.sum(pre_c, axis=3, keepdims=True)  # (b, units, dec_xl, 1)
        return c

Source File: updater.py From chainer-partial_convolution_image_inpainting with MIT License

6 votes

def calc_loss_style(hout_dict,hcomp_dict,hgt_dict):
    layers = hgt_dict.keys()
    for i,layer_name in enumerate(layers):
        B,C,H,W = hout_dict[layer_name].shape
        hout = F.reshape(hout_dict[layer_name],(B,C,H*W))
        hcomp = F.reshape(hcomp_dict[layer_name],(B,C,H*W))
        hgt = F.reshape(hgt_dict[layer_name],(B,C,H*W))
        
        hout_gram = F.batch_matmul(hout,hout,transb=True)
        hcomp_gram = F.batch_matmul(hcomp,hcomp,transb=True)
        hgt_gram = F.batch_matmul(hgt,hgt,transb=True)
        
        if i==0: 
            L_style_out = F.mean_absolute_error(hout_gram,hgt_gram)/(C*H*W)
            L_style_comp = F.mean_absolute_error(hcomp_gram,hgt_gram)/(C*H*W)
        else:
            L_style_out += F.mean_absolute_error(hout_gram,hgt_gram)/(C*H*W)
            L_style_comp += F.mean_absolute_error(hcomp_gram,hgt_gram)/(C*H*W)        

    return L_style_out + L_style_comp

Source File: MnihCNN_cis.py From ssai-cnn with MIT License

6 votes

def channelwise_inhibited(self, h):
        xp = cuda.get_array_module(h.data)
        num = h.data.shape[0]

        h = F.split_axis(h, 3, 1)
        c = F.reshape(h[self.c], (num, 16, 16))
        z = Variable(xp.zeros_like(c.data), 'AUTO')
        c = F.batch_matmul(c, z)
        c = F.reshape(c, (num, 1, 16, 16))
        hs = []
        for i, s in enumerate(h):
            if i == self.c:
                hs.append(c)
            else:
                hs.append(s)
        return F.concat(hs, 1)

Source File: attention.py From knmt with GNU General Public License v3.0

6 votes

def __call__(self, inpt, mask):
        mb_size = inpt.data.shape[0]
        max_length = inpt.data.shape[1]

        precomp = F.reshape(F.tanh(self.lin(F.reshape(inpt, (-1, self.Hi)))), (mb_size, -1, self.Ho))

        mask_offset = max_length - len(mask)

        precomp_mask_penalties = self.xp.concatenate(
            [
                self.xp.zeros((mb_size, mask_offset), dtype=self.xp.float32),
                -10000 * (1 - self.xp.concatenate([
                    self.xp.reshape(mask_elem, (mb_size, 1)).astype(self.xp.float32) for mask_elem in mask], 1))
            ], 1
        )

        def compute_copy_coefficients(state):
            betas = F.reshape(batch_matmul(precomp, state), (mb_size, -1))
            masked_betas = betas + precomp_mask_penalties
            return masked_betas

        return compute_copy_coefficients

Source File: decoder_cells.py From knmt with GNU General Public License v3.0

5 votes

def compute_logits(self, new_states, concatenated, attn):
        new_output_state = new_states[-1]

        all_concatenated = F.concat((concatenated, new_output_state))
        logits = self.decoder_chain.lin_o(self.decoder_chain.maxo(all_concatenated))

        if self.lexicon_probability_matrix is not None:
            current_mb_size = new_output_state.data.shape[0]
            assert self.mb_size is None or current_mb_size <= self.mb_size
            lexicon_probability_matrix = self.lexicon_probability_matrix[:current_mb_size]

            # Just making sure data shape is as expected
            attn_mb_size, max_source_length_attn = attn.data.shape
            assert attn_mb_size == current_mb_size
            lex_mb_size, max_source_length_lexicon, v_size_lexicon = lexicon_probability_matrix.shape
            assert max_source_length_lexicon == max_source_length_attn
            assert logits.data.shape == (current_mb_size, v_size_lexicon)

            if self.demux:
                assert lex_mb_size == 1
                weighted_lex_probs = F.reshape(
                    matmul_constant(attn, lexicon_probability_matrix.reshape(lexicon_probability_matrix.shape[1],
                                                                             lexicon_probability_matrix.shape[2])),
                    logits.data.shape)
            else:
                assert lex_mb_size == current_mb_size

    #                 weighted_lex_probs = F.reshape(
    #                         F.batch_matmul(attn, ConstantFunction(lexicon_probability_matrix)(), transa = True),
    #                                                logits.data.shape)

                weighted_lex_probs = F.reshape(
                    batch_matmul_constant(attn, lexicon_probability_matrix, transa=True),
                    logits.data.shape)

            logits += F.log(weighted_lex_probs + self.lex_epsilon)
        return logits

Source File: chainer-gogh-multi.py From chainer-gogh with MIT License

5 votes

def get_matrix(y):
    ch = y.data.shape[1]
    wd = y.data.shape[2]
    gogh_y = F.reshape(y, (y.data.shape[0],ch,wd**2))
    gogh_matrix = F.batch_matmul(gogh_y, gogh_y, transb=True)/np.float32(ch*wd**2)
    return gogh_matrix

Source File: angular_loss.py From deep_metric_learning with MIT License

5 votes

def angular_mc_loss(f, f_p, alpha=45, in_degree=True):
    '''
    Args:
        f (chainer.Variable or xp.npdarray):
            Anchor vectors. Each vectors in f must be l2 normalized.
        f_p (chainer.Variable or xp.npdarray):
            Positive vectors. Each vectors in f must be l2 normalized.
    '''
    xp = cuda.get_array_module(f)

    if in_degree:
        alpha = np.deg2rad(alpha)
    sq_tan_alpha = np.tan(alpha) ** 2
    n_pairs = len(f)

    # first and second term of f_{a,p,n}
    term1 = 4 * sq_tan_alpha * matmul(f + f_p, transpose(f_p))
    term2 = 2 * (1 + sq_tan_alpha) * F.sum(f * f_p, axis=1, keepdims=True)
#    term2 = 2 * (1 + sq_tan_alpha) * F.batch_matmul(f, f_p, transa=True).reshape(n_pairs, 1)

    f_apn = term1 - F.broadcast_to(term2, (n_pairs, n_pairs))
    # multiply zero to diagonal components of f_apn
    mask = xp.ones_like(f_apn.data) - xp.eye(n_pairs, dtype=f.dtype)
    f_apn = f_apn * mask

    return F.average(F.logsumexp(f_apn, axis=1))

Source File: S2S_att.py From seq2seq_temporal_attention with MIT License

5 votes

def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            v = tanh(self.av(array.concat.concat((a, state['h2']), axis=1)))
            w = self.vw(v)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size))
        return context, e_list, sum_e

Source File: S2S_att.py From seq2seq_temporal_attention with MIT License

5 votes

def __call__(self, a_list, state, batch_size, xp):
        e_list = []
        sum_e = xp.zeros((batch_size, 1), dtype=xp.float32)
        for a in a_list:
            w = self.aw(a, state['h2'])
            w.data = xp.clip(w.data, -20, 20)
            e = exp(w)
            e_list.append(e)
            sum_e = sum_e + e

        context = xp.zeros((batch_size, self.hidden_size), dtype=xp.float32)
        for a, e in zip(a_list, e_list):
            e /= sum_e
            context = context + reshape(batch_matmul(a, e), (batch_size, self.hidden_size))
        return context, e_list, sum_e

Source File: attention.py From knmt with GNU General Public License v3.0

5 votes

def batch_matmul(a, b, transa=False, transb=False):
    return F.matmul(a[:, :, None], b, transa=transa, transb=transb)

Source File: attenders.py From lencon with MIT License

5 votes

def _attend(self, p):
        weight = F.batch_matmul(self.source_hiddens, p)
        weight = F.where(self.mask, weight, self.minf)
        attention = F.softmax(weight)
        return attention

Source File: attenders.py From lencon with MIT License

5 votes

def __call__(self, p, train=True):
        attention = self._attend(p)

        if self.history is not None:
            self.history.append(
                chainer.cuda.to_cpu(attention.data[0, :, 0]).tolist())

        ret = F.batch_matmul(F.swapaxes(self.source_hiddens, 2, 1), attention)
        return F.reshape(ret, (self.batchsize, self.dim_out))

Source File: model_py.py From models with MIT License

5 votes

def _attn(self, q, k, v):
        w = F.batch_matmul(q.reshape(-1, *q.shape[-2:]),
                           k.reshape(-1, *k.shape[-2:]))
        if self.scale:
            w = w / math.sqrt(v.shape[-1])
        # TF implem method: mask_attn_weights
        w = w * self.b.array[0] + -1e9 * (1 - self.b.array[0])
        w = F.softmax(w, axis=2)
        w = self.attn_dropout(w)
        return F.batch_matmul(w, v.reshape(-1, *v.shape[-2:]))\
                .reshape(v.shape[0], v.shape[1], v.shape[2], -1)

Source File: attention.py From knmt with GNU General Public License v3.0

5 votes

def compute_ctxt_demux(self, fb_concat, mask):
        mb_size, nb_elems, Hi = fb_concat.data.shape
        assert Hi == self.Hi
        assert mb_size == 1
        assert len(mask) == 0

        precomputed_al_factor = F.reshape(self.al_lin_h(
            F.reshape(fb_concat, (mb_size * nb_elems, self.Hi))), (mb_size, nb_elems, self.Ha))

#         concatenated_mask = F.concat([F.reshape(mask_elem, (mb_size, 1)) for mask_elem in mask], 1)

        def compute_ctxt(previous_state, prev_word_embedding=None):
            current_mb_size = previous_state.data.shape[0]

            al_factor = F.broadcast_to(precomputed_al_factor, (current_mb_size, nb_elems, self.Ha))
#             used_fb_concat = F.broadcast_to(fb_concat, (current_mb_size, nb_elems, Hi))
#             used_concatenated_mask = F.broadcast_to(concatenated_mask, (current_mb_size, nb_elems))

            state_al_factor = self.al_lin_s(previous_state)
            
            #As suggested by Isao Goto
            if prev_word_embedding is not None:
                state_al_factor = state_al_factor + self.al_lin_y(prev_word_embedding)
            
            state_al_factor_bc = F.broadcast_to(F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha))
            a_coeffs = F.reshape(self.al_lin_o(F.reshape(F.tanh(state_al_factor_bc + al_factor),
                                                         (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems))


#             with cuda.get_device_from_array(used_concatenated_mask.data):
#                 a_coeffs = a_coeffs - 10000 * (1-used_concatenated_mask.data)

            attn = F.softmax(a_coeffs)

#             ci = F.reshape(F.batch_matmul(attn, used_fb_concat, transa = True), (current_mb_size, self.Hi))

            ci = F.reshape(F.matmul(attn, F.reshape(fb_concat, (nb_elems, Hi))), (current_mb_size, self.Hi))

            return ci, attn

        return compute_ctxt

Source File: memnn.py From pfio with MIT License

5 votes

def query(self, u):
        xp = backend.get_array_module(u)
        size = self.m.shape[1]
        inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
        tm = self.TA(inds)
        tc = self.TC(inds)
        tm = F.broadcast_to(tm, self.m.shape)
        tc = F.broadcast_to(tc, self.c.shape)
        p = F.softmax(F.batch_matmul(self.m + tm, u))
        o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p)
        o = F.squeeze(o, -1)
        u = o + u
        return u

Source File: test_matmul.py From chainer with MIT License

5 votes

def forward(self, inputs, device):
        x1, x2 = inputs
        with testing.assert_warns(DeprecationWarning):
            y = F.batch_matmul(
                x1, x2, transa=self.transa, transb=self.transb)
        return y,

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def batch_matmul_last_dims(A, B, transa=False, transb=False):
    assert A.data.shape[:-2] == B.data.shape[:-2]
    reshaped_A = F.reshape(A, (-1,) + A.data.shape[-2:])
    reshaped_B = F.reshape(B, (-1,) + B.data.shape[-2:])
    reshaped_result = F.batch_matmul(reshaped_A, reshaped_B, transa=transa, transb=transb)
    result = F.reshape(reshaped_result, A.data.shape[:-2] + reshaped_result.data.shape[-2:])
    return result

########################################################################
# Multihead Attention
#

Source File: ic_stn.py From see with GNU General Public License v3.0

4 votes

def __call__(self, images):
        self.lstm.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        # h = self.rs4(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5)

        localizations = []

        with cuda.get_device_from_array(h.data):

            for _ in range(self.num_timesteps):
                timestep_localizations = []
                in_feature = h
                lstm_prediction = F.relu(self.lstm(in_feature))
                transformed = self.transform_2(lstm_prediction)
                transformed = F.reshape(transformed, (-1, 2, 3))
                transformation_params = rotation_dropout(transformed, ratio=self.dropout_ratio)
                timestep_localizations.append(transformation_params)

                # self.transform_2.disable_update()

                if self.do_parameter_refinement:
                    transformation_params = self.to_homogeneous_coordinates(transformation_params)
                    # refine the transformation parameters
                    for _ in range(self.num_refinement_steps):
                        transformation_deltas = self.do_transformation_param_refinement_step(images, transformation_params)
                        transformation_deltas = self.to_homogeneous_coordinates(transformation_deltas)

                        transformation_params = F.batch_matmul(transformation_params, transformation_deltas)
                        # transformation_params = F.batch_matmul(transformation_deltas, transformation_params)
                        timestep_localizations.append(transformation_params[:, :-1, :])

                localizations.append(timestep_localizations)

        return [F.concat(loc, axis=0) for loc in zip(*localizations)]

Source File: fsns.py From see with GNU General Public License v3.0

4 votes

def __call__(self, images):
        self.lstm.reset_state()
        self.transform_2.reset_state()

        h = self.bn0(self.conv0(images))
        h = F.average_pooling_2d(F.relu(h), 2, stride=2)

        h = self.rs1(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs2(h)
        h = F.max_pooling_2d(h, 2, stride=2)

        h = self.rs3(h)
        self.vis_anchor = h
        h = F.average_pooling_2d(h, 5, stride=2)

        localizations = []

        with cuda.get_device_from_array(h.data):
            homogenuous_addon = self.xp.zeros((len(h), 1, 3), dtype=h.data.dtype)
            homogenuous_addon[:, 0, 2] = 1

        for _ in range(self.num_timesteps):
            lstm_prediction = F.relu(self.lstm(h))
            translation_transform = F.reshape(self.rotation_transform(lstm_prediction), (-1, 2, 3))
            translation_transform = disable_shearing(translation_transform)
            translation_transform = F.concat((translation_transform, homogenuous_addon), axis=1)

            rotation_transform = F.reshape(self.rotation_transform(lstm_prediction), (-1, 2, 3))
            rotation_transform = disable_translation(rotation_transform)
            rotation_transform = F.concat((rotation_transform, homogenuous_addon), axis=1)

            # first rotate, then translate
            transform = F.batch_matmul(rotation_transform, translation_transform)
            # homogenuous_multiplier = F.get_item(transform, (..., 2, 2))
            #
            # # bring matrices from homogenous coordinates to normal coordinates
            transform = transform[:, :2, :]
            # transform = transform / homogenuous_multiplier
            localizations.append(rotation_dropout(transform, ratio=self.dropout_factor))

        return F.concat(localizations, axis=0)

Source File: VGG_cis.py From ssai-cnn with MIT License

4 votes

def __call__(self, x, t):
        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv3_1(h))
        h = F.relu(self.conv3_2(h))
        h = F.relu(self.conv3_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv4_1(h))
        h = F.relu(self.conv4_2(h))
        h = F.relu(self.conv4_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.conv5_1(h))
        h = F.relu(self.conv5_2(h))
        h = F.relu(self.conv5_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)

        h = F.relu(self.fc6(h))
        h = F.relu(self.fc7(h))
        h = self.fc8(h)

        # Channelwise Inhibited
        h = F.split_axis(h, 3, 1)
        c = F.reshape(h[self.c], (x.data.shape[0], 16, 16))
        xp = cuda.get_array_module(x.data)
        volatile = False if t is not None else True
        z = Variable(xp.zeros_like(c.data), volatile=volatile)
        c = F.batch_matmul(c, z)
        c = F.reshape(c, (x.data.shape[0], 1, 16, 16))
        hs = []
        for i, s in enumerate(h):
            if i == self.c:
                hs.append(c)
            else:
                hs.append(s)
        self.pred = F.concat(hs, 1)

        if t is not None:
            self.loss = F.softmax_cross_entropy(self.pred, t)
            self.loss /= 16 * 16
            return self.loss
        else:
            self.pred = F.softmax(self.pred)
            return self.pred

Source File: modelx.py From SPReID with MIT License

4 votes

def __call__(self, x, t, dataset, train=True):
        
        # Create variables
        x = Variable(x)
        x.to_gpu(self.gpu_id)
        t = Variable(t)
        t.to_gpu(self.gpu_id)

        with chainer.using_config('train', False):
            with chainer.using_config('enable_backprop', False):
                xo = self.segmentation.predictor(x)
                y = self.segmentation.classifiers[0](xo)                
                y = F.separate(F.softmax(y), axis=1)
                # foreground, head, torso-hand, lower-body, shoes
                segprob = F.stack((1.0-y[0],
                    y[1]+y[2]+y[4]+y[13],
                    y[5]+y[6]+y[7]+y[11]+y[10]+y[3]+y[14]+y[15],
                    y[9]+y[16]+y[17]+y[12],
                    y[18]+y[19]+y[8]), axis=1)   
        
        # Forward
        with chainer.using_config('train', train):
            with chainer.using_config('enable_backprop', train):
                x = F.resize_images(x,self.args.scales_reid)
                # InceptionV3 backbone                                
                x = self.predictor(x)
                x_a = F.average_pooling_2d(x, x.shape[-2:])
                # Resize to segmentation map resolution
                x = F.resize_images(x,segprob.shape[-2:])     
                # aggregate features at semantic parts
                xl = F.scale(
                    F.batch_matmul(
                    F.reshape(segprob,(segprob.shape[0], segprob.shape[1], -1)),
                    F.reshape(x,(x.shape[0], x.shape[1], -1)), 
                     transb=True),
                    1.0/F.sum(segprob, axis=(2,3)), axis=0)                
                
                xfg, xl = F.split_axis(xl, [1], axis=1)
                xl = F.max(xl, axis=1, keepdims=True)
                x = F.concat((xfg,xl), axis=2)
                # Classifiers                
                x_s = F.reshape(x, (-1, 2*2048, 1, 1))
                x = F.concat((x_s, x_a), axis=1)
                
                if train:
                    self.y_s = self.classifiers[0](x)
                    # Loss                
                    self.loss = F.softmax_cross_entropy(F.squeeze(self.y_s, axis=(2, 3)), t)

                    # Clear grads for uninitialized params
                    self.cleargrads()
                    # Backwards
                    self.loss.backward()

                    # Reporter        
                    self.reporter.update({dataset:{'loss':self.loss.data.tolist()}})        

                else:
                    x = F.squeeze(x)
                    x.to_cpu()
                    self.reporter.update({dataset:{'features':x.data}})

Python chainer.functions.batch_matmul() Examples