Python Examples of chainer.functions.swapaxes

Source File: common.py From imgclsmob with MIT License

6 votes

def channel_shuffle2(x,
                     groups):
    """
    Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
    https://arxiv.org/abs/1707.01083. The alternative version.

    Parameters:
    ----------
    x : chainer.Variable or numpy.ndarray or cupy.ndarray
        Input variable.
    groups : int
        Number of groups.

    Returns
    -------
    chainer.Variable or numpy.ndarray or cupy.ndarray
        Resulted variable.
    """
    batch, channels, height, width = x.shape
    channels_per_group = channels // groups
    x = F.reshape(x, shape=(batch, channels_per_group, groups, height, width))
    x = F.swapaxes(x, axis1=1, axis2=2)
    x = F.reshape(x, shape=(batch, channels, height, width))
    return x

Source File: convolution_rbm.py From SeRanet with MIT License

6 votes

def reconstruct(self, v):
        """

        :param v: Variable Matrix(batch_size, in_channels, image_height, image_width)
        :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width)
        """
        batch_size = v.data.shape[0]
        xp = cuda.get_array_module(v.data)
        if self.real == 0:
            h = F.sigmoid(self.conv(v))
        else:
            std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1))
            h = F.sigmoid(self.conv(v / std_ch))
        # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden)))
        W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
        reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1))
            # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)))
        return reconstructed_v

Source File: convolution_rbm.py From SeRanet with MIT License

6 votes

def propdown(self, hid):
        """ This function propagates the hidden units activation downwords to the visible units
        :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out)  - given h_sample
        :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1
        """
        batch_size = hid.data.shape[0]
        if self.real == 0:
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
                # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))
            v_mean = F.sigmoid(pre_sigmoid_activation)
            #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape)
            #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7])
            #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3])
            #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10])
            #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data)
            #print('v_mean', v_mean.data.shape)
            #print('v_mean sum', F.sum(v_mean).data)
            #print('hid', hid.data.shape)

        else:
            # TODO: check
            W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1)
            v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1)
        return v_mean

Source File: subsampling.py From espnet with Apache License 2.0

6 votes

def forward(self, xs, ilens):
        """Subsample x.

        :param chainer.Variable x: input tensor
        :return: subsampled x and mask

        """
        xs = self.xp.array(xs[:, None])
        xs = F.relu(self.conv1(xs))
        xs = F.relu(self.conv2(xs))
        batch, _, length, _ = xs.shape
        xs = self.out(F.swapaxes(xs, 1, 2).reshape(batch * length, -1))
        xs = self.pe(xs.reshape(batch, length, -1))
        # change ilens accordingly
        ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int)
        ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int)
        return xs, ilens

Source File: cnn.py From fpl with MIT License

6 votes

def __call__(self, inputs):
        pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs)
        batch_size, past_len, _ = pos_x.shape

        h_pos = self.pos_encoder(pos_x)
        h_pose = self.pose_encoder(pose_x)
        h_ego = self.ego_encoder(ego_x)
        h = F.concat((h_pos, h_pose, h_ego), axis=1)  # (B, C, 2)
        h = self.inter(h)
        h_pos = self.pos_decoder(h)
        pred_y = self.last(h_pos)  # (B, 10, C+6+28)
        pred_y = F.swapaxes(pred_y, 1, 2)
        pred_y = pred_y[:, :pos_y.shape[1], :]
        loss = F.mean_squared_error(pred_y, pos_y)

        pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape)
        pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean
        return loss, pred_y, None

Source File: cnn.py From fpl with MIT License

6 votes

def __call__(self, inputs):
        pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs)
        batch_size, past_len, _ = pos_x.shape

        h_pos = self.pos_encoder(pos_x)
        h_pose = self.pose_encoder(pose_x)
        h = F.concat((h_pos, h_pose), axis=1)  # (B, C, 2)
        h = self.inter(h)
        h_pos = self.pos_decoder(h)
        pred_y = self.last(h_pos)  # (B, 10, C+6+28)
        pred_y = F.swapaxes(pred_y, 1, 2)
        pred_y = pred_y[:, :pos_y.shape[1], :]
        loss = F.mean_squared_error(pred_y, pos_y)

        pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape)
        pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean
        return loss, pred_y, None

Source File: cnn.py From fpl with MIT License

6 votes

def __call__(self, inputs):
        pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs)
        batch_size, past_len, _ = pos_x.shape

        h_pos = self.pos_encoder(pos_x)
        h_ego = self.ego_encoder(ego_x)
        h = F.concat((h_pos, h_ego), axis=1)  # (B, C, 2)
        h = self.inter(h)
        h_pos = self.pos_decoder(h)
        pred_y = self.last(h_pos)  # (B, 10, C+6+28)
        pred_y = F.swapaxes(pred_y, 1, 2)
        pred_y = pred_y[:, :pos_y.shape[1], :]
        loss = F.mean_squared_error(pred_y, pos_y)

        pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape)
        pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean
        return loss, pred_y, None

Source File: common.py From imgclsmob with MIT License

6 votes

def channel_shuffle(x,
                    groups):
    """
    Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
    https://arxiv.org/abs/1707.01083.

    Parameters:
    ----------
    x : chainer.Variable or numpy.ndarray or cupy.ndarray
        Input variable.
    groups : int
        Number of groups.

    Returns
    -------
    chainer.Variable or numpy.ndarray or cupy.ndarray
        Resulted variable.
    """
    batch, channels, height, width = x.shape
    channels_per_group = channels // groups
    x = F.reshape(x, shape=(batch, groups, channels_per_group, height, width))
    x = F.swapaxes(x, axis1=1, axis2=2)
    x = F.reshape(x, shape=(batch, channels, height, width))
    return x

Source File: SwapAxes.py From chainer-compiler with MIT License

5 votes

def forward(self, x):
        y1 = F.swapaxes(x, 1, 3)
        y2 = F.swapaxes(x, 0, 1)
        return y1, y2

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def reorganize_by_head(Q, n_heads):
    mb_size, n_Q, d_model = Q.data.shape
    assert d_model%n_heads == 0
    head_size = d_model // n_heads
    reshaped_Q = F.reshape(Q, (mb_size, n_Q, n_heads, head_size))
    return F.swapaxes(reshaped_Q, 1, 2)

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def undo_reorganize_by_head(Q):
    mb_size, n_heads, n_Q, head_size = Q.data.shape
    swapped_Q = F.swapaxes(Q, 1, 2)
    return F.reshape(swapped_Q, (mb_size, n_Q, -1))

Source File: attention.py From espnet with Apache License 2.0

5 votes

def forward(self, e_var, s_var=None, mask=None, batch=1):
        """Core function of the Multi-head attention layer.

        Args:
            e_var (chainer.Variable): Variable of input array.
            s_var (chainer.Variable): Variable of source array from encoder.
            mask (chainer.Variable): Attention mask.
            batch (int): Batch size.

        Returns:
            chainer.Variable: Outout of multi-head attention layer.

        """
        xp = self.xp
        if s_var is None:
            # batch, head, time1/2, d_k)
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(e_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(e_var).reshape(batch, -1, self.h, self.d_k)
        else:
            Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k)
            K = self.linear_k(s_var).reshape(batch, -1, self.h, self.d_k)
            V = self.linear_v(s_var).reshape(batch, -1, self.h, self.d_k)
        scores = F.matmul(F.swapaxes(Q, 1, 2), K.transpose(0, 2, 3, 1)) / np.sqrt(
            self.d_k
        )
        if mask is not None:
            mask = xp.stack([mask] * self.h, axis=1)
            scores = F.where(mask, scores, xp.full(scores.shape, MIN_VALUE, "f"))
        self.attn = F.softmax(scores, axis=-1)
        p_attn = F.dropout(self.attn, self.dropout)
        x = F.matmul(p_attn, F.swapaxes(V, 1, 2))
        x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k)
        return self.linear_out(x)

Source File: functions_ndarray.py From chainer-compiler with MIT License

5 votes

def assign(self, target : 'Object'):

        # unimplemented
        temp = np.array(0)
        for v in dir(temp):
            func = values.Object(
                values.FuncValue(functions.UnimplementedFunction(v), target, None))
            target.attributes.set_predefined_obj(str(v), func)

        shape_func = values.Object(
            values.FuncValue(NDArrayShapeFunction(), target, None))
        target.attributes.set_predefined_obj('shape', shape_func)

        size_func = values.Object(
            values.FuncValue(NDArraySizeFunction(), target, None))
        target.attributes.set_predefined_obj('size', size_func)

        cumsum_func = values.Object(
            values.FuncValue(NDArrayCumsumFunction(), target, None))
        target.attributes.set_predefined_obj('cumsum', cumsum_func)

        def add_chainer_function(func):
            func_ = values.Object(
                values.FuncValue(NDArrayChainerFunction(func), target, None))
            target.attributes.set_predefined_obj(func.__name__, func_)

        add_chainer_function(F.reshape)
        add_chainer_function(F.sum)
        add_chainer_function(F.swapaxes)
        add_chainer_function(F.transpose)

Source File: test_swapaxes.py From chainer with MIT License

5 votes

def forward(self, inputs, devices):
        x, = inputs
        y = functions.swapaxes(x, self.axis1, self.axis2)
        return y,

Source File: test_swapaxes.py From chainer with MIT License

5 votes

def forward_expected(self, inputs):
        x, = inputs
        y_expected = x.swapaxes(self.axis1, self.axis2)
        return y_expected,

Source File: module.py From fpl with MIT License

5 votes

def __call__(self, x):
        h = F.swapaxes(x, 1, 2)  # (B, D, L)
        for idx in range(self.nb_layers):
            h = getattr(self, "conv{}".format(idx))(h)
        return h

Source File: EspNet_VGG2L.py From chainer-compiler with MIT License

5 votes

def forward(self, xs, ilens):
        '''VGG2L forward

        :param xs:
        :param ilens:
        :return:
        '''
        logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens))

        # x: utt x frame x dim
        xs = F.pad_sequence(xs)

        # x: utt x 1 (input channel num) x frame x dim
        xs = F.swapaxes(F.reshape(
            xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2)

        xs = F.relu(self.conv1_1(xs))
        xs = F.relu(self.conv1_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        xs = F.relu(self.conv2_1(xs))
        xs = F.relu(self.conv2_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        # change ilens accordingly
        # EDIT(hamaji): ChxVM puts int32 on GPU and it hurts the performance.
        # TODO(hamaji): Fix device assignment to get rid of this change.
        ilens = (ilens + 1) // 2
        ilens = (ilens + 1) // 2
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)

        # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim)
        xs = F.swapaxes(xs, 1, 2)
        xs = F.reshape(
            xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3]))
        xs = [xs[i, :ilens[i], :] for i in range(len(ilens))]

        return xs, ilens

Source File: cnn.py From fpl with MIT License

5 votes

def __call__(self, inputs):
        pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs)
        batch_size, past_len, _ = pos_x.shape

        h = self.pos_encoder(pos_x)
        h = self.inter(h)
        h = self.pos_decoder(h)
        pred_y = self.last(h)
        pred_y = F.swapaxes(pred_y, 1, 2)
        pred_y = pred_y[:, :pos_y.shape[1], :]
        loss = F.mean_squared_error(pred_y, pos_y)

        pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape)
        pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean
        return loss, pred_y, None

Source File: module.py From fpl with MIT License

5 votes

def __call__(self, x):
        h = F.swapaxes(x, 1, 2)  # (B, D, L)
        for idx in range(self.nb_layers):
            h = getattr(self, "conv{}".format(idx))(h)
        return h

Source File: main.py From qb with MIT License

5 votes

def __call__(self, xs):
        """
        Forward pass of a sentence.
        :param xs: a batch of sentences
        :return h: final hidden states
        """
        xs = self.embed(xs)
        xs = F.swapaxes(xs, 0, 1) # time, batch, embed
        self.rnn.reset_state()
        for x in xs:
            h = self.rnn(x)
        h = F.tanh(self.linear(h))
        return h

Source File: SwapAxes.py From chainer-compiler with MIT License

5 votes

def forward(self, x):
        y1 = F.swapaxes(x, 1, 3)
        y2 = F.swapaxes(x, 0, 1)
        return y1, y2


# ======================================

Source File: voca.py From imgclsmob with MIT License

5 votes

def __call__(self, x, pid):
        x = self.bn(x)
        x = F.swapaxes(x, axis1=1, axis2=3)
        y = F.expand_dims(F.expand_dims(pid, axis=-1), axis=-1)
        y = F.tile(y, reps=(1, 1, self.audio_window_size, 1))
        x = F.concat((x, y), axis=1)
        x = self.branch(x)
        x = F.reshape(x, shape=(x.shape[0], -1))
        x = F.concat((x, pid), axis=1)
        x = self.fc1(x)
        x = F.tanh(x)
        x = self.fc2(x)
        return x

Source File: transformer.py From EEND with MIT License

5 votes

def __call__(self, x, batch_size):
        # x: (BT, F)
        # TODO: if chainer >= 5.0, use linear functions with 'n_batch_axes'
        # and x be (B, T, F), then remove batch_size.
        q = self.linearQ(x).reshape(batch_size, -1, self.h, self.d_k)
        k = self.linearK(x).reshape(batch_size, -1, self.h, self.d_k)
        v = self.linearV(x).reshape(batch_size, -1, self.h, self.d_k)
        scores = F.matmul(
            F.swapaxes(q, 1, 2), k.transpose(0, 2, 3, 1)) / np.sqrt(self.d_k)
        # scores: (B, h, T, T)
        self.att = F.softmax(scores, axis=3)
        p_att = F.dropout(self.att, self.dropout)
        x = F.matmul(p_att, F.swapaxes(v, 1, 2))
        x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k)
        return self.linearO(x)

Source File: SwapAxes.py From chainer-compiler with MIT License

5 votes

def forward(self, x):
        y1 = x.swapaxes(1, 3)
        y2 = x.swapaxes(0, 1)
        return y1, y2

# ======================================

Source File: EspNet_VGG2L.py From chainer-compiler with MIT License

5 votes

def forward(self, xs, ilens):
        '''VGG2L forward

        :param xs:
        :param ilens:
        :return:
        '''
        logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens))

        # x: utt x frame x dim
        xs = F.pad_sequence(xs)

        # x: utt x 1 (input channel num) x frame x dim
        xs = F.swapaxes(F.reshape(
            xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2)

        xs = F.relu(self.conv1_1(xs))
        xs = F.relu(self.conv1_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        xs = F.relu(self.conv2_1(xs))
        xs = F.relu(self.conv2_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        # change ilens accordingly
        # EDIT(hamaji): XCVM puts int32 on GPU and it hurts the performance.
        # TODO(hamaji): Fix device assignment to get rid of this change.
        ilens = (ilens + 1) // 2
        ilens = (ilens + 1) // 2
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)

        # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim)
        xs = F.swapaxes(xs, 1, 2)
        xs = F.reshape(
            xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3]))
        xs = [xs[i, :ilens[i], :] for i in range(len(ilens))]

        return xs, ilens

Source File: EspNet_AttLoc.py From chainer-compiler with MIT License

4 votes

def original(self, enc_hs, dec_z, att_prev, scaling=2.0):
        '''AttLoc forward

        :param enc_hs:
        :param dec_z:
        :param att_prev:
        :param scaling:
        :return:
        '''
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h)

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [self.xp.full(
                hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)

        # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape()
        # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(
            F.reshape(att_prev, (batch, 1, 1, self.h_length)))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2)
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = linear_tensor(self.mlp_att, att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = F.broadcast_to(
            F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape)

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        # TODO(watanabe) use batch_matmul
        e = F.squeeze(linear_tensor(self.gvec, F.tanh(
            att_conv + self.pre_compute_enc_h + dec_z_tiled)), axis=2)
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)

        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w

Source File: EspNet_AttLoc.py From chainer-compiler with MIT License

4 votes

def forward(self, enc_hs, dec_z, att_prev):
        '''AttLoc forward

        :param enc_hs:
        :param dec_z:
        :param att_prev:
        :param scaling:
        :return:
        '''
        # EDIT(hamaji): scaling is now a local variable.
        scaling = 2.0
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = linear_tensor_3d(self.mlp_enc, self.enc_h)

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [self.xp.full(
                hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)

        # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape()
        # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(
            F.reshape(att_prev, (batch, 1, 1, self.h_length)))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2)
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = linear_tensor_3d(self.mlp_att, att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = F.broadcast_to(
            F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape)

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        # TODO(watanabe) use batch_matmul
        e = F.squeeze(linear_tensor_3d(self.gvec, F.tanh(
            att_conv + self.pre_compute_enc_h + dec_z_tiled)), axis=2)
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)

        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w

Source File: encoders.py From espnet with Apache License 2.0

4 votes

def __call__(self, xs, ilens):
        """VGG2L forward propagation.

        Args:
            xs (chainer.Variable): Batch of padded charactor ids. (B, Tmax)
            ilens (chainer.Variable): Batch of length of each features. (B,)

        Returns:
            chainer.Variable: Subsampled vector of xs.
            chainer.Variable: Subsampled vector of ilens.

        """
        logging.info(self.__class__.__name__ + " input lengths: " + str(ilens))

        # x: utt x frame x dim
        xs = F.pad_sequence(xs)

        # x: utt x 1 (input channel num) x frame x dim
        xs = F.swapaxes(
            xs.reshape(
                xs.shape[0],
                xs.shape[1],
                self.in_channel,
                xs.shape[2] // self.in_channel,
            ),
            1,
            2,
        )

        xs = F.relu(self.conv1_1(xs))
        xs = F.relu(self.conv1_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        xs = F.relu(self.conv2_1(xs))
        xs = F.relu(self.conv2_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        # change ilens accordingly
        ilens = self.xp.array(
            self.xp.ceil(self.xp.array(ilens, dtype=np.float32) / 2), dtype=np.int32
        )
        ilens = self.xp.array(
            self.xp.ceil(self.xp.array(ilens, dtype=np.float32) / 2), dtype=np.int32
        )

        # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim)
        xs = F.swapaxes(xs, 1, 2)
        xs = xs.reshape(xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3])
        xs = [xs[i, : ilens[i], :] for i in range(len(ilens))]

        return xs, ilens

Python chainer.functions.swapaxes() Examples