Python chainer.functions.swapaxes() Examples
The following are 28
code examples of chainer.functions.swapaxes().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.functions
, or try the search function
.
Example #1
Source File: common.py From imgclsmob with MIT License | 6 votes |
def channel_shuffle2(x, groups): """ Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,' https://arxiv.org/abs/1707.01083. The alternative version. Parameters: ---------- x : chainer.Variable or numpy.ndarray or cupy.ndarray Input variable. groups : int Number of groups. Returns ------- chainer.Variable or numpy.ndarray or cupy.ndarray Resulted variable. """ batch, channels, height, width = x.shape channels_per_group = channels // groups x = F.reshape(x, shape=(batch, channels_per_group, groups, height, width)) x = F.swapaxes(x, axis1=1, axis2=2) x = F.reshape(x, shape=(batch, channels, height, width)) return x
Example #2
Source File: convolution_rbm.py From SeRanet with MIT License | 6 votes |
def reconstruct(self, v): """ :param v: Variable Matrix(batch_size, in_channels, image_height, image_width) :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width) """ batch_size = v.data.shape[0] xp = cuda.get_array_module(v.data) if self.real == 0: h = F.sigmoid(self.conv(v)) else: std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1)) h = F.sigmoid(self.conv(v / std_ch)) # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden))) W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1)) # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))) return reconstructed_v
Example #3
Source File: convolution_rbm.py From SeRanet with MIT License | 6 votes |
def propdown(self, hid): """ This function propagates the hidden units activation downwords to the visible units :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - given h_sample :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1 """ batch_size = hid.data.shape[0] if self.real == 0: W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)) v_mean = F.sigmoid(pre_sigmoid_activation) #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape) #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7]) #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3]) #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10]) #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data) #print('v_mean', v_mean.data.shape) #print('v_mean sum', F.sum(v_mean).data) #print('hid', hid.data.shape) else: # TODO: check W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) return v_mean
Example #4
Source File: subsampling.py From espnet with Apache License 2.0 | 6 votes |
def forward(self, xs, ilens): """Subsample x. :param chainer.Variable x: input tensor :return: subsampled x and mask """ xs = self.xp.array(xs[:, None]) xs = F.relu(self.conv1(xs)) xs = F.relu(self.conv2(xs)) batch, _, length, _ = xs.shape xs = self.out(F.swapaxes(xs, 1, 2).reshape(batch * length, -1)) xs = self.pe(xs.reshape(batch, length, -1)) # change ilens accordingly ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int) ilens = np.ceil(np.array(ilens, dtype=np.float32) / 2).astype(np.int) return xs, ilens
Example #5
Source File: cnn.py From fpl with MIT License | 6 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_pose = self.pose_encoder(pose_x) h_ego = self.ego_encoder(ego_x) h = F.concat((h_pos, h_pose, h_ego), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #6
Source File: cnn.py From fpl with MIT License | 6 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_pose = self.pose_encoder(pose_x) h = F.concat((h_pos, h_pose), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #7
Source File: cnn.py From fpl with MIT License | 6 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h_pos = self.pos_encoder(pos_x) h_ego = self.ego_encoder(ego_x) h = F.concat((h_pos, h_ego), axis=1) # (B, C, 2) h = self.inter(h) h_pos = self.pos_decoder(h) pred_y = self.last(h_pos) # (B, 10, C+6+28) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #8
Source File: common.py From imgclsmob with MIT License | 6 votes |
def channel_shuffle(x, groups): """ Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,' https://arxiv.org/abs/1707.01083. Parameters: ---------- x : chainer.Variable or numpy.ndarray or cupy.ndarray Input variable. groups : int Number of groups. Returns ------- chainer.Variable or numpy.ndarray or cupy.ndarray Resulted variable. """ batch, channels, height, width = x.shape channels_per_group = channels // groups x = F.reshape(x, shape=(batch, groups, channels_per_group, height, width)) x = F.swapaxes(x, axis1=1, axis2=2) x = F.reshape(x, shape=(batch, channels, height, width)) return x
Example #9
Source File: SwapAxes.py From chainer-compiler with MIT License | 5 votes |
def forward(self, x): y1 = F.swapaxes(x, 1, 3) y2 = F.swapaxes(x, 0, 1) return y1, y2
Example #10
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def reorganize_by_head(Q, n_heads): mb_size, n_Q, d_model = Q.data.shape assert d_model%n_heads == 0 head_size = d_model // n_heads reshaped_Q = F.reshape(Q, (mb_size, n_Q, n_heads, head_size)) return F.swapaxes(reshaped_Q, 1, 2)
Example #11
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def undo_reorganize_by_head(Q): mb_size, n_heads, n_Q, head_size = Q.data.shape swapped_Q = F.swapaxes(Q, 1, 2) return F.reshape(swapped_Q, (mb_size, n_Q, -1))
Example #12
Source File: attention.py From espnet with Apache License 2.0 | 5 votes |
def forward(self, e_var, s_var=None, mask=None, batch=1): """Core function of the Multi-head attention layer. Args: e_var (chainer.Variable): Variable of input array. s_var (chainer.Variable): Variable of source array from encoder. mask (chainer.Variable): Attention mask. batch (int): Batch size. Returns: chainer.Variable: Outout of multi-head attention layer. """ xp = self.xp if s_var is None: # batch, head, time1/2, d_k) Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k) K = self.linear_k(e_var).reshape(batch, -1, self.h, self.d_k) V = self.linear_v(e_var).reshape(batch, -1, self.h, self.d_k) else: Q = self.linear_q(e_var).reshape(batch, -1, self.h, self.d_k) K = self.linear_k(s_var).reshape(batch, -1, self.h, self.d_k) V = self.linear_v(s_var).reshape(batch, -1, self.h, self.d_k) scores = F.matmul(F.swapaxes(Q, 1, 2), K.transpose(0, 2, 3, 1)) / np.sqrt( self.d_k ) if mask is not None: mask = xp.stack([mask] * self.h, axis=1) scores = F.where(mask, scores, xp.full(scores.shape, MIN_VALUE, "f")) self.attn = F.softmax(scores, axis=-1) p_attn = F.dropout(self.attn, self.dropout) x = F.matmul(p_attn, F.swapaxes(V, 1, 2)) x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k) return self.linear_out(x)
Example #13
Source File: functions_ndarray.py From chainer-compiler with MIT License | 5 votes |
def assign(self, target : 'Object'): # unimplemented temp = np.array(0) for v in dir(temp): func = values.Object( values.FuncValue(functions.UnimplementedFunction(v), target, None)) target.attributes.set_predefined_obj(str(v), func) shape_func = values.Object( values.FuncValue(NDArrayShapeFunction(), target, None)) target.attributes.set_predefined_obj('shape', shape_func) size_func = values.Object( values.FuncValue(NDArraySizeFunction(), target, None)) target.attributes.set_predefined_obj('size', size_func) cumsum_func = values.Object( values.FuncValue(NDArrayCumsumFunction(), target, None)) target.attributes.set_predefined_obj('cumsum', cumsum_func) def add_chainer_function(func): func_ = values.Object( values.FuncValue(NDArrayChainerFunction(func), target, None)) target.attributes.set_predefined_obj(func.__name__, func_) add_chainer_function(F.reshape) add_chainer_function(F.sum) add_chainer_function(F.swapaxes) add_chainer_function(F.transpose)
Example #14
Source File: test_swapaxes.py From chainer with MIT License | 5 votes |
def forward(self, inputs, devices): x, = inputs y = functions.swapaxes(x, self.axis1, self.axis2) return y,
Example #15
Source File: test_swapaxes.py From chainer with MIT License | 5 votes |
def forward_expected(self, inputs): x, = inputs y_expected = x.swapaxes(self.axis1, self.axis2) return y_expected,
Example #16
Source File: module.py From fpl with MIT License | 5 votes |
def __call__(self, x): h = F.swapaxes(x, 1, 2) # (B, D, L) for idx in range(self.nb_layers): h = getattr(self, "conv{}".format(idx))(h) return h
Example #17
Source File: EspNet_VGG2L.py From chainer-compiler with MIT License | 5 votes |
def forward(self, xs, ilens): '''VGG2L forward :param xs: :param ilens: :return: ''' logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens)) # x: utt x frame x dim xs = F.pad_sequence(xs) # x: utt x 1 (input channel num) x frame x dim xs = F.swapaxes(F.reshape( xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2) xs = F.relu(self.conv1_1(xs)) xs = F.relu(self.conv1_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) xs = F.relu(self.conv2_1(xs)) xs = F.relu(self.conv2_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) # change ilens accordingly # EDIT(hamaji): ChxVM puts int32 on GPU and it hurts the performance. # TODO(hamaji): Fix device assignment to get rid of this change. ilens = (ilens + 1) // 2 ilens = (ilens + 1) // 2 # ilens = self.xp.array(self.xp.ceil(self.xp.array( # ilens, dtype=np.float32) / 2), dtype=np.int32) # ilens = self.xp.array(self.xp.ceil(self.xp.array( # ilens, dtype=np.float32) / 2), dtype=np.int32) # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim) xs = F.swapaxes(xs, 1, 2) xs = F.reshape( xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3])) xs = [xs[i, :ilens[i], :] for i in range(len(ilens))] return xs, ilens
Example #18
Source File: cnn.py From fpl with MIT License | 5 votes |
def __call__(self, inputs): pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(inputs) batch_size, past_len, _ = pos_x.shape h = self.pos_encoder(pos_x) h = self.inter(h) h = self.pos_decoder(h) pred_y = self.last(h) pred_y = F.swapaxes(pred_y, 1, 2) pred_y = pred_y[:, :pos_y.shape[1], :] loss = F.mean_squared_error(pred_y, pos_y) pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1), pred_y.shape) pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean return loss, pred_y, None
Example #19
Source File: module.py From fpl with MIT License | 5 votes |
def __call__(self, x): h = F.swapaxes(x, 1, 2) # (B, D, L) for idx in range(self.nb_layers): h = getattr(self, "conv{}".format(idx))(h) return h
Example #20
Source File: main.py From qb with MIT License | 5 votes |
def __call__(self, xs): """ Forward pass of a sentence. :param xs: a batch of sentences :return h: final hidden states """ xs = self.embed(xs) xs = F.swapaxes(xs, 0, 1) # time, batch, embed self.rnn.reset_state() for x in xs: h = self.rnn(x) h = F.tanh(self.linear(h)) return h
Example #21
Source File: SwapAxes.py From chainer-compiler with MIT License | 5 votes |
def forward(self, x): y1 = F.swapaxes(x, 1, 3) y2 = F.swapaxes(x, 0, 1) return y1, y2 # ======================================
Example #22
Source File: voca.py From imgclsmob with MIT License | 5 votes |
def __call__(self, x, pid): x = self.bn(x) x = F.swapaxes(x, axis1=1, axis2=3) y = F.expand_dims(F.expand_dims(pid, axis=-1), axis=-1) y = F.tile(y, reps=(1, 1, self.audio_window_size, 1)) x = F.concat((x, y), axis=1) x = self.branch(x) x = F.reshape(x, shape=(x.shape[0], -1)) x = F.concat((x, pid), axis=1) x = self.fc1(x) x = F.tanh(x) x = self.fc2(x) return x
Example #23
Source File: transformer.py From EEND with MIT License | 5 votes |
def __call__(self, x, batch_size): # x: (BT, F) # TODO: if chainer >= 5.0, use linear functions with 'n_batch_axes' # and x be (B, T, F), then remove batch_size. q = self.linearQ(x).reshape(batch_size, -1, self.h, self.d_k) k = self.linearK(x).reshape(batch_size, -1, self.h, self.d_k) v = self.linearV(x).reshape(batch_size, -1, self.h, self.d_k) scores = F.matmul( F.swapaxes(q, 1, 2), k.transpose(0, 2, 3, 1)) / np.sqrt(self.d_k) # scores: (B, h, T, T) self.att = F.softmax(scores, axis=3) p_att = F.dropout(self.att, self.dropout) x = F.matmul(p_att, F.swapaxes(v, 1, 2)) x = F.swapaxes(x, 1, 2).reshape(-1, self.h * self.d_k) return self.linearO(x)
Example #24
Source File: SwapAxes.py From chainer-compiler with MIT License | 5 votes |
def forward(self, x): y1 = x.swapaxes(1, 3) y2 = x.swapaxes(0, 1) return y1, y2 # ======================================
Example #25
Source File: EspNet_VGG2L.py From chainer-compiler with MIT License | 5 votes |
def forward(self, xs, ilens): '''VGG2L forward :param xs: :param ilens: :return: ''' logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens)) # x: utt x frame x dim xs = F.pad_sequence(xs) # x: utt x 1 (input channel num) x frame x dim xs = F.swapaxes(F.reshape( xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2) xs = F.relu(self.conv1_1(xs)) xs = F.relu(self.conv1_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) xs = F.relu(self.conv2_1(xs)) xs = F.relu(self.conv2_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) # change ilens accordingly # EDIT(hamaji): XCVM puts int32 on GPU and it hurts the performance. # TODO(hamaji): Fix device assignment to get rid of this change. ilens = (ilens + 1) // 2 ilens = (ilens + 1) // 2 # ilens = self.xp.array(self.xp.ceil(self.xp.array( # ilens, dtype=np.float32) / 2), dtype=np.int32) # ilens = self.xp.array(self.xp.ceil(self.xp.array( # ilens, dtype=np.float32) / 2), dtype=np.int32) # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim) xs = F.swapaxes(xs, 1, 2) xs = F.reshape( xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3])) xs = [xs[i, :ilens[i], :] for i in range(len(ilens))] return xs, ilens
Example #26
Source File: EspNet_AttLoc.py From chainer-compiler with MIT License | 4 votes |
def original(self, enc_hs, dec_z, att_prev, scaling=2.0): '''AttLoc forward :param enc_hs: :param dec_z: :param att_prev: :param scaling: :return: ''' batch = len(enc_hs) # pre-compute all h outside the decoder loop if self.pre_compute_enc_h is None: self.enc_h = F.pad_sequence(enc_hs) # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h) if dec_z is None: dec_z = chainer.Variable(self.xp.zeros( (batch, self.dunits), dtype=np.float32)) else: dec_z = F.reshape(dec_z, (batch, self.dunits)) # initialize attention weight with uniform dist. if att_prev is None: att_prev = [self.xp.full( hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs] att_prev = [chainer.Variable(att) for att in att_prev] att_prev = F.pad_sequence(att_prev) # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape() # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame att_conv = self.loc_conv( F.reshape(att_prev, (batch, 1, 1, self.h_length))) # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2) # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim att_conv = linear_tensor(self.mlp_att, att_conv) # dec_z_tiled: utt x frame x att_dim dec_z_tiled = F.broadcast_to( F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape) # dot with gvec # utt x frame x att_dim -> utt x frame # TODO(watanabe) use batch_matmul e = F.squeeze(linear_tensor(self.gvec, F.tanh( att_conv + self.pre_compute_enc_h + dec_z_tiled)), axis=2) # Applying a minus-large-number filter to make a probability value zero for a padded area # simply degrades the performance, and I gave up this implementation # Apply a scaling to make an attention sharp w = F.softmax(scaling * e) # weighted sum over flames # utt x hdim c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1) return c, w
Example #27
Source File: EspNet_AttLoc.py From chainer-compiler with MIT License | 4 votes |
def forward(self, enc_hs, dec_z, att_prev): '''AttLoc forward :param enc_hs: :param dec_z: :param att_prev: :param scaling: :return: ''' # EDIT(hamaji): scaling is now a local variable. scaling = 2.0 batch = len(enc_hs) # pre-compute all h outside the decoder loop if self.pre_compute_enc_h is None: self.enc_h = F.pad_sequence(enc_hs) # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim self.pre_compute_enc_h = linear_tensor_3d(self.mlp_enc, self.enc_h) if dec_z is None: dec_z = chainer.Variable(self.xp.zeros( (batch, self.dunits), dtype=np.float32)) else: dec_z = F.reshape(dec_z, (batch, self.dunits)) # initialize attention weight with uniform dist. if att_prev is None: att_prev = [self.xp.full( hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32) for hh in enc_hs] att_prev = [chainer.Variable(att) for att in att_prev] att_prev = F.pad_sequence(att_prev) # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape() # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame att_conv = self.loc_conv( F.reshape(att_prev, (batch, 1, 1, self.h_length))) # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2) # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim att_conv = linear_tensor_3d(self.mlp_att, att_conv) # dec_z_tiled: utt x frame x att_dim dec_z_tiled = F.broadcast_to( F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape) # dot with gvec # utt x frame x att_dim -> utt x frame # TODO(watanabe) use batch_matmul e = F.squeeze(linear_tensor_3d(self.gvec, F.tanh( att_conv + self.pre_compute_enc_h + dec_z_tiled)), axis=2) # Applying a minus-large-number filter to make a probability value zero for a padded area # simply degrades the performance, and I gave up this implementation # Apply a scaling to make an attention sharp w = F.softmax(scaling * e) # weighted sum over flames # utt x hdim c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1) return c, w
Example #28
Source File: encoders.py From espnet with Apache License 2.0 | 4 votes |
def __call__(self, xs, ilens): """VGG2L forward propagation. Args: xs (chainer.Variable): Batch of padded charactor ids. (B, Tmax) ilens (chainer.Variable): Batch of length of each features. (B,) Returns: chainer.Variable: Subsampled vector of xs. chainer.Variable: Subsampled vector of ilens. """ logging.info(self.__class__.__name__ + " input lengths: " + str(ilens)) # x: utt x frame x dim xs = F.pad_sequence(xs) # x: utt x 1 (input channel num) x frame x dim xs = F.swapaxes( xs.reshape( xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel, ), 1, 2, ) xs = F.relu(self.conv1_1(xs)) xs = F.relu(self.conv1_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) xs = F.relu(self.conv2_1(xs)) xs = F.relu(self.conv2_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) # change ilens accordingly ilens = self.xp.array( self.xp.ceil(self.xp.array(ilens, dtype=np.float32) / 2), dtype=np.int32 ) ilens = self.xp.array( self.xp.ceil(self.xp.array(ilens, dtype=np.float32) / 2), dtype=np.int32 ) # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim) xs = F.swapaxes(xs, 1, 2) xs = xs.reshape(xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3]) xs = [xs[i, : ilens[i], :] for i in range(len(ilens))] return xs, ilens