Python Examples of chainer.functions.reshape

Source File: model.py From chainer-gqn with MIT License

6 votes

def generate_image_from_zero_z(self, v, r):
        xp = cuda.get_array_module(v)

        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)

        v = cf.reshape(v, v.shape[:2] + (1, 1))

        for t in range(self.num_layers):
            generation_core = self.get_generation_core(t)

            mean_z_p, _ = self.z_prior_distribution.compute_parameter(h_t_gen)
            z_t = xp.zeros_like(mean_z_p.data)

            h_next_gen, c_next_gen, u_next = generation_core(
                h_t_gen, c_t_gen, z_t, v, r, u_t)

            u_t = u_next
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

        mean_x = self.map_u_x(u_t)
        return mean_x.data

Source File: decoder.py From knmt with GNU General Public License v3.0

6 votes

def get_initial_logits(self, mb_size = None):
        if mb_size is None:
            mb_size = self.src_mb_size
        else:
            assert self.src_mb_size == 1
        assert mb_size is not None
    
        bos_encoding = F.broadcast_to(self.decoder_chain.bos_encoding, (mb_size, 1, self.decoder_chain.d_model))
        
        cross_mask = self.decoder_chain.xp.broadcast_to(self.mask_input[:,0:1,0:1,:], (self.mask_input.shape[0], self.decoder_chain.n_heads, 1, self.mask_input.shape[3]))
        
        final_layer, prev_states =  self.decoder_chain.encoding_layers.one_step(bos_encoding, None,
                                                               self.src_encoding, cross_mask)
        
        logits = self.decoder_chain.logits_layer(F.reshape(final_layer, (mb_size, self.decoder_chain.d_model)))
        return logits, DecoderState(pos=-1, prev_states=prev_states)

Source File: dqn.py From chainerrl with MIT License

6 votes

def _compute_y_and_t(self, exp_batch):
        batch_size = exp_batch['reward'].shape[0]

        # Compute Q-values for current states
        batch_state = exp_batch['state']

        if self.recurrent:
            qout, _ = self.model.n_step_forward(
                batch_state,
                exp_batch['recurrent_state'],
                output_mode='concat',
            )
        else:
            qout = self.model(batch_state)

        batch_actions = exp_batch['action']
        batch_q = F.reshape(qout.evaluate_actions(
            batch_actions), (batch_size, 1))

        with chainer.no_backprop_mode():
            batch_q_target = F.reshape(
                self._compute_target_values(exp_batch),
                (batch_size, 1))

        return batch_q, batch_q_target

Source File: StatelessLSTM.py From chainer-compiler with MIT License

6 votes

def _initialize_params(self):
        lateral_init = initializers._get_initializer(self.lateral_init)
        upward_init = initializers._get_initializer(self.upward_init)
        bias_init = initializers._get_initializer(self.bias_init)
        forget_bias_init = initializers._get_initializer(self.forget_bias_init)

        for i in six.moves.range(0, 4 * self.state_size, self.state_size):
            lateral_init(self.lateral.W.data[i:i + self.state_size, :])
            upward_init(self.upward.W.data[i:i + self.state_size, :])

        a, i, f, o = lstm._extract_gates(
            self.upward.b.data.reshape(1, 4 * self.state_size, 1))

        bias_init(a)
        bias_init(i)
        forget_bias_init(f)
        bias_init(o)

Source File: dueling_dqn.py From chainerrl with MIT License

6 votes

def __call__(self, x):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]

        h = self.activation(self.main_stream(h))
        h_a, h_v = F.split_axis(h, 2, axis=-1)
        ya = F.reshape(self.a_stream(h_a),
                       (batch_size, self.n_actions, self.n_atoms))

        mean = F.sum(ya, axis=1, keepdims=True) / self.n_actions

        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = F.reshape(self.v_stream(h_v), (batch_size, 1, self.n_atoms))
        ya, ys = F.broadcast(ya, ys)
        q = F.softmax(ya + ys, axis=2)

        return action_value.DistributionalDiscreteActionValue(q, self.z_values)

Source File: dueling_dqn.py From chainerrl with MIT License

6 votes

def __call__(self, x):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]
        ya = self.a_stream(h)
        mean = F.reshape(
            F.sum(ya, axis=1) / self.n_actions, (batch_size, 1))
        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = self.v_stream(h)

        ya, ys = F.broadcast(ya, ys)
        q = ya + ys
        return action_value.DiscreteActionValue(q)

Source File: iqn.py From chainerrl with MIT License

6 votes

def _evaluate_psi_x_with_quantile_thresholds(psi_x, phi, f, taus):
    assert psi_x.ndim == 2
    batch_size, hidden_size = psi_x.shape
    assert taus.ndim == 2
    assert taus.shape[0] == batch_size
    n_taus = taus.shape[1]
    phi_taus = phi(taus)
    assert phi_taus.ndim == 3
    assert phi_taus.shape == (batch_size, n_taus, hidden_size)
    psi_x_b = F.broadcast_to(
        F.expand_dims(psi_x, axis=1), phi_taus.shape)
    h = psi_x_b * phi_taus
    h = F.reshape(h, (-1, hidden_size))
    assert h.shape == (batch_size * n_taus, hidden_size)
    h = f(h)
    assert h.ndim == 2
    assert h.shape[0] == batch_size * n_taus
    n_actions = h.shape[-1]
    h = F.reshape(h, (batch_size, n_taus, n_actions))
    return QuantileDiscreteActionValue(h)

Source File: train_dqn_batch_grasping.py From chainerrl with MIT License

6 votes

def __init__(self, n_actions, max_episode_steps):
        super().__init__()
        with self.init_scope():
            self.embed = L.EmbedID(max_episode_steps + 1, 3136)
            self.image2hidden = chainerrl.links.Sequence(
                L.Convolution2D(None, 32, 8, stride=4),
                F.relu,
                L.Convolution2D(None, 64, 4, stride=2),
                F.relu,
                L.Convolution2D(None, 64, 3, stride=1),
                functools.partial(F.reshape, shape=(-1, 3136)),
            )
            self.hidden2out = chainerrl.links.Sequence(
                L.Linear(None, 512),
                F.relu,
                L.Linear(None, n_actions),
                DiscreteActionValue,
            )

Source File: MnihCNN_rcis.py From ssai-cnn with MIT License

6 votes

def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.relu(self.conv2(h))
        h = F.relu(self.conv3(h))
        h = F.relu(self.fc4(h))
        h = self.fc5(h)
        h = F.reshape(h, (x.data.shape[0], 3, 16, 16))
        h = self.channelwise_inhibited(h)

        if self.train:
            self.loss = F.softmax_cross_entropy(h, t, normalize=False)
            return self.loss
        else:
            self.pred = F.softmax(h)
            return self.pred

Source File: model.py From chainer-gqn with MIT License

6 votes

def generate_image(self, v, r):
        xp = cuda.get_array_module(v)

        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)
        v = cf.reshape(v, v.shape[:2] + (1, 1))

        for t in range(self.num_layers):
            generation_core = self.get_generation_core(t)

            mean_z_p, ln_var_z_p = self.z_prior_distribution.compute_parameter(
                h_t_gen)
            z_t = cf.gaussian(mean_z_p, ln_var_z_p)

            h_next_gen, c_next_gen, u_next = generation_core(
                h_t_gen, c_t_gen, z_t, v, r, u_t)

            u_t = u_next
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

        mean_x = self.map_u_x(u_t)
        return mean_x.data

Source File: MnihCNN_cis.py From ssai-cnn with MIT License

6 votes

def channelwise_inhibited(self, h):
        xp = cuda.get_array_module(h.data)
        num = h.data.shape[0]

        h = F.split_axis(h, 3, 1)
        c = F.reshape(h[self.c], (num, 16, 16))
        z = Variable(xp.zeros_like(c.data), 'AUTO')
        c = F.batch_matmul(c, z)
        c = F.reshape(c, (num, 1, 16, 16))
        hs = []
        for i, s in enumerate(h):
            if i == self.c:
                hs.append(c)
            else:
                hs.append(s)
        return F.concat(hs, 1)

Source File: MnihCNN_cis.py From ssai-cnn with MIT License

6 votes

def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.relu(self.conv2(h))
        h = F.relu(self.conv3(h))
        h = F.dropout(F.relu(self.fc4(h)), train=self.train)
        h = self.fc5(h)
        h = F.reshape(h, (x.data.shape[0], 3, 16, 16))
        h = self.channelwise_inhibited(h)

        if self.train:
            self.loss = F.softmax_cross_entropy(h, t, normalize=False)
            return self.loss
        else:
            self.pred = F.softmax(h)
            return self.pred

Source File: attention.py From knmt with GNU General Public License v3.0

6 votes

def __call__(self, inpt, mask):
        mb_size = inpt.data.shape[0]
        max_length = inpt.data.shape[1]

        precomp = F.reshape(F.tanh(self.lin(F.reshape(inpt, (-1, self.Hi)))), (mb_size, -1, self.Ho))

        mask_offset = max_length - len(mask)

        precomp_mask_penalties = self.xp.concatenate(
            [
                self.xp.zeros((mb_size, mask_offset), dtype=self.xp.float32),
                -10000 * (1 - self.xp.concatenate([
                    self.xp.reshape(mask_elem, (mb_size, 1)).astype(self.xp.float32) for mask_elem in mask], 1))
            ], 1
        )

        def compute_copy_coefficients(state):
            betas = F.reshape(batch_matmul(precomp, state), (mb_size, -1))
            masked_betas = betas + precomp_mask_penalties
            return masked_betas

        return compute_copy_coefficients

Source File: block.py From Deep_VoiceChanger with MIT License

6 votes

def __call__(self, x):
        if self.dr:
            x = F.dropout(x, self.dr)
        x = F.transpose(x, (0, 2, 1, 3))
        out_shape = list(x.shape)
        x = F.reshape(x, (-1, x.shape[2]*x.shape[3]))
        x = self.l(x)
        x = self.activation(x)
        out_shape[2] = self.out_ch
        x = F.reshape(x, out_shape)
        x = F.transpose(x, (0, 2, 1, 3))
        return x

Source File: block.py From Deep_VoiceChanger with MIT License

6 votes

def __call__(self, x):
        if self.dr:
            with chainer.using_config('train', True):
                x = F.dropout(x, self.dr)
        if self.gap:
            x = F.sum(x, axis=(2,3))
        N = x.shape[0]
        #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py
        feature = F.reshape(F.leaky_relu(x), (N, -1))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        h = self.l(h)
        return h

Source File: EspNet_AttDot.py From chainer-compiler with MIT License

5 votes

def original(self, enc_hs, dec_z, att_prev, scaling=2.0):
        '''AttDot forward

        :param enc_hs:
        :param dec_z:
        :param scaling:
        :return:
        '''
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = F.tanh(
                linear_tensor(self.mlp_enc, self.enc_h))

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # <phi (h_t), psi (s)> for all t
        u = F.broadcast_to(F.expand_dims(F.tanh(self.mlp_dec(dec_z)), 1),
                           self.pre_compute_enc_h.shape)
        e = F.sum(self.pre_compute_enc_h * u, axis=2)  # utt x frame
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)
        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w

Source File: EspNet_AttDot.py From chainer-compiler with MIT License

5 votes

def forward(self, enc_hs, dec_z, att_prev):
        '''AttDot forward

        :param enc_hs:
        :param dec_z:
        :param scaling:
        :return:
        '''
        # EDIT(hamaji): scaling is now a local variable.
        scaling = 2.0
        batch = len(enc_hs)

        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = F.tanh(
                linear_tensor(self.mlp_enc, self.enc_h))

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # <phi (h_t), psi (s)> for all t
        u = F.broadcast_to(F.expand_dims(F.tanh(self.mlp_dec(dec_z)), 1),
                           self.pre_compute_enc_h.shape)
        e = F.sum(self.pre_compute_enc_h * u, axis=2)  # utt x frame
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)
        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w

Source File: decoder.py From knmt with GNU General Public License v3.0

5 votes

def compute_loss(self, seq_list, encoded_input, mask_input, reduce="mean"):
        logits = self.compute_logits(seq_list, encoded_input, mask_input)
        padded_target_with_eos = pad_data(seq_list, pad_value=-1, add_eos=self.eos_idx)
        padded_target_with_eos = self.move_np_array_to_correct_device(padded_target_with_eos)
        loss = F.softmax_cross_entropy(F.reshape(logits, (-1, self.V+1)), padded_target_with_eos.reshape(-1,), reduce=reduce)
        return loss

Source File: EspNet_VGG2L.py From chainer-compiler with MIT License

5 votes

def forward(self, xs, ilens):
        '''VGG2L forward

        :param xs:
        :param ilens:
        :return:
        '''
        logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens))

        # x: utt x frame x dim
        xs = F.pad_sequence(xs)

        # x: utt x 1 (input channel num) x frame x dim
        xs = F.swapaxes(F.reshape(
            xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2)

        xs = F.relu(self.conv1_1(xs))
        xs = F.relu(self.conv1_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        xs = F.relu(self.conv2_1(xs))
        xs = F.relu(self.conv2_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        # change ilens accordingly
        # EDIT(hamaji): ChxVM puts int32 on GPU and it hurts the performance.
        # TODO(hamaji): Fix device assignment to get rid of this change.
        ilens = (ilens + 1) // 2
        ilens = (ilens + 1) // 2
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)

        # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim)
        xs = F.swapaxes(xs, 1, 2)
        xs = F.reshape(
            xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3]))
        xs = [xs[i, :ilens[i], :] for i in range(len(ilens))]

        return xs, ilens

Source File: decoder.py From knmt with GNU General Public License v3.0

5 votes

def __call__(self, prev_decoder_state, inpt):
        current_mb_size = inpt.shape[0]
#         mask = np.zeros((current_mb_size, ), dtype = np.float32)
#         padded = np.zeros((current_mb_size, ), dtype = np.float32)
#         for num_batch, idx in enumerate(inpt):
#             padded[num_batch] = idx if idx is not None else 0
#             mask[num_batch] = 0 if idx is not None else -10000
        
        prev_decoder_state = prev_decoder_state.reduce_to_minibatch_size(current_mb_size)
        current_pos = prev_decoder_state.get_pos() + 1
        
        encoded = self.decoder_chain.emb(inpt)
        pos_vect = self.decoder_chain.get_one_pos_vect(current_mb_size, current_pos)
        
        encoded = encoded + pos_vect
        
        if self.decoder_chain.dropout is not None:
            encoded = F.dropout(encoded, self.decoder_chain.dropout)
            
        cross_mask = self.decoder_chain.xp.broadcast_to(
            self.mask_input[:,0:1,0:1,:], 
            (self.mask_input.shape[0], self.decoder_chain.n_heads, 1, self.mask_input.shape[3]))
       
        final_layer, prev_states =  self.decoder_chain.encoding_layers.one_step(encoded, prev_decoder_state.get_states(),
                                                               self.src_encoding, cross_mask)
       
#         logits = apply_linear_layer_to_last_dims(final_layer, self.decoder_chain.logits_layer)
        logits = self.decoder_chain.logits_layer(F.reshape(final_layer, (current_mb_size, self.decoder_chain.d_model)))
        return logits, DecoderState(pos=current_pos, prev_states=prev_states)

Source File: largefov_light.py From Semantic-Segmentation-using-Adversarial-Networks with MIT License

5 votes

def __call__(self, x):
        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.relu(self.conv1_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.relu(self.conv3_1(h))
        h = self.conv3_2(h)
        h = f.global_average_pooling_2d(h)
        h = F.reshape(h, (h.shape[0],h.shape[1]))
        return h

Source File: utils.py From knmt with GNU General Public License v3.0

5 votes

def apply_linear_layer_to_last_dims(Q, w_Q):
    mb_size_Q, n_Q, d_model_Q = Q.data.shape
    return F.reshape(w_Q(F.reshape(Q, (mb_size_Q * n_Q, d_model_Q))), (mb_size_Q, n_Q, -1))


########################################################################
# Generating position vectors according to Google's paper formula
#

Source File: utils.py From knmt with GNU General Public License v3.0

5 votes

def apply_layer_normalization(self, added_output):
        if len(added_output.shape) > 2:
            d_model = added_output.shape[-1]
            final_layer = F.reshape(
                self.normalizing_layer(
                    F.reshape(added_output, (-1, d_model))
                    ), added_output.shape)                
        else:
            final_layer = self.normalizing_layer(added_output)
            
        return final_layer

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def __init__(self, d_model, n_heads, experimental_relu=False, dropout=None, residual_mode="normal", no_normalize=False):
        super(AddAndNormalizedAttentionBase, self).__init__(
            multi_attention= ConstantSizeMultiBatchMultiHeadAttention(d_model = d_model, n_heads=n_heads,
                                                             experimental_relu=experimental_relu,
                                                                     dropout=dropout),
                                                            
            residual_layer = DropoutAndAddAndNormalize(dropout=dropout, residual_mode=residual_mode, no_normalize=no_normalize)
        )
        
        self.d_model = d_model
        
#         self.dropout = dropout
#         
#         if not no_normalize:
#             self.add_link("normalizing_layer", LayerNormalization())
#         
#         self.no_add = no_add
#         self.no_normalize = no_normalize
        
#     def dropout_and_add_and_normalize(self, sub_output, inpt, train=True):
#         if self.dropout is not None:
#             sub_output = F.dropout(sub_output, ratio=self.dropout, train=train)
#             
#         if self.no_add:
#             added_output = sub_output
#         else:
#             added_output = sub_output + inpt
#         
#         if self.no_normalize:
#             final_layer = added_output
#         else:
#             mb, length, d_model = added_output.shape
#             final_layer = F.reshape(
#                 self.normalizing_layer(
#                     F.reshape(added_output, (mb * length, d_model))
#                     ), (mb, length, d_model))
#         
#         return final_layer

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def batch_matmul_last_dims(A, B, transa=False, transb=False):
    assert A.data.shape[:-2] == B.data.shape[:-2]
    reshaped_A = F.reshape(A, (-1,) + A.data.shape[-2:])
    reshaped_B = F.reshape(B, (-1,) + B.data.shape[-2:])
    reshaped_result = F.batch_matmul(reshaped_A, reshaped_B, transa=transa, transb=transb)
    result = F.reshape(reshaped_result, A.data.shape[:-2] + reshaped_result.data.shape[-2:])
    return result

########################################################################
# Multihead Attention
#

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def test_reorganize_by_head():
    Q = Variable(np.arange(2*3*5*7).reshape(5, 7, 2*3).astype(np.float32))
    Qr = reorganize_by_head(Q, 2)
    Qrr = undo_reorganize_by_head(Qr)
    
    assert np.all(Qrr.data == Q.data)
    assert Qr.data.base is Q.data
    assert Qrr.data.base is Q.data
    assert np.all(Qr.data[:, 0, :, :]%6 < 3)
    assert np.all(Qr.data[:, 1, :, :]%6 >= 3)

Source File: multi_attention.py From knmt with GNU General Public License v3.0

5 votes

def undo_reorganize_by_head(Q):
    mb_size, n_heads, n_Q, head_size = Q.data.shape
    swapped_Q = F.swapaxes(Q, 1, 2)
    return F.reshape(swapped_Q, (mb_size, n_Q, -1))

Source File: decoder_cells.py From knmt with GNU General Public License v3.0

5 votes

def compute_logits(self, new_states, concatenated, attn):
        new_output_state = new_states[-1]

        all_concatenated = F.concat((concatenated, new_output_state))
        logits = self.decoder_chain.lin_o(self.decoder_chain.maxo(all_concatenated))

        if self.lexicon_probability_matrix is not None:
            current_mb_size = new_output_state.data.shape[0]
            assert self.mb_size is None or current_mb_size <= self.mb_size
            lexicon_probability_matrix = self.lexicon_probability_matrix[:current_mb_size]

            # Just making sure data shape is as expected
            attn_mb_size, max_source_length_attn = attn.data.shape
            assert attn_mb_size == current_mb_size
            lex_mb_size, max_source_length_lexicon, v_size_lexicon = lexicon_probability_matrix.shape
            assert max_source_length_lexicon == max_source_length_attn
            assert logits.data.shape == (current_mb_size, v_size_lexicon)

            if self.demux:
                assert lex_mb_size == 1
                weighted_lex_probs = F.reshape(
                    matmul_constant(attn, lexicon_probability_matrix.reshape(lexicon_probability_matrix.shape[1],
                                                                             lexicon_probability_matrix.shape[2])),
                    logits.data.shape)
            else:
                assert lex_mb_size == current_mb_size

    #                 weighted_lex_probs = F.reshape(
    #                         F.batch_matmul(attn, ConstantFunction(lexicon_probability_matrix)(), transa = True),
    #                                                logits.data.shape)

                weighted_lex_probs = F.reshape(
                    batch_matmul_constant(attn, lexicon_probability_matrix, transa=True),
                    logits.data.shape)

            logits += F.log(weighted_lex_probs + self.lex_epsilon)
        return logits

Source File: attention.py From knmt with GNU General Public License v3.0

5 votes

def compute_ctxt_demux(self, fb_concat, mask):
        mb_size, nb_elems, Hi = fb_concat.data.shape
        assert Hi == self.Hi
        assert mb_size == 1
        assert len(mask) == 0

        precomputed_al_factor = F.reshape(self.al_lin_h(
            F.reshape(fb_concat, (mb_size * nb_elems, self.Hi))), (mb_size, nb_elems, self.Ha))

#         concatenated_mask = F.concat([F.reshape(mask_elem, (mb_size, 1)) for mask_elem in mask], 1)

        def compute_ctxt(previous_state, prev_word_embedding=None):
            current_mb_size = previous_state.data.shape[0]

            al_factor = F.broadcast_to(precomputed_al_factor, (current_mb_size, nb_elems, self.Ha))
#             used_fb_concat = F.broadcast_to(fb_concat, (current_mb_size, nb_elems, Hi))
#             used_concatenated_mask = F.broadcast_to(concatenated_mask, (current_mb_size, nb_elems))

            state_al_factor = self.al_lin_s(previous_state)
            
            #As suggested by Isao Goto
            if prev_word_embedding is not None:
                state_al_factor = state_al_factor + self.al_lin_y(prev_word_embedding)
            
            state_al_factor_bc = F.broadcast_to(F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha))
            a_coeffs = F.reshape(self.al_lin_o(F.reshape(F.tanh(state_al_factor_bc + al_factor),
                                                         (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems))


#             with cuda.get_device_from_array(used_concatenated_mask.data):
#                 a_coeffs = a_coeffs - 10000 * (1-used_concatenated_mask.data)

            attn = F.softmax(a_coeffs)

#             ci = F.reshape(F.batch_matmul(attn, used_fb_concat, transa = True), (current_mb_size, self.Hi))

            ci = F.reshape(F.matmul(attn, F.reshape(fb_concat, (nb_elems, Hi))), (current_mb_size, self.Hi))

            return ci, attn

        return compute_ctxt

Source File: invert_diff.py From ssai-cnn with MIT License

5 votes

def tv_norm(self, x):
        diffh = self.tvh(
            F.reshape(x, (3, 1, self.args.in_size, self.args.in_size)))
        diffw = self.tvw(
            F.reshape(x, (3, 1, self.args.in_size, self.args.in_size)))
        tv = (F.sum(diffh ** 2) + F.sum(diffw ** 2)) ** (self.args.beta / 2.)

        return tv

Python chainer.functions.reshape() Examples