Python chainer.functions.reshape() Examples

The following are 30 code examples of chainer.functions.reshape(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module chainer.functions , or try the search function .
Example #1
Source File: model.py    From chainer-gqn with MIT License 6 votes vote down vote up
def generate_image_from_zero_z(self, v, r):
        xp = cuda.get_array_module(v)

        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)

        v = cf.reshape(v, v.shape[:2] + (1, 1))

        for t in range(self.num_layers):
            generation_core = self.get_generation_core(t)

            mean_z_p, _ = self.z_prior_distribution.compute_parameter(h_t_gen)
            z_t = xp.zeros_like(mean_z_p.data)

            h_next_gen, c_next_gen, u_next = generation_core(
                h_t_gen, c_t_gen, z_t, v, r, u_t)

            u_t = u_next
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

        mean_x = self.map_u_x(u_t)
        return mean_x.data 
Example #2
Source File: decoder.py    From knmt with GNU General Public License v3.0 6 votes vote down vote up
def get_initial_logits(self, mb_size = None):
        if mb_size is None:
            mb_size = self.src_mb_size
        else:
            assert self.src_mb_size == 1
        assert mb_size is not None
    
        bos_encoding = F.broadcast_to(self.decoder_chain.bos_encoding, (mb_size, 1, self.decoder_chain.d_model))
        
        cross_mask = self.decoder_chain.xp.broadcast_to(self.mask_input[:,0:1,0:1,:], (self.mask_input.shape[0], self.decoder_chain.n_heads, 1, self.mask_input.shape[3]))
        
        final_layer, prev_states =  self.decoder_chain.encoding_layers.one_step(bos_encoding, None,
                                                               self.src_encoding, cross_mask)
        
        logits = self.decoder_chain.logits_layer(F.reshape(final_layer, (mb_size, self.decoder_chain.d_model)))
        return logits, DecoderState(pos=-1, prev_states=prev_states) 
Example #3
Source File: dqn.py    From chainerrl with MIT License 6 votes vote down vote up
def _compute_y_and_t(self, exp_batch):
        batch_size = exp_batch['reward'].shape[0]

        # Compute Q-values for current states
        batch_state = exp_batch['state']

        if self.recurrent:
            qout, _ = self.model.n_step_forward(
                batch_state,
                exp_batch['recurrent_state'],
                output_mode='concat',
            )
        else:
            qout = self.model(batch_state)

        batch_actions = exp_batch['action']
        batch_q = F.reshape(qout.evaluate_actions(
            batch_actions), (batch_size, 1))

        with chainer.no_backprop_mode():
            batch_q_target = F.reshape(
                self._compute_target_values(exp_batch),
                (batch_size, 1))

        return batch_q, batch_q_target 
Example #4
Source File: StatelessLSTM.py    From chainer-compiler with MIT License 6 votes vote down vote up
def _initialize_params(self):
        lateral_init = initializers._get_initializer(self.lateral_init)
        upward_init = initializers._get_initializer(self.upward_init)
        bias_init = initializers._get_initializer(self.bias_init)
        forget_bias_init = initializers._get_initializer(self.forget_bias_init)

        for i in six.moves.range(0, 4 * self.state_size, self.state_size):
            lateral_init(self.lateral.W.data[i:i + self.state_size, :])
            upward_init(self.upward.W.data[i:i + self.state_size, :])

        a, i, f, o = lstm._extract_gates(
            self.upward.b.data.reshape(1, 4 * self.state_size, 1))

        bias_init(a)
        bias_init(i)
        forget_bias_init(f)
        bias_init(o) 
Example #5
Source File: dueling_dqn.py    From chainerrl with MIT License 6 votes vote down vote up
def __call__(self, x):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]

        h = self.activation(self.main_stream(h))
        h_a, h_v = F.split_axis(h, 2, axis=-1)
        ya = F.reshape(self.a_stream(h_a),
                       (batch_size, self.n_actions, self.n_atoms))

        mean = F.sum(ya, axis=1, keepdims=True) / self.n_actions

        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = F.reshape(self.v_stream(h_v), (batch_size, 1, self.n_atoms))
        ya, ys = F.broadcast(ya, ys)
        q = F.softmax(ya + ys, axis=2)

        return action_value.DistributionalDiscreteActionValue(q, self.z_values) 
Example #6
Source File: dueling_dqn.py    From chainerrl with MIT License 6 votes vote down vote up
def __call__(self, x):
        h = x
        for l in self.conv_layers:
            h = self.activation(l(h))

        # Advantage
        batch_size = x.shape[0]
        ya = self.a_stream(h)
        mean = F.reshape(
            F.sum(ya, axis=1) / self.n_actions, (batch_size, 1))
        ya, mean = F.broadcast(ya, mean)
        ya -= mean

        # State value
        ys = self.v_stream(h)

        ya, ys = F.broadcast(ya, ys)
        q = ya + ys
        return action_value.DiscreteActionValue(q) 
Example #7
Source File: iqn.py    From chainerrl with MIT License 6 votes vote down vote up
def _evaluate_psi_x_with_quantile_thresholds(psi_x, phi, f, taus):
    assert psi_x.ndim == 2
    batch_size, hidden_size = psi_x.shape
    assert taus.ndim == 2
    assert taus.shape[0] == batch_size
    n_taus = taus.shape[1]
    phi_taus = phi(taus)
    assert phi_taus.ndim == 3
    assert phi_taus.shape == (batch_size, n_taus, hidden_size)
    psi_x_b = F.broadcast_to(
        F.expand_dims(psi_x, axis=1), phi_taus.shape)
    h = psi_x_b * phi_taus
    h = F.reshape(h, (-1, hidden_size))
    assert h.shape == (batch_size * n_taus, hidden_size)
    h = f(h)
    assert h.ndim == 2
    assert h.shape[0] == batch_size * n_taus
    n_actions = h.shape[-1]
    h = F.reshape(h, (batch_size, n_taus, n_actions))
    return QuantileDiscreteActionValue(h) 
Example #8
Source File: train_dqn_batch_grasping.py    From chainerrl with MIT License 6 votes vote down vote up
def __init__(self, n_actions, max_episode_steps):
        super().__init__()
        with self.init_scope():
            self.embed = L.EmbedID(max_episode_steps + 1, 3136)
            self.image2hidden = chainerrl.links.Sequence(
                L.Convolution2D(None, 32, 8, stride=4),
                F.relu,
                L.Convolution2D(None, 64, 4, stride=2),
                F.relu,
                L.Convolution2D(None, 64, 3, stride=1),
                functools.partial(F.reshape, shape=(-1, 3136)),
            )
            self.hidden2out = chainerrl.links.Sequence(
                L.Linear(None, 512),
                F.relu,
                L.Linear(None, n_actions),
                DiscreteActionValue,
            ) 
Example #9
Source File: MnihCNN_rcis.py    From ssai-cnn with MIT License 6 votes vote down vote up
def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.relu(self.conv2(h))
        h = F.relu(self.conv3(h))
        h = F.relu(self.fc4(h))
        h = self.fc5(h)
        h = F.reshape(h, (x.data.shape[0], 3, 16, 16))
        h = self.channelwise_inhibited(h)

        if self.train:
            self.loss = F.softmax_cross_entropy(h, t, normalize=False)
            return self.loss
        else:
            self.pred = F.softmax(h)
            return self.pred 
Example #10
Source File: model.py    From chainer-gqn with MIT License 6 votes vote down vote up
def generate_image(self, v, r):
        xp = cuda.get_array_module(v)

        batch_size = v.shape[0]
        h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state(
            batch_size, xp)
        v = cf.reshape(v, v.shape[:2] + (1, 1))

        for t in range(self.num_layers):
            generation_core = self.get_generation_core(t)

            mean_z_p, ln_var_z_p = self.z_prior_distribution.compute_parameter(
                h_t_gen)
            z_t = cf.gaussian(mean_z_p, ln_var_z_p)

            h_next_gen, c_next_gen, u_next = generation_core(
                h_t_gen, c_t_gen, z_t, v, r, u_t)

            u_t = u_next
            h_t_gen = h_next_gen
            c_t_gen = c_next_gen

        mean_x = self.map_u_x(u_t)
        return mean_x.data 
Example #11
Source File: MnihCNN_cis.py    From ssai-cnn with MIT License 6 votes vote down vote up
def channelwise_inhibited(self, h):
        xp = cuda.get_array_module(h.data)
        num = h.data.shape[0]

        h = F.split_axis(h, 3, 1)
        c = F.reshape(h[self.c], (num, 16, 16))
        z = Variable(xp.zeros_like(c.data), 'AUTO')
        c = F.batch_matmul(c, z)
        c = F.reshape(c, (num, 1, 16, 16))
        hs = []
        for i, s in enumerate(h):
            if i == self.c:
                hs.append(c)
            else:
                hs.append(s)
        return F.concat(hs, 1) 
Example #12
Source File: MnihCNN_cis.py    From ssai-cnn with MIT License 6 votes vote down vote up
def __call__(self, x, t):
        h = F.relu(self.conv1(x))
        h = F.max_pooling_2d(h, 2, 1)
        h = F.relu(self.conv2(h))
        h = F.relu(self.conv3(h))
        h = F.dropout(F.relu(self.fc4(h)), train=self.train)
        h = self.fc5(h)
        h = F.reshape(h, (x.data.shape[0], 3, 16, 16))
        h = self.channelwise_inhibited(h)

        if self.train:
            self.loss = F.softmax_cross_entropy(h, t, normalize=False)
            return self.loss
        else:
            self.pred = F.softmax(h)
            return self.pred 
Example #13
Source File: attention.py    From knmt with GNU General Public License v3.0 6 votes vote down vote up
def __call__(self, inpt, mask):
        mb_size = inpt.data.shape[0]
        max_length = inpt.data.shape[1]

        precomp = F.reshape(F.tanh(self.lin(F.reshape(inpt, (-1, self.Hi)))), (mb_size, -1, self.Ho))

        mask_offset = max_length - len(mask)

        precomp_mask_penalties = self.xp.concatenate(
            [
                self.xp.zeros((mb_size, mask_offset), dtype=self.xp.float32),
                -10000 * (1 - self.xp.concatenate([
                    self.xp.reshape(mask_elem, (mb_size, 1)).astype(self.xp.float32) for mask_elem in mask], 1))
            ], 1
        )

        def compute_copy_coefficients(state):
            betas = F.reshape(batch_matmul(precomp, state), (mb_size, -1))
            masked_betas = betas + precomp_mask_penalties
            return masked_betas

        return compute_copy_coefficients 
Example #14
Source File: block.py    From Deep_VoiceChanger with MIT License 6 votes vote down vote up
def __call__(self, x):
        if self.dr:
            x = F.dropout(x, self.dr)
        x = F.transpose(x, (0, 2, 1, 3))
        out_shape = list(x.shape)
        x = F.reshape(x, (-1, x.shape[2]*x.shape[3]))
        x = self.l(x)
        x = self.activation(x)
        out_shape[2] = self.out_ch
        x = F.reshape(x, out_shape)
        x = F.transpose(x, (0, 2, 1, 3))
        return x 
Example #15
Source File: block.py    From Deep_VoiceChanger with MIT License 6 votes vote down vote up
def __call__(self, x):
        if self.dr:
            with chainer.using_config('train', True):
                x = F.dropout(x, self.dr)
        if self.gap:
            x = F.sum(x, axis=(2,3))
        N = x.shape[0]
        #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py
        feature = F.reshape(F.leaky_relu(x), (N, -1))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        h = self.l(h)
        return h 
Example #16
Source File: EspNet_AttDot.py    From chainer-compiler with MIT License 5 votes vote down vote up
def original(self, enc_hs, dec_z, att_prev, scaling=2.0):
        '''AttDot forward

        :param enc_hs:
        :param dec_z:
        :param scaling:
        :return:
        '''
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = F.tanh(
                linear_tensor(self.mlp_enc, self.enc_h))

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # <phi (h_t), psi (s)> for all t
        u = F.broadcast_to(F.expand_dims(F.tanh(self.mlp_dec(dec_z)), 1),
                           self.pre_compute_enc_h.shape)
        e = F.sum(self.pre_compute_enc_h * u, axis=2)  # utt x frame
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)
        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w 
Example #17
Source File: EspNet_AttDot.py    From chainer-compiler with MIT License 5 votes vote down vote up
def forward(self, enc_hs, dec_z, att_prev):
        '''AttDot forward

        :param enc_hs:
        :param dec_z:
        :param scaling:
        :return:
        '''
        # EDIT(hamaji): scaling is now a local variable.
        scaling = 2.0
        batch = len(enc_hs)

        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = F.tanh(
                linear_tensor(self.mlp_enc, self.enc_h))

        if dec_z is None:
            dec_z = chainer.Variable(self.xp.zeros(
                (batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # <phi (h_t), psi (s)> for all t
        u = F.broadcast_to(F.expand_dims(F.tanh(self.mlp_dec(dec_z)), 1),
                           self.pre_compute_enc_h.shape)
        e = F.sum(self.pre_compute_enc_h * u, axis=2)  # utt x frame
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)
        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1)

        return c, w 
Example #18
Source File: decoder.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def compute_loss(self, seq_list, encoded_input, mask_input, reduce="mean"):
        logits = self.compute_logits(seq_list, encoded_input, mask_input)
        padded_target_with_eos = pad_data(seq_list, pad_value=-1, add_eos=self.eos_idx)
        padded_target_with_eos = self.move_np_array_to_correct_device(padded_target_with_eos)
        loss = F.softmax_cross_entropy(F.reshape(logits, (-1, self.V+1)), padded_target_with_eos.reshape(-1,), reduce=reduce)
        return loss 
Example #19
Source File: EspNet_VGG2L.py    From chainer-compiler with MIT License 5 votes vote down vote up
def forward(self, xs, ilens):
        '''VGG2L forward

        :param xs:
        :param ilens:
        :return:
        '''
        logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens))

        # x: utt x frame x dim
        xs = F.pad_sequence(xs)

        # x: utt x 1 (input channel num) x frame x dim
        xs = F.swapaxes(F.reshape(
            xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2)

        xs = F.relu(self.conv1_1(xs))
        xs = F.relu(self.conv1_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        xs = F.relu(self.conv2_1(xs))
        xs = F.relu(self.conv2_2(xs))
        xs = F.max_pooling_2d(xs, 2, stride=2)

        # change ilens accordingly
        # EDIT(hamaji): ChxVM puts int32 on GPU and it hurts the performance.
        # TODO(hamaji): Fix device assignment to get rid of this change.
        ilens = (ilens + 1) // 2
        ilens = (ilens + 1) // 2
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)
        # ilens = self.xp.array(self.xp.ceil(self.xp.array(
        #     ilens, dtype=np.float32) / 2), dtype=np.int32)

        # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim)
        xs = F.swapaxes(xs, 1, 2)
        xs = F.reshape(
            xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3]))
        xs = [xs[i, :ilens[i], :] for i in range(len(ilens))]

        return xs, ilens 
Example #20
Source File: decoder.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def __call__(self, prev_decoder_state, inpt):
        current_mb_size = inpt.shape[0]
#         mask = np.zeros((current_mb_size, ), dtype = np.float32)
#         padded = np.zeros((current_mb_size, ), dtype = np.float32)
#         for num_batch, idx in enumerate(inpt):
#             padded[num_batch] = idx if idx is not None else 0
#             mask[num_batch] = 0 if idx is not None else -10000
        
        prev_decoder_state = prev_decoder_state.reduce_to_minibatch_size(current_mb_size)
        current_pos = prev_decoder_state.get_pos() + 1
        
        encoded = self.decoder_chain.emb(inpt)
        pos_vect = self.decoder_chain.get_one_pos_vect(current_mb_size, current_pos)
        
        encoded = encoded + pos_vect
        
        if self.decoder_chain.dropout is not None:
            encoded = F.dropout(encoded, self.decoder_chain.dropout)
            
        cross_mask = self.decoder_chain.xp.broadcast_to(
            self.mask_input[:,0:1,0:1,:], 
            (self.mask_input.shape[0], self.decoder_chain.n_heads, 1, self.mask_input.shape[3]))
       
        final_layer, prev_states =  self.decoder_chain.encoding_layers.one_step(encoded, prev_decoder_state.get_states(),
                                                               self.src_encoding, cross_mask)
       
#         logits = apply_linear_layer_to_last_dims(final_layer, self.decoder_chain.logits_layer)
        logits = self.decoder_chain.logits_layer(F.reshape(final_layer, (current_mb_size, self.decoder_chain.d_model)))
        return logits, DecoderState(pos=current_pos, prev_states=prev_states) 
Example #21
Source File: largefov_light.py    From Semantic-Segmentation-using-Adversarial-Networks with MIT License 5 votes vote down vote up
def __call__(self, x):
        h = F.relu(self.conv1_1(x))
        h = F.relu(self.conv1_2(h))
        h = F.relu(self.conv1_3(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.relu(self.conv2_1(h))
        h = F.relu(self.conv2_2(h))
        h = F.max_pooling_2d(h, 2, stride=2)
        h = F.relu(self.conv3_1(h))
        h = self.conv3_2(h)
        h = f.global_average_pooling_2d(h)
        h = F.reshape(h, (h.shape[0],h.shape[1]))
        return h 
Example #22
Source File: utils.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def apply_linear_layer_to_last_dims(Q, w_Q):
    mb_size_Q, n_Q, d_model_Q = Q.data.shape
    return F.reshape(w_Q(F.reshape(Q, (mb_size_Q * n_Q, d_model_Q))), (mb_size_Q, n_Q, -1))


########################################################################
# Generating position vectors according to Google's paper formula
# 
Example #23
Source File: utils.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def apply_layer_normalization(self, added_output):
        if len(added_output.shape) > 2:
            d_model = added_output.shape[-1]
            final_layer = F.reshape(
                self.normalizing_layer(
                    F.reshape(added_output, (-1, d_model))
                    ), added_output.shape)                
        else:
            final_layer = self.normalizing_layer(added_output)
            
        return final_layer 
Example #24
Source File: multi_attention.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def __init__(self, d_model, n_heads, experimental_relu=False, dropout=None, residual_mode="normal", no_normalize=False):
        super(AddAndNormalizedAttentionBase, self).__init__(
            multi_attention= ConstantSizeMultiBatchMultiHeadAttention(d_model = d_model, n_heads=n_heads,
                                                             experimental_relu=experimental_relu,
                                                                     dropout=dropout),
                                                            
            residual_layer = DropoutAndAddAndNormalize(dropout=dropout, residual_mode=residual_mode, no_normalize=no_normalize)
        )
        
        self.d_model = d_model
        
#         self.dropout = dropout
#         
#         if not no_normalize:
#             self.add_link("normalizing_layer", LayerNormalization())
#         
#         self.no_add = no_add
#         self.no_normalize = no_normalize
        
#     def dropout_and_add_and_normalize(self, sub_output, inpt, train=True):
#         if self.dropout is not None:
#             sub_output = F.dropout(sub_output, ratio=self.dropout, train=train)
#             
#         if self.no_add:
#             added_output = sub_output
#         else:
#             added_output = sub_output + inpt
#         
#         if self.no_normalize:
#             final_layer = added_output
#         else:
#             mb, length, d_model = added_output.shape
#             final_layer = F.reshape(
#                 self.normalizing_layer(
#                     F.reshape(added_output, (mb * length, d_model))
#                     ), (mb, length, d_model))
#         
#         return final_layer 
Example #25
Source File: multi_attention.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def batch_matmul_last_dims(A, B, transa=False, transb=False):
    assert A.data.shape[:-2] == B.data.shape[:-2]
    reshaped_A = F.reshape(A, (-1,) + A.data.shape[-2:])
    reshaped_B = F.reshape(B, (-1,) + B.data.shape[-2:])
    reshaped_result = F.batch_matmul(reshaped_A, reshaped_B, transa=transa, transb=transb)
    result = F.reshape(reshaped_result, A.data.shape[:-2] + reshaped_result.data.shape[-2:])
    return result

########################################################################
# Multihead Attention
# 
Example #26
Source File: multi_attention.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def test_reorganize_by_head():
    Q = Variable(np.arange(2*3*5*7).reshape(5, 7, 2*3).astype(np.float32))
    Qr = reorganize_by_head(Q, 2)
    Qrr = undo_reorganize_by_head(Qr)
    
    assert np.all(Qrr.data == Q.data)
    assert Qr.data.base is Q.data
    assert Qrr.data.base is Q.data
    assert np.all(Qr.data[:, 0, :, :]%6 < 3)
    assert np.all(Qr.data[:, 1, :, :]%6 >= 3) 
Example #27
Source File: multi_attention.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def undo_reorganize_by_head(Q):
    mb_size, n_heads, n_Q, head_size = Q.data.shape
    swapped_Q = F.swapaxes(Q, 1, 2)
    return F.reshape(swapped_Q, (mb_size, n_Q, -1)) 
Example #28
Source File: decoder_cells.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def compute_logits(self, new_states, concatenated, attn):
        new_output_state = new_states[-1]

        all_concatenated = F.concat((concatenated, new_output_state))
        logits = self.decoder_chain.lin_o(self.decoder_chain.maxo(all_concatenated))

        if self.lexicon_probability_matrix is not None:
            current_mb_size = new_output_state.data.shape[0]
            assert self.mb_size is None or current_mb_size <= self.mb_size
            lexicon_probability_matrix = self.lexicon_probability_matrix[:current_mb_size]

            # Just making sure data shape is as expected
            attn_mb_size, max_source_length_attn = attn.data.shape
            assert attn_mb_size == current_mb_size
            lex_mb_size, max_source_length_lexicon, v_size_lexicon = lexicon_probability_matrix.shape
            assert max_source_length_lexicon == max_source_length_attn
            assert logits.data.shape == (current_mb_size, v_size_lexicon)

            if self.demux:
                assert lex_mb_size == 1
                weighted_lex_probs = F.reshape(
                    matmul_constant(attn, lexicon_probability_matrix.reshape(lexicon_probability_matrix.shape[1],
                                                                             lexicon_probability_matrix.shape[2])),
                    logits.data.shape)
            else:
                assert lex_mb_size == current_mb_size

    #                 weighted_lex_probs = F.reshape(
    #                         F.batch_matmul(attn, ConstantFunction(lexicon_probability_matrix)(), transa = True),
    #                                                logits.data.shape)

                weighted_lex_probs = F.reshape(
                    batch_matmul_constant(attn, lexicon_probability_matrix, transa=True),
                    logits.data.shape)

            logits += F.log(weighted_lex_probs + self.lex_epsilon)
        return logits 
Example #29
Source File: attention.py    From knmt with GNU General Public License v3.0 5 votes vote down vote up
def compute_ctxt_demux(self, fb_concat, mask):
        mb_size, nb_elems, Hi = fb_concat.data.shape
        assert Hi == self.Hi
        assert mb_size == 1
        assert len(mask) == 0

        precomputed_al_factor = F.reshape(self.al_lin_h(
            F.reshape(fb_concat, (mb_size * nb_elems, self.Hi))), (mb_size, nb_elems, self.Ha))

#         concatenated_mask = F.concat([F.reshape(mask_elem, (mb_size, 1)) for mask_elem in mask], 1)

        def compute_ctxt(previous_state, prev_word_embedding=None):
            current_mb_size = previous_state.data.shape[0]

            al_factor = F.broadcast_to(precomputed_al_factor, (current_mb_size, nb_elems, self.Ha))
#             used_fb_concat = F.broadcast_to(fb_concat, (current_mb_size, nb_elems, Hi))
#             used_concatenated_mask = F.broadcast_to(concatenated_mask, (current_mb_size, nb_elems))

            state_al_factor = self.al_lin_s(previous_state)
            
            #As suggested by Isao Goto
            if prev_word_embedding is not None:
                state_al_factor = state_al_factor + self.al_lin_y(prev_word_embedding)
            
            state_al_factor_bc = F.broadcast_to(F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha))
            a_coeffs = F.reshape(self.al_lin_o(F.reshape(F.tanh(state_al_factor_bc + al_factor),
                                                         (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems))


#             with cuda.get_device_from_array(used_concatenated_mask.data):
#                 a_coeffs = a_coeffs - 10000 * (1-used_concatenated_mask.data)

            attn = F.softmax(a_coeffs)

#             ci = F.reshape(F.batch_matmul(attn, used_fb_concat, transa = True), (current_mb_size, self.Hi))

            ci = F.reshape(F.matmul(attn, F.reshape(fb_concat, (nb_elems, Hi))), (current_mb_size, self.Hi))

            return ci, attn

        return compute_ctxt 
Example #30
Source File: invert_diff.py    From ssai-cnn with MIT License 5 votes vote down vote up
def tv_norm(self, x):
        diffh = self.tvh(
            F.reshape(x, (3, 1, self.args.in_size, self.args.in_size)))
        diffw = self.tvw(
            F.reshape(x, (3, 1, self.args.in_size, self.args.in_size)))
        tv = (F.sum(diffh ** 2) + F.sum(diffw ** 2)) ** (self.args.beta / 2.)

        return tv