Python chainer.functions.reshape() Examples
The following are 30
code examples of chainer.functions.reshape().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.functions
, or try the search function
.
Example #1
Source File: model.py From chainer-gqn with MIT License | 6 votes |
def generate_image_from_zero_z(self, v, r): xp = cuda.get_array_module(v) batch_size = v.shape[0] h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state( batch_size, xp) v = cf.reshape(v, v.shape[:2] + (1, 1)) for t in range(self.num_layers): generation_core = self.get_generation_core(t) mean_z_p, _ = self.z_prior_distribution.compute_parameter(h_t_gen) z_t = xp.zeros_like(mean_z_p.data) h_next_gen, c_next_gen, u_next = generation_core( h_t_gen, c_t_gen, z_t, v, r, u_t) u_t = u_next h_t_gen = h_next_gen c_t_gen = c_next_gen mean_x = self.map_u_x(u_t) return mean_x.data
Example #2
Source File: decoder.py From knmt with GNU General Public License v3.0 | 6 votes |
def get_initial_logits(self, mb_size = None): if mb_size is None: mb_size = self.src_mb_size else: assert self.src_mb_size == 1 assert mb_size is not None bos_encoding = F.broadcast_to(self.decoder_chain.bos_encoding, (mb_size, 1, self.decoder_chain.d_model)) cross_mask = self.decoder_chain.xp.broadcast_to(self.mask_input[:,0:1,0:1,:], (self.mask_input.shape[0], self.decoder_chain.n_heads, 1, self.mask_input.shape[3])) final_layer, prev_states = self.decoder_chain.encoding_layers.one_step(bos_encoding, None, self.src_encoding, cross_mask) logits = self.decoder_chain.logits_layer(F.reshape(final_layer, (mb_size, self.decoder_chain.d_model))) return logits, DecoderState(pos=-1, prev_states=prev_states)
Example #3
Source File: dqn.py From chainerrl with MIT License | 6 votes |
def _compute_y_and_t(self, exp_batch): batch_size = exp_batch['reward'].shape[0] # Compute Q-values for current states batch_state = exp_batch['state'] if self.recurrent: qout, _ = self.model.n_step_forward( batch_state, exp_batch['recurrent_state'], output_mode='concat', ) else: qout = self.model(batch_state) batch_actions = exp_batch['action'] batch_q = F.reshape(qout.evaluate_actions( batch_actions), (batch_size, 1)) with chainer.no_backprop_mode(): batch_q_target = F.reshape( self._compute_target_values(exp_batch), (batch_size, 1)) return batch_q, batch_q_target
Example #4
Source File: StatelessLSTM.py From chainer-compiler with MIT License | 6 votes |
def _initialize_params(self): lateral_init = initializers._get_initializer(self.lateral_init) upward_init = initializers._get_initializer(self.upward_init) bias_init = initializers._get_initializer(self.bias_init) forget_bias_init = initializers._get_initializer(self.forget_bias_init) for i in six.moves.range(0, 4 * self.state_size, self.state_size): lateral_init(self.lateral.W.data[i:i + self.state_size, :]) upward_init(self.upward.W.data[i:i + self.state_size, :]) a, i, f, o = lstm._extract_gates( self.upward.b.data.reshape(1, 4 * self.state_size, 1)) bias_init(a) bias_init(i) forget_bias_init(f) bias_init(o)
Example #5
Source File: dueling_dqn.py From chainerrl with MIT License | 6 votes |
def __call__(self, x): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] h = self.activation(self.main_stream(h)) h_a, h_v = F.split_axis(h, 2, axis=-1) ya = F.reshape(self.a_stream(h_a), (batch_size, self.n_actions, self.n_atoms)) mean = F.sum(ya, axis=1, keepdims=True) / self.n_actions ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = F.reshape(self.v_stream(h_v), (batch_size, 1, self.n_atoms)) ya, ys = F.broadcast(ya, ys) q = F.softmax(ya + ys, axis=2) return action_value.DistributionalDiscreteActionValue(q, self.z_values)
Example #6
Source File: dueling_dqn.py From chainerrl with MIT License | 6 votes |
def __call__(self, x): h = x for l in self.conv_layers: h = self.activation(l(h)) # Advantage batch_size = x.shape[0] ya = self.a_stream(h) mean = F.reshape( F.sum(ya, axis=1) / self.n_actions, (batch_size, 1)) ya, mean = F.broadcast(ya, mean) ya -= mean # State value ys = self.v_stream(h) ya, ys = F.broadcast(ya, ys) q = ya + ys return action_value.DiscreteActionValue(q)
Example #7
Source File: iqn.py From chainerrl with MIT License | 6 votes |
def _evaluate_psi_x_with_quantile_thresholds(psi_x, phi, f, taus): assert psi_x.ndim == 2 batch_size, hidden_size = psi_x.shape assert taus.ndim == 2 assert taus.shape[0] == batch_size n_taus = taus.shape[1] phi_taus = phi(taus) assert phi_taus.ndim == 3 assert phi_taus.shape == (batch_size, n_taus, hidden_size) psi_x_b = F.broadcast_to( F.expand_dims(psi_x, axis=1), phi_taus.shape) h = psi_x_b * phi_taus h = F.reshape(h, (-1, hidden_size)) assert h.shape == (batch_size * n_taus, hidden_size) h = f(h) assert h.ndim == 2 assert h.shape[0] == batch_size * n_taus n_actions = h.shape[-1] h = F.reshape(h, (batch_size, n_taus, n_actions)) return QuantileDiscreteActionValue(h)
Example #8
Source File: train_dqn_batch_grasping.py From chainerrl with MIT License | 6 votes |
def __init__(self, n_actions, max_episode_steps): super().__init__() with self.init_scope(): self.embed = L.EmbedID(max_episode_steps + 1, 3136) self.image2hidden = chainerrl.links.Sequence( L.Convolution2D(None, 32, 8, stride=4), F.relu, L.Convolution2D(None, 64, 4, stride=2), F.relu, L.Convolution2D(None, 64, 3, stride=1), functools.partial(F.reshape, shape=(-1, 3136)), ) self.hidden2out = chainerrl.links.Sequence( L.Linear(None, 512), F.relu, L.Linear(None, n_actions), DiscreteActionValue, )
Example #9
Source File: MnihCNN_rcis.py From ssai-cnn with MIT License | 6 votes |
def __call__(self, x, t): h = F.relu(self.conv1(x)) h = F.max_pooling_2d(h, 2, 1) h = F.relu(self.conv2(h)) h = F.relu(self.conv3(h)) h = F.relu(self.fc4(h)) h = self.fc5(h) h = F.reshape(h, (x.data.shape[0], 3, 16, 16)) h = self.channelwise_inhibited(h) if self.train: self.loss = F.softmax_cross_entropy(h, t, normalize=False) return self.loss else: self.pred = F.softmax(h) return self.pred
Example #10
Source File: model.py From chainer-gqn with MIT License | 6 votes |
def generate_image(self, v, r): xp = cuda.get_array_module(v) batch_size = v.shape[0] h_t_gen, c_t_gen, u_t, _, _ = self.generate_initial_state( batch_size, xp) v = cf.reshape(v, v.shape[:2] + (1, 1)) for t in range(self.num_layers): generation_core = self.get_generation_core(t) mean_z_p, ln_var_z_p = self.z_prior_distribution.compute_parameter( h_t_gen) z_t = cf.gaussian(mean_z_p, ln_var_z_p) h_next_gen, c_next_gen, u_next = generation_core( h_t_gen, c_t_gen, z_t, v, r, u_t) u_t = u_next h_t_gen = h_next_gen c_t_gen = c_next_gen mean_x = self.map_u_x(u_t) return mean_x.data
Example #11
Source File: MnihCNN_cis.py From ssai-cnn with MIT License | 6 votes |
def channelwise_inhibited(self, h): xp = cuda.get_array_module(h.data) num = h.data.shape[0] h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (num, 16, 16)) z = Variable(xp.zeros_like(c.data), 'AUTO') c = F.batch_matmul(c, z) c = F.reshape(c, (num, 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) return F.concat(hs, 1)
Example #12
Source File: MnihCNN_cis.py From ssai-cnn with MIT License | 6 votes |
def __call__(self, x, t): h = F.relu(self.conv1(x)) h = F.max_pooling_2d(h, 2, 1) h = F.relu(self.conv2(h)) h = F.relu(self.conv3(h)) h = F.dropout(F.relu(self.fc4(h)), train=self.train) h = self.fc5(h) h = F.reshape(h, (x.data.shape[0], 3, 16, 16)) h = self.channelwise_inhibited(h) if self.train: self.loss = F.softmax_cross_entropy(h, t, normalize=False) return self.loss else: self.pred = F.softmax(h) return self.pred
Example #13
Source File: attention.py From knmt with GNU General Public License v3.0 | 6 votes |
def __call__(self, inpt, mask): mb_size = inpt.data.shape[0] max_length = inpt.data.shape[1] precomp = F.reshape(F.tanh(self.lin(F.reshape(inpt, (-1, self.Hi)))), (mb_size, -1, self.Ho)) mask_offset = max_length - len(mask) precomp_mask_penalties = self.xp.concatenate( [ self.xp.zeros((mb_size, mask_offset), dtype=self.xp.float32), -10000 * (1 - self.xp.concatenate([ self.xp.reshape(mask_elem, (mb_size, 1)).astype(self.xp.float32) for mask_elem in mask], 1)) ], 1 ) def compute_copy_coefficients(state): betas = F.reshape(batch_matmul(precomp, state), (mb_size, -1)) masked_betas = betas + precomp_mask_penalties return masked_betas return compute_copy_coefficients
Example #14
Source File: block.py From Deep_VoiceChanger with MIT License | 6 votes |
def __call__(self, x): if self.dr: x = F.dropout(x, self.dr) x = F.transpose(x, (0, 2, 1, 3)) out_shape = list(x.shape) x = F.reshape(x, (-1, x.shape[2]*x.shape[3])) x = self.l(x) x = self.activation(x) out_shape[2] = self.out_ch x = F.reshape(x, out_shape) x = F.transpose(x, (0, 2, 1, 3)) return x
Example #15
Source File: block.py From Deep_VoiceChanger with MIT License | 6 votes |
def __call__(self, x): if self.dr: with chainer.using_config('train', True): x = F.dropout(x, self.dr) if self.gap: x = F.sum(x, axis=(2,3)) N = x.shape[0] #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py feature = F.reshape(F.leaky_relu(x), (N, -1)) m = F.reshape(self.md(feature), (N, self.B * self.C, 1)) m0 = F.broadcast_to(m, (N, self.B * self.C, N)) m1 = F.transpose(m0, (2, 1, 0)) d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N))) d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1 h = F.concat([feature, d]) h = self.l(h) return h
Example #16
Source File: EspNet_AttDot.py From chainer-compiler with MIT License | 5 votes |
def original(self, enc_hs, dec_z, att_prev, scaling=2.0): '''AttDot forward :param enc_hs: :param dec_z: :param scaling: :return: ''' batch = len(enc_hs) # pre-compute all h outside the decoder loop if self.pre_compute_enc_h is None: self.enc_h = F.pad_sequence(enc_hs) # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim self.pre_compute_enc_h = F.tanh( linear_tensor(self.mlp_enc, self.enc_h)) if dec_z is None: dec_z = chainer.Variable(self.xp.zeros( (batch, self.dunits), dtype=np.float32)) else: dec_z = F.reshape(dec_z, (batch, self.dunits)) # <phi (h_t), psi (s)> for all t u = F.broadcast_to(F.expand_dims(F.tanh(self.mlp_dec(dec_z)), 1), self.pre_compute_enc_h.shape) e = F.sum(self.pre_compute_enc_h * u, axis=2) # utt x frame # Applying a minus-large-number filter to make a probability value zero for a padded area # simply degrades the performance, and I gave up this implementation # Apply a scaling to make an attention sharp w = F.softmax(scaling * e) # weighted sum over flames # utt x hdim c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1) return c, w
Example #17
Source File: EspNet_AttDot.py From chainer-compiler with MIT License | 5 votes |
def forward(self, enc_hs, dec_z, att_prev): '''AttDot forward :param enc_hs: :param dec_z: :param scaling: :return: ''' # EDIT(hamaji): scaling is now a local variable. scaling = 2.0 batch = len(enc_hs) if self.pre_compute_enc_h is None: self.enc_h = F.pad_sequence(enc_hs) # utt x frame x hdim self.h_length = self.enc_h.shape[1] # utt x frame x att_dim self.pre_compute_enc_h = F.tanh( linear_tensor(self.mlp_enc, self.enc_h)) if dec_z is None: dec_z = chainer.Variable(self.xp.zeros( (batch, self.dunits), dtype=np.float32)) else: dec_z = F.reshape(dec_z, (batch, self.dunits)) # <phi (h_t), psi (s)> for all t u = F.broadcast_to(F.expand_dims(F.tanh(self.mlp_dec(dec_z)), 1), self.pre_compute_enc_h.shape) e = F.sum(self.pre_compute_enc_h * u, axis=2) # utt x frame # Applying a minus-large-number filter to make a probability value zero for a padded area # simply degrades the performance, and I gave up this implementation # Apply a scaling to make an attention sharp w = F.softmax(scaling * e) # weighted sum over flames # utt x hdim c = F.sum(self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1) return c, w
Example #18
Source File: decoder.py From knmt with GNU General Public License v3.0 | 5 votes |
def compute_loss(self, seq_list, encoded_input, mask_input, reduce="mean"): logits = self.compute_logits(seq_list, encoded_input, mask_input) padded_target_with_eos = pad_data(seq_list, pad_value=-1, add_eos=self.eos_idx) padded_target_with_eos = self.move_np_array_to_correct_device(padded_target_with_eos) loss = F.softmax_cross_entropy(F.reshape(logits, (-1, self.V+1)), padded_target_with_eos.reshape(-1,), reduce=reduce) return loss
Example #19
Source File: EspNet_VGG2L.py From chainer-compiler with MIT License | 5 votes |
def forward(self, xs, ilens): '''VGG2L forward :param xs: :param ilens: :return: ''' logging.info(self.__class__.__name__ + ' input lengths: ' + str(ilens)) # x: utt x frame x dim xs = F.pad_sequence(xs) # x: utt x 1 (input channel num) x frame x dim xs = F.swapaxes(F.reshape( xs, (xs.shape[0], xs.shape[1], self.in_channel, xs.shape[2] // self.in_channel)), 1, 2) xs = F.relu(self.conv1_1(xs)) xs = F.relu(self.conv1_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) xs = F.relu(self.conv2_1(xs)) xs = F.relu(self.conv2_2(xs)) xs = F.max_pooling_2d(xs, 2, stride=2) # change ilens accordingly # EDIT(hamaji): ChxVM puts int32 on GPU and it hurts the performance. # TODO(hamaji): Fix device assignment to get rid of this change. ilens = (ilens + 1) // 2 ilens = (ilens + 1) // 2 # ilens = self.xp.array(self.xp.ceil(self.xp.array( # ilens, dtype=np.float32) / 2), dtype=np.int32) # ilens = self.xp.array(self.xp.ceil(self.xp.array( # ilens, dtype=np.float32) / 2), dtype=np.int32) # x: utt_list of frame (remove zeropaded frames) x (input channel num x dim) xs = F.swapaxes(xs, 1, 2) xs = F.reshape( xs, (xs.shape[0], xs.shape[1], xs.shape[2] * xs.shape[3])) xs = [xs[i, :ilens[i], :] for i in range(len(ilens))] return xs, ilens
Example #20
Source File: decoder.py From knmt with GNU General Public License v3.0 | 5 votes |
def __call__(self, prev_decoder_state, inpt): current_mb_size = inpt.shape[0] # mask = np.zeros((current_mb_size, ), dtype = np.float32) # padded = np.zeros((current_mb_size, ), dtype = np.float32) # for num_batch, idx in enumerate(inpt): # padded[num_batch] = idx if idx is not None else 0 # mask[num_batch] = 0 if idx is not None else -10000 prev_decoder_state = prev_decoder_state.reduce_to_minibatch_size(current_mb_size) current_pos = prev_decoder_state.get_pos() + 1 encoded = self.decoder_chain.emb(inpt) pos_vect = self.decoder_chain.get_one_pos_vect(current_mb_size, current_pos) encoded = encoded + pos_vect if self.decoder_chain.dropout is not None: encoded = F.dropout(encoded, self.decoder_chain.dropout) cross_mask = self.decoder_chain.xp.broadcast_to( self.mask_input[:,0:1,0:1,:], (self.mask_input.shape[0], self.decoder_chain.n_heads, 1, self.mask_input.shape[3])) final_layer, prev_states = self.decoder_chain.encoding_layers.one_step(encoded, prev_decoder_state.get_states(), self.src_encoding, cross_mask) # logits = apply_linear_layer_to_last_dims(final_layer, self.decoder_chain.logits_layer) logits = self.decoder_chain.logits_layer(F.reshape(final_layer, (current_mb_size, self.decoder_chain.d_model))) return logits, DecoderState(pos=current_pos, prev_states=prev_states)
Example #21
Source File: largefov_light.py From Semantic-Segmentation-using-Adversarial-Networks with MIT License | 5 votes |
def __call__(self, x): h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.relu(self.conv1_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv2_1(h)) h = F.relu(self.conv2_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv3_1(h)) h = self.conv3_2(h) h = f.global_average_pooling_2d(h) h = F.reshape(h, (h.shape[0],h.shape[1])) return h
Example #22
Source File: utils.py From knmt with GNU General Public License v3.0 | 5 votes |
def apply_linear_layer_to_last_dims(Q, w_Q): mb_size_Q, n_Q, d_model_Q = Q.data.shape return F.reshape(w_Q(F.reshape(Q, (mb_size_Q * n_Q, d_model_Q))), (mb_size_Q, n_Q, -1)) ######################################################################## # Generating position vectors according to Google's paper formula #
Example #23
Source File: utils.py From knmt with GNU General Public License v3.0 | 5 votes |
def apply_layer_normalization(self, added_output): if len(added_output.shape) > 2: d_model = added_output.shape[-1] final_layer = F.reshape( self.normalizing_layer( F.reshape(added_output, (-1, d_model)) ), added_output.shape) else: final_layer = self.normalizing_layer(added_output) return final_layer
Example #24
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def __init__(self, d_model, n_heads, experimental_relu=False, dropout=None, residual_mode="normal", no_normalize=False): super(AddAndNormalizedAttentionBase, self).__init__( multi_attention= ConstantSizeMultiBatchMultiHeadAttention(d_model = d_model, n_heads=n_heads, experimental_relu=experimental_relu, dropout=dropout), residual_layer = DropoutAndAddAndNormalize(dropout=dropout, residual_mode=residual_mode, no_normalize=no_normalize) ) self.d_model = d_model # self.dropout = dropout # # if not no_normalize: # self.add_link("normalizing_layer", LayerNormalization()) # # self.no_add = no_add # self.no_normalize = no_normalize # def dropout_and_add_and_normalize(self, sub_output, inpt, train=True): # if self.dropout is not None: # sub_output = F.dropout(sub_output, ratio=self.dropout, train=train) # # if self.no_add: # added_output = sub_output # else: # added_output = sub_output + inpt # # if self.no_normalize: # final_layer = added_output # else: # mb, length, d_model = added_output.shape # final_layer = F.reshape( # self.normalizing_layer( # F.reshape(added_output, (mb * length, d_model)) # ), (mb, length, d_model)) # # return final_layer
Example #25
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def batch_matmul_last_dims(A, B, transa=False, transb=False): assert A.data.shape[:-2] == B.data.shape[:-2] reshaped_A = F.reshape(A, (-1,) + A.data.shape[-2:]) reshaped_B = F.reshape(B, (-1,) + B.data.shape[-2:]) reshaped_result = F.batch_matmul(reshaped_A, reshaped_B, transa=transa, transb=transb) result = F.reshape(reshaped_result, A.data.shape[:-2] + reshaped_result.data.shape[-2:]) return result ######################################################################## # Multihead Attention #
Example #26
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def test_reorganize_by_head(): Q = Variable(np.arange(2*3*5*7).reshape(5, 7, 2*3).astype(np.float32)) Qr = reorganize_by_head(Q, 2) Qrr = undo_reorganize_by_head(Qr) assert np.all(Qrr.data == Q.data) assert Qr.data.base is Q.data assert Qrr.data.base is Q.data assert np.all(Qr.data[:, 0, :, :]%6 < 3) assert np.all(Qr.data[:, 1, :, :]%6 >= 3)
Example #27
Source File: multi_attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def undo_reorganize_by_head(Q): mb_size, n_heads, n_Q, head_size = Q.data.shape swapped_Q = F.swapaxes(Q, 1, 2) return F.reshape(swapped_Q, (mb_size, n_Q, -1))
Example #28
Source File: decoder_cells.py From knmt with GNU General Public License v3.0 | 5 votes |
def compute_logits(self, new_states, concatenated, attn): new_output_state = new_states[-1] all_concatenated = F.concat((concatenated, new_output_state)) logits = self.decoder_chain.lin_o(self.decoder_chain.maxo(all_concatenated)) if self.lexicon_probability_matrix is not None: current_mb_size = new_output_state.data.shape[0] assert self.mb_size is None or current_mb_size <= self.mb_size lexicon_probability_matrix = self.lexicon_probability_matrix[:current_mb_size] # Just making sure data shape is as expected attn_mb_size, max_source_length_attn = attn.data.shape assert attn_mb_size == current_mb_size lex_mb_size, max_source_length_lexicon, v_size_lexicon = lexicon_probability_matrix.shape assert max_source_length_lexicon == max_source_length_attn assert logits.data.shape == (current_mb_size, v_size_lexicon) if self.demux: assert lex_mb_size == 1 weighted_lex_probs = F.reshape( matmul_constant(attn, lexicon_probability_matrix.reshape(lexicon_probability_matrix.shape[1], lexicon_probability_matrix.shape[2])), logits.data.shape) else: assert lex_mb_size == current_mb_size # weighted_lex_probs = F.reshape( # F.batch_matmul(attn, ConstantFunction(lexicon_probability_matrix)(), transa = True), # logits.data.shape) weighted_lex_probs = F.reshape( batch_matmul_constant(attn, lexicon_probability_matrix, transa=True), logits.data.shape) logits += F.log(weighted_lex_probs + self.lex_epsilon) return logits
Example #29
Source File: attention.py From knmt with GNU General Public License v3.0 | 5 votes |
def compute_ctxt_demux(self, fb_concat, mask): mb_size, nb_elems, Hi = fb_concat.data.shape assert Hi == self.Hi assert mb_size == 1 assert len(mask) == 0 precomputed_al_factor = F.reshape(self.al_lin_h( F.reshape(fb_concat, (mb_size * nb_elems, self.Hi))), (mb_size, nb_elems, self.Ha)) # concatenated_mask = F.concat([F.reshape(mask_elem, (mb_size, 1)) for mask_elem in mask], 1) def compute_ctxt(previous_state, prev_word_embedding=None): current_mb_size = previous_state.data.shape[0] al_factor = F.broadcast_to(precomputed_al_factor, (current_mb_size, nb_elems, self.Ha)) # used_fb_concat = F.broadcast_to(fb_concat, (current_mb_size, nb_elems, Hi)) # used_concatenated_mask = F.broadcast_to(concatenated_mask, (current_mb_size, nb_elems)) state_al_factor = self.al_lin_s(previous_state) #As suggested by Isao Goto if prev_word_embedding is not None: state_al_factor = state_al_factor + self.al_lin_y(prev_word_embedding) state_al_factor_bc = F.broadcast_to(F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)), (current_mb_size, nb_elems, self.Ha)) a_coeffs = F.reshape(self.al_lin_o(F.reshape(F.tanh(state_al_factor_bc + al_factor), (current_mb_size * nb_elems, self.Ha))), (current_mb_size, nb_elems)) # with cuda.get_device_from_array(used_concatenated_mask.data): # a_coeffs = a_coeffs - 10000 * (1-used_concatenated_mask.data) attn = F.softmax(a_coeffs) # ci = F.reshape(F.batch_matmul(attn, used_fb_concat, transa = True), (current_mb_size, self.Hi)) ci = F.reshape(F.matmul(attn, F.reshape(fb_concat, (nb_elems, Hi))), (current_mb_size, self.Hi)) return ci, attn return compute_ctxt
Example #30
Source File: invert_diff.py From ssai-cnn with MIT License | 5 votes |
def tv_norm(self, x): diffh = self.tvh( F.reshape(x, (3, 1, self.args.in_size, self.args.in_size))) diffw = self.tvw( F.reshape(x, (3, 1, self.args.in_size, self.args.in_size))) tv = (F.sum(diffh ** 2) + F.sum(diffw ** 2)) ** (self.args.beta / 2.) return tv