Python torch.log_softmax() Examples
The following are 30
code examples of torch.log_softmax().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: metrics.py From OpenTransformer with MIT License | 6 votes |
def forward(self, x, target): """Compute loss between x and target :param torch.Tensor x: prediction (batch, seqlen, class) :param torch.Tensor target: target signal masked with self.padding_id (batch, seqlen) :return: scalar float value :rtype torch.Tensor """ assert x.size(2) == self.size batch_size = x.size(0) x = x.view(-1, self.size) target = target.reshape(-1) with torch.no_grad(): true_dist = x.clone() true_dist.fill_(self.smoothing / (self.size - 1)) ignore = target == self.padding_idx # (B,) total = len(target) - ignore.sum().item() target = target.masked_fill(ignore, 0) # avoid -1 index true_dist.scatter_(1, target.unsqueeze(1), self.confidence) kl = self.criterion(torch.log_softmax(x, dim=1), true_dist) denom = total if self.normalize_length else batch_size return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
Example #2
Source File: transformer.py From dl4mt-seqgen with BSD 3-Clause "New" or "Revised" License | 6 votes |
def forward(self, x, y, get_scores=False): """ Compute the loss, and optionally the scores. """ assert (y == self.pad_index).sum().item() == 0 if self.asm is False: scores = self.proj(x).view(-1, self.n_words) if self.label_smoothing == 0.0: loss = F.cross_entropy(scores, y, reduction='elementwise_mean') else: lprobs = torch.log_softmax(scores, dim=1) nll_loss = -lprobs.gather(dim=-1, index=y.unsqueeze(1)) smooth_loss = -lprobs.sum(dim=-1, keepdim=True) nll_loss, smooth_loss = nll_loss.sum(), smooth_loss.sum() eps_i = self.label_smoothing / lprobs.size(-1) loss = (1. - self.label_smoothing) * nll_loss + eps_i * smooth_loss loss = loss / x.shape[0] else: _, loss = self.proj(x, y) scores = self.proj.log_prob(x) if get_scores else None return scores, loss
Example #3
Source File: test_beam_search.py From encoder-agnostic-adaptation with MIT License | 6 votes |
def init_step(self, beam, expected_len_pen): # init_preds: [4, 3, 5, 6, 7] - no EOS's init_scores = torch.log_softmax(torch.tensor( [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1) init_scores = deepcopy(init_scores.repeat( self.BATCH_SZ * self.BEAM_SZ, 1)) new_scores = init_scores + beam.topk_log_probs.view(-1).unsqueeze(1) expected_beam_scores, expected_preds_0 = new_scores \ .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS) \ .topk(self.BEAM_SZ, dim=-1) beam.advance(deepcopy(init_scores), self.random_attn()) self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores)) self.assertTrue(beam.topk_ids.equal(expected_preds_0)) self.assertFalse(beam.is_finished.any()) self.assertFalse(beam.done) return expected_beam_scores
Example #4
Source File: label_smoothing_loss.py From espnet with Apache License 2.0 | 6 votes |
def forward(self, x, target): """Compute loss between x and target. :param torch.Tensor x: prediction (batch, seqlen, class) :param torch.Tensor target: target signal masked with self.padding_id (batch, seqlen) :return: scalar float value :rtype torch.Tensor """ assert x.size(2) == self.size batch_size = x.size(0) x = x.view(-1, self.size) target = target.view(-1) with torch.no_grad(): true_dist = x.clone() true_dist.fill_(self.smoothing / (self.size - 1)) ignore = target == self.padding_idx # (B,) total = len(target) - ignore.sum().item() target = target.masked_fill(ignore, 0) # avoid -1 index true_dist.scatter_(1, target.unsqueeze(1), self.confidence) kl = self.criterion(torch.log_softmax(x, dim=1), true_dist) denom = total if self.normalize_length else batch_size return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
Example #5
Source File: test_beam_search.py From OpenNMT-py with MIT License | 6 votes |
def init_step(self, beam, expected_len_pen): # init_preds: [4, 3, 5, 6, 7] - no EOS's init_scores = torch.log_softmax(torch.tensor( [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1) init_scores = deepcopy(init_scores.repeat( self.BATCH_SZ * self.BEAM_SZ, 1)) new_scores = init_scores + beam.topk_log_probs.view(-1).unsqueeze(1) expected_beam_scores, expected_preds_0 = new_scores \ .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS) \ .topk(self.BEAM_SZ, dim=-1) beam.advance(deepcopy(init_scores), self.random_attn()) self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores)) self.assertTrue(beam.topk_ids.equal(expected_preds_0)) self.assertFalse(beam.is_finished.any()) self.assertFalse(beam.done) return expected_beam_scores
Example #6
Source File: test_softmax.py From pytorch_scatter with MIT License | 6 votes |
def test_log_softmax(): src = torch.tensor([0.2, 0, 0.2, -2.1, 3.2, 7, -1, float('-inf')]) src.requires_grad_() index = torch.tensor([0, 1, 0, 1, 1, 2, 4, 4]) out = scatter_log_softmax(src, index) out0 = torch.log_softmax(torch.tensor([0.2, 0.2]), dim=-1) out1 = torch.log_softmax(torch.tensor([0, -2.1, 3.2]), dim=-1) out2 = torch.log_softmax(torch.tensor([7], dtype=torch.float), dim=-1) out4 = torch.log_softmax(torch.tensor([-1, float('-inf')]), dim=-1) expected = torch.stack([ out0[0], out1[0], out0[1], out1[1], out1[2], out2[0], out4[0], out4[1] ], dim=0) assert torch.allclose(out, expected) out.backward(torch.randn_like(out))
Example #7
Source File: ctc.py From neural_sp with Apache License 2.0 | 6 votes |
def greedy(self, eouts, elens): """Greedy decoding. Args: eouts (FloatTensor): `[B, T, enc_n_units]` elens (np.ndarray): `[B]` Returns: hyps (np.ndarray): Best path hypothesis. `[B, L]` """ log_probs = torch.log_softmax(self.output(eouts), dim=-1) best_paths = log_probs.argmax(-1) # `[B, L]` hyps = [] for b in range(eouts.size(0)): indices = [best_paths[b, t].item() for t in range(elens[b])] # Step 1. Collapse repeated labels collapsed_indices = [x[0] for x in groupby(indices)] # Step 2. Remove all blank labels best_hyp = [x for x in filter(lambda x: x != self.blank, collapsed_indices)] hyps.append(np.array(best_hyp)) return np.array(hyps)
Example #8
Source File: test_beam_search.py From OpenNMT-kpg-release with MIT License | 6 votes |
def init_step(self, beam, expected_len_pen): # init_preds: [4, 3, 5, 6, 7] - no EOS's init_scores = torch.log_softmax(torch.tensor( [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1) init_scores = deepcopy(init_scores.repeat( self.BATCH_SZ * self.BEAM_SZ, 1)) new_scores = init_scores + beam.topk_log_probs.view(-1).unsqueeze(1) expected_beam_scores, expected_preds_0 = new_scores \ .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS) \ .topk(self.BEAM_SZ, dim=-1) beam.advance(deepcopy(init_scores), self.random_attn()) self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores)) self.assertTrue(beam.topk_ids.equal(expected_preds_0)) self.assertFalse(beam.is_finished.any()) self.assertFalse(beam.done) return expected_beam_scores
Example #9
Source File: criterion.py From neural_sp with Apache License 2.0 | 6 votes |
def focal_loss(logits, ys, ylens, alpha, gamma): """Compute focal loss. Args: logits (FloatTensor): `[B, T, vocab]` ys (LongTensor): Indices of labels. `[B, L]` ylens (IntTensor): `[B]` alpha (float): gamma (float): Returns: loss_mean (FloatTensor): `[1]` """ bs = ys.size(0) log_probs = torch.log_softmax(logits, dim=-1) probs_inv = -torch.softmax(logits, dim=-1) + 1 loss = -alpha * torch.mul(torch.pow(probs_inv, gamma), log_probs) loss_mean = np.sum([loss[b, :ylens[b], :].sum() for b in range(bs)]) / ylens.sum() return loss_mean
Example #10
Source File: criterion.py From neural_sp with Apache License 2.0 | 6 votes |
def kldiv_lsm_ctc(logits, ylens): """Compute KL divergence loss for label smoothing of CTC and Transducer models. Args: logits (FloatTensor): `[B, T, vocab]` ylens (IntTensor): `[B]` Returns: loss_mean (FloatTensor): `[1]` """ bs, _, vocab = logits.size() log_uniform = logits.new_zeros(logits.size()).fill_(math.log(1 / (vocab - 1))) probs = torch.softmax(logits, dim=-1) log_probs = torch.log_softmax(logits, dim=-1) loss = torch.mul(probs, log_probs - log_uniform) loss_mean = np.sum([loss[b, :ylens[b], :].sum() for b in range(bs)]) / ylens.sum() # assert loss_mean >= 0 return loss_mean
Example #11
Source File: criterion.py From neural_sp with Apache License 2.0 | 6 votes |
def distillation(logits_student, logits_teacher, ylens, temperature=5.0): """Compute cross entropy loss for knowledge distillation of sequence-to-sequence models. Args: logits_student (FloatTensor): `[B, T, vocab]` logits_teacher (FloatTensor): `[B, T, vocab]` ylens (IntTensor): `[B]` temperature (float): Returns: loss_mean (FloatTensor): `[1]` """ bs, _, vocab = logits_student.size() log_probs_student = torch.log_softmax(logits_student, dim=-1) probs_teacher = torch.softmax(logits_teacher / temperature, dim=-1).data loss = -torch.mul(probs_teacher, log_probs_student) loss_mean = np.sum([loss[b, :ylens[b], :].sum() for b in range(bs)]) / ylens.sum() return loss_mean
Example #12
Source File: models.py From sparse-sharing with MIT License | 6 votes |
def forward(self, task_id, x, y, seq_len): words_emb = self.embedding(x) char_emb = self.char(x) x = torch.cat([words_emb, char_emb], dim=-1) x, _ = self.lstm(x, seq_len) self.dropout(x) logit = self.out[task_id[0]](x) seq_mask = seq_len_to_mask(seq_len, x.size(1)) if self.crf is not None: logit = torch.log_softmax(logit, dim=-1) loss = self.crf[task_id[0]](logit, y, seq_mask).mean() pred = self.crf[task_id[0]].viterbi_decode(logit, seq_mask)[0] else: loss = ce_loss(logit, y, seq_mask) pred = torch.argmax(logit, dim=2) return {"loss": loss, "pred": pred}
Example #13
Source File: train.py From crnn.pytorch with Apache License 2.0 | 6 votes |
def train_one_epoch(model, criterion, optimizer, data_loader, device, epoch, args): epoch_loss = 0.0 for image, target, input_len, target_len in tqdm(data_loader): image = image.to(device) # print(target, target_len, input_len) outputs = model(image.to(torch.float32)) # [B,N,C] outputs = torch.log_softmax(outputs, dim=2) outputs = outputs.permute([1, 0, 2]) # [N,B,C] loss = criterion(outputs[:], target, input_len, target_len) # 梯度更新 model.zero_grad() loss.backward() optimizer.step() # 当前轮的loss epoch_loss += loss.item() * image.size(0) if np.isnan(loss.item()): print(target, input_len, target_len) epoch_loss = epoch_loss / len(data_loader.dataset) # 打印日志,保存权重 print('Epoch: {}/{} loss: {:03f}'.format(epoch + 1, args.epochs, epoch_loss)) return epoch_loss
Example #14
Source File: label_smooth.py From pytorch-loss with MIT License | 6 votes |
def forward(ctx, logits, label, lb_smooth, lb_ignore): # prepare label num_classes = logits.size(1) lb_pos, lb_neg = 1. - lb_smooth, lb_smooth / num_classes label = label.clone().detach() ignore = label == lb_ignore n_valid = (label != lb_ignore).sum() label[ignore] = 0 lb_one_hot = torch.empty_like(logits).fill_( lb_neg).scatter_(1, label.unsqueeze(1), lb_pos).detach() ignore = ignore.nonzero() _, M = ignore.size() a, *b = ignore.chunk(M, dim=1) mask = [a, torch.arange(logits.size(1)), *b] lb_one_hot[mask] = 0 coeff = (num_classes - 1) * lb_neg + lb_pos ctx.variables = coeff, mask, logits, lb_one_hot loss = torch.log_softmax(logits, dim=1).neg_().mul_(lb_one_hot).sum(dim=1) return loss
Example #15
Source File: FFM_Multi_PyTorch.py From Awesome-RecSystem-Models with MIT License | 6 votes |
def forward(self, x): # 先计算得到线性的那一部分 linear_part = self.linear(x) # 计算交叉部分 interaction_part = 0.0 for i in range(self.fea_num): for j in range(i + 1, self.fea_num): v_ifj = self.v[i, self.field_map_dict[j], :, :] v_jfi = self.v[j, self.field_map_dict[i], :, :] xij = torch.unsqueeze(x[:, i] * x[:, j], dim=1) v_ijji = torch.unsqueeze(torch.sum(v_ifj * v_jfi, dim=0), dim=0) interaction_part += torch.mm(xij, v_ijji) output = linear_part + interaction_part output = torch.log_softmax(output, dim=1) return output
Example #16
Source File: label_smoothing_loss.py From adviser with GNU General Public License v3.0 | 6 votes |
def forward(self, x, target): """Compute loss between x and target. :param torch.Tensor x: prediction (batch, seqlen, class) :param torch.Tensor target: target signal masked with self.padding_id (batch, seqlen) :return: scalar float value :rtype torch.Tensor """ assert x.size(2) == self.size batch_size = x.size(0) x = x.view(-1, self.size) target = target.view(-1) with torch.no_grad(): true_dist = x.clone() true_dist.fill_(self.smoothing / (self.size - 1)) ignore = target == self.padding_idx # (B,) total = len(target) - ignore.sum().item() target = target.masked_fill(ignore, 0) # avoid -1 index true_dist.scatter_(1, target.unsqueeze(1), self.confidence) kl = self.criterion(torch.log_softmax(x, dim=1), true_dist) denom = total if self.normalize_length else batch_size return kl.masked_fill(ignore.unsqueeze(1), 0).sum() / denom
Example #17
Source File: test_beam.py From OpenNMT-kpg-release with MIT License | 5 votes |
def first_step(self, beam, expected_beam_scores, expected_len_pen): # no EOS's yet assert len(beam.finished) == 0 scores_1 = torch.log_softmax(torch.tensor( [[0, 0, 0, .3, 0, .51, .2, 0], [0, 0, 1.5, 0, 0, 0, 0, 0], [0, 0, 0, 0, .49, .48, 0, 0], [0, 0, 0, .2, .2, .2, .2, .2], [0, 0, 0, .2, .2, .2, .2, .2]] ), dim=1) beam.advance(scores_1, torch.randn(self.BEAM_SZ, self.INP_SEQ_LEN)) new_scores = scores_1 + expected_beam_scores.t() expected_beam_scores, unreduced_preds = new_scores.view(-1).topk( self.BEAM_SZ, 0, True, True) expected_bptr_1 = unreduced_preds / self.N_WORDS # [5, 3, 2, 6, 0], so beam 2 predicts EOS! expected_preds_1 = unreduced_preds - expected_bptr_1 * self.N_WORDS self.assertTrue(beam.scores.allclose(expected_beam_scores)) self.assertTrue(beam.next_ys[-1].equal(expected_preds_1)) self.assertTrue(beam.prev_ks[-1].equal(expected_bptr_1)) self.assertEqual(len(beam.finished), 1) self.assertEqual(beam.finished[0][2], 2) # beam 2 finished self.assertEqual(beam.finished[0][1], 2) # finished on second step self.assertEqual(beam.finished[0][0], # finished with correct score expected_beam_scores[2] / expected_len_pen) self.assertFalse(beam.eos_top) self.assertFalse(beam.done) return expected_beam_scores
Example #18
Source File: test_beam.py From OpenNMT-kpg-release with MIT License | 5 votes |
def init_step(self, beam): # init_preds: [4, 3, 5, 6, 7] - no EOS's init_scores = torch.log_softmax(torch.tensor( [[0, 0, 0, 4, 5, 3, 2, 1]], dtype=torch.float), dim=1) expected_beam_scores, expected_preds_0 = init_scores.topk(self.BEAM_SZ) beam.advance(init_scores, torch.randn(self.BEAM_SZ, self.INP_SEQ_LEN)) self.assertTrue(beam.scores.allclose(expected_beam_scores)) self.assertTrue(beam.next_ys[-1].equal(expected_preds_0[0])) self.assertFalse(beam.eos_top) self.assertFalse(beam.done) return expected_beam_scores
Example #19
Source File: test_beam_search.py From OpenNMT-kpg-release with MIT License | 5 votes |
def third_step(self, beam, expected_beam_scores, expected_len_pen): # assumes beam 0 finished on last step scores_3 = torch.log_softmax(torch.tensor( [[0, 0, 5000, 0, 5000, .51, .2, 0], # beam 0 shouldn't cont [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 5000, 0, 0], [0, 0, 0, .2, .2, .2, .2, .2], [0, 0, 50, 0, .2, .2, .2, .2]] # beam 4 -> beam 1 should die ), dim=1) scores_3 = scores_3.repeat(self.BATCH_SZ, 1) beam.advance(deepcopy(scores_3), self.random_attn()) expected_beam_scores[:, 0::self.BEAM_SZ] = self.DEAD_SCORE new_scores = scores_3 + expected_beam_scores.view(-1).unsqueeze(1) expected_beam_scores, unreduced_preds = new_scores\ .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS)\ .topk(self.BEAM_SZ, -1) expected_bptr_3 = unreduced_preds / self.N_WORDS # [5, 2, 6, 1, 0] repeat self.BATCH_SZ, so beam 1 predicts EOS! expected_preds_3 = unreduced_preds - expected_bptr_3 * self.N_WORDS self.assertTrue(beam.topk_log_probs.allclose( expected_beam_scores)) self.assertTrue(beam.topk_scores.allclose( expected_beam_scores / expected_len_pen)) self.assertTrue(beam.topk_ids.equal(expected_preds_3)) self.assertTrue(beam.current_backptr.equal(expected_bptr_3)) self.assertEqual(beam.is_finished.sum(), self.BATCH_SZ) # new beam 1 finished self.assertTrue(beam.is_finished[:, 1].all()) # new beam 1 is old beam 4 self.assertTrue(expected_bptr_3[:, 1].eq(4).all()) beam.update_finished() self.assertTrue(beam.top_beam_finished.all()) self.assertTrue(beam.done) return expected_beam_scores
Example #20
Source File: sign.py From ogb with MIT License | 5 votes |
def forward(self, xs): outs = [] for x, lin in zip(xs, self.lins): out = F.dropout(F.relu(lin(x)), p=0.5, training=self.training) outs.append(out) x = torch.cat(outs, dim=-1) x = self.lin(x) return torch.log_softmax(x, dim=-1)
Example #21
Source File: parts.py From NeMo with Apache License 2.0 | 5 votes |
def forward(self, hidden_states): output_states = hidden_states[:] for i in range(self.layers): output_states = getattr(self, f'layer{i}')(output_states) if self.log_softmax: output_states = torch.log_softmax(output_states.float(), dim=-1).to(hidden_states.dtype) # TODO: make it work with float16 return output_states
Example #22
Source File: decoder.py From adviser with GNU General Public License v3.0 | 5 votes |
def forward_one_step(self, tgt, tgt_mask, memory, cache=None): """Forward one step. :param torch.Tensor tgt: input token ids, int64 (batch, maxlen_out) :param torch.Tensor tgt_mask: input token mask, (batch, maxlen_out) dtype=torch.uint8 in PyTorch 1.2- dtype=torch.bool in PyTorch 1.2+ (include 1.2) :param torch.Tensor memory: encoded memory, float32 (batch, maxlen_in, feat) :param List[torch.Tensor] cache: cached output list of (batch, max_time_out-1, size) :return y, cache: NN output value and cache per `self.decoders`. `y.shape` is (batch, maxlen_out, token) :rtype: Tuple[torch.Tensor, List[torch.Tensor]] """ x = self.embed(tgt) if cache is None: cache = [None] * len(self.decoders) new_cache = [] for c, decoder in zip(cache, self.decoders): x, tgt_mask, memory, memory_mask = decoder(x, tgt_mask, memory, None, cache=c) new_cache.append(x) if self.normalize_before: y = self.after_norm(x[:, -1]) else: y = x[:, -1] if self.output_layer is not None: y = torch.log_softmax(self.output_layer(y), dim=-1) return y, new_cache # beam search API (see ScorerInterface)
Example #23
Source File: pretrain_mnist_clf.py From BiAAE with MIT License | 5 votes |
def get_logits(self, x): return torch.log_softmax(self.forward(x), dim=-1)
Example #24
Source File: test_beam_search.py From OpenNMT-py with MIT License | 5 votes |
def third_step(self, beam, expected_beam_scores, expected_len_pen): # assumes beam 0 finished on last step scores_3 = torch.log_softmax(torch.tensor( [[0, 0, 5000, 0, 5000, .51, .2, 0], # beam 0 shouldn't cont [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 5000, 0, 0], [0, 0, 0, .2, .2, .2, .2, .2], [0, 0, 50, 0, .2, .2, .2, .2]] # beam 4 -> beam 1 should die ), dim=1) scores_3 = scores_3.repeat(self.BATCH_SZ, 1) beam.advance(deepcopy(scores_3), self.random_attn()) expected_beam_scores[:, 0::self.BEAM_SZ] = self.DEAD_SCORE new_scores = scores_3 + expected_beam_scores.view(-1).unsqueeze(1) expected_beam_scores, unreduced_preds = new_scores\ .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS)\ .topk(self.BEAM_SZ, -1) expected_bptr_3 = unreduced_preds / self.N_WORDS # [5, 2, 6, 1, 0] repeat self.BATCH_SZ, so beam 1 predicts EOS! expected_preds_3 = unreduced_preds - expected_bptr_3 * self.N_WORDS self.assertTrue(beam.topk_log_probs.allclose( expected_beam_scores)) self.assertTrue(beam.topk_scores.allclose( expected_beam_scores / expected_len_pen)) self.assertTrue(beam.topk_ids.equal(expected_preds_3)) self.assertTrue(beam.current_backptr.equal(expected_bptr_3)) self.assertEqual(beam.is_finished.sum(), self.BATCH_SZ) # new beam 1 finished self.assertTrue(beam.is_finished[:, 1].all()) # new beam 1 is old beam 4 self.assertTrue(expected_bptr_3[:, 1].eq(4).all()) beam.update_finished() self.assertTrue(beam.top_beam_finished.all()) self.assertTrue(beam.done) return expected_beam_scores
Example #25
Source File: test_beam_search.py From OpenNMT-py with MIT License | 5 votes |
def first_step(self, beam, expected_beam_scores, expected_len_pen): # no EOS's yet assert beam.is_finished.sum() == 0 scores_1 = torch.log_softmax(torch.tensor( [[0, 0, 0, .3, 0, .51, .2, 0], [0, 0, 1.5, 0, 0, 0, 0, 0], [0, 0, 0, 0, .49, .48, 0, 0], [0, 0, 0, .2, .2, .2, .2, .2], [0, 0, 0, .2, .2, .2, .2, .2]] ), dim=1) scores_1 = scores_1.repeat(self.BATCH_SZ, 1) beam.advance(deepcopy(scores_1), self.random_attn()) new_scores = scores_1 + expected_beam_scores.view(-1).unsqueeze(1) expected_beam_scores, unreduced_preds = new_scores\ .view(self.BATCH_SZ, self.BEAM_SZ * self.N_WORDS)\ .topk(self.BEAM_SZ, -1) expected_bptr_1 = unreduced_preds / self.N_WORDS # [5, 3, 2, 6, 0], so beam 2 predicts EOS! expected_preds_1 = unreduced_preds - expected_bptr_1 * self.N_WORDS self.assertTrue(beam.topk_log_probs.allclose(expected_beam_scores)) self.assertTrue(beam.topk_scores.allclose( expected_beam_scores / expected_len_pen)) self.assertTrue(beam.topk_ids.equal(expected_preds_1)) self.assertTrue(beam.current_backptr.equal(expected_bptr_1)) self.assertEqual(beam.is_finished.sum(), self.BATCH_SZ) self.assertTrue(beam.is_finished[:, 2].all()) # beam 2 finished beam.update_finished() self.assertFalse(beam.top_beam_finished.any()) self.assertFalse(beam.done) return expected_beam_scores
Example #26
Source File: mtpnet.py From jsis3d with MIT License | 5 votes |
def forward(self, x): x = self.net(x) logits = self.fc1(x) logits = logits.transpose(2, 1) logits = torch.log_softmax(logits, dim=-1) embedded = self.fc2(x) embedded = embedded.transpose(2, 1) return logits, embedded
Example #27
Source File: label_smooth.py From pytorch-loss with MIT License | 5 votes |
def __init__(self, lb_smooth=0.1, reduction='mean', ignore_index=-100): super(LabelSmoothSoftmaxCEV1, self).__init__() self.lb_smooth = lb_smooth self.reduction = reduction self.lb_ignore = ignore_index self.log_softmax = nn.LogSoftmax(dim=1)
Example #28
Source File: test_greedy_search.py From OpenNMT-py with MIT License | 5 votes |
def test_doesnt_predict_eos_if_shorter_than_min_len(self): # batch 0 will always predict EOS. The other batches will predict # non-eos scores. for batch_sz in [1, 3]: n_words = 100 _non_eos_idxs = [47] valid_score_dist = torch.log_softmax(torch.tensor( [6., 5.]), dim=0) min_length = 5 eos_idx = 2 lengths = torch.randint(0, 30, (batch_sz,)) samp = GreedySearch( 0, 1, 2, batch_sz, min_length, False, set(), False, 30, 1., 1) samp.initialize(torch.zeros(1), lengths) all_attns = [] for i in range(min_length + 4): word_probs = torch.full( (batch_sz, n_words), -float('inf')) # "best" prediction is eos - that should be blocked word_probs[0, eos_idx] = valid_score_dist[0] # include at least one prediction OTHER than EOS # that is greater than -1e20 word_probs[0, _non_eos_idxs[0]] = valid_score_dist[1] word_probs[1:, _non_eos_idxs[0] + i] = 0 attns = torch.randn(1, batch_sz, 53) all_attns.append(attns) samp.advance(word_probs, attns) if i < min_length: self.assertTrue( samp.topk_scores[0].allclose(valid_score_dist[1])) self.assertTrue( samp.topk_scores[1:].eq(0).all()) elif i == min_length: # now batch 0 has ended and no others have self.assertTrue(samp.is_finished[0, :].eq(1).all()) self.assertTrue(samp.is_finished[1:, 1:].eq(0).all()) else: # i > min_length break
Example #29
Source File: label_smooth.py From pytorch-loss with MIT License | 5 votes |
def forward(self, logits, label): ''' args: logits: tensor of shape (N, C, H, W) args: label: tensor of shape(N, H, W) ''' # overcome ignored label logits = logits.float() # use fp32 to avoid nan with torch.no_grad(): num_classes = logits.size(1) label = label.clone().detach() ignore = label == self.lb_ignore n_valid = (ignore == 0).sum() label[ignore] = 0 lb_pos, lb_neg = 1. - self.lb_smooth, self.lb_smooth / num_classes lb_one_hot = torch.empty_like(logits).fill_( lb_neg).scatter_(1, label.unsqueeze(1), lb_pos).detach() logs = self.log_softmax(logits) loss = -torch.sum(logs * lb_one_hot, dim=1) loss[ignore] = 0 if self.reduction == 'mean': loss = loss.sum() / n_valid if self.reduction == 'sum': loss = loss.sum() return loss ## # version 2: user derived grad computation
Example #30
Source File: affinity_loss.py From pytorch-loss with MIT License | 5 votes |
def forward(self, logits, labels): ignore_mask = labels.cpu() == self.ignore_lb n_valid = ignore_mask.numel() - ignore_mask.sum().item() indices = [ # center, # edge ((1, None, None, None), (None, -1, None, None)), # up ((None, -1, None, None), (1, None, None, None)), # down ((None, None, 1, None), (None, None, None, -1)), # left ((None, None, None, -1), (None, None, 1, None)), # right ((1, None, 1, None), (None, -1, None, -1)), # up-left ((1, None, None, -1), (None, -1, 1, None)), # up-right ((None, -1, 1, None), (1, None, None, -1)), # down-left ((None, -1, None, -1), (1, None, 1, None)), # down-right ] losses = [] probs = torch.softmax(logits, dim=1) log_probs = torch.log_softmax(logits, dim=1) for idx_c, idx_e in indices: lbcenter = labels[:, idx_c[0]:idx_c[1], idx_c[2]:idx_c[3]].detach() lbedge = labels[:, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]].detach() igncenter = ignore_mask[:, idx_c[0]:idx_c[1], idx_c[2]:idx_c[3]].detach() ignedge = ignore_mask[:, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]].detach() lgp_center = probs[:, :, idx_c[0]:idx_c[1], idx_c[2]:idx_c[3]] lgp_edge = probs[:, :, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]] prob_edge = probs[:, :, idx_e[0]:idx_e[1], idx_e[2]:idx_e[3]] kldiv = (prob_edge * (lgp_edge - lgp_center)).sum(dim=1) kldiv[ignedge | igncenter] = 0 loss = torch.where( lbcenter == lbedge, self.lambda_edge * kldiv, self.lambda_not_edge * F.relu(self.kl_margin - kldiv, inplace=True) ).sum() / n_valid losses.append(loss) return sum(losses) / 8