Python allennlp.nn.util.sequence_cross_entropy_with_logits() Examples
The following are 21
code examples of allennlp.nn.util.sequence_cross_entropy_with_logits().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.nn.util
, or try the search function
.
Example #1
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_sequence_cross_entropy_with_logits_smooths_labels_correctly(self): tensor = torch.rand([1, 3, 4]) targets = torch.LongTensor(numpy.random.randint(0, 3, [1, 3])) weights = torch.ones([2, 3]) loss = util.sequence_cross_entropy_with_logits( tensor, targets, weights, label_smoothing=0.1 ) correct_loss = 0.0 for prediction, label in zip(tensor.squeeze(0), targets.squeeze(0)): prediction = torch.nn.functional.log_softmax(prediction, dim=-1) correct_loss += prediction[label] * 0.9 # incorrect elements correct_loss += prediction.sum() * 0.1 / 4 # Average over sequence. correct_loss = -correct_loss / 3 numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy())
Example #2
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_sequence_cross_entropy_with_logits_averages_batch_correctly(self): # test batch average is the same as dividing the batch averaged # loss by the number of batches containing any non-padded tokens. tensor = torch.rand([5, 7, 4]) tensor[0, 3:, :] = 0 tensor[1, 4:, :] = 0 tensor[2, 2:, :] = 0 tensor[3, :, :] = 0 weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1) targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7])) targets *= weights loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights) vector_loss = util.sequence_cross_entropy_with_logits( tensor, targets, weights, average=None ) # Batch has one completely padded row, so divide by 4. assert loss.data.numpy() == vector_loss.sum().item() / 4
Example #3
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_sequence_cross_entropy_with_logits_averages_token_correctly(self): # test token average is the same as multiplying the per-batch loss # with the per-batch weights and dividing by the total weight tensor = torch.rand([5, 7, 4]) tensor[0, 3:, :] = 0 tensor[1, 4:, :] = 0 tensor[2, 2:, :] = 0 tensor[3, :, :] = 0 weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1) targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7])) targets *= weights loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, average="token") vector_loss = util.sequence_cross_entropy_with_logits( tensor, targets, weights, average=None ) total_token_loss = (vector_loss * weights.float().sum(dim=-1)).sum() average_token_loss = (total_token_loss / weights.float().sum()).detach() assert_almost_equal(loss.detach().item(), average_token_loss.item(), decimal=5)
Example #4
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_sequence_cross_entropy_with_logits_gamma_correctly(self): batch = 1 length = 3 classes = 4 gamma = abs(numpy.random.randn()) # [0, +inf) tensor = torch.rand([batch, length, classes]) targets = torch.LongTensor(numpy.random.randint(0, classes, [batch, length])) weights = torch.ones([batch, length]) loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, gamma=gamma) correct_loss = 0.0 for logit, label in zip(tensor.squeeze(0), targets.squeeze(0)): p = torch.nn.functional.softmax(logit, dim=-1) pt = p[label] ft = (1 - pt) ** gamma correct_loss += -pt.log() * ft # Average over sequence. correct_loss = correct_loss / length numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy())
Example #5
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_sequence_cross_entropy_with_logits_alpha_list_correctly(self): batch = 1 length = 3 classes = 4 # alpha float for binary class only alpha = abs(numpy.random.randn(classes)) # [0, +inf) tensor = torch.rand([batch, length, classes]) targets = torch.LongTensor(numpy.random.randint(0, classes, [batch, length])) weights = torch.ones([batch, length]) loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, alpha=alpha) correct_loss = 0.0 for logit, label in zip(tensor.squeeze(0), targets.squeeze(0)): logp = torch.nn.functional.log_softmax(logit, dim=-1) logpt = logp[label] at = alpha[label] correct_loss += -logpt * at # Average over sequence. correct_loss = correct_loss / length numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy())
Example #6
Source File: tag_decoder.py From udify with MIT License | 6 votes |
def _loss(self, hidden, mask, gold_tags, output_dim): logits = self.task_output(hidden) reshaped_log_probs = logits.view(-1, self.num_classes) class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view(output_dim) output_dict = {"logits": logits, "class_probabilities": class_probabilities} if gold_tags is not None: output_dict["loss"] = sequence_cross_entropy_with_logits(logits, gold_tags, mask, label_smoothing=self.label_smoothing) for metric in self.metrics.values(): metric(logits, gold_tags, mask.float()) return output_dict
Example #7
Source File: lstm.py From allennlp_tutorial with MIT License | 6 votes |
def forward(self, tokens: Dict[str, torch.Tensor], label: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]: mask = get_text_field_mask(tokens) embedded = self._embedder(tokens) encoded = self._encoder(embedded, mask) classified = self._classifier(encoded) output: Dict[str, torch.Tensor] = {} output['logits'] = classified if label is not None: self._f1(classified, label, mask) output['loss'] = sequence_cross_entropy_with_logits(classified, label, mask) return output
Example #8
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_sequence_cross_entropy_with_logits_masks_loss_correctly(self): # test weight masking by checking that a tensor with non-zero values in # masked positions returns the same loss as a tensor with zeros in those # positions. tensor = torch.rand([5, 7, 4]) tensor[0, 3:, :] = 0 tensor[1, 4:, :] = 0 tensor[2, 2:, :] = 0 tensor[3, :, :] = 0 weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1) tensor2 = tensor.clone() tensor2[0, 3:, :] = 2 tensor2[1, 4:, :] = 13 tensor2[2, 2:, :] = 234 tensor2[3, :, :] = 65 targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7])) targets *= weights loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights) loss2 = util.sequence_cross_entropy_with_logits(tensor2, targets, weights) assert loss.data.numpy() == loss2.data.numpy()
Example #9
Source File: util_test.py From magnitude with MIT License | 6 votes |
def test_sequence_cross_entropy_with_logits_masks_loss_correctly(self): # test weight masking by checking that a tensor with non-zero values in # masked positions returns the same loss as a tensor with zeros in those # positions. tensor = torch.rand([5, 7, 4]) tensor[0, 3:, :] = 0 tensor[1, 4:, :] = 0 tensor[2, 2:, :] = 0 tensor[3, :, :] = 0 weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1) tensor2 = tensor.clone() tensor2[0, 3:, :] = 2 tensor2[1, 4:, :] = 13 tensor2[2, 2:, :] = 234 tensor2[3, :, :] = 65 targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7])) targets *= weights loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights) loss2 = util.sequence_cross_entropy_with_logits(tensor2, targets, weights) assert loss.data.numpy() == loss2.data.numpy()
Example #10
Source File: util_test.py From magnitude with MIT License | 6 votes |
def test_sequence_cross_entropy_with_logits_smooths_labels_correctly(self): tensor = torch.rand([1, 3, 4]) targets = torch.LongTensor(numpy.random.randint(0, 3, [1, 3])) weights = torch.ones([2, 3]) loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, label_smoothing=0.1) correct_loss = 0.0 for prediction, label in izip(tensor.squeeze(0), targets.squeeze(0)): prediction = torch.nn.functional.log_softmax(prediction, dim=-1) correct_loss += prediction[label] * 0.9 # incorrect elements correct_loss += prediction.sum() * 0.1/4 # Average over sequence. correct_loss = - correct_loss / 3 numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy())
Example #11
Source File: simple_seq2seq.py From magnitude with MIT License | 5 votes |
def _get_loss(logits , targets , target_mask ) : u""" Takes logits (unnormalized outputs from the decoder) of size (batch_size, num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1) and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross entropy loss while taking the mask into account. The length of ``targets`` is expected to be greater than that of ``logits`` because the decoder does not need to compute the output corresponding to the last timestep of ``targets``. This method aligns the inputs appropriately to compute the loss. During training, we want the logit corresponding to timestep i to be similar to the target token from timestep i + 1. That is, the targets should be shifted by one timestep for appropriate comparison. Consider a single example where the target has 3 words, and padding is to 7 tokens. The complete sequence would correspond to <S> w1 w2 w3 <E> <P> <P> and the mask would be 1 1 1 1 1 0 0 and let the logits be l1 l2 l3 l4 l5 l6 We actually need to compare: the sequence w1 w2 w3 <E> <P> <P> with masks 1 1 1 1 0 0 against l1 l2 l3 l4 l5 l6 (where the input was) <S> w1 w2 w3 <E> <P> """ relevant_targets = targets[:, 1:].contiguous() # (batch_size, num_decoding_steps) relevant_mask = target_mask[:, 1:].contiguous() # (batch_size, num_decoding_steps) loss = sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask) return loss #overrides
Example #12
Source File: custom_autoregressive_seq2seq_decoder.py From summarus with Apache License 2.0 | 5 votes |
def _get_loss(self, logits: torch.LongTensor, targets: torch.LongTensor, target_mask: torch.LongTensor) -> torch.Tensor: # shape: (batch_size, num_decoding_steps) relevant_targets = targets[:, 1:].contiguous() # shape: (batch_size, num_decoding_steps) relevant_mask = target_mask[:, 1:].contiguous() return util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask, label_smoothing=self._label_smoothing_ratio)
Example #13
Source File: lstm_character.py From allennlp_tutorial with MIT License | 5 votes |
def forward(self, tokens: Dict[str, torch.Tensor], label: torch.Tensor) -> Dict[str, torch.Tensor]: # split the namespace into characters and tokens, since they # aren't the same shape characters = { 'characters': tokens['characters'] } tokens = { 'tokens': tokens['tokens'] } # get the tokens mask mask = get_text_field_mask(tokens) # get the cahracters mask, for which we use the nifty `num_wrapping_dims` argument character_mask = get_text_field_mask(characters, num_wrapping_dims=1) # decompose the shape into named parameters for future use batch_size, sequence_length, word_length = character_mask.shape # embed the characters embedded_characters = self._character_embedder(characters) # convert the embeddings from 4d embeddings to a 3d tensor # the first dimension of this tensor is (batch_size * num_tokens) # (i.e. each word is its own instance in a batch) embedded_characters = embedded_characters.view(batch_size*sequence_length, word_length, -1) character_mask = character_mask.view(batch_size*sequence_length, word_length) # run the character LSTM encoded_characters = self._character_encoder(embedded_characters, character_mask) # reshape the output into a 3d tensor we can concatenate with the word embeddings encoded_characters = encoded_characters.view(batch_size, sequence_length, -1) # run the standard LSTM NER pipeline embedded = self._word_embedder(tokens) embedded = torch.cat([embedded, encoded_characters], dim=2) encoded = self._encoder(embedded, mask) classified = self._classifier(encoded) if label is not None: self._f1(classified, label, mask) output["loss"] = sequence_cross_entropy_with_logits(classified, label, mask) return output
Example #14
Source File: updown_captioner.py From updown-baseline with MIT License | 5 votes |
def _get_loss( self, logits: torch.Tensor, targets: torch.Tensor, target_mask: torch.Tensor ) -> torch.Tensor: r""" Compute cross entropy loss of predicted caption (logits) w.r.t. target caption. The cross entropy loss of caption is cross entropy loss at each time-step, summed. Parameters ---------- logits: torch.Tensor A tensor of shape ``(batch_size, max_caption_length - 1, vocab_size)`` containing unnormalized log-probabilities of predicted captions. targets: torch.Tensor A tensor of shape ``(batch_size, max_caption_length - 1)`` of tokenized target captions. target_mask: torch.Tensor A mask over target captions, elements where mask is zero are ignored from loss computation. Here, we ignore ``@@UNKNOWN@@`` token (and hence padding tokens too because they are basically the same). Returns ------- torch.Tensor A tensor of shape ``(batch_size, )`` containing cross entropy loss of captions, summed across time-steps. """ # shape: (batch_size, ) target_lengths = torch.sum(target_mask, dim=-1).float() # shape: (batch_size, ) return target_lengths * sequence_cross_entropy_with_logits( logits, targets, target_mask, average=None )
Example #15
Source File: simple_seq2seq_test.py From magnitude with MIT License | 5 votes |
def test_loss_is_computed_correctly(self): batch_size = 5 num_decoding_steps = 5 num_classes = 10 sample_logits = torch.randn(batch_size, num_decoding_steps-1, num_classes) sample_targets = torch.from_numpy(numpy.random.randint(0, num_classes, (batch_size, num_decoding_steps))) # Mask should be either 0 or 1 sample_mask = torch.from_numpy(numpy.random.randint(0, 2, (batch_size, num_decoding_steps))) expected_loss = sequence_cross_entropy_with_logits(sample_logits, sample_targets[:, 1:].contiguous(), sample_mask[:, 1:].contiguous()) # pylint: disable=protected-access actual_loss = self.model._get_loss(sample_logits, sample_targets, sample_mask) assert numpy.equal(expected_loss.data.numpy(), actual_loss.data.numpy())
Example #16
Source File: tag_decoder.py From udify with MIT License | 5 votes |
def _features_loss(self, hidden, mask, gold_tags, output_dict): if gold_tags is None: return for feature in self.features: logits = self.feature_outputs[feature](hidden) loss = sequence_cross_entropy_with_logits(logits, gold_tags[feature], mask, label_smoothing=self.label_smoothing) loss /= len(self.features) output_dict["loss"] += loss for metric in self.features_metrics[feature].values(): metric(logits, gold_tags[feature], mask.float())
Example #17
Source File: util_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_sequence_cross_entropy_with_logits_alpha_single_float_correctly(self): batch = 1 length = 3 classes = 2 # alpha float for binary class only alpha = ( numpy.random.rand() if numpy.random.rand() > 0.5 else (1.0 - numpy.random.rand()) ) # [0, 1] alpha = torch.tensor(alpha) tensor = torch.rand([batch, length, classes]) targets = torch.LongTensor(numpy.random.randint(0, classes, [batch, length])) weights = torch.ones([batch, length]) loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, alpha=alpha) correct_loss = 0.0 for logit, label in zip(tensor.squeeze(0), targets.squeeze(0)): logp = torch.nn.functional.log_softmax(logit, dim=-1) logpt = logp[label] if label: at = alpha else: at = 1 - alpha correct_loss += -logpt * at # Average over sequence. correct_loss = correct_loss / length numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy())
Example #18
Source File: simple_tagger.py From magnitude with MIT License | 4 votes |
def forward(self, # type: ignore tokens , tags = None, metadata = None) : # pylint: disable=arguments-differ u""" Parameters ---------- tokens : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. tags : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer gold class labels of shape ``(batch_size, num_tokens)``. metadata : ``List[Dict[str, Any]]``, optional, (default = None) metadata containg the original words in the sentence to be tagged under a 'words' key. Returns ------- An output dictionary consisting of: logits : torch.FloatTensor A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing unnormalised log probabilities of the tag classes. class_probabilities : torch.FloatTensor A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing a distribution of the tag classes per word. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ embedded_text_input = self.text_field_embedder(tokens) batch_size, sequence_length, _ = embedded_text_input.size() mask = get_text_field_mask(tokens) encoded_text = self.encoder(embedded_text_input, mask) logits = self.tag_projection_layer(encoded_text) reshaped_log_probs = logits.view(-1, self.num_classes) class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size, sequence_length, self.num_classes]) output_dict = {u"logits": logits, u"class_probabilities": class_probabilities} if tags is not None: loss = sequence_cross_entropy_with_logits(logits, tags, mask) for metric in list(self.metrics.values()): metric(logits, tags, mask.float()) output_dict[u"loss"] = loss if metadata is not None: output_dict[u"words"] = [x[u"words"] for x in metadata] return output_dict #overrides
Example #19
Source File: simple_bilm.py From swagaf with MIT License | 4 votes |
def forward(self, words: torch.Tensor, use_forward=True, use_reverse=True, compute_logprobs=False) -> Dict[ str, Union[torch.Tensor, List[torch.Tensor]]]: """ use this for training the LM :param words: [batch_size, N] words. assuming you're starting with BOS and ending with EOS here :return: """ encoded_inputs = self.embed_words(words) mask = (words != 0).long()[:, 2:] word_targets = words[:, 1:-1].contiguous() result_dict = { 'mask': mask, 'word_targets': word_targets, } # TODO: try to reduce duplicate code here if use_forward: self.forward_lm.reset_states() forward_activation = self.forward_lm(encoded_inputs[:, :-2], mask) if compute_logprobs: # being memory efficient here is critical if the input tensors are large result_dict['forward_logprobs'] = self._chunked_logsoftmaxes(forward_activation, word_targets) * mask.float() else: result_dict['forward_logits'] = self.decoder(forward_activation) result_dict['forward_loss'] = sequence_cross_entropy_with_logits(result_dict['forward_logits'], word_targets, mask) if use_reverse: self.reverse_lm.reset_states() reverse_activation = self.reverse_lm(encoded_inputs[:, 2:], mask) if compute_logprobs: result_dict['reverse_logprobs'] = self._chunked_logsoftmaxes(reverse_activation, word_targets) * mask.float() else: result_dict['reverse_logits'] = self.decoder(reverse_activation) result_dict['reverse_loss'] = sequence_cross_entropy_with_logits(result_dict['reverse_logits'], word_targets, mask) return result_dict
Example #20
Source File: simple_tagger.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 4 votes |
def forward(self, # type: ignore tokens: Dict[str, torch.LongTensor], tags: torch.LongTensor = None, metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]: # pylint: disable=arguments-differ """ Parameters ---------- tokens : Dict[str, torch.LongTensor], required The output of ``TextField.as_array()``, which should typically be passed directly to a ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer`` tensors. At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens": Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used for the ``TokenIndexers`` when you created the ``TextField`` representing your sequence. The dictionary is designed to be passed directly to a ``TextFieldEmbedder``, which knows how to combine different word representations into a single vector per token in your input. tags : torch.LongTensor, optional (default = None) A torch tensor representing the sequence of integer gold class labels of shape ``(batch_size, num_tokens)``. metadata : ``List[Dict[str, Any]]``, optional, (default = None) metadata containing the original words in the sentence to be tagged under a 'words' key. Returns ------- An output dictionary consisting of: logits : torch.FloatTensor A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing unnormalised log probabilities of the tag classes. class_probabilities : torch.FloatTensor A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing a distribution of the tag classes per word. loss : torch.FloatTensor, optional A scalar loss to be optimised. """ embedded_text_input = self.text_field_embedder(tokens) batch_size, sequence_length, _ = embedded_text_input.size() mask = get_text_field_mask(tokens) encoded_text = self.encoder(embedded_text_input, mask) logits = self.tag_projection_layer(encoded_text) reshaped_log_probs = logits.view(-1, self.num_classes) class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size, sequence_length, self.num_classes]) output_dict = {"logits": logits, "class_probabilities": class_probabilities} if tags is not None: loss = sequence_cross_entropy_with_logits(logits, tags, mask) for metric in self.metrics.values(): metric(logits, tags, mask.float()) if self._f1_metric is not None: self._f1_metric(logits, tags, mask.float()) output_dict["loss"] = loss if metadata is not None: output_dict["words"] = [x["words"] for x in metadata] return output_dict
Example #21
Source File: seq2seq_base.py From probnmn-clevr with MIT License | 4 votes |
def _get_loss( logits: torch.LongTensor, targets: torch.LongTensor, target_mask: torch.LongTensor ): r""" Override AllenNLP Seq2Seq model's provided ``_get_loss`` method, which returns sequence cross entropy averaged over batch by default. Instead, provide sequence cross entropy of each sequence in a batch separately. Extended Summary ---------------- From AllenNLP documentation: Compute loss. Takes logits (unnormalized outputs from the decoder) of size (batch_size, num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1) and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross entropy loss while taking the mask into account. The length of ``targets`` is expected to be greater than that of ``logits`` because the decoder does not need to compute the output corresponding to the last timestep of ``targets``. This method aligns the inputs appropriately to compute the loss. During training, we want the logit corresponding to timestep i to be similar to the target token from timestep i + 1. That is, the targets should be shifted by one timestep for appropriate comparison. Consider a single example where the target has 3 words, and padding is to 7 tokens:: The complete sequence would correspond to <S> w1 w2 w3 <E> <P> <P> and the mask would be 1 1 1 1 1 0 0 and let the logits be l1 l2 l3 l4 l5 l6 We actually need to compare:: the sequence w1 w2 w3 <E> <P> <P> with masks 1 1 1 1 0 0 against l1 l2 l3 l4 l5 l6 (where the input was) <S> w1 w2 w3 <E> <P> """ # shape: (batch_size, num_decoding_steps) relevant_targets = targets[:, 1:].contiguous() # shape: (batch_size, num_decoding_steps) relevant_mask = target_mask[:, 1:].contiguous() return sequence_cross_entropy_with_logits( logits, relevant_targets, relevant_mask, average=None )