Python allennlp.nn.util.sequence_cross_entropy_with_logits() Examples

The following are 21 code examples of allennlp.nn.util.sequence_cross_entropy_with_logits(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.nn.util , or try the search function .
Example #1
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_smooths_labels_correctly(self):
        tensor = torch.rand([1, 3, 4])
        targets = torch.LongTensor(numpy.random.randint(0, 3, [1, 3]))

        weights = torch.ones([2, 3])
        loss = util.sequence_cross_entropy_with_logits(
            tensor, targets, weights, label_smoothing=0.1
        )

        correct_loss = 0.0
        for prediction, label in zip(tensor.squeeze(0), targets.squeeze(0)):
            prediction = torch.nn.functional.log_softmax(prediction, dim=-1)
            correct_loss += prediction[label] * 0.9
            # incorrect elements
            correct_loss += prediction.sum() * 0.1 / 4
        # Average over sequence.
        correct_loss = -correct_loss / 3
        numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy()) 
Example #2
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_averages_batch_correctly(self):
        # test batch average is the same as dividing the batch averaged
        # loss by the number of batches containing any non-padded tokens.
        tensor = torch.rand([5, 7, 4])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, :, :] = 0
        weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1)
        targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7]))
        targets *= weights

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights)

        vector_loss = util.sequence_cross_entropy_with_logits(
            tensor, targets, weights, average=None
        )
        # Batch has one completely padded row, so divide by 4.
        assert loss.data.numpy() == vector_loss.sum().item() / 4 
Example #3
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_averages_token_correctly(self):
        # test token average is the same as multiplying the per-batch loss
        # with the per-batch weights and dividing by the total weight
        tensor = torch.rand([5, 7, 4])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, :, :] = 0
        weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1)
        targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7]))
        targets *= weights

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, average="token")

        vector_loss = util.sequence_cross_entropy_with_logits(
            tensor, targets, weights, average=None
        )
        total_token_loss = (vector_loss * weights.float().sum(dim=-1)).sum()
        average_token_loss = (total_token_loss / weights.float().sum()).detach()
        assert_almost_equal(loss.detach().item(), average_token_loss.item(), decimal=5) 
Example #4
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_gamma_correctly(self):
        batch = 1
        length = 3
        classes = 4
        gamma = abs(numpy.random.randn())  # [0, +inf)

        tensor = torch.rand([batch, length, classes])
        targets = torch.LongTensor(numpy.random.randint(0, classes, [batch, length]))
        weights = torch.ones([batch, length])

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, gamma=gamma)

        correct_loss = 0.0
        for logit, label in zip(tensor.squeeze(0), targets.squeeze(0)):
            p = torch.nn.functional.softmax(logit, dim=-1)
            pt = p[label]
            ft = (1 - pt) ** gamma
            correct_loss += -pt.log() * ft
        # Average over sequence.
        correct_loss = correct_loss / length
        numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy()) 
Example #5
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_alpha_list_correctly(self):
        batch = 1
        length = 3
        classes = 4  # alpha float for binary class only
        alpha = abs(numpy.random.randn(classes))  # [0, +inf)

        tensor = torch.rand([batch, length, classes])
        targets = torch.LongTensor(numpy.random.randint(0, classes, [batch, length]))
        weights = torch.ones([batch, length])

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, alpha=alpha)

        correct_loss = 0.0
        for logit, label in zip(tensor.squeeze(0), targets.squeeze(0)):
            logp = torch.nn.functional.log_softmax(logit, dim=-1)
            logpt = logp[label]
            at = alpha[label]
            correct_loss += -logpt * at
        # Average over sequence.
        correct_loss = correct_loss / length
        numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy()) 
Example #6
Source File: tag_decoder.py    From udify with MIT License 6 votes vote down vote up
def _loss(self, hidden, mask, gold_tags, output_dim):
        logits = self.task_output(hidden)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view(output_dim)

        output_dict = {"logits": logits, "class_probabilities": class_probabilities}

        if gold_tags is not None:
            output_dict["loss"] = sequence_cross_entropy_with_logits(logits,
                                                                     gold_tags,
                                                                     mask,
                                                                     label_smoothing=self.label_smoothing)
            for metric in self.metrics.values():
                metric(logits, gold_tags, mask.float())

        return output_dict 
Example #7
Source File: lstm.py    From allennlp_tutorial with MIT License 6 votes vote down vote up
def forward(self,
                tokens: Dict[str, torch.Tensor],
                label: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
        mask = get_text_field_mask(tokens)

        embedded = self._embedder(tokens)
        encoded = self._encoder(embedded, mask)
        classified = self._classifier(encoded)

        output: Dict[str, torch.Tensor] = {}
        output['logits'] = classified

        if label is not None:
            self._f1(classified, label, mask)
            output['loss'] = sequence_cross_entropy_with_logits(classified, label, mask)

        return output 
Example #8
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_masks_loss_correctly(self):

        # test weight masking by checking that a tensor with non-zero values in
        # masked positions returns the same loss as a tensor with zeros in those
        # positions.
        tensor = torch.rand([5, 7, 4])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, :, :] = 0
        weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1)
        tensor2 = tensor.clone()
        tensor2[0, 3:, :] = 2
        tensor2[1, 4:, :] = 13
        tensor2[2, 2:, :] = 234
        tensor2[3, :, :] = 65
        targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7]))
        targets *= weights

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights)
        loss2 = util.sequence_cross_entropy_with_logits(tensor2, targets, weights)
        assert loss.data.numpy() == loss2.data.numpy() 
Example #9
Source File: util_test.py    From magnitude with MIT License 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_masks_loss_correctly(self):

        # test weight masking by checking that a tensor with non-zero values in
        # masked positions returns the same loss as a tensor with zeros in those
        # positions.
        tensor = torch.rand([5, 7, 4])
        tensor[0, 3:, :] = 0
        tensor[1, 4:, :] = 0
        tensor[2, 2:, :] = 0
        tensor[3, :, :] = 0
        weights = (tensor != 0.0)[:, :, 0].long().squeeze(-1)
        tensor2 = tensor.clone()
        tensor2[0, 3:, :] = 2
        tensor2[1, 4:, :] = 13
        tensor2[2, 2:, :] = 234
        tensor2[3, :, :] = 65
        targets = torch.LongTensor(numpy.random.randint(0, 3, [5, 7]))
        targets *= weights

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights)
        loss2 = util.sequence_cross_entropy_with_logits(tensor2, targets, weights)
        assert loss.data.numpy() == loss2.data.numpy() 
Example #10
Source File: util_test.py    From magnitude with MIT License 6 votes vote down vote up
def test_sequence_cross_entropy_with_logits_smooths_labels_correctly(self):
        tensor = torch.rand([1, 3, 4])
        targets = torch.LongTensor(numpy.random.randint(0, 3, [1, 3]))

        weights = torch.ones([2, 3])
        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, label_smoothing=0.1)

        correct_loss = 0.0
        for prediction, label in izip(tensor.squeeze(0), targets.squeeze(0)):
            prediction = torch.nn.functional.log_softmax(prediction, dim=-1)
            correct_loss += prediction[label] * 0.9
            # incorrect elements
            correct_loss += prediction.sum() * 0.1/4
        # Average over sequence.
        correct_loss = - correct_loss / 3
        numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy()) 
Example #11
Source File: simple_seq2seq.py    From magnitude with MIT License 5 votes vote down vote up
def _get_loss(logits                  ,
                  targets                  ,
                  target_mask                  )                    :
        u"""
        Takes logits (unnormalized outputs from the decoder) of size (batch_size,
        num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1)
        and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes cross
        entropy loss while taking the mask into account.

        The length of ``targets`` is expected to be greater than that of ``logits`` because the
        decoder does not need to compute the output corresponding to the last timestep of
        ``targets``. This method aligns the inputs appropriately to compute the loss.

        During training, we want the logit corresponding to timestep i to be similar to the target
        token from timestep i + 1. That is, the targets should be shifted by one timestep for
        appropriate comparison.  Consider a single example where the target has 3 words, and
        padding is to 7 tokens.
           The complete sequence would correspond to <S> w1  w2  w3  <E> <P> <P>
           and the mask would be                     1   1   1   1   1   0   0
           and let the logits be                     l1  l2  l3  l4  l5  l6
        We actually need to compare:
           the sequence           w1  w2  w3  <E> <P> <P>
           with masks             1   1   1   1   0   0
           against                l1  l2  l3  l4  l5  l6
           (where the input was)  <S> w1  w2  w3  <E> <P>
        """
        relevant_targets = targets[:, 1:].contiguous()  # (batch_size, num_decoding_steps)
        relevant_mask = target_mask[:, 1:].contiguous()  # (batch_size, num_decoding_steps)
        loss = sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask)
        return loss

    #overrides 
Example #12
Source File: custom_autoregressive_seq2seq_decoder.py    From summarus with Apache License 2.0 5 votes vote down vote up
def _get_loss(self,
                  logits: torch.LongTensor,
                  targets: torch.LongTensor,
                  target_mask: torch.LongTensor) -> torch.Tensor:
        # shape: (batch_size, num_decoding_steps)
        relevant_targets = targets[:, 1:].contiguous()

        # shape: (batch_size, num_decoding_steps)
        relevant_mask = target_mask[:, 1:].contiguous()

        return util.sequence_cross_entropy_with_logits(logits,
                                                       relevant_targets,
                                                       relevant_mask,
                                                       label_smoothing=self._label_smoothing_ratio) 
Example #13
Source File: lstm_character.py    From allennlp_tutorial with MIT License 5 votes vote down vote up
def forward(self,
                tokens: Dict[str, torch.Tensor],
                label: torch.Tensor) -> Dict[str, torch.Tensor]:
        # split the namespace into characters and tokens, since they
        # aren't the same shape
        characters = { 'characters': tokens['characters'] }
        tokens = { 'tokens': tokens['tokens'] }

        # get the tokens mask
        mask = get_text_field_mask(tokens)
        # get the cahracters mask, for which we use the nifty `num_wrapping_dims` argument
        character_mask = get_text_field_mask(characters, num_wrapping_dims=1)
        # decompose the shape into named parameters for future use
        batch_size, sequence_length, word_length = character_mask.shape
        # embed the characters
        embedded_characters = self._character_embedder(characters)
        # convert the embeddings from 4d embeddings to a 3d tensor
        # the first dimension of this tensor is (batch_size * num_tokens)
        # (i.e. each word is its own instance in a batch)
        embedded_characters = embedded_characters.view(batch_size*sequence_length, word_length, -1)
        character_mask = character_mask.view(batch_size*sequence_length, word_length)
        # run the character LSTM
        encoded_characters = self._character_encoder(embedded_characters, character_mask)
        # reshape the output into a 3d tensor we can concatenate with the word embeddings
        encoded_characters = encoded_characters.view(batch_size, sequence_length, -1)

        # run the standard LSTM NER pipeline
        embedded = self._word_embedder(tokens)
        embedded = torch.cat([embedded, encoded_characters], dim=2)
        encoded = self._encoder(embedded, mask)

        classified = self._classifier(encoded)

        if label is not None:
            self._f1(classified, label, mask)
            output["loss"] = sequence_cross_entropy_with_logits(classified, label, mask)


        return output 
Example #14
Source File: updown_captioner.py    From updown-baseline with MIT License 5 votes vote down vote up
def _get_loss(
        self, logits: torch.Tensor, targets: torch.Tensor, target_mask: torch.Tensor
    ) -> torch.Tensor:
        r"""
        Compute cross entropy loss of predicted caption (logits) w.r.t. target caption. The cross
        entropy loss of caption is cross entropy loss at each time-step, summed.

        Parameters
        ----------
        logits: torch.Tensor
            A tensor of shape ``(batch_size, max_caption_length - 1, vocab_size)`` containing
            unnormalized log-probabilities of predicted captions.
        targets: torch.Tensor
            A tensor of shape ``(batch_size, max_caption_length - 1)`` of tokenized target
            captions.
        target_mask: torch.Tensor
            A mask over target captions, elements where mask is zero are ignored from loss
            computation. Here, we ignore ``@@UNKNOWN@@`` token (and hence padding tokens too
            because they are basically the same).

        Returns
        -------
        torch.Tensor
            A tensor of shape ``(batch_size, )`` containing cross entropy loss of captions, summed
            across time-steps.
        """

        # shape: (batch_size, )
        target_lengths = torch.sum(target_mask, dim=-1).float()

        # shape: (batch_size, )
        return target_lengths * sequence_cross_entropy_with_logits(
            logits, targets, target_mask, average=None
        ) 
Example #15
Source File: simple_seq2seq_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_loss_is_computed_correctly(self):
        batch_size = 5
        num_decoding_steps = 5
        num_classes = 10
        sample_logits = torch.randn(batch_size, num_decoding_steps-1, num_classes)
        sample_targets = torch.from_numpy(numpy.random.randint(0, num_classes,
                                                               (batch_size, num_decoding_steps)))
        # Mask should be either 0 or 1
        sample_mask = torch.from_numpy(numpy.random.randint(0, 2,
                                                            (batch_size, num_decoding_steps)))
        expected_loss = sequence_cross_entropy_with_logits(sample_logits, sample_targets[:, 1:].contiguous(),
                                                           sample_mask[:, 1:].contiguous())
        # pylint: disable=protected-access
        actual_loss = self.model._get_loss(sample_logits, sample_targets, sample_mask)
        assert numpy.equal(expected_loss.data.numpy(), actual_loss.data.numpy()) 
Example #16
Source File: tag_decoder.py    From udify with MIT License 5 votes vote down vote up
def _features_loss(self, hidden, mask, gold_tags, output_dict):
        if gold_tags is None:
            return

        for feature in self.features:
            logits = self.feature_outputs[feature](hidden)
            loss = sequence_cross_entropy_with_logits(logits,
                                                      gold_tags[feature],
                                                      mask,
                                                      label_smoothing=self.label_smoothing)
            loss /= len(self.features)
            output_dict["loss"] += loss

            for metric in self.features_metrics[feature].values():
                metric(logits, gold_tags[feature], mask.float()) 
Example #17
Source File: util_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_sequence_cross_entropy_with_logits_alpha_single_float_correctly(self):
        batch = 1
        length = 3
        classes = 2  # alpha float for binary class only
        alpha = (
            numpy.random.rand() if numpy.random.rand() > 0.5 else (1.0 - numpy.random.rand())
        )  # [0, 1]
        alpha = torch.tensor(alpha)

        tensor = torch.rand([batch, length, classes])
        targets = torch.LongTensor(numpy.random.randint(0, classes, [batch, length]))
        weights = torch.ones([batch, length])

        loss = util.sequence_cross_entropy_with_logits(tensor, targets, weights, alpha=alpha)

        correct_loss = 0.0
        for logit, label in zip(tensor.squeeze(0), targets.squeeze(0)):
            logp = torch.nn.functional.log_softmax(logit, dim=-1)
            logpt = logp[label]
            if label:
                at = alpha
            else:
                at = 1 - alpha
            correct_loss += -logpt * at
        # Average over sequence.
        correct_loss = correct_loss / length
        numpy.testing.assert_array_almost_equal(loss.data.numpy(), correct_loss.data.numpy()) 
Example #18
Source File: simple_tagger.py    From magnitude with MIT License 4 votes vote down vote up
def forward(self,  # type: ignore
                tokens                             ,
                tags                   = None,
                metadata                       = None)                           :
        # pylint: disable=arguments-differ
        u"""
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.
        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
            metadata containg the original words in the sentence to be tagged under a 'words' key.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.text_field_embedder(tokens)
        batch_size, sequence_length, _ = embedded_text_input.size()
        mask = get_text_field_mask(tokens)
        encoded_text = self.encoder(embedded_text_input, mask)

        logits = self.tag_projection_layer(encoded_text)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size,
                                                                          sequence_length,
                                                                          self.num_classes])

        output_dict = {u"logits": logits, u"class_probabilities": class_probabilities}

        if tags is not None:
            loss = sequence_cross_entropy_with_logits(logits, tags, mask)
            for metric in list(self.metrics.values()):
                metric(logits, tags, mask.float())
            output_dict[u"loss"] = loss

        if metadata is not None:
            output_dict[u"words"] = [x[u"words"] for x in metadata]
        return output_dict

    #overrides 
Example #19
Source File: simple_bilm.py    From swagaf with MIT License 4 votes vote down vote up
def forward(self, words: torch.Tensor, use_forward=True, use_reverse=True, compute_logprobs=False) -> Dict[
        str, Union[torch.Tensor, List[torch.Tensor]]]:
        """
        use this for training the LM
        :param words: [batch_size, N] words. assuming you're starting with BOS and ending with EOS here
        :return:
        """
        encoded_inputs = self.embed_words(words)
        mask = (words != 0).long()[:, 2:]
        word_targets = words[:, 1:-1].contiguous()

        result_dict = {
            'mask': mask,
            'word_targets': word_targets,
        }
        # TODO: try to reduce duplicate code here
        if use_forward:
            self.forward_lm.reset_states()
            forward_activation = self.forward_lm(encoded_inputs[:, :-2], mask)

            if compute_logprobs:
                # being memory efficient here is critical if the input tensors are large
                result_dict['forward_logprobs'] = self._chunked_logsoftmaxes(forward_activation,
                                                                             word_targets) * mask.float()
            else:

                result_dict['forward_logits'] = self.decoder(forward_activation)
                result_dict['forward_loss'] = sequence_cross_entropy_with_logits(result_dict['forward_logits'],
                                                                                 word_targets,
                                                                                 mask)
        if use_reverse:
            self.reverse_lm.reset_states()
            reverse_activation = self.reverse_lm(encoded_inputs[:, 2:], mask)
            if compute_logprobs:
                result_dict['reverse_logprobs'] = self._chunked_logsoftmaxes(reverse_activation,
                                                                             word_targets) * mask.float()
            else:
                result_dict['reverse_logits'] = self.decoder(reverse_activation)
                result_dict['reverse_loss'] = sequence_cross_entropy_with_logits(result_dict['reverse_logits'],
                                                                                 word_targets,
                                                                                 mask)
        return result_dict 
Example #20
Source File: simple_tagger.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 4 votes vote down vote up
def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                tags: torch.LongTensor = None,
                metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
        # pylint: disable=arguments-differ
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels of shape
            ``(batch_size, num_tokens)``.
        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
            metadata containing the original words in the sentence to be tagged under a 'words' key.

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """
        embedded_text_input = self.text_field_embedder(tokens)
        batch_size, sequence_length, _ = embedded_text_input.size()
        mask = get_text_field_mask(tokens)
        encoded_text = self.encoder(embedded_text_input, mask)

        logits = self.tag_projection_layer(encoded_text)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size,
                                                                          sequence_length,
                                                                          self.num_classes])

        output_dict = {"logits": logits, "class_probabilities": class_probabilities}

        if tags is not None:
            loss = sequence_cross_entropy_with_logits(logits, tags, mask)
            for metric in self.metrics.values():
                metric(logits, tags, mask.float())
            if self._f1_metric is not None:
                self._f1_metric(logits, tags, mask.float())
            output_dict["loss"] = loss

        if metadata is not None:
            output_dict["words"] = [x["words"] for x in metadata]
        return output_dict 
Example #21
Source File: seq2seq_base.py    From probnmn-clevr with MIT License 4 votes vote down vote up
def _get_loss(
        logits: torch.LongTensor, targets: torch.LongTensor, target_mask: torch.LongTensor
    ):
        r"""
        Override AllenNLP Seq2Seq model's provided ``_get_loss`` method, which returns sequence
        cross entropy averaged over batch by default. Instead, provide sequence cross entropy of
        each sequence in a batch separately.

        Extended Summary
        ----------------
        From AllenNLP documentation:

        Compute loss.
        Takes logits (unnormalized outputs from the decoder) of size (batch_size,
        num_decoding_steps, num_classes), target indices of size (batch_size, num_decoding_steps+1)
        and corresponding masks of size (batch_size, num_decoding_steps+1) steps and computes
        cross entropy loss while taking the mask into account.
        The length of ``targets`` is expected to be greater than that of ``logits`` because the
        decoder does not need to compute the output corresponding to the last timestep of
        ``targets``. This method aligns the inputs appropriately to compute the loss.
        During training, we want the logit corresponding to timestep i to be similar to the target
        token from timestep i + 1. That is, the targets should be shifted by one timestep for
        appropriate comparison.  Consider a single example where the target has 3 words, and
        padding is to 7 tokens::

           The complete sequence would correspond to <S> w1  w2  w3  <E> <P> <P>
           and the mask would be                     1   1   1   1   1   0   0
           and let the logits be                     l1  l2  l3  l4  l5  l6

        We actually need to compare::

           the sequence           w1  w2  w3  <E> <P> <P>
           with masks             1   1   1   1   0   0
           against                l1  l2  l3  l4  l5  l6
           (where the input was)  <S> w1  w2  w3  <E> <P>

        """
        # shape: (batch_size, num_decoding_steps)
        relevant_targets = targets[:, 1:].contiguous()

        # shape: (batch_size, num_decoding_steps)
        relevant_mask = target_mask[:, 1:].contiguous()

        return sequence_cross_entropy_with_logits(
            logits, relevant_targets, relevant_mask, average=None
        )