Python allennlp.nn.util.add_sentence_boundary_token_ids() Examples

The following are 5 code examples of allennlp.nn.util.add_sentence_boundary_token_ids(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.nn.util , or try the search function

Example #1

Source File: util_test.py From allennlp with Apache License 2.0

6 votes

def test_add_sentence_boundary_token_ids_handles_3D_input(self):
        tensor = torch.from_numpy(
            numpy.array(
                [
                    [[1, 2, 3, 4], [5, 5, 5, 5], [6, 8, 1, 2]],
                    [[4, 3, 2, 1], [8, 7, 6, 5], [0, 0, 0, 0]],
                ]
            )
        )
        mask = (tensor > 0).sum(dim=-1) > 0
        bos = torch.from_numpy(numpy.array([9, 9, 9, 9]))
        eos = torch.from_numpy(numpy.array([10, 10, 10, 10]))
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array(
            [
                [[9, 9, 9, 9], [1, 2, 3, 4], [5, 5, 5, 5], [6, 8, 1, 2], [10, 10, 10, 10]],
                [[9, 9, 9, 9], [4, 3, 2, 1], [8, 7, 6, 5], [10, 10, 10, 10], [0, 0, 0, 0]],
            ]
        )
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == ((expected_new_tensor > 0).sum(axis=-1) > 0)).all()

Example #2

Source File: util_test.py From magnitude with MIT License

6 votes

def test_add_sentence_boundary_token_ids_handles_3D_input(self):
        tensor = torch.from_numpy(
                numpy.array([[[1, 2, 3, 4],
                              [5, 5, 5, 5],
                              [6, 8, 1, 2]],
                             [[4, 3, 2, 1],
                              [8, 7, 6, 5],
                              [0, 0, 0, 0]]]))
        mask = ((tensor > 0).sum(dim=-1) > 0).type(torch.LongTensor)
        bos = torch.from_numpy(numpy.array([9, 9, 9, 9]))
        eos = torch.from_numpy(numpy.array([10, 10, 10, 10]))
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array([[[9, 9, 9, 9],
                                            [1, 2, 3, 4],
                                            [5, 5, 5, 5],
                                            [6, 8, 1, 2],
                                            [10, 10, 10, 10]],
                                           [[9, 9, 9, 9],
                                            [4, 3, 2, 1],
                                            [8, 7, 6, 5],
                                            [10, 10, 10, 10],
                                            [0, 0, 0, 0]]])
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == ((expected_new_tensor > 0).sum(axis=-1) > 0)).all()

Example #3

Source File: util_test.py From allennlp with Apache License 2.0

5 votes

def test_add_sentence_boundary_token_ids_handles_2D_input(self):
        tensor = torch.from_numpy(numpy.array([[1, 2, 3], [4, 5, 0]]))
        mask = tensor > 0
        bos = 9
        eos = 10
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array([[9, 1, 2, 3, 10], [9, 4, 5, 10, 0]])
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == (expected_new_tensor > 0)).all()

Example #4

Source File: util_test.py From magnitude with MIT License

5 votes

def test_add_sentence_boundary_token_ids_handles_2D_input(self):
        tensor = torch.from_numpy(numpy.array([[1, 2, 3], [4, 5, 0]]))
        mask = (tensor > 0).long()
        bos = 9
        eos = 10
        new_tensor, new_mask = util.add_sentence_boundary_token_ids(tensor, mask, bos, eos)
        expected_new_tensor = numpy.array([[9, 1, 2, 3, 10],
                                           [9, 4, 5, 10, 0]])
        assert (new_tensor.data.numpy() == expected_new_tensor).all()
        assert (new_mask.data.numpy() == (expected_new_tensor > 0)).all()

Example #5

Source File: seq2seq_base.py From probnmn-clevr with MIT License

4 votes

def forward(
        self,
        source_tokens: torch.LongTensor,
        target_tokens: Optional[torch.LongTensor] = None,
        decoding_strategy: str = "sampling",
    ) -> Dict[str, torch.Tensor]:

        r"""
        Override AllenNLP's forward, changing decoder logic. Perform either categorical sampling
        or greedy decoding as per specified.

        Parameters
        ----------
        source_tokens: torch.LongTensor
            Tokenized source sequences padded to maximum length. These are not padded with
            @start@ and @end@ sentence boundaries. Shape: (batch_size, max_source_length)
        target_tokens: torch.LongTensor, optional (default = None)
            Tokenized target sequences padded to maximum length. These are not padded with
            @start@ and @end@ sentence boundaries. Shape: (batch_size, max_target_length)
        decoding_strategy: str, optional (default = "sampling")
            How to perform decoding? One of "sampling" or "greedy".

        Returns
        -------
        Dict[str, torch.Tensor]
        """
        # Add "@start@" and "@end@" tokens to source and target sequences.
        source_tokens, _ = add_sentence_boundary_token_ids(
            source_tokens, (source_tokens != self._pad_index), self._start_index, self._end_index
        )
        if target_tokens is not None:
            target_tokens, _ = add_sentence_boundary_token_ids(
                target_tokens,
                (target_tokens != self._pad_index),
                self._start_index,
                self._end_index,
            )
        # Remove "@start@" from source sequences anyway (it's being encoded).
        source_tokens = {"tokens": source_tokens[:, 1:]}
        if target_tokens is not None:
            target_tokens = {"tokens": target_tokens}

        # _encode and _init_decoder_state are super class methods, left untouched.
        # keys: {"encoder_outputs", "source_mask"}
        state = self._encode(source_tokens)

        # keys: {"encoder_outputs", "source_mask", "decoder_hidden", "decoder_context"}
        state = self._init_decoder_state(state)

        # The `_forward_loop` decodes the input sequence and computes the loss during training
        # and validation.
        # keys: {"predictions", "loss"}
        output_dict = self._forward_loop(state, target_tokens, decoding_strategy)

        return output_dict