Python allennlp.nn.util.masked_log_softmax() Examples

The following are 6 code examples of allennlp.nn.util.masked_log_softmax(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.nn.util , or try the search function .
Example #1
Source File: nlvr_decoder_step.py    From magnitude with MIT License 6 votes vote down vote up
def _get_next_state_info_without_agenda(state                  ,
                                            considered_actions                 ,
                                            action_logits              ,
                                            action_mask              ):                        
                                                                                                             
        u"""
        We return a list of log probabilities corresponding to actions that are not padding. This
        method is related to the training scenario where we have target action sequences for
        training.
        """
        considered_action_logprobs = nn_util.masked_log_softmax(action_logits, action_mask)
        all_action_logprobs                                           = []
        for group_index, (score, considered_logprobs) in enumerate(izip(state.score,
                                                                       considered_action_logprobs)):
            instance_action_logprobs                                 = []
            for action_index, logprob in enumerate(considered_logprobs):
                # This is the actual index of the action from the original list of actions.
                action = considered_actions[group_index][action_index]
                if action == -1:
                    # Ignoring padding.
                    continue
                instance_action_logprobs.append((action_index, score + logprob))
            all_action_logprobs.append(instance_action_logprobs)
        return all_action_logprobs 
Example #2
Source File: util_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_masked_log_softmax_masked(self):
        # Tests replicated from test_softmax_masked - we test that exponentiated,
        # the log softmax contains the correct elements (masked elements should be == 1).

        # Testing the general masked 1D case.
        vector_1d = torch.FloatTensor([[1.0, 2.0, 5.0]])
        mask_1d = torch.tensor([[True, False, True]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert_array_almost_equal(
            numpy.exp(vector_1d_softmaxed), numpy.array([[0.01798621, 0.0, 0.98201382]])
        )

        vector_1d = torch.FloatTensor([[0.0, 2.0, 3.0, 4.0]])
        mask_1d = torch.tensor([[True, False, True, True]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert_array_almost_equal(
            numpy.exp(vector_1d_softmaxed), numpy.array([[0.01321289, 0.0, 0.26538793, 0.72139918]])
        )

        # Testing the masked 1D case where the input is all 0s and the mask
        # is not all 0s.
        vector_1d = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0]])
        mask_1d = torch.tensor([[False, False, False, True]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert_array_almost_equal(
            numpy.exp(vector_1d_softmaxed), numpy.array([[0.0, 0.0, 0.0, 1.0]])
        )

        # Testing the masked 1D case where the input is not all 0s
        # and the mask is all 0s.  The output here will be arbitrary, but it should not be nan.
        vector_1d = torch.FloatTensor([[0.0, 2.0, 3.0, 4.0]])
        mask_1d = torch.tensor([[False, False, False, False]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert not numpy.isnan(vector_1d_softmaxed).any() 
Example #3
Source File: util_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_masked_log_softmax_masked(self):
        # Tests replicated from test_softmax_masked - we test that exponentiated,
        # the log softmax contains the correct elements (masked elements should be == 1).

        # Testing the general masked 1D case.
        vector_1d = torch.FloatTensor([[1.0, 2.0, 5.0]])
        mask_1d = torch.FloatTensor([[1.0, 0.0, 1.0]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert_array_almost_equal(numpy.exp(vector_1d_softmaxed),
                                  numpy.array([[0.01798621, 0.0, 0.98201382]]))

        vector_1d = torch.FloatTensor([[0.0, 2.0, 3.0, 4.0]])
        mask_1d = torch.FloatTensor([[1.0, 0.0, 1.0, 1.0]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert_array_almost_equal(numpy.exp(vector_1d_softmaxed),
                                  numpy.array([[0.01321289, 0.0,
                                                0.26538793, 0.72139918]]))

        # Testing the masked 1D case where the input is all 0s and the mask
        # is not all 0s.
        vector_1d = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0]])
        mask_1d = torch.FloatTensor([[0.0, 0.0, 0.0, 1.0]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert_array_almost_equal(numpy.exp(vector_1d_softmaxed),
                                  numpy.array([[0., 0., 0., 1.]]))

        # Testing the masked 1D case where the input is not all 0s
        # and the mask is all 0s.  The output here will be arbitrary, but it should not be nan.
        vector_1d = torch.FloatTensor([[0.0, 2.0, 3.0, 4.0]])
        mask_1d = torch.FloatTensor([[0.0, 0.0, 0.0, 0.0]])
        vector_1d_softmaxed = util.masked_log_softmax(vector_1d, mask_1d).data.numpy()
        assert not numpy.isnan(vector_1d_softmaxed).any() 
Example #4
Source File: hotpot_bert_v0.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None,
                gt_span=None, mode=ForwardMode.TRAIN):
        sequence_output, _ = self.bert_encoder(input_ids, token_type_ids, attention_mask,
                                               output_all_encoded_layers=False)
        joint_length = allen_util.get_lengths_from_binary_sequence_mask(attention_mask)

        joint_seq_logits = self.qa_outputs(sequence_output)

        # The following line is from AllenNLP bidaf.
        start_logits = allen_util.replace_masked_values(joint_seq_logits[:, :, 0], attention_mask, -1e18)
        # B, T, 2
        end_logits = allen_util.replace_masked_values(joint_seq_logits[:, :, 1], attention_mask, -1e18)

        if mode == BertSpan.ForwardMode.TRAIN:
            assert gt_span is not None
            gt_start = gt_span[:, 0]  # gt_span: [B, 2] -> [B]
            gt_end = gt_span[:, 1]

            start_loss = nll_loss(allen_util.masked_log_softmax(start_logits, attention_mask), gt_start)
            end_loss = nll_loss(allen_util.masked_log_softmax(end_logits, attention_mask), gt_end)
            # We delete squeeze bc it will cause problem when the batch size is 1, and remember the gt_start and gt_end should have shape [B].
            # start_loss = nll_loss(allen_util.masked_log_softmax(start_logits, context_mask), gt_start.squeeze(-1))
            # end_loss = nll_loss(allen_util.masked_log_softmax(end_logits, context_mask), gt_end.squeeze(-1))

            loss = start_loss + end_loss
            return loss
        else:
            return start_logits, end_logits, joint_length 
Example #5
Source File: bert_span_v0.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def forward(self, input_ids, token_type_ids=None, attention_mask=None, context_span=None,
                gt_span=None, max_context_length=0, mode=ForwardMode.TRAIN):
        # Precomputing of the max_context_length is important
        # because we want the same value to be shared to different GPUs, dynamic calculating is not feasible.
        sequence_output, _ = self.bert_encoder(input_ids, token_type_ids, attention_mask,
                                               output_all_encoded_layers=False)

        joint_seq_logits = self.qa_outputs(sequence_output)
        context_logits, context_length = span_util.span_select(joint_seq_logits, context_span, max_context_length)
        context_mask = allen_util.get_mask_from_sequence_lengths(context_length, max_context_length)

        # The following line is from AllenNLP bidaf.
        start_logits = allen_util.replace_masked_values(context_logits[:, :, 0], context_mask, -1e18)
        # B, T, 2
        end_logits = allen_util.replace_masked_values(context_logits[:, :, 1], context_mask, -1e18)

        if mode == BertSpan.ForwardMode.TRAIN:
            assert gt_span is not None
            gt_start = gt_span[:, 0]  # gt_span: [B, 2]
            gt_end = gt_span[:, 1]

            start_loss = nll_loss(allen_util.masked_log_softmax(start_logits, context_mask), gt_start.squeeze(-1))
            end_loss = nll_loss(allen_util.masked_log_softmax(end_logits, context_mask), gt_end.squeeze(-1))

            loss = start_loss + end_loss
            return loss
        else:
            return start_logits, end_logits, context_length 
Example #6
Source File: better_model.py    From LipReading with MIT License 4 votes vote down vote up
def forward(self,
                frames: torch.FloatTensor,
                frame_lens: torch.LongTensor):
        """
        frames: (batch_size, seq_len, num_lmks, lmk_dim)
        frame_lens: (batch_size, )
        """
        if self.frame_processing == 'flatten':
            frames = frames.reshape(frames.shape[0], frames.shape[1], -1)

        # Reverse sorts the batch by unpadded seq_len.
        (sorted_frames, sorted_frame_lens,
            restoration_indices, _) = sort_batch_by_length(frames, frame_lens)

        # Returns a PackedSequence.
        packed_frames = nn.utils.rnn.pack_padded_sequence(sorted_frames,
                            sorted_frame_lens.data.cpu().numpy() if sorted_frame_lens.is_cuda else sorted_frame_lens.data.numpy(),
                            batch_first=True)

        # Encoder: feed frames to the model, output hidden states.
        # final_state: (num_layers * num_dir, batch_size, hidden_size) (*2 if LSTM)
        packed_hidden_states, final_state = self.rnn(packed_frames)

        # Unpack encoding, the hidden states, a Tensor.
        # (batch_size, seq_len, num_dir * hidden_size)
        hidden_states, _ = nn.utils.rnn.pad_packed_sequence(packed_hidden_states, batch_first=True)

        # (num_layers, batch_size, hidden_size * num_dir) (*2 if LSTM)
        if self.bidirectional:
            final_state = self._cat_directions(final_state)

        hidden_states = hidden_states.index_select(0, restoration_indices)
        if isinstance(final_state, tuple):  # LSTM
            final_state = (final_state[0].index_select(1, restoration_indices),
                           final_state[1].index_select(1, restoration_indices))
        else:
            final_state = final_state.index_select(1, restoration_indices)

        if self.enable_ctc:
            output_logits = self.output_proj(hidden_states)
            output_log_probs = masked_log_softmax(output_logits, self.output_mask.expand(output_logits.shape[0], self.adj_vocab_size), dim=-1)
            return output_log_probs, hidden_states, final_state
        else:
            return hidden_states, final_state