Python allennlp.nn.util.logsumexp() Examples

The following are 7 code examples of allennlp.nn.util.logsumexp(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.nn.util , or try the search function .
Example #1
Source File: util_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_logsumexp(self):
        # First a simple example where we add probabilities in log space.
        tensor = torch.FloatTensor([[0.4, 0.1, 0.2]])
        log_tensor = tensor.log()
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=False)
        assert_almost_equal(log_summed.exp().data.numpy(), [0.7])
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=True)
        assert_almost_equal(log_summed.exp().data.numpy(), [[0.7]])

        # Then some more atypical examples, and making sure this will work with how we handle
        # log masks.
        tensor = torch.FloatTensor([[float("-inf"), 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[-200.0, 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[20.0, 20.0], [-200.0, 200.0]])
        assert_almost_equal(util.logsumexp(tensor, dim=0).data.numpy(), [20.0, 200.0]) 
Example #2
Source File: maximum_marginal_likelihood.py    From magnitude with MIT License 6 votes vote down vote up
def decode(self,
               initial_state              ,
               decode_step             ,
               supervision                                   )                           :
        targets, target_mask = supervision

        # If self._beam_size is not set, we use a beam size that ensures we keep all of the
        # sequences.
        beam_size = self._beam_size or targets.size(1)
        beam_search = ConstrainedBeamSearch(beam_size, targets, target_mask)
        finished_states                                = beam_search.search(initial_state, decode_step)

        loss = 0
        for instance_states in list(finished_states.values()):
            scores = [state.score[0].view(-1) for state in instance_states]
            loss += -util.logsumexp(torch.cat(scores))
        return {u'loss': loss / len(finished_states)} 
Example #3
Source File: util_test.py    From magnitude with MIT License 6 votes vote down vote up
def test_logsumexp(self):
        # First a simple example where we add probabilities in log space.
        tensor = torch.FloatTensor([[.4, .1, .2]])
        log_tensor = tensor.log()
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=False)
        assert_almost_equal(log_summed.exp().data.numpy(), [.7])
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=True)
        assert_almost_equal(log_summed.exp().data.numpy(), [[.7]])

        # Then some more atypical examples, and making sure this will work with how we handle
        # log masks.
        tensor = torch.FloatTensor([[float(u'-inf'), 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[-200.0, 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[20.0, 20.0], [-200.0, 200.0]])
        assert_almost_equal(util.logsumexp(tensor, dim=0).data.numpy(), [20.0, 200.0]) 
Example #4
Source File: maximum_marginal_likelihood.py    From allennlp-semparse with Apache License 2.0 6 votes vote down vote up
def decode(
        self,
        initial_state: State,
        transition_function: TransitionFunction,
        supervision: Tuple[torch.Tensor, torch.Tensor],
    ) -> Dict[str, torch.Tensor]:
        targets, target_mask = supervision
        beam_search = ConstrainedBeamSearch(self._beam_size, targets, target_mask)
        finished_states: Dict[int, List[State]] = beam_search.search(
            initial_state, transition_function
        )

        loss = 0
        for instance_states in finished_states.values():
            scores = [state.score[0].view(-1) for state in instance_states]
            loss += -util.logsumexp(torch.cat(scores))
        return {"loss": loss / len(finished_states)} 
Example #5
Source File: conditional_random_field.py    From allennlp with Apache License 2.0 4 votes vote down vote up
def _input_likelihood(self, logits: torch.Tensor, mask: torch.BoolTensor) -> torch.Tensor:
        """
        Computes the (batch_size,) denominator term for the log-likelihood, which is the
        sum of the likelihoods across all possible state sequences.
        """
        batch_size, sequence_length, num_tags = logits.size()

        # Transpose batch size and sequence dimensions
        mask = mask.transpose(0, 1).contiguous()
        logits = logits.transpose(0, 1).contiguous()

        # Initial alpha is the (batch_size, num_tags) tensor of likelihoods combining the
        # transitions to the initial states and the logits for the first timestep.
        if self.include_start_end_transitions:
            alpha = self.start_transitions.view(1, num_tags) + logits[0]
        else:
            alpha = logits[0]

        # For each i we compute logits for the transitions from timestep i-1 to timestep i.
        # We do so in a (batch_size, num_tags, num_tags) tensor where the axes are
        # (instance, current_tag, next_tag)
        for i in range(1, sequence_length):
            # The emit scores are for time i ("next_tag") so we broadcast along the current_tag axis.
            emit_scores = logits[i].view(batch_size, 1, num_tags)
            # Transition scores are (current_tag, next_tag) so we broadcast along the instance axis.
            transition_scores = self.transitions.view(1, num_tags, num_tags)
            # Alpha is for the current_tag, so we broadcast along the next_tag axis.
            broadcast_alpha = alpha.view(batch_size, num_tags, 1)

            # Add all the scores together and logexp over the current_tag axis.
            inner = broadcast_alpha + emit_scores + transition_scores

            # In valid positions (mask == True) we want to take the logsumexp over the current_tag dimension
            # of `inner`. Otherwise (mask == False) we want to retain the previous alpha.
            alpha = util.logsumexp(inner, 1) * mask[i].view(batch_size, 1) + alpha * (
                ~mask[i]
            ).view(batch_size, 1)

        # Every sequence needs to end with a transition to the stop_tag.
        if self.include_start_end_transitions:
            stops = alpha + self.end_transitions.view(1, num_tags)
        else:
            stops = alpha

        # Finally we log_sum_exp along the num_tags dim, result is (batch_size,)
        return util.logsumexp(stops) 
Example #6
Source File: conditional_random_field.py    From magnitude with MIT License 4 votes vote down vote up
def _input_likelihood(self, logits              , mask              )                :
        u"""
        Computes the (batch_size,) denominator term for the log-likelihood, which is the
        sum of the likelihoods across all possible state sequences.
        """
        batch_size, sequence_length, num_tags = logits.size()

        # Transpose batch size and sequence dimensions
        mask = mask.float().transpose(0, 1).contiguous()
        logits = logits.transpose(0, 1).contiguous()

        # Initial alpha is the (batch_size, num_tags) tensor of likelihoods combining the
        # transitions to the initial states and the logits for the first timestep.
        if self.include_start_end_transitions:
            alpha = self.start_transitions.view(1, num_tags) + logits[0]
        else:
            alpha = logits[0]

        # For each i we compute logits for the transitions from timestep i-1 to timestep i.
        # We do so in a (batch_size, num_tags, num_tags) tensor where the axes are
        # (instance, current_tag, next_tag)
        for i in range(1, sequence_length):
            # The emit scores are for time i ("next_tag") so we broadcast along the current_tag axis.
            emit_scores = logits[i].view(batch_size, 1, num_tags)
            # Transition scores are (current_tag, next_tag) so we broadcast along the instance axis.
            transition_scores = self.transitions.view(1, num_tags, num_tags)
            # Alpha is for the current_tag, so we broadcast along the next_tag axis.
            broadcast_alpha = alpha.view(batch_size, num_tags, 1)

            # Add all the scores together and logexp over the current_tag axis
            inner = broadcast_alpha + emit_scores + transition_scores

            # In valid positions (mask == 1) we want to take the logsumexp over the current_tag dimension
            # of ``inner``. Otherwise (mask == 0) we want to retain the previous alpha.
            alpha = (util.logsumexp(inner, 1) * mask[i].view(batch_size, 1) +
                     alpha * (1 - mask[i]).view(batch_size, 1))

        # Every sequence needs to end with a transition to the stop_tag.
        if self.include_start_end_transitions:
            stops = alpha + self.end_transitions.view(1, num_tags)
        else:
            stops = alpha

        # Finally we log_sum_exp along the num_tags dim, result is (batch_size,)
        return util.logsumexp(stops) 
Example #7
Source File: maximum_marginal_likelihood.py    From propara with Apache License 2.0 4 votes vote down vote up
def decode(self,
               initial_state: DecoderState,
               decode_step: DecoderStep,
               supervision: Tuple[torch.Tensor, torch.Tensor],
               # instance_score has shape : batch_size, score of the instance
               instance_score=None) -> Dict[str, torch.Tensor]:
        targets, target_mask = supervision
        allowed_transitions = self._create_allowed_transitions(targets, target_mask)
        finished_states = []
        states = [initial_state]
        step_num = 0
        while states:
            step_num += 1
            next_states = []
            # We group together all current states to get more efficient (batched) computation.
            grouped_state = states[0].combine_states(states)
            allowed_actions = self._get_allowed_actions(grouped_state, allowed_transitions)
            # This will store a set of (batch_index, action_history) tuples, and we'll check it
            # against the allowed actions to make sure we're actually scoring all of the actions we
            # are supposed to.
            actions_taken: Set[Tuple[int, Tuple[int, ...]]] = set()
            for next_state in decode_step.take_step(grouped_state, allowed_actions=allowed_actions, max_actions=20):
                actions_taken.add((next_state.batch_indices[0], ubertuple(next_state.action_history[0])))
                if next_state.is_finished():
                    finished_states.append(next_state)
                else:
                    next_states.append(next_state)
            states = next_states
            # self._check_all_actions_taken(actions_taken, grouped_state, allowed_actions)

        # This is a dictionary of lists - for each batch instance, we want the score of all
        # finished states.  So this has shape (batch_size, num_target_action_sequences), though
        # it's not actually a tensor, because different batch instance might have different numbers
        # of finished states.
        batch_scores = self._group_scores_by_batch(finished_states)
        loss = 0
        for scores in batch_scores.values():  # we don't care about the batch index, just the scores
            # Assumes: there is always batch size of 1 (at least until EMNLP'18 submission)
            # if instance_score is a float, it is a dummy passed by the model.
            if not instance_score or type(instance_score) == float:
                loss += -util.logsumexp(torch.cat(scores))
            else:
                loss += instance_score[0] * -util.logsumexp(torch.cat(scores))
        # Denominator should not be zero.
        output_dict = {}
        output_dict['loss'] = loss / (len(batch_scores) + 1e-12)
        output_dict['finished_states'] = finished_states

        return output_dict