Python Examples of allennlp.nn.util.logsumexp

Source File: util_test.py From allennlp with Apache License 2.0

6 votes

def test_logsumexp(self):
        # First a simple example where we add probabilities in log space.
        tensor = torch.FloatTensor([[0.4, 0.1, 0.2]])
        log_tensor = tensor.log()
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=False)
        assert_almost_equal(log_summed.exp().data.numpy(), [0.7])
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=True)
        assert_almost_equal(log_summed.exp().data.numpy(), [[0.7]])

        # Then some more atypical examples, and making sure this will work with how we handle
        # log masks.
        tensor = torch.FloatTensor([[float("-inf"), 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[-200.0, 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[20.0, 20.0], [-200.0, 200.0]])
        assert_almost_equal(util.logsumexp(tensor, dim=0).data.numpy(), [20.0, 200.0])

Source File: maximum_marginal_likelihood.py From magnitude with MIT License

6 votes

def decode(self,
               initial_state              ,
               decode_step             ,
               supervision                                   )                           :
        targets, target_mask = supervision

        # If self._beam_size is not set, we use a beam size that ensures we keep all of the
        # sequences.
        beam_size = self._beam_size or targets.size(1)
        beam_search = ConstrainedBeamSearch(beam_size, targets, target_mask)
        finished_states                                = beam_search.search(initial_state, decode_step)

        loss = 0
        for instance_states in list(finished_states.values()):
            scores = [state.score[0].view(-1) for state in instance_states]
            loss += -util.logsumexp(torch.cat(scores))
        return {u'loss': loss / len(finished_states)}

Source File: util_test.py From magnitude with MIT License

6 votes

def test_logsumexp(self):
        # First a simple example where we add probabilities in log space.
        tensor = torch.FloatTensor([[.4, .1, .2]])
        log_tensor = tensor.log()
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=False)
        assert_almost_equal(log_summed.exp().data.numpy(), [.7])
        log_summed = util.logsumexp(log_tensor, dim=-1, keepdim=True)
        assert_almost_equal(log_summed.exp().data.numpy(), [[.7]])

        # Then some more atypical examples, and making sure this will work with how we handle
        # log masks.
        tensor = torch.FloatTensor([[float(u'-inf'), 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[-200.0, 20.0]])
        assert_almost_equal(util.logsumexp(tensor).data.numpy(), [20.0])
        tensor = torch.FloatTensor([[20.0, 20.0], [-200.0, 200.0]])
        assert_almost_equal(util.logsumexp(tensor, dim=0).data.numpy(), [20.0, 200.0])

Source File: maximum_marginal_likelihood.py From allennlp-semparse with Apache License 2.0

6 votes

def decode(
        self,
        initial_state: State,
        transition_function: TransitionFunction,
        supervision: Tuple[torch.Tensor, torch.Tensor],
    ) -> Dict[str, torch.Tensor]:
        targets, target_mask = supervision
        beam_search = ConstrainedBeamSearch(self._beam_size, targets, target_mask)
        finished_states: Dict[int, List[State]] = beam_search.search(
            initial_state, transition_function
        )

        loss = 0
        for instance_states in finished_states.values():
            scores = [state.score[0].view(-1) for state in instance_states]
            loss += -util.logsumexp(torch.cat(scores))
        return {"loss": loss / len(finished_states)}

Source File: conditional_random_field.py From allennlp with Apache License 2.0

4 votes

def _input_likelihood(self, logits: torch.Tensor, mask: torch.BoolTensor) -> torch.Tensor:
        """
        Computes the (batch_size,) denominator term for the log-likelihood, which is the
        sum of the likelihoods across all possible state sequences.
        """
        batch_size, sequence_length, num_tags = logits.size()

        # Transpose batch size and sequence dimensions
        mask = mask.transpose(0, 1).contiguous()
        logits = logits.transpose(0, 1).contiguous()

        # Initial alpha is the (batch_size, num_tags) tensor of likelihoods combining the
        # transitions to the initial states and the logits for the first timestep.
        if self.include_start_end_transitions:
            alpha = self.start_transitions.view(1, num_tags) + logits[0]
        else:
            alpha = logits[0]

        # For each i we compute logits for the transitions from timestep i-1 to timestep i.
        # We do so in a (batch_size, num_tags, num_tags) tensor where the axes are
        # (instance, current_tag, next_tag)
        for i in range(1, sequence_length):
            # The emit scores are for time i ("next_tag") so we broadcast along the current_tag axis.
            emit_scores = logits[i].view(batch_size, 1, num_tags)
            # Transition scores are (current_tag, next_tag) so we broadcast along the instance axis.
            transition_scores = self.transitions.view(1, num_tags, num_tags)
            # Alpha is for the current_tag, so we broadcast along the next_tag axis.
            broadcast_alpha = alpha.view(batch_size, num_tags, 1)

            # Add all the scores together and logexp over the current_tag axis.
            inner = broadcast_alpha + emit_scores + transition_scores

            # In valid positions (mask == True) we want to take the logsumexp over the current_tag dimension
            # of `inner`. Otherwise (mask == False) we want to retain the previous alpha.
            alpha = util.logsumexp(inner, 1) * mask[i].view(batch_size, 1) + alpha * (
                ~mask[i]
            ).view(batch_size, 1)

        # Every sequence needs to end with a transition to the stop_tag.
        if self.include_start_end_transitions:
            stops = alpha + self.end_transitions.view(1, num_tags)
        else:
            stops = alpha

        # Finally we log_sum_exp along the num_tags dim, result is (batch_size,)
        return util.logsumexp(stops)

Source File: conditional_random_field.py From magnitude with MIT License

4 votes

def _input_likelihood(self, logits              , mask              )                :
        u"""
        Computes the (batch_size,) denominator term for the log-likelihood, which is the
        sum of the likelihoods across all possible state sequences.
        """
        batch_size, sequence_length, num_tags = logits.size()

        # Transpose batch size and sequence dimensions
        mask = mask.float().transpose(0, 1).contiguous()
        logits = logits.transpose(0, 1).contiguous()

        # Initial alpha is the (batch_size, num_tags) tensor of likelihoods combining the
        # transitions to the initial states and the logits for the first timestep.
        if self.include_start_end_transitions:
            alpha = self.start_transitions.view(1, num_tags) + logits[0]
        else:
            alpha = logits[0]

        # For each i we compute logits for the transitions from timestep i-1 to timestep i.
        # We do so in a (batch_size, num_tags, num_tags) tensor where the axes are
        # (instance, current_tag, next_tag)
        for i in range(1, sequence_length):
            # The emit scores are for time i ("next_tag") so we broadcast along the current_tag axis.
            emit_scores = logits[i].view(batch_size, 1, num_tags)
            # Transition scores are (current_tag, next_tag) so we broadcast along the instance axis.
            transition_scores = self.transitions.view(1, num_tags, num_tags)
            # Alpha is for the current_tag, so we broadcast along the next_tag axis.
            broadcast_alpha = alpha.view(batch_size, num_tags, 1)

            # Add all the scores together and logexp over the current_tag axis
            inner = broadcast_alpha + emit_scores + transition_scores

            # In valid positions (mask == 1) we want to take the logsumexp over the current_tag dimension
            # of ``inner``. Otherwise (mask == 0) we want to retain the previous alpha.
            alpha = (util.logsumexp(inner, 1) * mask[i].view(batch_size, 1) +
                     alpha * (1 - mask[i]).view(batch_size, 1))

        # Every sequence needs to end with a transition to the stop_tag.
        if self.include_start_end_transitions:
            stops = alpha + self.end_transitions.view(1, num_tags)
        else:
            stops = alpha

        # Finally we log_sum_exp along the num_tags dim, result is (batch_size,)
        return util.logsumexp(stops)

Source File: maximum_marginal_likelihood.py From propara with Apache License 2.0

4 votes

def decode(self,
               initial_state: DecoderState,
               decode_step: DecoderStep,
               supervision: Tuple[torch.Tensor, torch.Tensor],
               # instance_score has shape : batch_size, score of the instance
               instance_score=None) -> Dict[str, torch.Tensor]:
        targets, target_mask = supervision
        allowed_transitions = self._create_allowed_transitions(targets, target_mask)
        finished_states = []
        states = [initial_state]
        step_num = 0
        while states:
            step_num += 1
            next_states = []
            # We group together all current states to get more efficient (batched) computation.
            grouped_state = states[0].combine_states(states)
            allowed_actions = self._get_allowed_actions(grouped_state, allowed_transitions)
            # This will store a set of (batch_index, action_history) tuples, and we'll check it
            # against the allowed actions to make sure we're actually scoring all of the actions we
            # are supposed to.
            actions_taken: Set[Tuple[int, Tuple[int, ...]]] = set()
            for next_state in decode_step.take_step(grouped_state, allowed_actions=allowed_actions, max_actions=20):
                actions_taken.add((next_state.batch_indices[0], ubertuple(next_state.action_history[0])))
                if next_state.is_finished():
                    finished_states.append(next_state)
                else:
                    next_states.append(next_state)
            states = next_states
            # self._check_all_actions_taken(actions_taken, grouped_state, allowed_actions)

        # This is a dictionary of lists - for each batch instance, we want the score of all
        # finished states.  So this has shape (batch_size, num_target_action_sequences), though
        # it's not actually a tensor, because different batch instance might have different numbers
        # of finished states.
        batch_scores = self._group_scores_by_batch(finished_states)
        loss = 0
        for scores in batch_scores.values():  # we don't care about the batch index, just the scores
            # Assumes: there is always batch size of 1 (at least until EMNLP'18 submission)
            # if instance_score is a float, it is a dummy passed by the model.
            if not instance_score or type(instance_score) == float:
                loss += -util.logsumexp(torch.cat(scores))
            else:
                loss += instance_score[0] * -util.logsumexp(torch.cat(scores))
        # Denominator should not be zero.
        output_dict = {}
        output_dict['loss'] = loss / (len(batch_scores) + 1e-12)
        output_dict['finished_states'] = finished_states

        return output_dict

Python allennlp.nn.util.logsumexp() Examples