Python Examples of allennlp.nn.util.weighted

Source File: util_test.py From allennlp with Apache License 2.0

6 votes

def test_weighted_sum_works_on_simple_input(self):
        batch_size = 1
        sentence_length = 5
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, sentence_length, embedding_dim)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.FloatTensor([[0.3, 0.4, 0.1, 0, 1.2]])
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, embedding_dim)
        expected_array = (
            0.3 * sentence_array[0, 0]
            + 0.4 * sentence_array[0, 1]
            + 0.1 * sentence_array[0, 2]
            + 0.0 * sentence_array[0, 3]
            + 1.2 * sentence_array[0, 4]
        )
        numpy.testing.assert_almost_equal(aggregated_array, [expected_array], decimal=5)

Source File: util_test.py From allennlp with Apache License 2.0

6 votes

def test_weighted_sum_handles_higher_order_input(self):
        batch_size = 1
        length_1 = 5
        length_2 = 6
        length_3 = 2
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, length_1, length_2, length_3, embedding_dim)
        attention_array = numpy.random.rand(batch_size, length_1, length_2, length_3)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.from_numpy(attention_array).float()
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, length_1, length_2, embedding_dim)
        expected_array = (
            attention_array[0, 3, 2, 0] * sentence_array[0, 3, 2, 0]
            + attention_array[0, 3, 2, 1] * sentence_array[0, 3, 2, 1]
        )
        numpy.testing.assert_almost_equal(aggregated_array[0, 3, 2], expected_array, decimal=5)

Source File: util_test.py From allennlp with Apache License 2.0

6 votes

def test_weighted_sum_handles_uneven_higher_order_input(self):
        batch_size = 1
        length_1 = 5
        length_2 = 6
        length_3 = 2
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, length_3, embedding_dim)
        attention_array = numpy.random.rand(batch_size, length_1, length_2, length_3)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.from_numpy(attention_array).float()
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, length_1, length_2, embedding_dim)
        for i in range(length_1):
            for j in range(length_2):
                expected_array = (
                    attention_array[0, i, j, 0] * sentence_array[0, 0]
                    + attention_array[0, i, j, 1] * sentence_array[0, 1]
                )
                numpy.testing.assert_almost_equal(
                    aggregated_array[0, i, j], expected_array, decimal=5
                )

Source File: util_test.py From allennlp with Apache License 2.0

6 votes

def test_weighted_sum_handles_3d_attention_with_3d_matrix(self):
        batch_size = 1
        length_1 = 5
        length_2 = 2
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, length_2, embedding_dim)
        attention_array = numpy.random.rand(batch_size, length_1, length_2)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.from_numpy(attention_array).float()
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, length_1, embedding_dim)
        for i in range(length_1):
            expected_array = (
                attention_array[0, i, 0] * sentence_array[0, 0]
                + attention_array[0, i, 1] * sentence_array[0, 1]
            )
            numpy.testing.assert_almost_equal(aggregated_array[0, i], expected_array, decimal=5)

Source File: nlvr_decoder_step.py From magnitude with MIT License

6 votes

def attend_on_sentence(self,
                           query              ,
                           encoder_outputs              ,
                           encoder_output_mask              )                                     :
        u"""
        This method is almost identical to ``WikiTablesDecoderStep.attend_on_question``. We just
        don't return the attention weights.
        Given a query (which is typically the decoder hidden state), compute an attention over the
        output of the sentence encoder, and return a weighted sum of the sentence representations
        given this attention.  We also return the attention weights themselves.

        This is a simple computation, but we have it as a separate method so that the ``forward``
        method on the main parser module can call it on the initial hidden state, to simplify the
        logic in ``take_step``.
        """
        # (group_size, sentence_length)
        sentence_attention_weights = self._input_attention(query,
                                                           encoder_outputs,
                                                           encoder_output_mask)
        # (group_size, encoder_output_dim)
        attended_sentence = nn_util.weighted_sum(encoder_outputs, sentence_attention_weights)
        return attended_sentence

Source File: wikitables_decoder_step.py From magnitude with MIT License

6 votes

def attend_on_question(self,
                           query              ,
                           encoder_outputs              ,
                           encoder_output_mask              )                                     :
        u"""
        Given a query (which is typically the decoder hidden state), compute an attention over the
        output of the question encoder, and return a weighted sum of the question representations
        given this attention.  We also return the attention weights themselves.

        This is a simple computation, but we have it as a separate method so that the ``forward``
        method on the main parser module can call it on the initial hidden state, to simplify the
        logic in ``take_step``.
        """
        # (group_size, question_length)
        question_attention_weights = self._input_attention(query,
                                                           encoder_outputs,
                                                           encoder_output_mask)
        # (group_size, encoder_output_dim)
        attended_question = util.weighted_sum(encoder_outputs, question_attention_weights)
        return attended_question, question_attention_weights

Source File: util_test.py From magnitude with MIT License

6 votes

def test_weighted_sum_handles_uneven_higher_order_input(self):
        batch_size = 1
        length_1 = 5
        length_2 = 6
        length_3 = 2
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, length_3, embedding_dim)
        attention_array = numpy.random.rand(batch_size, length_1, length_2, length_3)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.from_numpy(attention_array).float()
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, length_1, length_2, embedding_dim)
        for i in range(length_1):
            for j in range(length_2):
                expected_array = (attention_array[0, i, j, 0] * sentence_array[0, 0] +
                                  attention_array[0, i, j, 1] * sentence_array[0, 1])
                numpy.testing.assert_almost_equal(aggregated_array[0, i, j], expected_array,
                                                  decimal=5)

Source File: util_test.py From magnitude with MIT License

6 votes

def test_weighted_sum_handles_3d_attention_with_3d_matrix(self):
        batch_size = 1
        length_1 = 5
        length_2 = 2
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, length_2, embedding_dim)
        attention_array = numpy.random.rand(batch_size, length_1, length_2)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.from_numpy(attention_array).float()
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, length_1, embedding_dim)
        for i in range(length_1):
            expected_array = (attention_array[0, i, 0] * sentence_array[0, 0] +
                              attention_array[0, i, 1] * sentence_array[0, 1])
            numpy.testing.assert_almost_equal(aggregated_array[0, i], expected_array,
                                              decimal=5)

Source File: basic_transition_function.py From allennlp-semparse with Apache License 2.0

6 votes

def attend_on_question(
        self, query: torch.Tensor, encoder_outputs: torch.Tensor, encoder_output_mask: torch.Tensor
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Given a query (which is typically the decoder hidden state), compute an attention over the
        output of the question encoder, and return a weighted sum of the question representations
        given this attention.  We also return the attention weights themselves.

        This is a simple computation, but we have it as a separate method so that the ``forward``
        method on the main parser module can call it on the initial hidden state, to simplify the
        logic in ``take_step``.
        """
        # (group_size, question_length)
        question_attention_weights = self._input_attention(
            query, encoder_outputs, encoder_output_mask
        )
        # (group_size, encoder_output_dim)
        attended_question = util.weighted_sum(encoder_outputs, question_attention_weights)
        return attended_question, question_attention_weights

Source File: self_attentive_span_extractor.py From allennlp with Apache License 2.0

5 votes

def forward(
        self,
        sequence_tensor: torch.FloatTensor,
        span_indices: torch.LongTensor,
        span_indices_mask: torch.BoolTensor = None,
    ) -> torch.FloatTensor:
        # shape (batch_size, sequence_length, 1)
        global_attention_logits = self._global_attention(sequence_tensor)

        # shape (batch_size, sequence_length, embedding_dim + 1)
        concat_tensor = torch.cat([sequence_tensor, global_attention_logits], -1)

        concat_output, span_mask = util.batched_span_select(concat_tensor, span_indices)

        # Shape: (batch_size, num_spans, max_batch_span_width, embedding_dim)
        span_embeddings = concat_output[:, :, :, :-1]
        # Shape: (batch_size, num_spans, max_batch_span_width)
        span_attention_logits = concat_output[:, :, :, -1]

        # Shape: (batch_size, num_spans, max_batch_span_width)
        span_attention_weights = util.masked_softmax(span_attention_logits, span_mask)

        # Do a weighted sum of the embedded spans with
        # respect to the normalised attention distributions.
        # Shape: (batch_size, num_spans, embedding_dim)
        attended_text_embeddings = util.weighted_sum(span_embeddings, span_attention_weights)

        if span_indices_mask is not None:
            # Above we were masking the widths of spans with respect to the max
            # span width in the batch. Here we are masking the spans which were
            # originally passed in as padding.
            return attended_text_embeddings * span_indices_mask.unsqueeze(-1)

        return attended_text_embeddings

Source File: util_test.py From magnitude with MIT License

5 votes

def test_weighted_sum_works_on_simple_input(self):
        batch_size = 1
        sentence_length = 5
        embedding_dim = 4
        sentence_array = numpy.random.rand(batch_size, sentence_length, embedding_dim)
        sentence_tensor = torch.from_numpy(sentence_array).float()
        attention_tensor = torch.FloatTensor([[.3, .4, .1, 0, 1.2]])
        aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy()
        assert aggregated_array.shape == (batch_size, embedding_dim)
        expected_array = (0.3 * sentence_array[0, 0] +
                          0.4 * sentence_array[0, 1] +
                          0.1 * sentence_array[0, 2] +
                          0.0 * sentence_array[0, 3] +
                          1.2 * sentence_array[0, 4])
        numpy.testing.assert_almost_equal(aggregated_array, [expected_array], decimal=5)

Source File: copynet.py From nlp-models with MIT License

5 votes

def _decoder_step(
        self,
        last_predictions: torch.Tensor,
        selective_weights: torch.Tensor,
        state: Dict[str, torch.Tensor],
    ) -> Dict[str, torch.Tensor]:
        # shape: (group_size, max_input_sequence_length, encoder_output_dim)
        encoder_outputs_mask = state["source_mask"]
        # shape: (group_size, target_embedding_dim)
        embedded_input = self._target_embedder(last_predictions)
        # shape: (group_size, max_input_sequence_length)
        attentive_weights = self._attention(
            state["decoder_hidden"], state["encoder_outputs"], encoder_outputs_mask
        )
        # shape: (group_size, encoder_output_dim)
        attentive_read = util.weighted_sum(state["encoder_outputs"], attentive_weights)
        # shape: (group_size, encoder_output_dim)
        selective_read = util.weighted_sum(
            state["encoder_outputs"][:, 1:-1], selective_weights
        )
        # shape: (group_size, target_embedding_dim + encoder_output_dim * 2)
        decoder_input = torch.cat((embedded_input, attentive_read, selective_read), -1)
        # shape: (group_size, decoder_input_dim)
        projected_decoder_input = self._input_projection_layer(decoder_input)

        state["decoder_hidden"], state["decoder_context"] = self._decoder_cell(
            projected_decoder_input, (state["decoder_hidden"], state["decoder_context"])
        )
        return state

Source File: intra_sentence_attention.py From magnitude with MIT License

4 votes

def forward(self, tokens              , mask              ):  # pylint: disable=arguments-differ
        batch_size, sequence_length, _ = tokens.size()
        # Shape: (batch_size, sequence_length, sequence_length)
        similarity_matrix = self._matrix_attention(tokens, tokens)

        if self._num_attention_heads > 1:
            # In this case, the similarity matrix actually has shape
            # (batch_size, sequence_length, sequence_length, num_heads).  To make the rest of the
            # logic below easier, we'll permute this to
            # (batch_size, sequence_length, num_heads, sequence_length).
            similarity_matrix = similarity_matrix.permute(0, 1, 3, 2)

        # Shape: (batch_size, sequence_length, [num_heads,] sequence_length)
        intra_sentence_attention = util.last_dim_softmax(similarity_matrix.contiguous(), mask)

        # Shape: (batch_size, sequence_length, projection_dim)
        output_token_representation = self._projection(tokens)

        if self._num_attention_heads > 1:
            # We need to split and permute the output representation to be
            # (batch_size, num_heads, sequence_length, projection_dim / num_heads), so that we can
            # do a proper weighted sum with `intra_sentence_attention`.
            shape = list(output_token_representation.size())
            new_shape = shape[:-1] + [self._num_attention_heads, -1]
            # Shape: (batch_size, sequence_length, num_heads, projection_dim / num_heads)
            output_token_representation = output_token_representation.view(*new_shape)
            # Shape: (batch_size, num_heads, sequence_length, projection_dim / num_heads)
            output_token_representation = output_token_representation.permute(0, 2, 1, 3)

        # Shape: (batch_size, sequence_length, [num_heads,] projection_dim [/ num_heads])
        attended_sentence = util.weighted_sum(output_token_representation,
                                              intra_sentence_attention)

        if self._num_attention_heads > 1:
            # Here we concatenate the weighted representation for each head.  We'll accomplish this
            # just with a resize.
            # Shape: (batch_size, sequence_length, projection_dim)
            attended_sentence = attended_sentence.view(batch_size, sequence_length, -1)

        # Shape: (batch_size, sequence_length, combination_dim)
        combined_tensors = util.combine_tensors(self._combination, [tokens, attended_sentence])
        return self._output_projection(combined_tensors)

Source File: simple_seq2seq.py From magnitude with MIT License

4 votes

def _prepare_decode_step_input(self,
                                   input_indices                  ,
                                   decoder_hidden_state                   = None,
                                   encoder_outputs                   = None,
                                   encoder_outputs_mask                   = None)                    :
        u"""
        Given the input indices for the current timestep of the decoder, and all the encoder
        outputs, compute the input at the current timestep.  Note: This method is agnostic to
        whether the indices are gold indices or the predictions made by the decoder at the last
        timestep. So, this can be used even if we're doing some kind of scheduled sampling.

        If we're not using attention, the output of this method is just an embedding of the input
        indices.  If we are, the output will be a concatentation of the embedding and an attended
        average of the encoder inputs.

        Parameters
        ----------
        input_indices : torch.LongTensor
            Indices of either the gold inputs to the decoder or the predicted labels from the
            previous timestep.
        decoder_hidden_state : torch.LongTensor, optional (not needed if no attention)
            Output of from the decoder at the last time step. Needed only if using attention.
        encoder_outputs : torch.LongTensor, optional (not needed if no attention)
            Encoder outputs from all time steps. Needed only if using attention.
        encoder_outputs_mask : torch.LongTensor, optional (not needed if no attention)
            Masks on encoder outputs. Needed only if using attention.
        """
        # input_indices : (batch_size,)  since we are processing these one timestep at a time.
        # (batch_size, target_embedding_dim)
        embedded_input = self._target_embedder(input_indices)
        if self._attention_function:
            # encoder_outputs : (batch_size, input_sequence_length, encoder_output_dim)
            # Ensuring mask is also a FloatTensor. Or else the multiplication within attention will
            # complain.
            encoder_outputs_mask = encoder_outputs_mask.float()
            # (batch_size, input_sequence_length)
            input_weights = self._decoder_attention(decoder_hidden_state, encoder_outputs, encoder_outputs_mask)
            # (batch_size, encoder_output_dim)
            attended_input = weighted_sum(encoder_outputs, input_weights)
            # (batch_size, encoder_output_dim + target_embedding_dim)
            return torch.cat((attended_input, embedded_input), -1)
        else:
            return embedded_input

Source File: prostruct_model.py From propara with Apache License 2.0

4 votes

def compute_location_spans(self, contextual_seq_embedding, embedded_sentence_verb_entity, mask):
        # # ===============================================================test============================================
        # # Layer 5: Span prediction for before and after location
        # Shape: (batch_size, passage_length, encoding_dim * 4 + modeling_dim))
        batch_size, num_sentences, num_participants, sentence_length, encoder_dim = contextual_seq_embedding.shape
        #print("contextual_seq_embedding: ", contextual_seq_embedding.shape)
        # size(span_start_input_after): batch_size * num_sentences *
        #                                num_participants * sentence_length * (embedding_size+2+2*seq2seq_output_size)
        span_start_input_after = torch.cat([embedded_sentence_verb_entity, contextual_seq_embedding], dim=-1)

        #print("span_start_input_after: ", span_start_input_after.shape)
        # Shape: (bs, ns , np, sl)
        span_start_logits_after = self._span_start_predictor_after(span_start_input_after).squeeze(-1)
        #print("span_start_logits_after: ", span_start_logits_after.shape)

        # Shape: (bs, ns , np, sl)
        span_start_probs_after = util.masked_softmax(span_start_logits_after, mask)
        #print("span_start_probs_after: ", span_start_probs_after.shape)

        # span_start_representation_after: (bs, ns , np, encoder_dim)
        span_start_representation_after = util.weighted_sum(contextual_seq_embedding, span_start_probs_after)
        #print("span_start_representation_after: ", span_start_representation_after.shape)

        # span_tiled_start_representation_after: (bs, ns , np, sl, 2*seq2seq_output_size)
        span_tiled_start_representation_after = span_start_representation_after.unsqueeze(3).expand(batch_size,
                                                                                                    num_sentences,
                                                                                                    num_participants,
                                                                                                    sentence_length,
                                                                                                    encoder_dim)
        #print("span_tiled_start_representation_after: ", span_tiled_start_representation_after.shape)

        # Shape: (batch_size, passage_length, (embedding+2  + encoder_dim + encoder_dim + encoder_dim))
        span_end_representation_after = torch.cat([embedded_sentence_verb_entity,
                                                   contextual_seq_embedding,
                                                   span_tiled_start_representation_after,
                                                   contextual_seq_embedding * span_tiled_start_representation_after],
                                                  dim=-1)
        #print("span_end_representation_after: ", span_end_representation_after.shape)

        # Shape: (batch_size, passage_length, encoding_dim)
        encoded_span_end_after = self.time_distributed_encoder_span_end_after(span_end_representation_after, mask)
        #print("encoded_span_end_after: ", encoded_span_end_after.shape)

        span_end_logits_after = self._span_end_predictor_after(encoded_span_end_after).squeeze(-1)
        #print("span_end_logits_after: ", span_end_logits_after.shape)

        span_end_probs_after = util.masked_softmax(span_end_logits_after, mask)
        #print("span_end_probs_after: ", span_end_probs_after.shape)

        span_start_logits_after = util.replace_masked_values(span_start_logits_after, mask, -1e7)
        span_end_logits_after = util.replace_masked_values(span_end_logits_after, mask, -1e7)

        # Fixme: we should condition this on predicted_action so that we can output '-' when needed
        # Fixme: also add a functionality to be able to output '?': we can use span_start_probs_after, span_end_probs_after
        best_span_after = self.get_best_span(span_start_logits_after, span_end_logits_after)
        #print("best_span_after: ", best_span_after)
        return best_span_after, span_start_logits_after, span_end_logits_after

Source File: pgn.py From summarus with Apache License 2.0

4 votes

def _prepare_output_projections(self,
                                    last_predictions: torch.Tensor,
                                    state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
        # shape: (group_size, max_input_sequence_length, encoder_output_dim)
        encoder_outputs = state["encoder_outputs"]
        # shape: (group_size, max_input_sequence_length)
        source_mask = state["source_mask"]
        # shape: (group_size, decoder_output_dim)
        decoder_hidden = state["decoder_hidden"]
        # shape: (group_size, decoder_output_dim)
        decoder_context = state["decoder_context"]
        # shape: (group_size, decoder_output_dim)
        attn_context = state.get("attn_context", None)

        is_unk = (last_predictions >= self._vocab_size).long()
        last_predictions_fixed = last_predictions - last_predictions * is_unk + self._unk_index * is_unk
        embedded_input = self._target_embedder(last_predictions_fixed)

        coverage = state.get("coverage", None)

        def get_attention_context(decoder_hidden_inner):
            if coverage is None:
                attention_scores = self._attention(decoder_hidden_inner, encoder_outputs, source_mask)
            else:
                attention_scores = self._attention(decoder_hidden_inner, encoder_outputs, source_mask, coverage)
            attention_context = util.weighted_sum(encoder_outputs, attention_scores)
            return attention_scores, attention_context

        if not self._embed_attn_to_output:
            attn_scores, attn_context = get_attention_context(decoder_hidden)
            decoder_input = torch.cat((attn_context, embedded_input), -1)
            decoder_hidden, decoder_context = self._decoder_cell(decoder_input, (decoder_hidden, decoder_context))
            projection = self._hidden_projection_layer(decoder_hidden)
        else:
            decoder_input = torch.cat((attn_context, embedded_input), -1)
            decoder_hidden, decoder_context = self._decoder_cell(decoder_input, (decoder_hidden, decoder_context))
            attn_scores, attn_context = get_attention_context(decoder_hidden)
            projection = self._hidden_projection_layer(torch.cat((attn_context, decoder_hidden), -1))

        output_projections = self._output_projection_layer(projection)
        if self._use_coverage:
            state["coverage"] = coverage + attn_scores
        state["decoder_input"] = decoder_input
        state["decoder_hidden"] = decoder_hidden
        state["decoder_context"] = decoder_context
        state["attn_scores"] = attn_scores
        state["attn_context"] = attn_context

        return output_projections, state

Python allennlp.nn.util.weighted_sum() Examples