Python allennlp.nn.util.weighted_sum() Examples
The following are 16
code examples of allennlp.nn.util.weighted_sum().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.nn.util
, or try the search function
.
Example #1
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_weighted_sum_works_on_simple_input(self): batch_size = 1 sentence_length = 5 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, sentence_length, embedding_dim) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.FloatTensor([[0.3, 0.4, 0.1, 0, 1.2]]) aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, embedding_dim) expected_array = ( 0.3 * sentence_array[0, 0] + 0.4 * sentence_array[0, 1] + 0.1 * sentence_array[0, 2] + 0.0 * sentence_array[0, 3] + 1.2 * sentence_array[0, 4] ) numpy.testing.assert_almost_equal(aggregated_array, [expected_array], decimal=5)
Example #2
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_weighted_sum_handles_higher_order_input(self): batch_size = 1 length_1 = 5 length_2 = 6 length_3 = 2 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, length_1, length_2, length_3, embedding_dim) attention_array = numpy.random.rand(batch_size, length_1, length_2, length_3) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.from_numpy(attention_array).float() aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, length_1, length_2, embedding_dim) expected_array = ( attention_array[0, 3, 2, 0] * sentence_array[0, 3, 2, 0] + attention_array[0, 3, 2, 1] * sentence_array[0, 3, 2, 1] ) numpy.testing.assert_almost_equal(aggregated_array[0, 3, 2], expected_array, decimal=5)
Example #3
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_weighted_sum_handles_uneven_higher_order_input(self): batch_size = 1 length_1 = 5 length_2 = 6 length_3 = 2 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, length_3, embedding_dim) attention_array = numpy.random.rand(batch_size, length_1, length_2, length_3) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.from_numpy(attention_array).float() aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, length_1, length_2, embedding_dim) for i in range(length_1): for j in range(length_2): expected_array = ( attention_array[0, i, j, 0] * sentence_array[0, 0] + attention_array[0, i, j, 1] * sentence_array[0, 1] ) numpy.testing.assert_almost_equal( aggregated_array[0, i, j], expected_array, decimal=5 )
Example #4
Source File: util_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_weighted_sum_handles_3d_attention_with_3d_matrix(self): batch_size = 1 length_1 = 5 length_2 = 2 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, length_2, embedding_dim) attention_array = numpy.random.rand(batch_size, length_1, length_2) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.from_numpy(attention_array).float() aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, length_1, embedding_dim) for i in range(length_1): expected_array = ( attention_array[0, i, 0] * sentence_array[0, 0] + attention_array[0, i, 1] * sentence_array[0, 1] ) numpy.testing.assert_almost_equal(aggregated_array[0, i], expected_array, decimal=5)
Example #5
Source File: nlvr_decoder_step.py From magnitude with MIT License | 6 votes |
def attend_on_sentence(self, query , encoder_outputs , encoder_output_mask ) : u""" This method is almost identical to ``WikiTablesDecoderStep.attend_on_question``. We just don't return the attention weights. Given a query (which is typically the decoder hidden state), compute an attention over the output of the sentence encoder, and return a weighted sum of the sentence representations given this attention. We also return the attention weights themselves. This is a simple computation, but we have it as a separate method so that the ``forward`` method on the main parser module can call it on the initial hidden state, to simplify the logic in ``take_step``. """ # (group_size, sentence_length) sentence_attention_weights = self._input_attention(query, encoder_outputs, encoder_output_mask) # (group_size, encoder_output_dim) attended_sentence = nn_util.weighted_sum(encoder_outputs, sentence_attention_weights) return attended_sentence
Example #6
Source File: wikitables_decoder_step.py From magnitude with MIT License | 6 votes |
def attend_on_question(self, query , encoder_outputs , encoder_output_mask ) : u""" Given a query (which is typically the decoder hidden state), compute an attention over the output of the question encoder, and return a weighted sum of the question representations given this attention. We also return the attention weights themselves. This is a simple computation, but we have it as a separate method so that the ``forward`` method on the main parser module can call it on the initial hidden state, to simplify the logic in ``take_step``. """ # (group_size, question_length) question_attention_weights = self._input_attention(query, encoder_outputs, encoder_output_mask) # (group_size, encoder_output_dim) attended_question = util.weighted_sum(encoder_outputs, question_attention_weights) return attended_question, question_attention_weights
Example #7
Source File: util_test.py From magnitude with MIT License | 6 votes |
def test_weighted_sum_handles_uneven_higher_order_input(self): batch_size = 1 length_1 = 5 length_2 = 6 length_3 = 2 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, length_3, embedding_dim) attention_array = numpy.random.rand(batch_size, length_1, length_2, length_3) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.from_numpy(attention_array).float() aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, length_1, length_2, embedding_dim) for i in range(length_1): for j in range(length_2): expected_array = (attention_array[0, i, j, 0] * sentence_array[0, 0] + attention_array[0, i, j, 1] * sentence_array[0, 1]) numpy.testing.assert_almost_equal(aggregated_array[0, i, j], expected_array, decimal=5)
Example #8
Source File: util_test.py From magnitude with MIT License | 6 votes |
def test_weighted_sum_handles_3d_attention_with_3d_matrix(self): batch_size = 1 length_1 = 5 length_2 = 2 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, length_2, embedding_dim) attention_array = numpy.random.rand(batch_size, length_1, length_2) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.from_numpy(attention_array).float() aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, length_1, embedding_dim) for i in range(length_1): expected_array = (attention_array[0, i, 0] * sentence_array[0, 0] + attention_array[0, i, 1] * sentence_array[0, 1]) numpy.testing.assert_almost_equal(aggregated_array[0, i], expected_array, decimal=5)
Example #9
Source File: basic_transition_function.py From allennlp-semparse with Apache License 2.0 | 6 votes |
def attend_on_question( self, query: torch.Tensor, encoder_outputs: torch.Tensor, encoder_output_mask: torch.Tensor ) -> Tuple[torch.Tensor, torch.Tensor]: """ Given a query (which is typically the decoder hidden state), compute an attention over the output of the question encoder, and return a weighted sum of the question representations given this attention. We also return the attention weights themselves. This is a simple computation, but we have it as a separate method so that the ``forward`` method on the main parser module can call it on the initial hidden state, to simplify the logic in ``take_step``. """ # (group_size, question_length) question_attention_weights = self._input_attention( query, encoder_outputs, encoder_output_mask ) # (group_size, encoder_output_dim) attended_question = util.weighted_sum(encoder_outputs, question_attention_weights) return attended_question, question_attention_weights
Example #10
Source File: self_attentive_span_extractor.py From allennlp with Apache License 2.0 | 5 votes |
def forward( self, sequence_tensor: torch.FloatTensor, span_indices: torch.LongTensor, span_indices_mask: torch.BoolTensor = None, ) -> torch.FloatTensor: # shape (batch_size, sequence_length, 1) global_attention_logits = self._global_attention(sequence_tensor) # shape (batch_size, sequence_length, embedding_dim + 1) concat_tensor = torch.cat([sequence_tensor, global_attention_logits], -1) concat_output, span_mask = util.batched_span_select(concat_tensor, span_indices) # Shape: (batch_size, num_spans, max_batch_span_width, embedding_dim) span_embeddings = concat_output[:, :, :, :-1] # Shape: (batch_size, num_spans, max_batch_span_width) span_attention_logits = concat_output[:, :, :, -1] # Shape: (batch_size, num_spans, max_batch_span_width) span_attention_weights = util.masked_softmax(span_attention_logits, span_mask) # Do a weighted sum of the embedded spans with # respect to the normalised attention distributions. # Shape: (batch_size, num_spans, embedding_dim) attended_text_embeddings = util.weighted_sum(span_embeddings, span_attention_weights) if span_indices_mask is not None: # Above we were masking the widths of spans with respect to the max # span width in the batch. Here we are masking the spans which were # originally passed in as padding. return attended_text_embeddings * span_indices_mask.unsqueeze(-1) return attended_text_embeddings
Example #11
Source File: util_test.py From magnitude with MIT License | 5 votes |
def test_weighted_sum_works_on_simple_input(self): batch_size = 1 sentence_length = 5 embedding_dim = 4 sentence_array = numpy.random.rand(batch_size, sentence_length, embedding_dim) sentence_tensor = torch.from_numpy(sentence_array).float() attention_tensor = torch.FloatTensor([[.3, .4, .1, 0, 1.2]]) aggregated_array = util.weighted_sum(sentence_tensor, attention_tensor).data.numpy() assert aggregated_array.shape == (batch_size, embedding_dim) expected_array = (0.3 * sentence_array[0, 0] + 0.4 * sentence_array[0, 1] + 0.1 * sentence_array[0, 2] + 0.0 * sentence_array[0, 3] + 1.2 * sentence_array[0, 4]) numpy.testing.assert_almost_equal(aggregated_array, [expected_array], decimal=5)
Example #12
Source File: copynet.py From nlp-models with MIT License | 5 votes |
def _decoder_step( self, last_predictions: torch.Tensor, selective_weights: torch.Tensor, state: Dict[str, torch.Tensor], ) -> Dict[str, torch.Tensor]: # shape: (group_size, max_input_sequence_length, encoder_output_dim) encoder_outputs_mask = state["source_mask"] # shape: (group_size, target_embedding_dim) embedded_input = self._target_embedder(last_predictions) # shape: (group_size, max_input_sequence_length) attentive_weights = self._attention( state["decoder_hidden"], state["encoder_outputs"], encoder_outputs_mask ) # shape: (group_size, encoder_output_dim) attentive_read = util.weighted_sum(state["encoder_outputs"], attentive_weights) # shape: (group_size, encoder_output_dim) selective_read = util.weighted_sum( state["encoder_outputs"][:, 1:-1], selective_weights ) # shape: (group_size, target_embedding_dim + encoder_output_dim * 2) decoder_input = torch.cat((embedded_input, attentive_read, selective_read), -1) # shape: (group_size, decoder_input_dim) projected_decoder_input = self._input_projection_layer(decoder_input) state["decoder_hidden"], state["decoder_context"] = self._decoder_cell( projected_decoder_input, (state["decoder_hidden"], state["decoder_context"]) ) return state
Example #13
Source File: intra_sentence_attention.py From magnitude with MIT License | 4 votes |
def forward(self, tokens , mask ): # pylint: disable=arguments-differ batch_size, sequence_length, _ = tokens.size() # Shape: (batch_size, sequence_length, sequence_length) similarity_matrix = self._matrix_attention(tokens, tokens) if self._num_attention_heads > 1: # In this case, the similarity matrix actually has shape # (batch_size, sequence_length, sequence_length, num_heads). To make the rest of the # logic below easier, we'll permute this to # (batch_size, sequence_length, num_heads, sequence_length). similarity_matrix = similarity_matrix.permute(0, 1, 3, 2) # Shape: (batch_size, sequence_length, [num_heads,] sequence_length) intra_sentence_attention = util.last_dim_softmax(similarity_matrix.contiguous(), mask) # Shape: (batch_size, sequence_length, projection_dim) output_token_representation = self._projection(tokens) if self._num_attention_heads > 1: # We need to split and permute the output representation to be # (batch_size, num_heads, sequence_length, projection_dim / num_heads), so that we can # do a proper weighted sum with `intra_sentence_attention`. shape = list(output_token_representation.size()) new_shape = shape[:-1] + [self._num_attention_heads, -1] # Shape: (batch_size, sequence_length, num_heads, projection_dim / num_heads) output_token_representation = output_token_representation.view(*new_shape) # Shape: (batch_size, num_heads, sequence_length, projection_dim / num_heads) output_token_representation = output_token_representation.permute(0, 2, 1, 3) # Shape: (batch_size, sequence_length, [num_heads,] projection_dim [/ num_heads]) attended_sentence = util.weighted_sum(output_token_representation, intra_sentence_attention) if self._num_attention_heads > 1: # Here we concatenate the weighted representation for each head. We'll accomplish this # just with a resize. # Shape: (batch_size, sequence_length, projection_dim) attended_sentence = attended_sentence.view(batch_size, sequence_length, -1) # Shape: (batch_size, sequence_length, combination_dim) combined_tensors = util.combine_tensors(self._combination, [tokens, attended_sentence]) return self._output_projection(combined_tensors)
Example #14
Source File: simple_seq2seq.py From magnitude with MIT License | 4 votes |
def _prepare_decode_step_input(self, input_indices , decoder_hidden_state = None, encoder_outputs = None, encoder_outputs_mask = None) : u""" Given the input indices for the current timestep of the decoder, and all the encoder outputs, compute the input at the current timestep. Note: This method is agnostic to whether the indices are gold indices or the predictions made by the decoder at the last timestep. So, this can be used even if we're doing some kind of scheduled sampling. If we're not using attention, the output of this method is just an embedding of the input indices. If we are, the output will be a concatentation of the embedding and an attended average of the encoder inputs. Parameters ---------- input_indices : torch.LongTensor Indices of either the gold inputs to the decoder or the predicted labels from the previous timestep. decoder_hidden_state : torch.LongTensor, optional (not needed if no attention) Output of from the decoder at the last time step. Needed only if using attention. encoder_outputs : torch.LongTensor, optional (not needed if no attention) Encoder outputs from all time steps. Needed only if using attention. encoder_outputs_mask : torch.LongTensor, optional (not needed if no attention) Masks on encoder outputs. Needed only if using attention. """ # input_indices : (batch_size,) since we are processing these one timestep at a time. # (batch_size, target_embedding_dim) embedded_input = self._target_embedder(input_indices) if self._attention_function: # encoder_outputs : (batch_size, input_sequence_length, encoder_output_dim) # Ensuring mask is also a FloatTensor. Or else the multiplication within attention will # complain. encoder_outputs_mask = encoder_outputs_mask.float() # (batch_size, input_sequence_length) input_weights = self._decoder_attention(decoder_hidden_state, encoder_outputs, encoder_outputs_mask) # (batch_size, encoder_output_dim) attended_input = weighted_sum(encoder_outputs, input_weights) # (batch_size, encoder_output_dim + target_embedding_dim) return torch.cat((attended_input, embedded_input), -1) else: return embedded_input
Example #15
Source File: prostruct_model.py From propara with Apache License 2.0 | 4 votes |
def compute_location_spans(self, contextual_seq_embedding, embedded_sentence_verb_entity, mask): # # ===============================================================test============================================ # # Layer 5: Span prediction for before and after location # Shape: (batch_size, passage_length, encoding_dim * 4 + modeling_dim)) batch_size, num_sentences, num_participants, sentence_length, encoder_dim = contextual_seq_embedding.shape #print("contextual_seq_embedding: ", contextual_seq_embedding.shape) # size(span_start_input_after): batch_size * num_sentences * # num_participants * sentence_length * (embedding_size+2+2*seq2seq_output_size) span_start_input_after = torch.cat([embedded_sentence_verb_entity, contextual_seq_embedding], dim=-1) #print("span_start_input_after: ", span_start_input_after.shape) # Shape: (bs, ns , np, sl) span_start_logits_after = self._span_start_predictor_after(span_start_input_after).squeeze(-1) #print("span_start_logits_after: ", span_start_logits_after.shape) # Shape: (bs, ns , np, sl) span_start_probs_after = util.masked_softmax(span_start_logits_after, mask) #print("span_start_probs_after: ", span_start_probs_after.shape) # span_start_representation_after: (bs, ns , np, encoder_dim) span_start_representation_after = util.weighted_sum(contextual_seq_embedding, span_start_probs_after) #print("span_start_representation_after: ", span_start_representation_after.shape) # span_tiled_start_representation_after: (bs, ns , np, sl, 2*seq2seq_output_size) span_tiled_start_representation_after = span_start_representation_after.unsqueeze(3).expand(batch_size, num_sentences, num_participants, sentence_length, encoder_dim) #print("span_tiled_start_representation_after: ", span_tiled_start_representation_after.shape) # Shape: (batch_size, passage_length, (embedding+2 + encoder_dim + encoder_dim + encoder_dim)) span_end_representation_after = torch.cat([embedded_sentence_verb_entity, contextual_seq_embedding, span_tiled_start_representation_after, contextual_seq_embedding * span_tiled_start_representation_after], dim=-1) #print("span_end_representation_after: ", span_end_representation_after.shape) # Shape: (batch_size, passage_length, encoding_dim) encoded_span_end_after = self.time_distributed_encoder_span_end_after(span_end_representation_after, mask) #print("encoded_span_end_after: ", encoded_span_end_after.shape) span_end_logits_after = self._span_end_predictor_after(encoded_span_end_after).squeeze(-1) #print("span_end_logits_after: ", span_end_logits_after.shape) span_end_probs_after = util.masked_softmax(span_end_logits_after, mask) #print("span_end_probs_after: ", span_end_probs_after.shape) span_start_logits_after = util.replace_masked_values(span_start_logits_after, mask, -1e7) span_end_logits_after = util.replace_masked_values(span_end_logits_after, mask, -1e7) # Fixme: we should condition this on predicted_action so that we can output '-' when needed # Fixme: also add a functionality to be able to output '?': we can use span_start_probs_after, span_end_probs_after best_span_after = self.get_best_span(span_start_logits_after, span_end_logits_after) #print("best_span_after: ", best_span_after) return best_span_after, span_start_logits_after, span_end_logits_after
Example #16
Source File: pgn.py From summarus with Apache License 2.0 | 4 votes |
def _prepare_output_projections(self, last_predictions: torch.Tensor, state: Dict[str, torch.Tensor]) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: # shape: (group_size, max_input_sequence_length, encoder_output_dim) encoder_outputs = state["encoder_outputs"] # shape: (group_size, max_input_sequence_length) source_mask = state["source_mask"] # shape: (group_size, decoder_output_dim) decoder_hidden = state["decoder_hidden"] # shape: (group_size, decoder_output_dim) decoder_context = state["decoder_context"] # shape: (group_size, decoder_output_dim) attn_context = state.get("attn_context", None) is_unk = (last_predictions >= self._vocab_size).long() last_predictions_fixed = last_predictions - last_predictions * is_unk + self._unk_index * is_unk embedded_input = self._target_embedder(last_predictions_fixed) coverage = state.get("coverage", None) def get_attention_context(decoder_hidden_inner): if coverage is None: attention_scores = self._attention(decoder_hidden_inner, encoder_outputs, source_mask) else: attention_scores = self._attention(decoder_hidden_inner, encoder_outputs, source_mask, coverage) attention_context = util.weighted_sum(encoder_outputs, attention_scores) return attention_scores, attention_context if not self._embed_attn_to_output: attn_scores, attn_context = get_attention_context(decoder_hidden) decoder_input = torch.cat((attn_context, embedded_input), -1) decoder_hidden, decoder_context = self._decoder_cell(decoder_input, (decoder_hidden, decoder_context)) projection = self._hidden_projection_layer(decoder_hidden) else: decoder_input = torch.cat((attn_context, embedded_input), -1) decoder_hidden, decoder_context = self._decoder_cell(decoder_input, (decoder_hidden, decoder_context)) attn_scores, attn_context = get_attention_context(decoder_hidden) projection = self._hidden_projection_layer(torch.cat((attn_context, decoder_hidden), -1)) output_projections = self._output_projection_layer(projection) if self._use_coverage: state["coverage"] = coverage + attn_scores state["decoder_input"] = decoder_input state["decoder_hidden"] = decoder_hidden state["decoder_context"] = decoder_context state["attn_scores"] = attn_scores state["attn_context"] = attn_context return output_projections, state