Python allennlp.nn.util.move_to_device() Examples
The following are 15
code examples of allennlp.nn.util.move_to_device().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.nn.util
, or try the search function
.
Example #1
Source File: predictor.py From DISTRE with Apache License 2.0 | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: model = self._model with torch.no_grad(): cuda_device = model._get_prediction_device() dataset = Batch(instances) dataset.index_instances(model.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) outputs = model.decode(model(**model_input)) return sanitize(outputs)
Example #2
Source File: trainer.py From NLP_Toolkit with Apache License 2.0 | 5 votes |
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor: """ Does a forward pass on the given batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices) else: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self._cuda_devices[0]) output_dict = self.model(**batch) try: loss = output_dict["loss"] if for_training: loss += self.model.get_regularization_penalty() except KeyError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs)." ) loss = None return loss
Example #3
Source File: gec_model.py From NLP_Toolkit with Apache License 2.0 | 5 votes |
def predict(self, batches): t11 = time() predictions = [] for batch, model in zip(batches, self.models): batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1) with torch.no_grad(): prediction = model.forward(**batch) predictions.append(prediction) preds, idx, error_probs = self._convert(predictions) t55 = time() if self.log: print(f"Inference time {t55 - t11}") return preds, idx, error_probs
Example #4
Source File: hotflip.py From allennlp with Apache License 2.0 | 5 votes |
def _make_embedder_input(self, all_tokens: List[str]) -> Dict[str, torch.Tensor]: inputs = {} # A bit of a hack; this will only work with some dataset readers, but it'll do for now. indexers = self.predictor._dataset_reader._token_indexers # type: ignore for indexer_name, token_indexer in indexers.items(): if isinstance(token_indexer, SingleIdTokenIndexer): all_indices = [ self.vocab._token_to_index[self.namespace][token] for token in all_tokens ] inputs[indexer_name] = {"tokens": torch.LongTensor(all_indices).unsqueeze(0)} elif isinstance(token_indexer, TokenCharactersIndexer): tokens = [Token(x) for x in all_tokens] max_token_length = max(len(x) for x in all_tokens) # sometime max_token_length is too short for cnn encoder max_token_length = max(max_token_length, token_indexer._min_padding_length) indexed_tokens = token_indexer.tokens_to_indices(tokens, self.vocab) padding_lengths = token_indexer.get_padding_lengths(indexed_tokens) padded_tokens = token_indexer.as_padded_tensor_dict(indexed_tokens, padding_lengths) inputs[indexer_name] = { "token_characters": torch.LongTensor( padded_tokens["token_characters"] ).unsqueeze(0) } elif isinstance(token_indexer, ELMoTokenCharactersIndexer): elmo_tokens = [] for token in all_tokens: elmo_indexed_token = token_indexer.tokens_to_indices( [Token(text=token)], self.vocab )["elmo_tokens"] elmo_tokens.append(elmo_indexed_token[0]) inputs[indexer_name] = {"elmo_tokens": torch.LongTensor(elmo_tokens).unsqueeze(0)} else: raise RuntimeError("Unsupported token indexer:", token_indexer) return util.move_to_device(inputs, self.cuda_device)
Example #5
Source File: trainer.py From allennlp with Apache License 2.0 | 5 votes |
def batch_outputs(self, batch: TensorDict, for_training: bool) -> Dict[str, torch.Tensor]: """ Does a forward pass on the given batch and returns the output dictionary that the model returns, after adding any specified regularization penalty to the loss (if training). """ batch = nn_util.move_to_device(batch, self.cuda_device) output_dict = self._pytorch_model(**batch) if for_training: try: regularization_penalty = self.model.get_regularization_penalty() loss = output_dict["loss"] # Handle model without regularization if regularization_penalty == 0.0: regularization_penalty = loss.new_full(size=[], fill_value=0.0) output_dict["reg_loss"] = regularization_penalty output_dict["loss"] += regularization_penalty except KeyError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs)." ) return output_dict
Example #6
Source File: hotpot_bert_v0.py From semanticRetrievalMRS with MIT License | 5 votes |
def span_eval(model, data_iter, do_lower_case, fitem_dict, device_num, show_progress, pred_no_answer=True): # fitem_dict in the parameter is the original fitem_dict output_fitem_dict = {} with torch.no_grad(): model.eval() for batch_idx, batch in tqdm(enumerate(data_iter), disable=(not show_progress)): batch = allen_util.move_to_device(batch, device_num) paired_sequence = batch['paired_sequence'] paired_segments_ids = batch['paired_segments_ids'] att_mask, _ = torch_util.get_length_and_mask(paired_sequence) gt_span = batch['gt_span'] start_logits, end_logits, context_length = model(mode=BertSpan.ForwardMode.EVAL, input_ids=paired_sequence, token_type_ids=paired_segments_ids, attention_mask=att_mask, gt_span=gt_span) b_fids = batch['fid'] b_uids = batch['uid'] write_to_predicted_fitem(start_logits, end_logits, context_length, b_fids, b_uids, gt_span, fitem_dict, output_fitem_dict, do_lower_case) eitem_list, eval_dict = merge_predicted_fitem_to_eitem(output_fitem_dict, None, pred_no_answer=pred_no_answer) return eitem_list, eval_dict
Example #7
Source File: write_semeval2010_task8_for_official_eval.py From kb with Apache License 2.0 | 5 votes |
def write_for_official_eval(model_archive_file, test_file, output_file, label_ids_to_label): archive = load_archive(model_archive_file) model = archive.model reader = DatasetReader.from_params(archive.config['dataset_reader']) iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 4})) vocab = Vocabulary.from_params(archive.config['vocabulary']) iterator.index_with(vocab) model.cuda() model.eval() instances = reader.read(test_file) predictions = [] for batch in iterator(instances, num_epochs=1, shuffle=False): batch = move_to_device(batch, cuda_device=0) output = model(**batch) batch_labels = [ label_ids_to_label[i] for i in output['predictions'].cpu().numpy().tolist() ] predictions.extend(batch_labels) to_write = ''.join(["{}\t{}\n".format(i + 8001, e) for i, e in enumerate(model.metrics[0].pred)]) with open(output_file, 'w') as fout: fout.write(to_write)
Example #8
Source File: write_wic_for_codalab.py From kb with Apache License 2.0 | 5 votes |
def write_for_official_eval(model_archive_file, test_file, output_file): archive = load_archive(model_archive_file) model = archive.model reader = DatasetReader.from_params(archive.config['dataset_reader']) iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 32})) vocab = Vocabulary.from_params(archive.config['vocabulary']) iterator.index_with(vocab) model.cuda() model.eval() label_ids_to_label = {0: 'F', 1: 'T'} instances = reader.read(test_file) predictions = [] for batch in iterator(instances, num_epochs=1, shuffle=False): batch = move_to_device(batch, cuda_device=0) output = model(**batch) batch_labels = [ label_ids_to_label[i] for i in output['predictions'].cpu().numpy().tolist() ] predictions.extend(batch_labels) assert len(predictions) == 1400 with open(output_file, 'w') as fout: for p in predictions: fout.write("{}\n".format(p))
Example #9
Source File: write_tacred_for_official_scorer.py From kb with Apache License 2.0 | 5 votes |
def write_for_official_eval(model_archive_file, test_file, output_file, label_ids_to_label): archive = load_archive(model_archive_file) model = archive.model reader = DatasetReader.from_params(archive.config['dataset_reader']) iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 4})) vocab = Vocabulary.from_params(archive.config['vocabulary']) iterator.index_with(vocab) model.cuda() model.eval() instances = reader.read(test_file) predictions = [] for batch in iterator(instances, num_epochs=1, shuffle=False): batch = move_to_device(batch, cuda_device=0) output = model(**batch) batch_labels = [ label_ids_to_label[i] for i in output['predictions'].cpu().numpy().tolist() ] predictions.extend(batch_labels) with open(output_file, 'w') as fout: for p in predictions: fout.write("{}\n".format(p))
Example #10
Source File: multitask_trainer.py From scicite with Apache License 2.0 | 5 votes |
def _batch_loss(self, batch: torch.Tensor, for_training: bool, batch_aux: torch.Tensor=None) -> torch.Tensor: """ Does a forward pass on the given batch and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = self._data_parallel(batch) if batch_aux is not None: raise ConfigurationError('multi-gpu not supported for multi-task training.') else: batch = util.move_to_device(batch, self._cuda_devices[0]) output_dict = self._model(**batch) try: loss = output_dict["loss"] if for_training: loss += self._model.get_regularization_penalty() except KeyError: if for_training: raise RuntimeError("The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") loss = None if batch_aux is not None: batch_aux = util.move_to_device(batch_aux, self._cuda_devices[0]) output_dict_aux = self._model(**batch_aux) try: loss_aux = output_dict_aux["loss"] if for_training: loss_aux += self._model.get_regularization_penalty() except KeyError: raise ConfigurationError("The auxilliary model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") # multi-task loss loss = loss + self._mixing_ratio * loss_aux return loss
Example #11
Source File: model.py From allennlp with Apache License 2.0 | 4 votes |
def forward_on_instances(self, instances: List[Instance]) -> List[Dict[str, numpy.ndarray]]: """ Takes a list of `Instances`, converts that text into arrays using this model's `Vocabulary`, passes those arrays through `self.forward()` and `self.make_output_human_readable()` (which by default does nothing) and returns the result. Before returning the result, we convert any `torch.Tensors` into numpy arrays and separate the batched output into a list of individual dicts per instance. Note that typically this will be faster on a GPU (and conditionally, on a CPU) than repeated calls to `forward_on_instance`. # Parameters instances : `List[Instance]`, required The instances to run the model on. # Returns A list of the models output for each instance. """ batch_size = len(instances) with torch.no_grad(): cuda_device = self._get_prediction_device() dataset = Batch(instances) dataset.index_instances(self.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) outputs = self.make_output_human_readable(self(**model_input)) instance_separated_output: List[Dict[str, numpy.ndarray]] = [ {} for _ in dataset.instances ] for name, output in list(outputs.items()): if isinstance(output, torch.Tensor): # NOTE(markn): This is a hack because 0-dim pytorch tensors are not iterable. # This occurs with batch size 1, because we still want to include the loss in that case. if output.dim() == 0: output = output.unsqueeze(0) if output.size(0) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue output = output.detach().cpu().numpy() elif len(output) != batch_size: self._maybe_warn_for_unseparable_batches(name) continue for instance_output, batch_element in zip(instance_separated_output, output): instance_output[name] = batch_element return instance_separated_output
Example #12
Source File: hotflip.py From allennlp with Apache License 2.0 | 4 votes |
def _first_order_taylor(self, grad: numpy.ndarray, token_idx: torch.Tensor, sign: int) -> int: """ The below code is based on https://github.com/pmichel31415/translate/blob/paul/pytorch_translate/ research/adversarial/adversaries/brute_force_adversary.py Replaces the current token_idx with another token_idx to increase the loss. In particular, this function uses the grad, alongside the embedding_matrix to select the token that maximizes the first-order taylor approximation of the loss. """ grad = util.move_to_device(torch.from_numpy(grad), self.cuda_device) if token_idx.size() != (): # We've got an encoder that only has character ids as input. We don't curently handle # this case, and it's not clear it's worth it to implement it. We'll at least give a # nicer error than some pytorch dimension mismatch. raise NotImplementedError( "You are using a character-level indexer with no other indexers. This case is not " "currently supported for hotflip. If you would really like to see us support " "this, please open an issue on github." ) if token_idx >= self.embedding_matrix.size(0): # This happens when we've truncated our fake embedding matrix. We need to do a dot # product with the word vector of the current token; if that token is out of # vocabulary for our truncated matrix, we need to run it through the embedding layer. inputs = self._make_embedder_input([self.vocab.get_token_from_index(token_idx)]) word_embedding = self.embedding_layer(inputs)[0] else: word_embedding = torch.nn.functional.embedding( util.move_to_device(torch.LongTensor([token_idx]), self.cuda_device), self.embedding_matrix, ) word_embedding = word_embedding.detach().unsqueeze(0) grad = grad.unsqueeze(0).unsqueeze(0) # solves equation (3) here https://arxiv.org/abs/1903.06620 new_embed_dot_grad = torch.einsum("bij,kj->bik", (grad, self.embedding_matrix)) prev_embed_dot_grad = torch.einsum("bij,bij->bi", (grad, word_embedding)).unsqueeze(-1) neg_dir_dot_grad = sign * (prev_embed_dot_grad - new_embed_dot_grad) neg_dir_dot_grad = neg_dir_dot_grad.detach().cpu().numpy() # Do not replace with non-alphanumeric tokens neg_dir_dot_grad[:, :, self.invalid_replacement_indices] = -numpy.inf best_at_each_step = neg_dir_dot_grad.argmax(2) return best_at_each_step[0].data[0]
Example #13
Source File: predictor.py From allennlp with Apache License 2.0 | 4 votes |
def get_gradients(self, instances: List[Instance]) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Gets the gradients of the loss with respect to the model inputs. # Parameters instances : `List[Instance]` # Returns `Tuple[Dict[str, Any], Dict[str, Any]]` The first item is a Dict of gradient entries for each input. The keys have the form `{grad_input_1: ..., grad_input_2: ... }` up to the number of inputs given. The second item is the model's output. # Notes Takes a `JsonDict` representing the inputs of the model and converts them to [`Instances`](../data/instance.md)), sends these through the model [`forward`](../models/model.md#forward) function after registering hooks on the embedding layer of the model. Calls `backward` on the loss and then removes the hooks. """ # set requires_grad to true for all parameters, but save original values to # restore them later original_param_name_to_requires_grad_dict = {} for param_name, param in self._model.named_parameters(): original_param_name_to_requires_grad_dict[param_name] = param.requires_grad param.requires_grad = True embedding_gradients: List[Tensor] = [] hooks: List[RemovableHandle] = self._register_embedding_gradient_hooks(embedding_gradients) dataset = Batch(instances) dataset.index_instances(self._model.vocab) dataset_tensor_dict = util.move_to_device(dataset.as_tensor_dict(), self.cuda_device) # To bypass "RuntimeError: cudnn RNN backward can only be called in training mode" with backends.cudnn.flags(enabled=False): outputs = self._model.make_output_human_readable( self._model.forward(**dataset_tensor_dict) # type: ignore ) loss = outputs["loss"] self._model.zero_grad() loss.backward() for hook in hooks: hook.remove() grad_dict = dict() for idx, grad in enumerate(embedding_gradients): key = "grad_input_" + str(idx + 1) grad_dict[key] = grad.detach().cpu().numpy() # restore the original requires_grad values of the parameters for param_name, param in self._model.named_parameters(): param.requires_grad = original_param_name_to_requires_grad_dict[param_name] return grad_dict, outputs
Example #14
Source File: evaluate_perplexity.py From kb with Apache License 2.0 | 4 votes |
def run_evaluation(evaluation_file, model_archive, random_candidates=False): archive = load_archive(model_archive) model = archive.model vocab = model.vocab params = archive.config model.multitask = False model.multitask_kg = False model.cuda() model.eval() for p in model.parameters(): p.requires_grad_(False) reader_params = params.pop('dataset_reader') if reader_params['type'] == 'multitask_reader': reader_params = reader_params['dataset_readers']['language_modeling'] if random_candidates: for k, v in reader_params['base_reader']['tokenizer_and_candidate_generator']['entity_candidate_generators'].items(): v['random_candidates'] = True reader = DatasetReader.from_params(Params(reader_params)) iterator = DataIterator.from_params(Params({ "type": "self_attn_bucket", "batch_size_schedule": "base-11gb-fp32", "iterator":{ "type": "bucket", "batch_size": 32, "sorting_keys": [["tokens", "num_tokens"]], "max_instances_in_memory": 2500, } })) iterator.index_with(vocab) instances = reader.read(evaluation_file) for batch_no, batch in enumerate(tqdm.tqdm(iterator(instances, num_epochs=1))): b = move_to_device(batch, 0) loss = model(**b) if batch_no % 100 == 0: print(model.get_metrics()) print(model.get_metrics())
Example #15
Source File: multitask_trainer_two_tasks.py From scicite with Apache License 2.0 | 4 votes |
def _batch_loss(self, batch: torch.Tensor, for_training: bool, batch_aux: torch.Tensor=None, batch_aux2: torch.Tensor=None) -> torch.Tensor: """ Does a forward pass on the given batch and auxiliary data batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = self._data_parallel(batch) if batch_aux is not None: raise ConfigurationError('multi-gpu not supported for multi-task training.') else: batch = util.move_to_device(batch, self._cuda_devices[0]) output_dict = self._model(**batch) try: loss = output_dict["loss"] if for_training: loss += self._model.get_regularization_penalty() except KeyError: if for_training: raise RuntimeError("The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") loss = None if batch_aux is not None and batch_aux2 is not None: batch_aux = util.move_to_device(batch_aux, self._cuda_devices[0]) batch_aux2 = util.move_to_device(batch_aux2, self._cuda_devices[0]) output_dict_aux = self._model(**batch_aux) output_dict_aux2 = self._model(**batch_aux2) try: loss_aux = output_dict_aux["loss"] loss_aux2 = output_dict_aux2["loss"] if for_training: loss_aux += self._model.get_regularization_penalty() loss_aux2 += self._model.get_regularization_penalty() except KeyError: raise ConfigurationError("The auxiliary model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") # multi-task loss loss = loss + self._mixing_ratio * loss_aux + self._mixing_ratio2 * loss_aux2 return loss