Python allennlp.nn.util.move_to_device() Examples

The following are 15 code examples of allennlp.nn.util.move_to_device(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.nn.util , or try the search function .
Example #1
Source File: predictor.py    From DISTRE with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        model = self._model

        with torch.no_grad():
            cuda_device = model._get_prediction_device()
            dataset = Batch(instances)
            dataset.index_instances(model.vocab)
            model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device)
            outputs = model.decode(model(**model_input))

        return sanitize(outputs) 
Example #2
Source File: trainer.py    From NLP_Toolkit with Apache License 2.0 5 votes vote down vote up
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor:
        """
        Does a forward pass on the given batches and returns the ``loss`` value in the result.
        If ``for_training`` is `True` also applies regularization penalty.
        """
        if self._multiple_gpu:
            output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices)
        else:
            assert len(batch_group) == 1
            batch = batch_group[0]
            batch = nn_util.move_to_device(batch, self._cuda_devices[0])
            output_dict = self.model(**batch)

        try:
            loss = output_dict["loss"]
            if for_training:
                loss += self.model.get_regularization_penalty()
        except KeyError:
            if for_training:
                raise RuntimeError(
                    "The model you are trying to optimize does not contain a"
                    " 'loss' key in the output of model.forward(inputs)."
                )
            loss = None

        return loss 
Example #3
Source File: gec_model.py    From NLP_Toolkit with Apache License 2.0 5 votes vote down vote up
def predict(self, batches):
        t11 = time()
        predictions = []
        for batch, model in zip(batches, self.models):
            batch = util.move_to_device(batch.as_tensor_dict(), 0 if torch.cuda.is_available() else -1)
            with torch.no_grad():
                prediction = model.forward(**batch)
            predictions.append(prediction)

        preds, idx, error_probs = self._convert(predictions)
        t55 = time()
        if self.log:
            print(f"Inference time {t55 - t11}")
        return preds, idx, error_probs 
Example #4
Source File: hotflip.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def _make_embedder_input(self, all_tokens: List[str]) -> Dict[str, torch.Tensor]:
        inputs = {}
        # A bit of a hack; this will only work with some dataset readers, but it'll do for now.
        indexers = self.predictor._dataset_reader._token_indexers  # type: ignore
        for indexer_name, token_indexer in indexers.items():
            if isinstance(token_indexer, SingleIdTokenIndexer):
                all_indices = [
                    self.vocab._token_to_index[self.namespace][token] for token in all_tokens
                ]
                inputs[indexer_name] = {"tokens": torch.LongTensor(all_indices).unsqueeze(0)}
            elif isinstance(token_indexer, TokenCharactersIndexer):
                tokens = [Token(x) for x in all_tokens]
                max_token_length = max(len(x) for x in all_tokens)
                # sometime max_token_length is too short for cnn encoder
                max_token_length = max(max_token_length, token_indexer._min_padding_length)
                indexed_tokens = token_indexer.tokens_to_indices(tokens, self.vocab)
                padding_lengths = token_indexer.get_padding_lengths(indexed_tokens)
                padded_tokens = token_indexer.as_padded_tensor_dict(indexed_tokens, padding_lengths)
                inputs[indexer_name] = {
                    "token_characters": torch.LongTensor(
                        padded_tokens["token_characters"]
                    ).unsqueeze(0)
                }
            elif isinstance(token_indexer, ELMoTokenCharactersIndexer):
                elmo_tokens = []
                for token in all_tokens:
                    elmo_indexed_token = token_indexer.tokens_to_indices(
                        [Token(text=token)], self.vocab
                    )["elmo_tokens"]
                    elmo_tokens.append(elmo_indexed_token[0])
                inputs[indexer_name] = {"elmo_tokens": torch.LongTensor(elmo_tokens).unsqueeze(0)}
            else:
                raise RuntimeError("Unsupported token indexer:", token_indexer)

        return util.move_to_device(inputs, self.cuda_device) 
Example #5
Source File: trainer.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def batch_outputs(self, batch: TensorDict, for_training: bool) -> Dict[str, torch.Tensor]:
        """
        Does a forward pass on the given batch and returns the output dictionary that the model
        returns, after adding any specified regularization penalty to the loss (if training).
        """
        batch = nn_util.move_to_device(batch, self.cuda_device)
        output_dict = self._pytorch_model(**batch)

        if for_training:
            try:
                regularization_penalty = self.model.get_regularization_penalty()
                loss = output_dict["loss"]

                # Handle model without regularization
                if regularization_penalty == 0.0:
                    regularization_penalty = loss.new_full(size=[], fill_value=0.0)

                output_dict["reg_loss"] = regularization_penalty
                output_dict["loss"] += regularization_penalty
            except KeyError:
                if for_training:
                    raise RuntimeError(
                        "The model you are trying to optimize does not contain a"
                        " 'loss' key in the output of model.forward(inputs)."
                    )

        return output_dict 
Example #6
Source File: hotpot_bert_v0.py    From semanticRetrievalMRS with MIT License 5 votes vote down vote up
def span_eval(model, data_iter, do_lower_case, fitem_dict, device_num, show_progress, pred_no_answer=True):
    # fitem_dict in the parameter is the original fitem_dict
    output_fitem_dict = {}

    with torch.no_grad():
        model.eval()

        for batch_idx, batch in tqdm(enumerate(data_iter), disable=(not show_progress)):
            batch = allen_util.move_to_device(batch, device_num)
            paired_sequence = batch['paired_sequence']
            paired_segments_ids = batch['paired_segments_ids']
            att_mask, _ = torch_util.get_length_and_mask(paired_sequence)
            gt_span = batch['gt_span']

            start_logits, end_logits, context_length = model(mode=BertSpan.ForwardMode.EVAL,
                                                             input_ids=paired_sequence,
                                                             token_type_ids=paired_segments_ids,
                                                             attention_mask=att_mask,
                                                             gt_span=gt_span)
            b_fids = batch['fid']
            b_uids = batch['uid']

            write_to_predicted_fitem(start_logits, end_logits, context_length, b_fids, b_uids, gt_span, fitem_dict,
                                     output_fitem_dict, do_lower_case)

    eitem_list, eval_dict = merge_predicted_fitem_to_eitem(output_fitem_dict, None, pred_no_answer=pred_no_answer)
    return eitem_list, eval_dict 
Example #7
Source File: write_semeval2010_task8_for_official_eval.py    From kb with Apache License 2.0 5 votes vote down vote up
def write_for_official_eval(model_archive_file, test_file, output_file,
                            label_ids_to_label):
    archive = load_archive(model_archive_file)
    model = archive.model

    reader = DatasetReader.from_params(archive.config['dataset_reader'])

    iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 4}))
    vocab = Vocabulary.from_params(archive.config['vocabulary'])
    iterator.index_with(vocab)

    model.cuda()
    model.eval()

    instances = reader.read(test_file)
    predictions = []
    for batch in iterator(instances, num_epochs=1, shuffle=False):
        batch = move_to_device(batch, cuda_device=0)
        output = model(**batch)

        batch_labels = [
            label_ids_to_label[i]
            for i in output['predictions'].cpu().numpy().tolist()
        ]

        predictions.extend(batch_labels)

    to_write = ''.join(["{}\t{}\n".format(i + 8001, e) for i, e in enumerate(model.metrics[0].pred)])
    with open(output_file, 'w') as fout:
        fout.write(to_write) 
Example #8
Source File: write_wic_for_codalab.py    From kb with Apache License 2.0 5 votes vote down vote up
def write_for_official_eval(model_archive_file, test_file, output_file):
    archive = load_archive(model_archive_file)
    model = archive.model

    reader = DatasetReader.from_params(archive.config['dataset_reader'])

    iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 32}))
    vocab = Vocabulary.from_params(archive.config['vocabulary'])
    iterator.index_with(vocab)

    model.cuda()
    model.eval()

    label_ids_to_label = {0: 'F', 1: 'T'}

    instances = reader.read(test_file)
    predictions = []
    for batch in iterator(instances, num_epochs=1, shuffle=False):
        batch = move_to_device(batch, cuda_device=0)
        output = model(**batch)

        batch_labels = [
            label_ids_to_label[i]
            for i in output['predictions'].cpu().numpy().tolist()
        ]

        predictions.extend(batch_labels)

    assert len(predictions) == 1400

    with open(output_file, 'w') as fout:
        for p in predictions:
            fout.write("{}\n".format(p)) 
Example #9
Source File: write_tacred_for_official_scorer.py    From kb with Apache License 2.0 5 votes vote down vote up
def write_for_official_eval(model_archive_file, test_file, output_file,
                            label_ids_to_label):
    archive = load_archive(model_archive_file)
    model = archive.model

    reader = DatasetReader.from_params(archive.config['dataset_reader'])

    iterator = DataIterator.from_params(Params({"type": "basic", "batch_size": 4}))
    vocab = Vocabulary.from_params(archive.config['vocabulary'])
    iterator.index_with(vocab)

    model.cuda()
    model.eval()

    instances = reader.read(test_file)
    predictions = []
    for batch in iterator(instances, num_epochs=1, shuffle=False):
        batch = move_to_device(batch, cuda_device=0)
        output = model(**batch)

        batch_labels = [
            label_ids_to_label[i]
            for i in output['predictions'].cpu().numpy().tolist()
        ]

        predictions.extend(batch_labels)


    with open(output_file, 'w') as fout:
        for p in predictions:
            fout.write("{}\n".format(p)) 
Example #10
Source File: multitask_trainer.py    From scicite with Apache License 2.0 5 votes vote down vote up
def _batch_loss(self, batch: torch.Tensor, for_training: bool, batch_aux: torch.Tensor=None) -> torch.Tensor:
        """
        Does a forward pass on the given batch and returns the ``loss`` value in the result.
        If ``for_training`` is `True` also applies regularization penalty.
        """
        if self._multiple_gpu:
            output_dict = self._data_parallel(batch)
            if batch_aux is not None:
                raise ConfigurationError('multi-gpu not supported for multi-task training.')
        else:
            batch = util.move_to_device(batch, self._cuda_devices[0])
            output_dict = self._model(**batch)

        try:
            loss = output_dict["loss"]
            if for_training:
                loss += self._model.get_regularization_penalty()
        except KeyError:
            if for_training:
                raise RuntimeError("The model you are trying to optimize does not contain a"
                                   " 'loss' key in the output of model.forward(inputs).")
            loss = None

        if batch_aux is not None:
            batch_aux = util.move_to_device(batch_aux, self._cuda_devices[0])
            output_dict_aux = self._model(**batch_aux)
            try:
                loss_aux = output_dict_aux["loss"]
                if for_training:
                    loss_aux += self._model.get_regularization_penalty()
            except KeyError:
                raise ConfigurationError("The auxilliary model you are trying to optimize does not contain a"
                                         " 'loss' key in the output of model.forward(inputs).")

            # multi-task loss
            loss = loss + self._mixing_ratio * loss_aux
        return loss 
Example #11
Source File: model.py    From allennlp with Apache License 2.0 4 votes vote down vote up
def forward_on_instances(self, instances: List[Instance]) -> List[Dict[str, numpy.ndarray]]:
        """
        Takes a list of `Instances`, converts that text into arrays using this model's `Vocabulary`,
        passes those arrays through `self.forward()` and `self.make_output_human_readable()` (which
        by default does nothing) and returns the result.  Before returning the result, we convert
        any `torch.Tensors` into numpy arrays and separate the batched output into a list of
        individual dicts per instance. Note that typically this will be faster on a GPU (and
        conditionally, on a CPU) than repeated calls to `forward_on_instance`.

        # Parameters

        instances : `List[Instance]`, required
            The instances to run the model on.

        # Returns

        A list of the models output for each instance.
        """
        batch_size = len(instances)
        with torch.no_grad():
            cuda_device = self._get_prediction_device()
            dataset = Batch(instances)
            dataset.index_instances(self.vocab)
            model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device)
            outputs = self.make_output_human_readable(self(**model_input))

            instance_separated_output: List[Dict[str, numpy.ndarray]] = [
                {} for _ in dataset.instances
            ]
            for name, output in list(outputs.items()):
                if isinstance(output, torch.Tensor):
                    # NOTE(markn): This is a hack because 0-dim pytorch tensors are not iterable.
                    # This occurs with batch size 1, because we still want to include the loss in that case.
                    if output.dim() == 0:
                        output = output.unsqueeze(0)

                    if output.size(0) != batch_size:
                        self._maybe_warn_for_unseparable_batches(name)
                        continue
                    output = output.detach().cpu().numpy()
                elif len(output) != batch_size:
                    self._maybe_warn_for_unseparable_batches(name)
                    continue
                for instance_output, batch_element in zip(instance_separated_output, output):
                    instance_output[name] = batch_element
            return instance_separated_output 
Example #12
Source File: hotflip.py    From allennlp with Apache License 2.0 4 votes vote down vote up
def _first_order_taylor(self, grad: numpy.ndarray, token_idx: torch.Tensor, sign: int) -> int:
        """
        The below code is based on
        https://github.com/pmichel31415/translate/blob/paul/pytorch_translate/
        research/adversarial/adversaries/brute_force_adversary.py

        Replaces the current token_idx with another token_idx to increase the loss. In particular, this
        function uses the grad, alongside the embedding_matrix to select the token that maximizes the
        first-order taylor approximation of the loss.
        """
        grad = util.move_to_device(torch.from_numpy(grad), self.cuda_device)
        if token_idx.size() != ():
            # We've got an encoder that only has character ids as input.  We don't curently handle
            # this case, and it's not clear it's worth it to implement it.  We'll at least give a
            # nicer error than some pytorch dimension mismatch.
            raise NotImplementedError(
                "You are using a character-level indexer with no other indexers. This case is not "
                "currently supported for hotflip. If you would really like to see us support "
                "this, please open an issue on github."
            )
        if token_idx >= self.embedding_matrix.size(0):
            # This happens when we've truncated our fake embedding matrix.  We need to do a dot
            # product with the word vector of the current token; if that token is out of
            # vocabulary for our truncated matrix, we need to run it through the embedding layer.
            inputs = self._make_embedder_input([self.vocab.get_token_from_index(token_idx)])
            word_embedding = self.embedding_layer(inputs)[0]
        else:
            word_embedding = torch.nn.functional.embedding(
                util.move_to_device(torch.LongTensor([token_idx]), self.cuda_device),
                self.embedding_matrix,
            )
        word_embedding = word_embedding.detach().unsqueeze(0)
        grad = grad.unsqueeze(0).unsqueeze(0)
        # solves equation (3) here https://arxiv.org/abs/1903.06620
        new_embed_dot_grad = torch.einsum("bij,kj->bik", (grad, self.embedding_matrix))
        prev_embed_dot_grad = torch.einsum("bij,bij->bi", (grad, word_embedding)).unsqueeze(-1)
        neg_dir_dot_grad = sign * (prev_embed_dot_grad - new_embed_dot_grad)
        neg_dir_dot_grad = neg_dir_dot_grad.detach().cpu().numpy()
        # Do not replace with non-alphanumeric tokens
        neg_dir_dot_grad[:, :, self.invalid_replacement_indices] = -numpy.inf
        best_at_each_step = neg_dir_dot_grad.argmax(2)
        return best_at_each_step[0].data[0] 
Example #13
Source File: predictor.py    From allennlp with Apache License 2.0 4 votes vote down vote up
def get_gradients(self, instances: List[Instance]) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """
        Gets the gradients of the loss with respect to the model inputs.

        # Parameters

        instances : `List[Instance]`

        # Returns

        `Tuple[Dict[str, Any], Dict[str, Any]]`
            The first item is a Dict of gradient entries for each input.
            The keys have the form  `{grad_input_1: ..., grad_input_2: ... }`
            up to the number of inputs given. The second item is the model's output.

        # Notes

        Takes a `JsonDict` representing the inputs of the model and converts
        them to [`Instances`](../data/instance.md)), sends these through
        the model [`forward`](../models/model.md#forward) function after registering hooks on the embedding
        layer of the model. Calls `backward` on the loss and then removes the
        hooks.
        """
        # set requires_grad to true for all parameters, but save original values to
        # restore them later
        original_param_name_to_requires_grad_dict = {}
        for param_name, param in self._model.named_parameters():
            original_param_name_to_requires_grad_dict[param_name] = param.requires_grad
            param.requires_grad = True

        embedding_gradients: List[Tensor] = []
        hooks: List[RemovableHandle] = self._register_embedding_gradient_hooks(embedding_gradients)

        dataset = Batch(instances)
        dataset.index_instances(self._model.vocab)
        dataset_tensor_dict = util.move_to_device(dataset.as_tensor_dict(), self.cuda_device)
        # To bypass "RuntimeError: cudnn RNN backward can only be called in training mode"
        with backends.cudnn.flags(enabled=False):
            outputs = self._model.make_output_human_readable(
                self._model.forward(**dataset_tensor_dict)  # type: ignore
            )

            loss = outputs["loss"]
            self._model.zero_grad()
            loss.backward()

        for hook in hooks:
            hook.remove()

        grad_dict = dict()
        for idx, grad in enumerate(embedding_gradients):
            key = "grad_input_" + str(idx + 1)
            grad_dict[key] = grad.detach().cpu().numpy()

        # restore the original requires_grad values of the parameters
        for param_name, param in self._model.named_parameters():
            param.requires_grad = original_param_name_to_requires_grad_dict[param_name]

        return grad_dict, outputs 
Example #14
Source File: evaluate_perplexity.py    From kb with Apache License 2.0 4 votes vote down vote up
def run_evaluation(evaluation_file, model_archive,
                   random_candidates=False):

    archive = load_archive(model_archive)
    model = archive.model
    vocab = model.vocab
    params = archive.config

    model.multitask = False
    model.multitask_kg = False
    model.cuda()
    model.eval()
    for p in model.parameters():
        p.requires_grad_(False)

    reader_params = params.pop('dataset_reader')
    if reader_params['type'] == 'multitask_reader':
        reader_params = reader_params['dataset_readers']['language_modeling']

    if random_candidates:
            for k, v in reader_params['base_reader']['tokenizer_and_candidate_generator']['entity_candidate_generators'].items():
                v['random_candidates'] = True

    reader = DatasetReader.from_params(Params(reader_params))

    iterator = DataIterator.from_params(Params({
            "type": "self_attn_bucket",
            "batch_size_schedule": "base-11gb-fp32",
            "iterator":{
                  "type": "bucket",
                  "batch_size": 32,
                  "sorting_keys": [["tokens", "num_tokens"]],
                  "max_instances_in_memory": 2500,
              }
    }))
    iterator.index_with(vocab)
    instances = reader.read(evaluation_file)

    for batch_no, batch in enumerate(tqdm.tqdm(iterator(instances, num_epochs=1))):
        b = move_to_device(batch, 0)
        loss = model(**b)
        if batch_no % 100 == 0:
            print(model.get_metrics())

    print(model.get_metrics()) 
Example #15
Source File: multitask_trainer_two_tasks.py    From scicite with Apache License 2.0 4 votes vote down vote up
def _batch_loss(self, batch: torch.Tensor,
                    for_training: bool,
                    batch_aux: torch.Tensor=None,
                    batch_aux2: torch.Tensor=None) -> torch.Tensor:
        """
        Does a forward pass on the given batch and auxiliary data batches and returns the ``loss`` value in the result.
        If ``for_training`` is `True` also applies regularization penalty.
        """
        if self._multiple_gpu:
            output_dict = self._data_parallel(batch)
            if batch_aux is not None:
                raise ConfigurationError('multi-gpu not supported for multi-task training.')
        else:
            batch = util.move_to_device(batch, self._cuda_devices[0])
            output_dict = self._model(**batch)

        try:
            loss = output_dict["loss"]
            if for_training:
                loss += self._model.get_regularization_penalty()
        except KeyError:
            if for_training:
                raise RuntimeError("The model you are trying to optimize does not contain a"
                                   " 'loss' key in the output of model.forward(inputs).")
            loss = None

        if batch_aux is not None and batch_aux2 is not None:
            batch_aux = util.move_to_device(batch_aux, self._cuda_devices[0])
            batch_aux2 = util.move_to_device(batch_aux2, self._cuda_devices[0])
            output_dict_aux = self._model(**batch_aux)
            output_dict_aux2 = self._model(**batch_aux2)
            try:
                loss_aux = output_dict_aux["loss"]
                loss_aux2 = output_dict_aux2["loss"]
                if for_training:
                    loss_aux += self._model.get_regularization_penalty()
                    loss_aux2 += self._model.get_regularization_penalty()
            except KeyError:
                raise ConfigurationError("The auxiliary model you are trying to optimize does not contain a"
                                         " 'loss' key in the output of model.forward(inputs).")

            # multi-task loss
            loss = loss + self._mixing_ratio * loss_aux + self._mixing_ratio2 * loss_aux2
        return loss