Python allennlp.common.util.sanitize() Examples

The following are 30 code examples of allennlp.common.util.sanitize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.common.util , or try the search function .
Example #1
Source File: smooth_gradient.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        # Convert inputs to labeled instances
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Run smoothgrad
            grads = self._smooth_grads(instance)

            # Normalize results
            for key, grad in grads.items():
                # TODO (@Eric-Wallace), SmoothGrad is not using times input normalization.
                # Fine for now, but should fix for consistency.

                # The [0] here is undo-ing the batching that happens in get_gradients.
                embedding_grad = numpy.sum(grad[0], axis=1)
                norm = numpy.linalg.norm(embedding_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads

        return sanitize(instances_with_grads) 
Example #2
Source File: integrated_gradient.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        # Convert inputs to labeled instances
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Run integrated gradients
            grads = self._integrate_gradients(instance)

            # Normalize results
            for key, grad in grads.items():
                # The [0] here is undo-ing the batching that happens in get_gradients.
                embedding_grad = numpy.sum(grad[0], axis=1)
                norm = numpy.linalg.norm(embedding_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads

        return sanitize(instances_with_grads) 
Example #3
Source File: input_reduction.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def attack_from_json(
        self,
        inputs: JsonDict = None,
        input_field_to_attack: str = "tokens",
        grad_input_field: str = "grad_input_1",
        ignore_tokens: List[str] = None,
        target: JsonDict = None,
    ):
        if target is not None:
            raise ValueError("Input reduction does not implement targeted attacks")
        ignore_tokens = ["@@NULL@@"] if ignore_tokens is None else ignore_tokens
        original_instances = self.predictor.json_to_labeled_instances(inputs)
        original_text_field: TextField = original_instances[0][  # type: ignore
            input_field_to_attack
        ]
        original_tokens = deepcopy(original_text_field.tokens)
        final_tokens = []
        for instance in original_instances:
            final_tokens.append(
                self._attack_instance(
                    inputs, instance, input_field_to_attack, grad_input_field, ignore_tokens
                )
            )
        return sanitize({"final": final_tokens, "original": original_tokens}) 
Example #4
Source File: multiple_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_json(self, input: JsonDict):
        instance = self._json_to_instance(input)
        output = self._model.forward_on_instance(instance)
        return_json = {}
        return_json["input"] = input

        label_probs = output["label_probs"]
        predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5]
        premises_attentions = output.get("premises_attentions", None)
        premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

        return_json["label_probs"] = label_probs
        return_json["predicted_answer_indices"] = predicted_answer_indices
        if premises_attentions is not None:
            return_json["premises_attentions"] = premises_attentions
            return_json["premises_aggregation_attentions"] = premises_aggregation_attentions
        return sanitize(return_json) 
Example #5
Source File: multiple_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]:
        instances = self._batch_json_to_instances(inputs)
        outputs = self._model.forward_on_instances(instances)
        return_jsons = []
        for input, output in zip(inputs, outputs):
            return_json = {}
            return_json["input"] = input
            premises_count = len(input["premises"])

            label_probs = output["label_probs"]
            predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5]
            premises_attentions = output.get("premises_attentions", None)
            premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

            return_json["label_probs"] = label_probs
            return_json["predicted_answer_indices"] = predicted_answer_indices
            if premises_attentions is not None:
                return_json["premises_attentions"] = premises_attentions[:, :premises_count]
                return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count]

            return_jsons.append(return_json)
        return sanitize(return_jsons) 
Example #6
Source File: single_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_json(self, input: JsonDict):
        instance = self._json_to_instance(input)
        output = self._model.forward_on_instance(instance)
        return_json = {}
        return_json["input"] = input

        label_probs = output["label_probs"]
        predicted_answer_index = list(label_probs).index(max(label_probs))
        premises_attentions = output.get("premises_attentions", None)
        premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

        return_json["label_probs"] = label_probs
        return_json["predicted_answer_index"] = predicted_answer_index
        if premises_attentions is not None:
            return_json["premises_attentions"] = premises_attentions
            return_json["premises_aggregation_attentions"] = premises_aggregation_attentions
        return sanitize(return_json) 
Example #7
Source File: single_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]:
        instances = self._batch_json_to_instances(inputs)
        outputs = self._model.forward_on_instances(instances)
        return_jsons = []
        for input, output in zip(inputs, outputs):
            return_json = {}
            return_json["input"] = input
            premises_count = len(input["premises"])

            label_probs = output["label_probs"]
            predicted_answer_index = list(label_probs).index(max(label_probs))
            premises_attentions = output.get("premises_attentions", None)
            premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

            return_json["label_probs"] = label_probs
            return_json["predicted_answer_index"] = predicted_answer_index
            if premises_attentions is not None:
                return_json["premises_attentions"] = premises_attentions[:, :premises_count]
                return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count]

            return_jsons.append(return_json)
        return sanitize(return_jsons) 
Example #8
Source File: biaffine_dependency_parser.py    From magnitude with MIT License 5 votes vote down vote up
def predict_batch_instance(self, instances                )                  :
        outputs = self._model.forward_on_instances(instances)
        for output in outputs:
            words = output[u"words"]
            pos = output[u"pos"]
            heads = output[u"predicted_heads"]
            tags = output[u"predicted_dependencies"]
            output[u"hierplane_tree"] = self._build_hierplane_tree(words, heads, tags, pos)
        return sanitize(outputs) 
Example #9
Source File: bidaf_qa_predictor.py    From ARC-Solvers with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict, cuda_device: int = -1):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance, cuda_device)
        json_output = inputs
        span_str = outputs["best_span_str"]
        # If the file has an answer key, calculate the score
        if "answerKey" in json_output:
            answer_choices = json_output["question"]["choices"]
            # Score each answer choice based on its overlap with the predicted span.
            for choice in answer_choices:
                choice_text = choice["text"]
                choice_score = self._overlap_score(choice_text, span_str)
                choice["score"] = choice_score

            # Get the maximum answer choice score
            max_choice_score = max(answer_choices, key=itemgetter("score"))["score"]
            # Collect all answer choices with the same score
            selected_answers = [choice["label"] for choice in answer_choices
                                if choice["score"] == max_choice_score]
            answer_key = json_output["answerKey"]
            if answer_key in selected_answers:
                question_score = 1 / len(selected_answers)
            else:
                question_score = 0
            json_output["selected_answers"] = ",".join(selected_answers)
            json_output["question_score"] = question_score
        json_output["best_span_str"] = span_str
        return sanitize(json_output) 
Example #10
Source File: entailment_pair.py    From multee with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance)
        inputs["entailment_prob"] = float(outputs["label_probs"][self._entailment_idx])
        inputs["contradiction_prob"] = float(outputs["label_probs"][self._contradiction_idx])
        inputs["neutral_prob"] = float(outputs["label_probs"][self._neutral_idx])
        return sanitize(inputs) 
Example #11
Source File: conll_predictor.py    From allennlp_tutorial with MIT License 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        label_vocab = self._model.vocab.get_index_to_token_vocabulary('labels')

        outputs['tokens'] = [str(token) for token in instance.fields['tokens'].tokens]
        outputs['predicted'] = [label_vocab[l] for l in outputs['logits'].argmax(1)]
        outputs['labels'] = instance.fields['label'].labels

        return sanitize(outputs) 
Example #12
Source File: transition_ucca_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        ret_dict = ucca_trans_outputs_into_mrp(outputs)
        return sanitize(ret_dict) 
Example #13
Source File: transition_ucca_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        outputs_batch = self._model.forward_on_instances(instances)

        ret_dict_batch = [[] for i in range(len(outputs_batch))]
        for outputs_idx in range(len(outputs_batch)):
            try:
                ret_dict_batch[outputs_idx] = ucca_trans_outputs_into_mrp(outputs_batch[outputs_idx])
            except:
                print('graph_id:' + json.loads(outputs_batch[outputs_idx]["meta_info"])['id'])

        return sanitize(ret_dict_batch) 
Example #14
Source File: transition_eds_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        ret_dict = eds_trans_outputs_into_mrp(outputs)
        return sanitize(ret_dict) 
Example #15
Source File: transition_eds_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        outputs_batch = self._model.forward_on_instances(instances)

        ret_dict_batch = [[] for i in range(len(outputs_batch))]
        for outputs_idx in range(len(outputs_batch)):
            try:
                ret_dict_batch[outputs_idx] = eds_trans_outputs_into_mrp(outputs_batch[outputs_idx])
            except:
                print('graph_id:' + json.loads(outputs_batch[outputs_idx]["meta_info"])['id'])

        return sanitize(ret_dict_batch) 
Example #16
Source File: transition_sdp_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)

        ret_dict = sdp_trans_outputs_into_mrp(outputs)

        return sanitize(ret_dict) 
Example #17
Source File: transition_amr_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        outputs_batch = self._model.forward_on_instances(instances)

        ret_dict_batch = []
        for outputs_idx in range(len(outputs_batch)):
            outputs = outputs_batch[outputs_idx]
            instance = instances[outputs_idx]

            existing_edges = outputs["existing_edges"]
            node_labels = outputs["node_labels"]
            id_cnt = outputs["id_cnt"]
            tokens = outputs["tokens"]
            metadata = outputs["metadata"]
            node_types = outputs["node_types"]
            sent_len = outputs["sent_len"]

            ret_dict = extract_mrp_dict(existing_edges=existing_edges,
                                        sent_len=sent_len,
                                        id_cnt=id_cnt,
                                        node_labels=node_labels,
                                        node_types=node_types,
                                        metadata=metadata)

            ret_dict_batch.append(ret_dict)

        return sanitize(ret_dict_batch) 
Example #18
Source File: decompatt_qa_predictor.py    From ARC-Solvers with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict, cuda_device: int = -1):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance, cuda_device)
        json_output = inputs
        json_output["score"] = outputs["label_probs"][self._entailment_idx]
        return sanitize(json_output) 
Example #19
Source File: dgem_predictor.py    From ARC-Solvers with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict, cuda_device: int = -1):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance, cuda_device)
        json_output = inputs
        json_output["score"] = outputs["label_probs"][self._entailment_idx]
        return sanitize(json_output) 
Example #20
Source File: predictor.py    From DISTRE with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        model = self._model

        with torch.no_grad():
            cuda_device = model._get_prediction_device()
            dataset = Batch(instances)
            dataset.index_instances(model.vocab)
            model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device)
            outputs = model.decode(model(**model_input))

        return sanitize(outputs) 
Example #21
Source File: predictor.py    From udify with MIT License 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        if "@@UNKNOWN@@" not in self._model.vocab._token_to_index["lemmas"]:
            # Handle cases where the labels are present in the test set but not training set
            self._predict_unknown(instance)
        outputs = self._model.forward_on_instance(instance)
        return sanitize(outputs) 
Example #22
Source File: simple_gradient.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        """
        Interprets the model's prediction for inputs.  Gets the gradients of the loss with respect
        to the input and returns those gradients normalized and sanitized.
        """
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        # List of embedding inputs, used for multiplying gradient by the input for normalization
        embeddings_list: List[numpy.ndarray] = []

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Hook used for saving embeddings
            handle = self._register_forward_hook(embeddings_list)
            grads = self.predictor.get_gradients([instance])[0]
            handle.remove()

            # Gradients come back in the reverse order that they were sent into the network
            embeddings_list.reverse()
            for key, grad in grads.items():
                # Get number at the end of every gradient key (they look like grad_input_[int],
                # we're getting this [int] part and subtracting 1 for zero-based indexing).
                # This is then used as an index into the reversed input array to match up the
                # gradient and its respective embedding.
                input_idx = int(key[-1]) - 1
                # The [0] here is undo-ing the batching that happens in get_gradients.
                emb_grad = numpy.sum(grad[0] * embeddings_list[input_idx], axis=1)
                norm = numpy.linalg.norm(emb_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in emb_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads
        return sanitize(instances_with_grads) 
Example #23
Source File: predictor.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def capture_model_internals(self) -> Iterator[dict]:
        """
        Context manager that captures the internal-module outputs of
        this predictor's model. The idea is that you could use it as follows:

        ```
            with predictor.capture_model_internals() as internals:
                outputs = predictor.predict_json(inputs)

            return {**outputs, "model_internals": internals}
        ```
        """
        results = {}
        hooks = []

        # First we'll register hooks to add the outputs of each module to the results dict.
        def add_output(idx: int):
            def _add_output(mod, _, outputs):
                results[idx] = {"name": str(mod), "output": sanitize(outputs)}

            return _add_output

        for idx, module in enumerate(self._model.modules()):
            if module != self._model:
                hook = module.register_forward_hook(add_output(idx))
                hooks.append(hook)

        # If you capture the return value of the context manager, you get the results dict.
        yield results

        # And then when you exit the context we remove all the hooks.
        for hook in hooks:
            hook.remove() 
Example #24
Source File: predictor.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        outputs = self._model.forward_on_instances(instances)
        return sanitize(outputs) 
Example #25
Source File: util_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_sanitize(self):
        assert util.sanitize(torch.Tensor([1, 2])) == [1, 2]
        assert util.sanitize(torch.LongTensor([1, 2])) == [1, 2]

        with pytest.raises(ValueError):
            util.sanitize(Unsanitizable())

        assert util.sanitize(Sanitizable()) == {"sanitizable": True} 
Example #26
Source File: decompatt_predictor.py    From scitail with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict, cuda_device: int = -1):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance, cuda_device)
        json_output = inputs
        json_output["score"] = outputs["label_probs"][self._entailment_idx]
        json_output["label_probs"] = outputs["label_probs"]
        json_output["label_logits"] = outputs["label_logits"]
        return sanitize(json_output) 
Example #27
Source File: dgem_predictor.py    From scitail with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict, cuda_device: int = -1):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance, cuda_device)
        json_output = inputs
        json_output["score"] = outputs["label_probs"][self._entailment_idx]
        json_output["label_probs"] = outputs["label_probs"]
        json_output["label_logits"] = outputs["label_logits"]
        return sanitize(json_output) 
Example #28
Source File: overlap_predictor.py    From scitail with Apache License 2.0 5 votes vote down vote up
def predict_json(self, inputs: JsonDict, cuda_device: int = -1):
        instance = self._json_to_instance(inputs)
        outputs = self._model.forward_on_instance(instance, cuda_device)
        json_output = inputs
        json_output["score"] = outputs["label_probs"][self._entailment_idx]
        json_output["label_probs"] = outputs["label_probs"]
        json_output["label_logits"] = outputs["label_logits"]
        return sanitize(json_output) 
Example #29
Source File: predictor.py    From udify with MIT License 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        if "@@UNKNOWN@@" not in self._model.vocab._token_to_index["lemmas"]:
            # Handle cases where the labels are present in the test set but not training set
            for instance in instances:
                self._predict_unknown(instance)
        outputs = self._model.forward_on_instances(instances)
        return sanitize(outputs) 
Example #30
Source File: biaffine_dependency_parser.py    From magnitude with MIT License 5 votes vote down vote up
def predict_instance(self, instance          )            :
        outputs = self._model.forward_on_instance(instance)

        words = outputs[u"words"]
        pos = outputs[u"pos"]
        heads = outputs[u"predicted_heads"]
        tags = outputs[u"predicted_dependencies"]
        outputs[u"hierplane_tree"] = self._build_hierplane_tree(words, heads, tags, pos)
        return sanitize(outputs)

    #overrides