Python allennlp.common.util.sanitize() Examples
The following are 30
code examples of allennlp.common.util.sanitize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.common.util
, or try the search function
.
Example #1
Source File: smooth_gradient.py From allennlp with Apache License 2.0 | 6 votes |
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict: # Convert inputs to labeled instances labeled_instances = self.predictor.json_to_labeled_instances(inputs) instances_with_grads = dict() for idx, instance in enumerate(labeled_instances): # Run smoothgrad grads = self._smooth_grads(instance) # Normalize results for key, grad in grads.items(): # TODO (@Eric-Wallace), SmoothGrad is not using times input normalization. # Fine for now, but should fix for consistency. # The [0] here is undo-ing the batching that happens in get_gradients. embedding_grad = numpy.sum(grad[0], axis=1) norm = numpy.linalg.norm(embedding_grad, ord=1) normalized_grad = [math.fabs(e) / norm for e in embedding_grad] grads[key] = normalized_grad instances_with_grads["instance_" + str(idx + 1)] = grads return sanitize(instances_with_grads)
Example #2
Source File: integrated_gradient.py From allennlp with Apache License 2.0 | 6 votes |
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict: # Convert inputs to labeled instances labeled_instances = self.predictor.json_to_labeled_instances(inputs) instances_with_grads = dict() for idx, instance in enumerate(labeled_instances): # Run integrated gradients grads = self._integrate_gradients(instance) # Normalize results for key, grad in grads.items(): # The [0] here is undo-ing the batching that happens in get_gradients. embedding_grad = numpy.sum(grad[0], axis=1) norm = numpy.linalg.norm(embedding_grad, ord=1) normalized_grad = [math.fabs(e) / norm for e in embedding_grad] grads[key] = normalized_grad instances_with_grads["instance_" + str(idx + 1)] = grads return sanitize(instances_with_grads)
Example #3
Source File: input_reduction.py From allennlp with Apache License 2.0 | 6 votes |
def attack_from_json( self, inputs: JsonDict = None, input_field_to_attack: str = "tokens", grad_input_field: str = "grad_input_1", ignore_tokens: List[str] = None, target: JsonDict = None, ): if target is not None: raise ValueError("Input reduction does not implement targeted attacks") ignore_tokens = ["@@NULL@@"] if ignore_tokens is None else ignore_tokens original_instances = self.predictor.json_to_labeled_instances(inputs) original_text_field: TextField = original_instances[0][ # type: ignore input_field_to_attack ] original_tokens = deepcopy(original_text_field.tokens) final_tokens = [] for instance in original_instances: final_tokens.append( self._attack_instance( inputs, instance, input_field_to_attack, grad_input_field, ignore_tokens ) ) return sanitize({"final": final_tokens, "original": original_tokens})
Example #4
Source File: multiple_correct_mcq_entailment.py From multee with Apache License 2.0 | 6 votes |
def predict_json(self, input: JsonDict): instance = self._json_to_instance(input) output = self._model.forward_on_instance(instance) return_json = {} return_json["input"] = input label_probs = output["label_probs"] predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5] premises_attentions = output.get("premises_attentions", None) premises_aggregation_attentions = output.get("premises_aggregation_attentions", None) return_json["label_probs"] = label_probs return_json["predicted_answer_indices"] = predicted_answer_indices if premises_attentions is not None: return_json["premises_attentions"] = premises_attentions return_json["premises_aggregation_attentions"] = premises_aggregation_attentions return sanitize(return_json)
Example #5
Source File: multiple_correct_mcq_entailment.py From multee with Apache License 2.0 | 6 votes |
def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]: instances = self._batch_json_to_instances(inputs) outputs = self._model.forward_on_instances(instances) return_jsons = [] for input, output in zip(inputs, outputs): return_json = {} return_json["input"] = input premises_count = len(input["premises"]) label_probs = output["label_probs"] predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5] premises_attentions = output.get("premises_attentions", None) premises_aggregation_attentions = output.get("premises_aggregation_attentions", None) return_json["label_probs"] = label_probs return_json["predicted_answer_indices"] = predicted_answer_indices if premises_attentions is not None: return_json["premises_attentions"] = premises_attentions[:, :premises_count] return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count] return_jsons.append(return_json) return sanitize(return_jsons)
Example #6
Source File: single_correct_mcq_entailment.py From multee with Apache License 2.0 | 6 votes |
def predict_json(self, input: JsonDict): instance = self._json_to_instance(input) output = self._model.forward_on_instance(instance) return_json = {} return_json["input"] = input label_probs = output["label_probs"] predicted_answer_index = list(label_probs).index(max(label_probs)) premises_attentions = output.get("premises_attentions", None) premises_aggregation_attentions = output.get("premises_aggregation_attentions", None) return_json["label_probs"] = label_probs return_json["predicted_answer_index"] = predicted_answer_index if premises_attentions is not None: return_json["premises_attentions"] = premises_attentions return_json["premises_aggregation_attentions"] = premises_aggregation_attentions return sanitize(return_json)
Example #7
Source File: single_correct_mcq_entailment.py From multee with Apache License 2.0 | 6 votes |
def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]: instances = self._batch_json_to_instances(inputs) outputs = self._model.forward_on_instances(instances) return_jsons = [] for input, output in zip(inputs, outputs): return_json = {} return_json["input"] = input premises_count = len(input["premises"]) label_probs = output["label_probs"] predicted_answer_index = list(label_probs).index(max(label_probs)) premises_attentions = output.get("premises_attentions", None) premises_aggregation_attentions = output.get("premises_aggregation_attentions", None) return_json["label_probs"] = label_probs return_json["predicted_answer_index"] = predicted_answer_index if premises_attentions is not None: return_json["premises_attentions"] = premises_attentions[:, :premises_count] return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count] return_jsons.append(return_json) return sanitize(return_jsons)
Example #8
Source File: biaffine_dependency_parser.py From magnitude with MIT License | 5 votes |
def predict_batch_instance(self, instances ) : outputs = self._model.forward_on_instances(instances) for output in outputs: words = output[u"words"] pos = output[u"pos"] heads = output[u"predicted_heads"] tags = output[u"predicted_dependencies"] output[u"hierplane_tree"] = self._build_hierplane_tree(words, heads, tags, pos) return sanitize(outputs)
Example #9
Source File: bidaf_qa_predictor.py From ARC-Solvers with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict, cuda_device: int = -1): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance, cuda_device) json_output = inputs span_str = outputs["best_span_str"] # If the file has an answer key, calculate the score if "answerKey" in json_output: answer_choices = json_output["question"]["choices"] # Score each answer choice based on its overlap with the predicted span. for choice in answer_choices: choice_text = choice["text"] choice_score = self._overlap_score(choice_text, span_str) choice["score"] = choice_score # Get the maximum answer choice score max_choice_score = max(answer_choices, key=itemgetter("score"))["score"] # Collect all answer choices with the same score selected_answers = [choice["label"] for choice in answer_choices if choice["score"] == max_choice_score] answer_key = json_output["answerKey"] if answer_key in selected_answers: question_score = 1 / len(selected_answers) else: question_score = 0 json_output["selected_answers"] = ",".join(selected_answers) json_output["question_score"] = question_score json_output["best_span_str"] = span_str return sanitize(json_output)
Example #10
Source File: entailment_pair.py From multee with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance) inputs["entailment_prob"] = float(outputs["label_probs"][self._entailment_idx]) inputs["contradiction_prob"] = float(outputs["label_probs"][self._contradiction_idx]) inputs["neutral_prob"] = float(outputs["label_probs"][self._neutral_idx]) return sanitize(inputs)
Example #11
Source File: conll_predictor.py From allennlp_tutorial with MIT License | 5 votes |
def predict_instance(self, instance: Instance) -> JsonDict: outputs = self._model.forward_on_instance(instance) label_vocab = self._model.vocab.get_index_to_token_vocabulary('labels') outputs['tokens'] = [str(token) for token in instance.fields['tokens'].tokens] outputs['predicted'] = [label_vocab[l] for l in outputs['logits'].argmax(1)] outputs['labels'] = instance.fields['label'].labels return sanitize(outputs)
Example #12
Source File: transition_ucca_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def predict_instance(self, instance: Instance) -> JsonDict: outputs = self._model.forward_on_instance(instance) ret_dict = ucca_trans_outputs_into_mrp(outputs) return sanitize(ret_dict)
Example #13
Source File: transition_ucca_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: outputs_batch = self._model.forward_on_instances(instances) ret_dict_batch = [[] for i in range(len(outputs_batch))] for outputs_idx in range(len(outputs_batch)): try: ret_dict_batch[outputs_idx] = ucca_trans_outputs_into_mrp(outputs_batch[outputs_idx]) except: print('graph_id:' + json.loads(outputs_batch[outputs_idx]["meta_info"])['id']) return sanitize(ret_dict_batch)
Example #14
Source File: transition_eds_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def predict_instance(self, instance: Instance) -> JsonDict: outputs = self._model.forward_on_instance(instance) ret_dict = eds_trans_outputs_into_mrp(outputs) return sanitize(ret_dict)
Example #15
Source File: transition_eds_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: outputs_batch = self._model.forward_on_instances(instances) ret_dict_batch = [[] for i in range(len(outputs_batch))] for outputs_idx in range(len(outputs_batch)): try: ret_dict_batch[outputs_idx] = eds_trans_outputs_into_mrp(outputs_batch[outputs_idx]) except: print('graph_id:' + json.loads(outputs_batch[outputs_idx]["meta_info"])['id']) return sanitize(ret_dict_batch)
Example #16
Source File: transition_sdp_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def predict_instance(self, instance: Instance) -> JsonDict: outputs = self._model.forward_on_instance(instance) ret_dict = sdp_trans_outputs_into_mrp(outputs) return sanitize(ret_dict)
Example #17
Source File: transition_amr_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0 | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: outputs_batch = self._model.forward_on_instances(instances) ret_dict_batch = [] for outputs_idx in range(len(outputs_batch)): outputs = outputs_batch[outputs_idx] instance = instances[outputs_idx] existing_edges = outputs["existing_edges"] node_labels = outputs["node_labels"] id_cnt = outputs["id_cnt"] tokens = outputs["tokens"] metadata = outputs["metadata"] node_types = outputs["node_types"] sent_len = outputs["sent_len"] ret_dict = extract_mrp_dict(existing_edges=existing_edges, sent_len=sent_len, id_cnt=id_cnt, node_labels=node_labels, node_types=node_types, metadata=metadata) ret_dict_batch.append(ret_dict) return sanitize(ret_dict_batch)
Example #18
Source File: decompatt_qa_predictor.py From ARC-Solvers with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict, cuda_device: int = -1): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance, cuda_device) json_output = inputs json_output["score"] = outputs["label_probs"][self._entailment_idx] return sanitize(json_output)
Example #19
Source File: dgem_predictor.py From ARC-Solvers with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict, cuda_device: int = -1): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance, cuda_device) json_output = inputs json_output["score"] = outputs["label_probs"][self._entailment_idx] return sanitize(json_output)
Example #20
Source File: predictor.py From DISTRE with Apache License 2.0 | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: model = self._model with torch.no_grad(): cuda_device = model._get_prediction_device() dataset = Batch(instances) dataset.index_instances(model.vocab) model_input = util.move_to_device(dataset.as_tensor_dict(), cuda_device) outputs = model.decode(model(**model_input)) return sanitize(outputs)
Example #21
Source File: predictor.py From udify with MIT License | 5 votes |
def predict_instance(self, instance: Instance) -> JsonDict: if "@@UNKNOWN@@" not in self._model.vocab._token_to_index["lemmas"]: # Handle cases where the labels are present in the test set but not training set self._predict_unknown(instance) outputs = self._model.forward_on_instance(instance) return sanitize(outputs)
Example #22
Source File: simple_gradient.py From allennlp with Apache License 2.0 | 5 votes |
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict: """ Interprets the model's prediction for inputs. Gets the gradients of the loss with respect to the input and returns those gradients normalized and sanitized. """ labeled_instances = self.predictor.json_to_labeled_instances(inputs) # List of embedding inputs, used for multiplying gradient by the input for normalization embeddings_list: List[numpy.ndarray] = [] instances_with_grads = dict() for idx, instance in enumerate(labeled_instances): # Hook used for saving embeddings handle = self._register_forward_hook(embeddings_list) grads = self.predictor.get_gradients([instance])[0] handle.remove() # Gradients come back in the reverse order that they were sent into the network embeddings_list.reverse() for key, grad in grads.items(): # Get number at the end of every gradient key (they look like grad_input_[int], # we're getting this [int] part and subtracting 1 for zero-based indexing). # This is then used as an index into the reversed input array to match up the # gradient and its respective embedding. input_idx = int(key[-1]) - 1 # The [0] here is undo-ing the batching that happens in get_gradients. emb_grad = numpy.sum(grad[0] * embeddings_list[input_idx], axis=1) norm = numpy.linalg.norm(emb_grad, ord=1) normalized_grad = [math.fabs(e) / norm for e in emb_grad] grads[key] = normalized_grad instances_with_grads["instance_" + str(idx + 1)] = grads return sanitize(instances_with_grads)
Example #23
Source File: predictor.py From allennlp with Apache License 2.0 | 5 votes |
def capture_model_internals(self) -> Iterator[dict]: """ Context manager that captures the internal-module outputs of this predictor's model. The idea is that you could use it as follows: ``` with predictor.capture_model_internals() as internals: outputs = predictor.predict_json(inputs) return {**outputs, "model_internals": internals} ``` """ results = {} hooks = [] # First we'll register hooks to add the outputs of each module to the results dict. def add_output(idx: int): def _add_output(mod, _, outputs): results[idx] = {"name": str(mod), "output": sanitize(outputs)} return _add_output for idx, module in enumerate(self._model.modules()): if module != self._model: hook = module.register_forward_hook(add_output(idx)) hooks.append(hook) # If you capture the return value of the context manager, you get the results dict. yield results # And then when you exit the context we remove all the hooks. for hook in hooks: hook.remove()
Example #24
Source File: predictor.py From allennlp with Apache License 2.0 | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: outputs = self._model.forward_on_instances(instances) return sanitize(outputs)
Example #25
Source File: util_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_sanitize(self): assert util.sanitize(torch.Tensor([1, 2])) == [1, 2] assert util.sanitize(torch.LongTensor([1, 2])) == [1, 2] with pytest.raises(ValueError): util.sanitize(Unsanitizable()) assert util.sanitize(Sanitizable()) == {"sanitizable": True}
Example #26
Source File: decompatt_predictor.py From scitail with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict, cuda_device: int = -1): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance, cuda_device) json_output = inputs json_output["score"] = outputs["label_probs"][self._entailment_idx] json_output["label_probs"] = outputs["label_probs"] json_output["label_logits"] = outputs["label_logits"] return sanitize(json_output)
Example #27
Source File: dgem_predictor.py From scitail with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict, cuda_device: int = -1): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance, cuda_device) json_output = inputs json_output["score"] = outputs["label_probs"][self._entailment_idx] json_output["label_probs"] = outputs["label_probs"] json_output["label_logits"] = outputs["label_logits"] return sanitize(json_output)
Example #28
Source File: overlap_predictor.py From scitail with Apache License 2.0 | 5 votes |
def predict_json(self, inputs: JsonDict, cuda_device: int = -1): instance = self._json_to_instance(inputs) outputs = self._model.forward_on_instance(instance, cuda_device) json_output = inputs json_output["score"] = outputs["label_probs"][self._entailment_idx] json_output["label_probs"] = outputs["label_probs"] json_output["label_logits"] = outputs["label_logits"] return sanitize(json_output)
Example #29
Source File: predictor.py From udify with MIT License | 5 votes |
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]: if "@@UNKNOWN@@" not in self._model.vocab._token_to_index["lemmas"]: # Handle cases where the labels are present in the test set but not training set for instance in instances: self._predict_unknown(instance) outputs = self._model.forward_on_instances(instances) return sanitize(outputs)
Example #30
Source File: biaffine_dependency_parser.py From magnitude with MIT License | 5 votes |
def predict_instance(self, instance ) : outputs = self._model.forward_on_instance(instance) words = outputs[u"words"] pos = outputs[u"pos"] heads = outputs[u"predicted_heads"] tags = outputs[u"predicted_dependencies"] outputs[u"hierplane_tree"] = self._build_hierplane_tree(words, heads, tags, pos) return sanitize(outputs) #overrides