Python allennlp.common.util.JsonDict() Examples

The following are 30 code examples of allennlp.common.util.JsonDict(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.common.util , or try the search function .
Example #1
Source File: convert_csqa.py    From KagNet with MIT License 6 votes vote down vote up
def convert_qajson_to_entailment(qa_json: JsonDict):
    question_text = qa_json["question"]["stem"]
    choices = qa_json["question"]["choices"]
    for choice in choices:
        choice_text = choice["text"]

        statement = create_hypothesis(get_fitb_from_question(question_text), choice_text)
        create_output_dict(qa_json, statement,  choice["label"] == qa_json.get("answerKey", "Z"))

    return qa_json


# Get a Fill-In-The-Blank (FITB) statement from the question text. E.g. "George wants to warm his
# hands quickly by rubbing them. Which skin surface will produce the most heat?" ->
# "George wants to warm his hands quickly by rubbing them. ___ skin surface will produce the most
# heat? 
Example #2
Source File: multiple_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_json(self, input: JsonDict):
        instance = self._json_to_instance(input)
        output = self._model.forward_on_instance(instance)
        return_json = {}
        return_json["input"] = input

        label_probs = output["label_probs"]
        predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5]
        premises_attentions = output.get("premises_attentions", None)
        premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

        return_json["label_probs"] = label_probs
        return_json["predicted_answer_indices"] = predicted_answer_indices
        if premises_attentions is not None:
            return_json["premises_attentions"] = premises_attentions
            return_json["premises_aggregation_attentions"] = premises_aggregation_attentions
        return sanitize(return_json) 
Example #3
Source File: single_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]:
        instances = self._batch_json_to_instances(inputs)
        outputs = self._model.forward_on_instances(instances)
        return_jsons = []
        for input, output in zip(inputs, outputs):
            return_json = {}
            return_json["input"] = input
            premises_count = len(input["premises"])

            label_probs = output["label_probs"]
            predicted_answer_index = list(label_probs).index(max(label_probs))
            premises_attentions = output.get("premises_attentions", None)
            premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

            return_json["label_probs"] = label_probs
            return_json["predicted_answer_index"] = predicted_answer_index
            if premises_attentions is not None:
                return_json["premises_attentions"] = premises_attentions[:, :premises_count]
                return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count]

            return_jsons.append(return_json)
        return sanitize(return_jsons) 
Example #4
Source File: predictor_acl_arc.py    From scicite with Apache License 2.0 6 votes vote down vote up
def predict_json(self, inputs: JsonDict) -> JsonDict:
        return_dict = {}
        citation = read_jurgens_jsonline(inputs)
        if len(citation.text) == 0:
            print('empty context, skipping')
            return {}
        print(self._dataset_reader)
        instance = self._dataset_reader.text_to_instance(
            citation_text=citation.text,
            intent=citation.intent,
            citing_paper_id=citation.citing_paper_id,
            cited_paper_id=citation.cited_paper_id,
            citation_excerpt_index=citation.citation_excerpt_index
        )
        outputs = self._model.forward_on_instance(instance)

        return_dict['citation_id'] = citation.citation_id
        return_dict['citingPaperId'] = outputs['citing_paper_id']
        return_dict['citedPaperId'] = outputs['cited_paper_id']
        return_dict['probabilities'] = outputs['probabilities']
        return_dict['prediction'] = outputs['prediction']
        return return_dict 
Example #5
Source File: squad_predictor.py    From pair2vec with Apache License 2.0 6 votes vote down vote up
def predict(self, question: str, passage: str, question_id: str) -> JsonDict:
        """
        Make a machine comprehension prediction on the supplied input.
        See https://rajpurkar.github.io/SQuAD-explorer/ for more information about the machine comprehension task.

        Parameters
        ----------
        question : ``str``
            A question about the content in the supplied paragraph.  The question must be answerable by a
            span in the paragraph.
        passage : ``str``
            A paragraph of information relevant to the question.

        Returns
        -------
        A dictionary that represents the prediction made by the system.  The answer string will be under the
        "best_span_str" key.
        """
        return self.predict_json({"passage" : passage, "question" : question, "question_id": question_id}) 
Example #6
Source File: single_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_json(self, input: JsonDict):
        instance = self._json_to_instance(input)
        output = self._model.forward_on_instance(instance)
        return_json = {}
        return_json["input"] = input

        label_probs = output["label_probs"]
        predicted_answer_index = list(label_probs).index(max(label_probs))
        premises_attentions = output.get("premises_attentions", None)
        premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

        return_json["label_probs"] = label_probs
        return_json["predicted_answer_index"] = predicted_answer_index
        if premises_attentions is not None:
            return_json["premises_attentions"] = premises_attentions
            return_json["premises_aggregation_attentions"] = premises_aggregation_attentions
        return sanitize(return_json) 
Example #7
Source File: multiple_correct_mcq_entailment.py    From multee with Apache License 2.0 6 votes vote down vote up
def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]:
        instances = self._batch_json_to_instances(inputs)
        outputs = self._model.forward_on_instances(instances)
        return_jsons = []
        for input, output in zip(inputs, outputs):
            return_json = {}
            return_json["input"] = input
            premises_count = len(input["premises"])

            label_probs = output["label_probs"]
            predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5]
            premises_attentions = output.get("premises_attentions", None)
            premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

            return_json["label_probs"] = label_probs
            return_json["predicted_answer_indices"] = predicted_answer_indices
            if premises_attentions is not None:
                return_json["premises_attentions"] = premises_attentions[:, :premises_count]
                return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count]

            return_jsons.append(return_json)
        return sanitize(return_jsons) 
Example #8
Source File: nlvr_parser.py    From allennlp-semparse with Apache License 2.0 6 votes vote down vote up
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        sentence = json_dict["sentence"]
        if "worlds" in json_dict:
            # This is grouped data
            worlds = json_dict["worlds"]
            if isinstance(worlds, str):
                worlds = json.loads(worlds)
        else:
            structured_rep = json_dict["structured_rep"]
            if isinstance(structured_rep, str):
                structured_rep = json.loads(structured_rep)
            worlds = [structured_rep]
        identifier = json_dict["identifier"] if "identifier" in json_dict else None
        instance = self._dataset_reader.text_to_instance(
            sentence=sentence,  # type: ignore
            structured_representations=worlds,
            identifier=identifier,
        )
        return instance 
Example #9
Source File: transition_ucca_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 6 votes vote down vote up
def predict(self, sentence: str) -> JsonDict:
        """
        Predict a dependency parse for the given sentence.
        Parameters
        ----------
        sentence The sentence to parse.

        Returns
        -------
        A dictionary representation of the dependency tree.
        """
        return self.predict_json({"sentence": sentence})

    # def predict_json(self, inputs: JsonDict) -> JsonDict:
    #     instance = self._json_to_instance(inputs)
    #     return self.predict_instance(instance) 
Example #10
Source File: input_reduction.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def attack_from_json(
        self,
        inputs: JsonDict = None,
        input_field_to_attack: str = "tokens",
        grad_input_field: str = "grad_input_1",
        ignore_tokens: List[str] = None,
        target: JsonDict = None,
    ):
        if target is not None:
            raise ValueError("Input reduction does not implement targeted attacks")
        ignore_tokens = ["@@NULL@@"] if ignore_tokens is None else ignore_tokens
        original_instances = self.predictor.json_to_labeled_instances(inputs)
        original_text_field: TextField = original_instances[0][  # type: ignore
            input_field_to_attack
        ]
        original_tokens = deepcopy(original_text_field.tokens)
        final_tokens = []
        for instance in original_instances:
            final_tokens.append(
                self._attack_instance(
                    inputs, instance, input_field_to_attack, grad_input_field, ignore_tokens
                )
            )
        return sanitize({"final": final_tokens, "original": original_tokens}) 
Example #11
Source File: integrated_gradient.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        # Convert inputs to labeled instances
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Run integrated gradients
            grads = self._integrate_gradients(instance)

            # Normalize results
            for key, grad in grads.items():
                # The [0] here is undo-ing the batching that happens in get_gradients.
                embedding_grad = numpy.sum(grad[0], axis=1)
                norm = numpy.linalg.norm(embedding_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads

        return sanitize(instances_with_grads) 
Example #12
Source File: smooth_gradient.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        # Convert inputs to labeled instances
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Run smoothgrad
            grads = self._smooth_grads(instance)

            # Normalize results
            for key, grad in grads.items():
                # TODO (@Eric-Wallace), SmoothGrad is not using times input normalization.
                # Fine for now, but should fix for consistency.

                # The [0] here is undo-ing the batching that happens in get_gradients.
                embedding_grad = numpy.sum(grad[0], axis=1)
                norm = numpy.linalg.norm(embedding_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads

        return sanitize(instances_with_grads) 
Example #13
Source File: saliency_interpreter.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        """
        This function finds saliency values for each input token.

        # Parameters

        inputs : `JsonDict`
            The input you want to interpret (the same as the argument to a Predictor, e.g., predict_json()).

        # Returns

        interpretation : `JsonDict`
            Contains the normalized saliency values for each input token. The dict has entries for
            each instance in the inputs JsonDict, e.g., `{instance_1: ..., instance_2:, ... }`.
            Each one of those entries has entries for the saliency of the inputs, e.g.,
            `{grad_input_1: ..., grad_input_2: ... }`.
        """
        raise NotImplementedError("Implement this for saliency interpretations") 
Example #14
Source File: summary_sentences_predictor.py    From summarus with Apache License 2.0 5 votes vote down vote up
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        source = json_dict["source"]
        return self._dataset_reader.text_to_instance(source) 
Example #15
Source File: transition_ucca_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        outputs_batch = self._model.forward_on_instances(instances)

        ret_dict_batch = [[] for i in range(len(outputs_batch))]
        for outputs_idx in range(len(outputs_batch)):
            try:
                ret_dict_batch[outputs_idx] = ucca_trans_outputs_into_mrp(outputs_batch[outputs_idx])
            except:
                print('graph_id:' + json.loads(outputs_batch[outputs_idx]["meta_info"])['id'])

        return sanitize(ret_dict_batch) 
Example #16
Source File: transition_ucca_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"sentence": "..."}``.
        """

        ret = parse_sentence(json.dumps(json_dict))

        tokens = ret["tokens"]
        meta_info = ret["meta_info"]
        tokens_range = ret["tokens_range"]

        return self._dataset_reader.text_to_instance(tokens=tokens, meta_info=[meta_info], tokens_range=tokens_range) 
Example #17
Source File: entailment_pair.py    From multee with Apache License 2.0 5 votes vote down vote up
def _json_to_instance(self,  # type: ignore
                          json_dict: JsonDict) -> Instance:
        premise_text = json_dict.get("sentence1", None) or json_dict.get("premise", None)
        hypothesis_text = json_dict.get("sentence2", None) or json_dict.get("hypothesis", None)
        if premise_text and hypothesis_text:
            return self._dataset_reader.text_to_instance(premise_text, hypothesis_text)
        logger.info("Error parsing input")
        return None 
Example #18
Source File: transition_ucca_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        ret_dict = ucca_trans_outputs_into_mrp(outputs)
        return sanitize(ret_dict) 
Example #19
Source File: tacred_predictor.py    From kb with Apache License 2.0 5 votes vote down vote up
def dump_line(self, outputs: JsonDict) -> str:
        return REVERSE_LABEL_MAP[outputs['predictions']] + '\n' 
Example #20
Source File: transition_eds_predictor.py    From HIT-SCIR-CoNLL2019 with Apache License 2.0 5 votes vote down vote up
def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        ret_dict = eds_trans_outputs_into_mrp(outputs)
        return sanitize(ret_dict) 
Example #21
Source File: multiple_correct_mcq_entailment.py    From multee with Apache License 2.0 5 votes vote down vote up
def _json_to_instance(self,  # type: ignore
                          json_dict: JsonDict) -> Instance:
        premises = json_dict["premises"]
        hypotheses = json_dict["hypotheses"]
        entailments = json_dict.get("entailments", None)
        if entailments is None:
            answer_indices = None
        else:
            answer_indices = [index for index, entailment in enumerate(entailments) if entailment]
        relevant_sentence_idxs = json_dict.get("relevant_sentence_idxs", None)
        return self._dataset_reader.text_to_instance(premises,
                                                     hypotheses,
                                                     answer_indices,
                                                     relevant_sentence_idxs) 
Example #22
Source File: squad_predictor.py    From pair2vec with Apache License 2.0 5 votes vote down vote up
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"question": "...", "passage": "..."}``.
        """
        question_text = json_dict["question"]
        passage_text = json_dict["passage"]
        question_id = json_dict["question_id"]
        return self._dataset_reader.text_to_instance(question_text, passage_text, question_id), {} 
Example #23
Source File: get_nlvr_logical_forms.py    From allennlp-semparse with Apache License 2.0 5 votes vote down vote up
def read_json_line(line: str) -> Tuple[str, str, List[JsonDict], List[str]]:
    data = json.loads(line)
    instance_id = data["identifier"]
    sentence = data["sentence"]
    if "worlds" in data:
        structured_reps = data["worlds"]
        label_strings = [label_str.lower() for label_str in data["labels"]]
    else:
        # We're reading ungrouped data.
        structured_reps = [data["structured_rep"]]
        label_strings = [data["label"].lower()]
    return instance_id, sentence, structured_reps, label_strings 
Example #24
Source File: atis_parser.py    From allennlp-semparse with Apache License 2.0 5 votes vote down vote up
def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"utterance": "..."}``.
        """
        utterance = json_dict["utterance"]
        return self._dataset_reader.text_to_instance([utterance]) 
Example #25
Source File: nlvr_parser.py    From allennlp-semparse with Apache License 2.0 5 votes vote down vote up
def dump_line(self, outputs: JsonDict) -> str:
        if "identifier" in outputs:
            # Returning CSV lines for official evaluation
            identifier = outputs["identifier"]
            denotation = outputs["denotations"][0][0]
            return f"{identifier},{denotation}\n"
        else:
            return json.dumps(outputs) + "\n" 
Example #26
Source File: nlvr_language.py    From allennlp-semparse with Apache License 2.0 5 votes vote down vote up
def __init__(self, objects_list: List[JsonDict], box_id: int) -> None:
        self._name = f"box {box_id + 1}"
        self._objects_string = str([str(_object) for _object in objects_list])
        self.objects = {Object(object_dict, self._name) for object_dict in objects_list}
        self.colors = {obj.color for obj in self.objects}
        self.shapes = {obj.shape for obj in self.objects} 
Example #27
Source File: nlvr_language.py    From allennlp-semparse with Apache License 2.0 5 votes vote down vote up
def __init__(self, attributes: JsonDict, box_id: str) -> None:
        object_color = attributes["color"].lower()
        # The dataset has a hex code only for blue for some reason.
        if object_color.startswith("#"):
            self.color = "blue"
        else:
            self.color = object_color
        object_shape = attributes["type"].lower()
        self.shape = object_shape
        self.x_loc = attributes["x_loc"]
        self.y_loc = attributes["y_loc"]
        self.size = attributes["size"]
        self._box_id = box_id 
Example #28
Source File: convert_statement.py    From KagNet with MIT License 5 votes vote down vote up
def create_output_dict(input_json: JsonDict, premise: str, hypothesis: str) -> JsonDict:
    input_json["premise"] = premise
    input_json["hypothesis"] = hypothesis
    return input_json 
Example #29
Source File: convert_statement.py    From KagNet with MIT License 5 votes vote down vote up
def convert_qajson_to_entailment(qa_json: JsonDict):
    question_text = qa_json["question"]["stem"]
    choice = qa_json["question"]["choice"]["text"]
    support = qa_json["question"]["support"]["text"]
    hypothesis = create_hypothesis(get_fitb_from_question(question_text), choice)
    output_dict = create_output_dict(qa_json, support, hypothesis)
    return output_dict


# Get a Fill-In-The-Blank (FITB) statement from the question text. E.g. "George wants to warm his
# hands quickly by rubbing them. Which skin surface will produce the most heat?" ->
# "George wants to warm his hands quickly by rubbing them. ___ skin surface will produce the most
# heat? 
Example #30
Source File: convert_csqa.py    From KagNet with MIT License 5 votes vote down vote up
def create_output_dict(input_json: JsonDict, statement: str, label: bool) -> JsonDict:
    if "statements" not in input_json:
        input_json["statements"] = []
    input_json["statements"].append({"label": label, "statement": statement})
    return input_json