Python Examples of allennlp.common.util.JsonDict

Source File: convert_csqa.py From KagNet with MIT License

6 votes

def convert_qajson_to_entailment(qa_json: JsonDict):
    question_text = qa_json["question"]["stem"]
    choices = qa_json["question"]["choices"]
    for choice in choices:
        choice_text = choice["text"]

        statement = create_hypothesis(get_fitb_from_question(question_text), choice_text)
        create_output_dict(qa_json, statement,  choice["label"] == qa_json.get("answerKey", "Z"))

    return qa_json


# Get a Fill-In-The-Blank (FITB) statement from the question text. E.g. "George wants to warm his
# hands quickly by rubbing them. Which skin surface will produce the most heat?" ->
# "George wants to warm his hands quickly by rubbing them. ___ skin surface will produce the most
# heat?

Source File: multiple_correct_mcq_entailment.py From multee with Apache License 2.0

6 votes

def predict_json(self, input: JsonDict):
        instance = self._json_to_instance(input)
        output = self._model.forward_on_instance(instance)
        return_json = {}
        return_json["input"] = input

        label_probs = output["label_probs"]
        predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5]
        premises_attentions = output.get("premises_attentions", None)
        premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

        return_json["label_probs"] = label_probs
        return_json["predicted_answer_indices"] = predicted_answer_indices
        if premises_attentions is not None:
            return_json["premises_attentions"] = premises_attentions
            return_json["premises_aggregation_attentions"] = premises_aggregation_attentions
        return sanitize(return_json)

Source File: single_correct_mcq_entailment.py From multee with Apache License 2.0

6 votes

def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]:
        instances = self._batch_json_to_instances(inputs)
        outputs = self._model.forward_on_instances(instances)
        return_jsons = []
        for input, output in zip(inputs, outputs):
            return_json = {}
            return_json["input"] = input
            premises_count = len(input["premises"])

            label_probs = output["label_probs"]
            predicted_answer_index = list(label_probs).index(max(label_probs))
            premises_attentions = output.get("premises_attentions", None)
            premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

            return_json["label_probs"] = label_probs
            return_json["predicted_answer_index"] = predicted_answer_index
            if premises_attentions is not None:
                return_json["premises_attentions"] = premises_attentions[:, :premises_count]
                return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count]

            return_jsons.append(return_json)
        return sanitize(return_jsons)

Source File: predictor_acl_arc.py From scicite with Apache License 2.0

6 votes

def predict_json(self, inputs: JsonDict) -> JsonDict:
        return_dict = {}
        citation = read_jurgens_jsonline(inputs)
        if len(citation.text) == 0:
            print('empty context, skipping')
            return {}
        print(self._dataset_reader)
        instance = self._dataset_reader.text_to_instance(
            citation_text=citation.text,
            intent=citation.intent,
            citing_paper_id=citation.citing_paper_id,
            cited_paper_id=citation.cited_paper_id,
            citation_excerpt_index=citation.citation_excerpt_index
        )
        outputs = self._model.forward_on_instance(instance)

        return_dict['citation_id'] = citation.citation_id
        return_dict['citingPaperId'] = outputs['citing_paper_id']
        return_dict['citedPaperId'] = outputs['cited_paper_id']
        return_dict['probabilities'] = outputs['probabilities']
        return_dict['prediction'] = outputs['prediction']
        return return_dict

Source File: squad_predictor.py From pair2vec with Apache License 2.0

6 votes

def predict(self, question: str, passage: str, question_id: str) -> JsonDict:
        """
        Make a machine comprehension prediction on the supplied input.
        See https://rajpurkar.github.io/SQuAD-explorer/ for more information about the machine comprehension task.

        Parameters
        ----------
        question : ``str``
            A question about the content in the supplied paragraph.  The question must be answerable by a
            span in the paragraph.
        passage : ``str``
            A paragraph of information relevant to the question.

        Returns
        -------
        A dictionary that represents the prediction made by the system.  The answer string will be under the
        "best_span_str" key.
        """
        return self.predict_json({"passage" : passage, "question" : question, "question_id": question_id})

Source File: single_correct_mcq_entailment.py From multee with Apache License 2.0

6 votes

def predict_json(self, input: JsonDict):
        instance = self._json_to_instance(input)
        output = self._model.forward_on_instance(instance)
        return_json = {}
        return_json["input"] = input

        label_probs = output["label_probs"]
        predicted_answer_index = list(label_probs).index(max(label_probs))
        premises_attentions = output.get("premises_attentions", None)
        premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

        return_json["label_probs"] = label_probs
        return_json["predicted_answer_index"] = predicted_answer_index
        if premises_attentions is not None:
            return_json["premises_attentions"] = premises_attentions
            return_json["premises_aggregation_attentions"] = premises_aggregation_attentions
        return sanitize(return_json)

Source File: multiple_correct_mcq_entailment.py From multee with Apache License 2.0

6 votes

def predict_batch_json(self, inputs: List[JsonDict]) -> List[JsonDict]:
        instances = self._batch_json_to_instances(inputs)
        outputs = self._model.forward_on_instances(instances)
        return_jsons = []
        for input, output in zip(inputs, outputs):
            return_json = {}
            return_json["input"] = input
            premises_count = len(input["premises"])

            label_probs = output["label_probs"]
            predicted_answer_indices = [index for index, prob in enumerate(list(label_probs)) if prob >= 0.5]
            premises_attentions = output.get("premises_attentions", None)
            premises_aggregation_attentions = output.get("premises_aggregation_attentions", None)

            return_json["label_probs"] = label_probs
            return_json["predicted_answer_indices"] = predicted_answer_indices
            if premises_attentions is not None:
                return_json["premises_attentions"] = premises_attentions[:, :premises_count]
                return_json["premises_aggregation_attentions"] = premises_aggregation_attentions[:, :premises_count]

            return_jsons.append(return_json)
        return sanitize(return_jsons)

Source File: nlvr_parser.py From allennlp-semparse with Apache License 2.0

6 votes

def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        sentence = json_dict["sentence"]
        if "worlds" in json_dict:
            # This is grouped data
            worlds = json_dict["worlds"]
            if isinstance(worlds, str):
                worlds = json.loads(worlds)
        else:
            structured_rep = json_dict["structured_rep"]
            if isinstance(structured_rep, str):
                structured_rep = json.loads(structured_rep)
            worlds = [structured_rep]
        identifier = json_dict["identifier"] if "identifier" in json_dict else None
        instance = self._dataset_reader.text_to_instance(
            sentence=sentence,  # type: ignore
            structured_representations=worlds,
            identifier=identifier,
        )
        return instance

Source File: transition_ucca_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0

6 votes

def predict(self, sentence: str) -> JsonDict:
        """
        Predict a dependency parse for the given sentence.
        Parameters
        ----------
        sentence The sentence to parse.

        Returns
        -------
        A dictionary representation of the dependency tree.
        """
        return self.predict_json({"sentence": sentence})

    # def predict_json(self, inputs: JsonDict) -> JsonDict:
    #     instance = self._json_to_instance(inputs)
    #     return self.predict_instance(instance)

Source File: input_reduction.py From allennlp with Apache License 2.0

6 votes

def attack_from_json(
        self,
        inputs: JsonDict = None,
        input_field_to_attack: str = "tokens",
        grad_input_field: str = "grad_input_1",
        ignore_tokens: List[str] = None,
        target: JsonDict = None,
    ):
        if target is not None:
            raise ValueError("Input reduction does not implement targeted attacks")
        ignore_tokens = ["@@NULL@@"] if ignore_tokens is None else ignore_tokens
        original_instances = self.predictor.json_to_labeled_instances(inputs)
        original_text_field: TextField = original_instances[0][  # type: ignore
            input_field_to_attack
        ]
        original_tokens = deepcopy(original_text_field.tokens)
        final_tokens = []
        for instance in original_instances:
            final_tokens.append(
                self._attack_instance(
                    inputs, instance, input_field_to_attack, grad_input_field, ignore_tokens
                )
            )
        return sanitize({"final": final_tokens, "original": original_tokens})

Source File: integrated_gradient.py From allennlp with Apache License 2.0

6 votes

def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        # Convert inputs to labeled instances
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Run integrated gradients
            grads = self._integrate_gradients(instance)

            # Normalize results
            for key, grad in grads.items():
                # The [0] here is undo-ing the batching that happens in get_gradients.
                embedding_grad = numpy.sum(grad[0], axis=1)
                norm = numpy.linalg.norm(embedding_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads

        return sanitize(instances_with_grads)

Source File: smooth_gradient.py From allennlp with Apache License 2.0

6 votes

def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        # Convert inputs to labeled instances
        labeled_instances = self.predictor.json_to_labeled_instances(inputs)

        instances_with_grads = dict()
        for idx, instance in enumerate(labeled_instances):
            # Run smoothgrad
            grads = self._smooth_grads(instance)

            # Normalize results
            for key, grad in grads.items():
                # TODO (@Eric-Wallace), SmoothGrad is not using times input normalization.
                # Fine for now, but should fix for consistency.

                # The [0] here is undo-ing the batching that happens in get_gradients.
                embedding_grad = numpy.sum(grad[0], axis=1)
                norm = numpy.linalg.norm(embedding_grad, ord=1)
                normalized_grad = [math.fabs(e) / norm for e in embedding_grad]
                grads[key] = normalized_grad

            instances_with_grads["instance_" + str(idx + 1)] = grads

        return sanitize(instances_with_grads)

Source File: saliency_interpreter.py From allennlp with Apache License 2.0

6 votes

def saliency_interpret_from_json(self, inputs: JsonDict) -> JsonDict:
        """
        This function finds saliency values for each input token.

        # Parameters

        inputs : `JsonDict`
            The input you want to interpret (the same as the argument to a Predictor, e.g., predict_json()).

        # Returns

        interpretation : `JsonDict`
            Contains the normalized saliency values for each input token. The dict has entries for
            each instance in the inputs JsonDict, e.g., `{instance_1: ..., instance_2:, ... }`.
            Each one of those entries has entries for the saliency of the inputs, e.g.,
            `{grad_input_1: ..., grad_input_2: ... }`.
        """
        raise NotImplementedError("Implement this for saliency interpretations")

Source File: summary_sentences_predictor.py From summarus with Apache License 2.0

5 votes

def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        source = json_dict["source"]
        return self._dataset_reader.text_to_instance(source)

Source File: transition_ucca_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0

5 votes

def predict_batch_instance(self, instances: List[Instance]) -> List[JsonDict]:
        outputs_batch = self._model.forward_on_instances(instances)

        ret_dict_batch = [[] for i in range(len(outputs_batch))]
        for outputs_idx in range(len(outputs_batch)):
            try:
                ret_dict_batch[outputs_idx] = ucca_trans_outputs_into_mrp(outputs_batch[outputs_idx])
            except:
                print('graph_id:' + json.loads(outputs_batch[outputs_idx]["meta_info"])['id'])

        return sanitize(ret_dict_batch)

Source File: transition_ucca_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0

5 votes

def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"sentence": "..."}``.
        """

        ret = parse_sentence(json.dumps(json_dict))

        tokens = ret["tokens"]
        meta_info = ret["meta_info"]
        tokens_range = ret["tokens_range"]

        return self._dataset_reader.text_to_instance(tokens=tokens, meta_info=[meta_info], tokens_range=tokens_range)

Source File: entailment_pair.py From multee with Apache License 2.0

5 votes

def _json_to_instance(self,  # type: ignore
                          json_dict: JsonDict) -> Instance:
        premise_text = json_dict.get("sentence1", None) or json_dict.get("premise", None)
        hypothesis_text = json_dict.get("sentence2", None) or json_dict.get("hypothesis", None)
        if premise_text and hypothesis_text:
            return self._dataset_reader.text_to_instance(premise_text, hypothesis_text)
        logger.info("Error parsing input")
        return None

Source File: transition_ucca_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0

5 votes

def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        ret_dict = ucca_trans_outputs_into_mrp(outputs)
        return sanitize(ret_dict)

Source File: tacred_predictor.py From kb with Apache License 2.0

5 votes

def dump_line(self, outputs: JsonDict) -> str:
        return REVERSE_LABEL_MAP[outputs['predictions']] + '\n'

Source File: transition_eds_predictor.py From HIT-SCIR-CoNLL2019 with Apache License 2.0

5 votes

def predict_instance(self, instance: Instance) -> JsonDict:
        outputs = self._model.forward_on_instance(instance)
        ret_dict = eds_trans_outputs_into_mrp(outputs)
        return sanitize(ret_dict)

Source File: multiple_correct_mcq_entailment.py From multee with Apache License 2.0

5 votes

def _json_to_instance(self,  # type: ignore
                          json_dict: JsonDict) -> Instance:
        premises = json_dict["premises"]
        hypotheses = json_dict["hypotheses"]
        entailments = json_dict.get("entailments", None)
        if entailments is None:
            answer_indices = None
        else:
            answer_indices = [index for index, entailment in enumerate(entailments) if entailment]
        relevant_sentence_idxs = json_dict.get("relevant_sentence_idxs", None)
        return self._dataset_reader.text_to_instance(premises,
                                                     hypotheses,
                                                     answer_indices,
                                                     relevant_sentence_idxs)

Source File: squad_predictor.py From pair2vec with Apache License 2.0

5 votes

def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"question": "...", "passage": "..."}``.
        """
        question_text = json_dict["question"]
        passage_text = json_dict["passage"]
        question_id = json_dict["question_id"]
        return self._dataset_reader.text_to_instance(question_text, passage_text, question_id), {}

Source File: get_nlvr_logical_forms.py From allennlp-semparse with Apache License 2.0

5 votes

def read_json_line(line: str) -> Tuple[str, str, List[JsonDict], List[str]]:
    data = json.loads(line)
    instance_id = data["identifier"]
    sentence = data["sentence"]
    if "worlds" in data:
        structured_reps = data["worlds"]
        label_strings = [label_str.lower() for label_str in data["labels"]]
    else:
        # We're reading ungrouped data.
        structured_reps = [data["structured_rep"]]
        label_strings = [data["label"].lower()]
    return instance_id, sentence, structured_reps, label_strings

Source File: atis_parser.py From allennlp-semparse with Apache License 2.0

5 votes

def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like ``{"utterance": "..."}``.
        """
        utterance = json_dict["utterance"]
        return self._dataset_reader.text_to_instance([utterance])

Source File: nlvr_parser.py From allennlp-semparse with Apache License 2.0

5 votes

def dump_line(self, outputs: JsonDict) -> str:
        if "identifier" in outputs:
            # Returning CSV lines for official evaluation
            identifier = outputs["identifier"]
            denotation = outputs["denotations"][0][0]
            return f"{identifier},{denotation}\n"
        else:
            return json.dumps(outputs) + "\n"

Source File: nlvr_language.py From allennlp-semparse with Apache License 2.0

5 votes

def __init__(self, objects_list: List[JsonDict], box_id: int) -> None:
        self._name = f"box {box_id + 1}"
        self._objects_string = str([str(_object) for _object in objects_list])
        self.objects = {Object(object_dict, self._name) for object_dict in objects_list}
        self.colors = {obj.color for obj in self.objects}
        self.shapes = {obj.shape for obj in self.objects}

Source File: nlvr_language.py From allennlp-semparse with Apache License 2.0

5 votes

def __init__(self, attributes: JsonDict, box_id: str) -> None:
        object_color = attributes["color"].lower()
        # The dataset has a hex code only for blue for some reason.
        if object_color.startswith("#"):
            self.color = "blue"
        else:
            self.color = object_color
        object_shape = attributes["type"].lower()
        self.shape = object_shape
        self.x_loc = attributes["x_loc"]
        self.y_loc = attributes["y_loc"]
        self.size = attributes["size"]
        self._box_id = box_id

Source File: convert_statement.py From KagNet with MIT License

5 votes

def create_output_dict(input_json: JsonDict, premise: str, hypothesis: str) -> JsonDict:
    input_json["premise"] = premise
    input_json["hypothesis"] = hypothesis
    return input_json

Source File: convert_statement.py From KagNet with MIT License

5 votes

def convert_qajson_to_entailment(qa_json: JsonDict):
    question_text = qa_json["question"]["stem"]
    choice = qa_json["question"]["choice"]["text"]
    support = qa_json["question"]["support"]["text"]
    hypothesis = create_hypothesis(get_fitb_from_question(question_text), choice)
    output_dict = create_output_dict(qa_json, support, hypothesis)
    return output_dict


# Get a Fill-In-The-Blank (FITB) statement from the question text. E.g. "George wants to warm his
# hands quickly by rubbing them. Which skin surface will produce the most heat?" ->
# "George wants to warm his hands quickly by rubbing them. ___ skin surface will produce the most
# heat?

Source File: convert_csqa.py From KagNet with MIT License

5 votes

def create_output_dict(input_json: JsonDict, statement: str, label: bool) -> JsonDict:
    if "statements" not in input_json:
        input_json["statements"] = []
    input_json["statements"].append({"label": label, "statement": statement})
    return input_json

Python allennlp.common.util.JsonDict() Examples