Python allennlp.models.load_archive() Examples

The following are 12 code examples of allennlp.models.load_archive(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.models , or try the search function .
Example #1
Source File: no_op_train_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_train_model(self):
        params = lambda: Params(
            {
                "model": {"type": "constant"},
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "validation_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "data_loader": {"batch_size": 2},
                "trainer": {"type": "no_op"},
            }
        )

        serialization_dir = self.TEST_DIR / "serialization_directory"
        train_model(params(), serialization_dir=serialization_dir)
        archive = load_archive(str(serialization_dir / "model.tar.gz"))
        model = archive.model
        assert model.forward(torch.tensor([1, 2, 3]))["class"] == torch.tensor(98)
        assert model.vocab.get_vocab_size() == 9 
Example #2
Source File: model_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_extend_embedder_vocab(self):
        model_archive = str(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )
        trained_model = load_archive(model_archive).model

        original_weight = trained_model._text_field_embedder.token_embedder_tokens.weight
        assert tuple(original_weight.shape) == (213, 10)

        counter = {"tokens": {"unawarded": 1}}
        trained_model.vocab._extend(counter)
        trained_model.extend_embedder_vocab()

        extended_weight = trained_model._text_field_embedder.token_embedder_tokens.weight
        assert tuple(extended_weight.shape) == (214, 10)

        assert torch.all(original_weight == extended_weight[:213, :]) 
Example #3
Source File: predictor_qa_mc_with_know_visualize_test.py    From OpenBookQA with Apache License 2.0 5 votes vote down vote up
def test_predictor():
    question_json = {"id": "1700", "question_tokens": ["@start@", "For", "what", "does", "a", "stove", "generally", "generate", "heat", "?", "@end@"], "choice_tokens_list": [["@start@", "warming", "the", "air", "in", "the", "area", "@end@"], ["@start@", "heating", "nutrients", "to", "appropriate", "temperatures", "@end@"], ["@start@", "entertaining", "various", "visitors", "and", "guests", "@end@"], ["@start@", "to", "create", "electrical", "charges", "@end@"]], "facts_tokens_list": [["@start@", "UML", "can", "generate", "code", "@end@"], ["@start@", "generate", "is", "a", "synonym", "of", "beget", "@end@"], ["@start@", "Heat", "is", "generated", "by", "a", "stove", "@end@"], ["@start@", "A", "sonnet", "is", "generally", "very", "structured", "@end@"], ["@start@", "A", "fundamentalist", "is", "generally", "right", "-", "wing", "@end@"], ["@start@", "menstruation", "is", "generally", "crampy", "@end@"], ["@start@", "an", "erection", "is", "generally", "pleasurable", "@end@"], ["@start@", "gunfire", "is", "generally", "lethal", "@end@"], ["@start@", "ejaculating", "is", "generally", "pleasurable", "@end@"], ["@start@", "Huddersfield", "is", "generally", "urban", "@end@"], ["@start@", "warming", "is", "a", "synonym", "of", "calefacient", "@end@"], ["@start@", "heat", "is", "related", "to", "warming", "air", "@end@"], ["@start@", "a", "stove", "is", "for", "warming", "food", "@end@"], ["@start@", "an", "air", "conditioning", "is", "for", "warming", "@end@"], ["@start@", "The", "earth", "is", "warming", "@end@"], ["@start@", "a", "heat", "source", "is", "for", "warming", "up", "@end@"], ["@start@", "A", "foyer", "is", "an", "enterance", "area", "@end@"], ["@start@", "Being", "nosey", "is", "not", "appropriate", "@end@"], ["@start@", "seize", "is", "a", "synonym", "of", "appropriate", "@end@"], ["@start@", "a", "fitting", "room", "is", "used", "for", "something", "appropriate", "@end@"], ["@start@", "appropriate", "is", "a", "synonym", "of", "allow", "@end@"], ["@start@", "appropriate", "is", "similar", "to", "befitting", "@end@"], ["@start@", "appropriate", "is", "similar", "to", "grade", "-", "appropriate", "@end@"], ["@start@", "grade", "-", "appropriate", "is", "similar", "to", "appropriate", "@end@"], ["@start@", "A", "parlor", "is", "used", "for", "entertaining", "guests", "@end@"], ["@start@", "a", "back", "courtyard", "is", "for", "entertaining", "guests", "@end@"], ["@start@", "guest", "is", "a", "type", "of", "visitor", "@end@"], ["@start@", "a", "family", "room", "is", "for", "entertaining", "guests", "@end@"], ["@start@", "cooking", "a", "meal", "is", "for", "entertaining", "guests", "@end@"], ["@start@", "buying", "a", "house", "is", "for", "entertaining", "guests", "@end@"], ["@start@", "having", "a", "party", "is", "for", "entertaining", "guests", "@end@"], ["@start@", "a", "dining", "area", "is", "used", "for", "entertaining", "guests", "@end@"], ["@start@", "visitor", "is", "related", "to", "guest", "@end@"], ["@start@", "guest", "is", "related", "to", "visitor", "@end@"], ["@start@", "Electrical", "charges", "are", "additive", "@end@"], ["@start@", "Lightning", "is", "an", "electrical", "charge", "@end@"], ["@start@", "electrons", "have", "electrical", "charge", "@end@"], ["@start@", "A", "judge", "is", "in", "charge", "in", "a", "courtroom", "@end@"], ["@start@", "charge", "is", "a", "synonym", "of", "accusation", "@end@"], ["@start@", "A", "consultant", "can", "charge", "a", "fee", "to", "a", "client", "@end@"], ["@start@", "charge", "is", "a", "synonym", "of", "commission", "@end@"], ["@start@", "charge", "is", "a", "synonym", "of", "cathexis", "@end@"], ["@start@", "charge", "is", "not", "cash", "@end@"], ["@start@", "arraign", "entails", "charge", "@end@"], ["@start@", "a", "stove", "generates", "heat", "for", "cooking", "usually", "@end@"], ["@start@", "preferences", "are", "generally", "learned", "characteristics", "@end@"], ["@start@", "a", "windmill", "does", "not", "create", "pollution", "@end@"], ["@start@", "temperature", "is", "a", "measure", "of", "heat", "energy", "@end@"], ["@start@", "a", "hot", "something", "is", "a", "source", "of", "heat", "@end@"], ["@start@", "the", "moon", "does", "not", "contain", "water", "@end@"], ["@start@", "sunlight", "produces", "heat", "@end@"], ["@start@", "an", "oven", "is", "a", "source", "of", "heat", "@end@"], ["@start@", "a", "hot", "substance", "is", "a", "source", "of", "heat", "@end@"], ["@start@", "a", "car", "engine", "is", "a", "source", "of", "heat", "@end@"], ["@start@", "as", "the", "amount", "of", "rainfall", "increases", "in", "an", "area", ",", "the", "amount", "of", "available", "water", "in", "that", "area", "will", "increase", "@end@"], ["@start@", "sound", "can", "travel", "through", "air", "@end@"], ["@start@", "the", "greenhouse", "effect", "is", "when", "carbon", "in", "the", "air", "heats", "a", "planet", "'s", "atmosphere", "@end@"], ["@start@", "a", "community", "is", "made", "of", "many", "types", "of", "organisms", "in", "an", "area", "@end@"], ["@start@", "air", "is", "a", "vehicle", "for", "sound", "@end@"], ["@start@", "rainfall", "is", "the", "amount", "of", "rain", "an", "area", "receives", "@end@"], ["@start@", "an", "animal", "requires", "air", "for", "survival", "@end@"], ["@start@", "humidity", "is", "the", "amount", "of", "water", "vapor", "in", "the", "air", "@end@"], ["@start@", "if", "some", "nutrients", "are", "in", "the", "soil", "then", "those", "nutrients", "are", "in", "the", "food", "chain", "@end@"], ["@start@", "as", "heat", "is", "transferred", "from", "something", "to", "something", "else", ",", "the", "temperature", "of", "that", "something", "will", "decrease", "@end@"], ["@start@", "uneven", "heating", "causes", "convection", "@end@"], ["@start@", "as", "temperature", "during", "the", "day", "increases", ",", "the", "temperature", "in", "an", "environment", "will", "increase", "@end@"], ["@start@", "uneven", "heating", "of", "the", "Earth", "'s", "surface", "cause", "wind", "@end@"], ["@start@", "an", "animal", "needs", "to", "eat", "food", "for", "nutrients", "@end@"], ["@start@", "soil", "contains", "nutrients", "for", "plants", "@end@"], ["@start@", "if", "two", "objects", "have", "the", "same", "charge", "then", "those", "two", "materials", "will", "repel", "each", "other", "@end@"], ["@start@", "water", "is", "an", "electrical", "conductor", "@end@"], ["@start@", "a", "battery", "is", "a", "source", "of", "electrical", "energy", "@end@"], ["@start@", "metal", "is", "an", "electrical", "energy", "conductor", "@end@"], ["@start@", "when", "an", "electrical", "circuit", "is", "working", "properly", ",", "electrical", "current", "runs", "through", "the", "wires", "in", "that", "circuit", "@end@"], ["@start@", "brick", "is", "an", "electrical", "insulator", "@end@"], ["@start@", "wood", "is", "an", "electrical", "energy", "insulator", "@end@"], ["@start@", "a", "toaster", "converts", "electrical", "energy", "into", "heat", "energy", "for", "toasting", "@end@"]], "gold_label": 1, "gold_facts": {"fact1": "a stove generates heat for cooking usually", "fact2": "cooking involves heating nutrients to higher temperatures"}, "label_probs": [0.002615198493003845, 0.9686304330825806, 0.008927381597459316, 0.01982697658240795], "label_ranks": [3, 0, 2, 1], "predicted_label": 1, }

    inputs = question_to_predictor_input(question_json)
    inputs = predictor_input_to_pred_input_with_full_question_text(inputs)
    print(json.dumps(inputs, indent=4))

    archive = load_archive('_trained_models/model_CN5_1202.tar.gz')
    predictor = Predictor.from_archive(archive, 'predictor-qa-mc-with-know-visualize')

    result = predictor.predict_json(inputs)

    print(result) 
Example #4
Source File: train_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_train_model_distributed(self):
        if torch.cuda.device_count() >= 2:
            devices = [0, 1]
        else:
            devices = [-1, -1]

        params = lambda: Params(
            {
                "model": {
                    "type": "simple_tagger",
                    "text_field_embedder": {
                        "token_embedders": {"tokens": {"type": "embedding", "embedding_dim": 5}}
                    },
                    "encoder": {"type": "lstm", "input_size": 5, "hidden_size": 7, "num_layers": 2},
                },
                "dataset_reader": {"type": "sequence_tagging"},
                "train_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "validation_data_path": SEQUENCE_TAGGING_DATA_PATH,
                "data_loader": {"batch_size": 2},
                "trainer": {"num_epochs": 2, "optimizer": "adam"},
                "distributed": {"cuda_devices": devices},
            }
        )

        out_dir = os.path.join(self.TEST_DIR, "test_distributed_train")
        train_model(params(), serialization_dir=out_dir)

        # Check that some logs specific to distributed
        # training are where we expect.
        serialized_files = os.listdir(out_dir)
        assert "out_worker0.log" in serialized_files
        assert "out_worker1.log" in serialized_files
        assert "model.tar.gz" in serialized_files

        # Check we can load the serialized model
        assert load_archive(out_dir).model 
Example #5
Source File: util_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_inspect_model_parameters(self):
        model_archive = str(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )
        parameters_inspection = str(
            self.FIXTURES_ROOT / "basic_classifier" / "parameters_inspection.json"
        )
        model = load_archive(model_archive).model
        with open(parameters_inspection) as file:
            parameters_inspection_dict = json.load(file)
        assert parameters_inspection_dict == util.inspect_parameters(model) 
Example #6
Source File: fine_tune.py    From magnitude with MIT License 5 votes vote down vote up
def fine_tune_model_from_file_paths(model_archive_path     ,
                                    config_file     ,
                                    serialization_dir     ,
                                    overrides      = u"",
                                    extend_vocab       = False,
                                    file_friendly_logging       = False)         :
    u"""
    A wrapper around :func:`fine_tune_model` which loads the model archive from a file.

    Parameters
    ----------
    model_archive_path : ``str``
        Path to a saved model archive that is the result of running the ``train`` command.
    config_file : ``str``
        A configuration file specifying how to continue training.  The format is identical to the
        configuration file for the ``train`` command, but any contents in the ``model`` section is
        ignored (as we are using the provided model archive instead).
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`fine_tune_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`fine_tune_model`.
    """
    # We don't need to pass in `cuda_device` here, because the trainer will call `model.cuda()` if
    # necessary.
    archive = load_archive(model_archive_path)
    params = Params.from_file(config_file, overrides)
    return fine_tune_model(model=archive.model,
                           params=params,
                           serialization_dir=serialization_dir,
                           extend_vocab=extend_vocab,
                           file_friendly_logging=file_friendly_logging) 
Example #7
Source File: fine_tune_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_fine_tune_does_not_expand_vocab_by_default(self):
        params = Params.from_file(self.config_file)
        # snli2 has a new token in it
        params[u"train_data_path"] = unicode(self.FIXTURES_ROOT / u'data' / u'snli2.jsonl')

        model = load_archive(self.model_archive).model

        # By default, no vocab expansion.
        fine_tune_model(model, params, self.serialization_dir) 
Example #8
Source File: fine_tune_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_fine_tune_runtime_errors_with_vocab_expansion(self):
        params = Params.from_file(self.config_file)
        params[u"train_data_path"] = unicode(self.FIXTURES_ROOT / u'data' / u'snli2.jsonl')

        model = load_archive(self.model_archive).model

        # If we do vocab expansion, we get a runtime error because of the embedding.
        with pytest.raises(RuntimeError):
            fine_tune_model(model, params, self.serialization_dir, extend_vocab=True) 
Example #9
Source File: fine_tune_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_fine_tune_nograd_regex(self):
        original_model = load_archive(self.model_archive).model
        name_parameters_original = dict(original_model.named_parameters())
        regex_lists = [[],
                       [u".*attend_feedforward.*", u".*token_embedder.*"],
                       [u".*compare_feedforward.*"]]
        for regex_list in regex_lists:
            params = Params.from_file(self.config_file)
            params[u"trainer"][u"no_grad"] = regex_list
            shutil.rmtree(self.serialization_dir, ignore_errors=True)
            tuned_model = fine_tune_model(model=original_model,
                                          params=params,
                                          serialization_dir=self.serialization_dir)
            # If regex is matched, parameter name should have requires_grad False
            # If regex is matched, parameter name should have same requires_grad
            # as the originally loaded model
            for name, parameter in tuned_model.named_parameters():
                if any(re.search(regex, name) for regex in regex_list):
                    assert not parameter.requires_grad
                else:
                    assert parameter.requires_grad\
                    == name_parameters_original[name].requires_grad
        # If all parameters have requires_grad=False, then error.
        with pytest.raises(Exception) as _:
            params = Params.from_file(self.config_file)
            params[u"trainer"][u"no_grad"] = [u"*"]
            shutil.rmtree(self.serialization_dir, ignore_errors=True)
            tuned_model = fine_tune_model(model=original_model,
                                          params=params,
                                          serialization_dir=self.serialization_dir) 
Example #10
Source File: __init__.py    From nanigonet with MIT License 5 votes vote down vote up
def __init__(self, model_path, top_k=3, cuda_device=-1):
        archive = load_archive(model_path,
                               cuda_device=cuda_device)

        config = archive.config
        prepare_environment(config)
        model = archive.model
        model.eval()

        self.model = model

        self._tokenizer = CharacterTokenizer()
        self._token_indexers = {'tokens': SingleIdTokenIndexer()}
        self._id_to_label = model.vocab.get_index_to_token_vocabulary(namespace='labels')
        self._top_k = top_k 
Example #11
Source File: eval_da.py    From fever-naacl-2018 with Apache License 2.0 4 votes vote down vote up
def eval_model(db: FeverDocDB, args) -> Model:
    archive = load_archive(args.archive_file, cuda_device=args.cuda_device)

    config = archive.config
    ds_params = config["dataset_reader"]

    model = archive.model
    model.eval()

    reader = FEVERReader(db,
                                 sentence_level=ds_params.pop("sentence_level",False),
                                 wiki_tokenizer=Tokenizer.from_params(ds_params.pop('wiki_tokenizer', {})),
                                 claim_tokenizer=Tokenizer.from_params(ds_params.pop('claim_tokenizer', {})),
                                 token_indexers=TokenIndexer.dict_from_params(ds_params.pop('token_indexers', {})))

    logger.info("Reading training data from %s", args.in_file)
    data = reader.read(args.in_file).instances

    actual = []
    predicted = []

    if args.log is not None:
        f = open(args.log,"w+")

    for item in tqdm(data):
        if item.fields["premise"] is None or item.fields["premise"].sequence_length() == 0:
            cls = "NOT ENOUGH INFO"
        else:
            prediction = model.forward_on_instance(item, args.cuda_device)
            cls = model.vocab._index_to_token["labels"][np.argmax(prediction["label_probs"])]

        if "label" in item.fields:
            actual.append(item.fields["label"].label)
        predicted.append(cls)

        if args.log is not None:
            if "label" in item.fields:
                f.write(json.dumps({"actual":item.fields["label"].label,"predicted":cls})+"\n")
            else:
                f.write(json.dumps({"predicted":cls})+"\n")

    if args.log is not None:
        f.close()


    if len(actual) > 0:
        print(accuracy_score(actual, predicted))
        print(classification_report(actual, predicted))
        print(confusion_matrix(actual, predicted))

    return model 
Example #12
Source File: interactive.py    From fever-naacl-2018 with Apache License 2.0 4 votes vote down vote up
def eval_model(db: FeverDocDB, args) -> Model:
    archive = load_archive(args.archive_file, cuda_device=args.cuda_device, overrides=args.overrides)

    config = archive.config
    ds_params = config["dataset_reader"]

    model = archive.model
    model.eval()

    reader = FEVERReader(db,
                                 sentence_level=ds_params.pop("sentence_level",False),
                                 wiki_tokenizer=Tokenizer.from_params(ds_params.pop('wiki_tokenizer', {})),
                                 claim_tokenizer=Tokenizer.from_params(ds_params.pop('claim_tokenizer', {})),
                                 token_indexers=TokenIndexer.dict_from_params(ds_params.pop('token_indexers', {})))


    while True:

        claim = input("enter claim (or q to quit) >>")
        if claim.lower() == "q":
            break

        ranker = retriever.get_class('tfidf')(tfidf_path=args.model)

        p_lines = []
        pages,_ = ranker.closest_docs(claim,5)

        for page in pages:
            lines = db.get_doc_lines(page)
            lines = [line.split("\t")[1] if len(line.split("\t")[1]) > 1 else "" for line in lines.split("\n")]

            p_lines.extend(zip(lines, [page] * len(lines), range(len(lines))))

        scores = tf_idf_sim(claim, [pl[0] for pl in p_lines])
        scores = list(zip(scores, [pl[1] for pl in p_lines], [pl[2] for pl in p_lines], [pl[0] for pl in p_lines]))
        scores = list(filter(lambda score: len(score[3].strip()), scores))
        sentences_l = list(sorted(scores, reverse=True, key=lambda elem: elem[0]))

        sentences = [s[3] for s in sentences_l[:5]]
        evidence = " ".join(sentences)


        print("Best pages: {0}".format(repr(pages)))

        print("Evidence:")
        for idx,sentence in enumerate(sentences_l[:5]):
            print("{0}\t{1}\t\t{2}\t{3}".format(idx+1, sentence[0], sentence[1],sentence[3]) )

        item = reader.text_to_instance(evidence, claim)

        prediction = model.forward_on_instance(item, args.cuda_device)
        cls = model.vocab._index_to_token["labels"][np.argmax(prediction["label_probs"])]
        print("PREDICTED: {0}".format(cls))
        print()