Python allennlp.models.Model() Examples

The following are 15 code examples of allennlp.models.Model(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.models , or try the search function .
Example #1
Source File: no_op_trainer.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def __init__(self, serialization_dir: str, model: Model) -> None:
        """
        A trivial trainer to assist in making model archives for models that do not actually
        require training. For instance, a majority class baseline.

        In a typical AllenNLP configuration file, neither the `serialization_dir` nor the `model`
        arguments would need an entry.
        """

        super().__init__(serialization_dir, cuda_device=-1)
        self.model = model 
Example #2
Source File: sentence_tagger.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def __init__(
        self, model: Model, dataset_reader: DatasetReader, language: str = "en_core_web_sm"
    ) -> None:
        super().__init__(model, dataset_reader)
        self._tokenizer = SpacyTokenizer(language=language, pos_tags=True) 
Example #3
Source File: predictor.py    From udify with MIT License 5 votes vote down vote up
def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader) 
Example #4
Source File: text_predictor.py    From udify with MIT License 5 votes vote down vote up
def __init__(self,
                 model: Model,
                 dataset_reader: DatasetReader,
                 output_conllu: bool = False) -> None:
        super().__init__(model, dataset_reader)
        self._dataset_reader = UniversalDependenciesRawDatasetReader(self._dataset_reader)
        self.predictor = UdifyPredictor(model, dataset_reader)
        self.output_conllu = output_conllu 
Example #5
Source File: copynet.py    From nlp-models with MIT License 5 votes vote down vote up
def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader)
        warnings.warn(
            "The 'copynet' predictor has been deprecated in favor of "
            "the 'seq2seq' predictor.",
            DeprecationWarning,
        ) 
Example #6
Source File: prostruct_prediction.py    From propara with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 model: Model,
                 dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader)
        self.tokenizer = WordTokenizer(word_splitter=SpacyWordSplitter(pos_tags=True)) 
Example #7
Source File: entailment_pair.py    From multee with Apache License 2.0 5 votes vote down vote up
def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader)
        self._entailment_idx = self._model.vocab.get_token_index("entailment", "labels")
        self._contradiction_idx = self._model.vocab.get_token_index("contradiction", "labels")
        self._neutral_idx = self._model.vocab.get_token_index("neutral", "labels") 
Example #8
Source File: summary_predictor.py    From summarus with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 model: Model,
                 dataset_reader: DatasetReader,
                 fix_subwords=True) -> None:
        super().__init__(model, dataset_reader)
        self._fix_subwords = fix_subwords 
Example #9
Source File: summary_predictor.py    From summarus with Apache License 2.0 5 votes vote down vote up
def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader, fix_subwords=False) 
Example #10
Source File: summary_predictor.py    From summarus with Apache License 2.0 5 votes vote down vote up
def __init__(self, model: Model, dataset_reader: DatasetReader) -> None:
        super().__init__(model, dataset_reader, fix_subwords=True) 
Example #11
Source File: summary_sentences_predictor.py    From summarus with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 model: Model,
                 dataset_reader: DatasetReader,
                 top_n=3,
                 border=None,
                 fix_subwords=True) -> None:
        super().__init__(model, dataset_reader)
        self._top_n = top_n
        self._border = border
        self._fix_subwords = fix_subwords 
Example #12
Source File: summary_sentences_predictor.py    From summarus with Apache License 2.0 5 votes vote down vote up
def __init__(self,
                 model: Model,
                 dataset_reader: DatasetReader,
                 top_n=3,
                 border=None) -> None:
        super().__init__(model, dataset_reader, top_n, border, fix_subwords=False) 
Example #13
Source File: main.py    From R-net with MIT License 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False,
                          force: bool = False,
                          ext_vars=None) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    param_path : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides, ext_vars=ext_vars)
    return train_model(params, serialization_dir, file_friendly_logging, recover, force) 
Example #14
Source File: eval_da.py    From fever-naacl-2018 with Apache License 2.0 4 votes vote down vote up
def eval_model(db: FeverDocDB, args) -> Model:
    archive = load_archive(args.archive_file, cuda_device=args.cuda_device)

    config = archive.config
    ds_params = config["dataset_reader"]

    model = archive.model
    model.eval()

    reader = FEVERReader(db,
                                 sentence_level=ds_params.pop("sentence_level",False),
                                 wiki_tokenizer=Tokenizer.from_params(ds_params.pop('wiki_tokenizer', {})),
                                 claim_tokenizer=Tokenizer.from_params(ds_params.pop('claim_tokenizer', {})),
                                 token_indexers=TokenIndexer.dict_from_params(ds_params.pop('token_indexers', {})))

    logger.info("Reading training data from %s", args.in_file)
    data = reader.read(args.in_file).instances

    actual = []
    predicted = []

    if args.log is not None:
        f = open(args.log,"w+")

    for item in tqdm(data):
        if item.fields["premise"] is None or item.fields["premise"].sequence_length() == 0:
            cls = "NOT ENOUGH INFO"
        else:
            prediction = model.forward_on_instance(item, args.cuda_device)
            cls = model.vocab._index_to_token["labels"][np.argmax(prediction["label_probs"])]

        if "label" in item.fields:
            actual.append(item.fields["label"].label)
        predicted.append(cls)

        if args.log is not None:
            if "label" in item.fields:
                f.write(json.dumps({"actual":item.fields["label"].label,"predicted":cls})+"\n")
            else:
                f.write(json.dumps({"predicted":cls})+"\n")

    if args.log is not None:
        f.close()


    if len(actual) > 0:
        print(accuracy_score(actual, predicted))
        print(classification_report(actual, predicted))
        print(confusion_matrix(actual, predicted))

    return model 
Example #15
Source File: interactive.py    From fever-naacl-2018 with Apache License 2.0 4 votes vote down vote up
def eval_model(db: FeverDocDB, args) -> Model:
    archive = load_archive(args.archive_file, cuda_device=args.cuda_device, overrides=args.overrides)

    config = archive.config
    ds_params = config["dataset_reader"]

    model = archive.model
    model.eval()

    reader = FEVERReader(db,
                                 sentence_level=ds_params.pop("sentence_level",False),
                                 wiki_tokenizer=Tokenizer.from_params(ds_params.pop('wiki_tokenizer', {})),
                                 claim_tokenizer=Tokenizer.from_params(ds_params.pop('claim_tokenizer', {})),
                                 token_indexers=TokenIndexer.dict_from_params(ds_params.pop('token_indexers', {})))


    while True:

        claim = input("enter claim (or q to quit) >>")
        if claim.lower() == "q":
            break

        ranker = retriever.get_class('tfidf')(tfidf_path=args.model)

        p_lines = []
        pages,_ = ranker.closest_docs(claim,5)

        for page in pages:
            lines = db.get_doc_lines(page)
            lines = [line.split("\t")[1] if len(line.split("\t")[1]) > 1 else "" for line in lines.split("\n")]

            p_lines.extend(zip(lines, [page] * len(lines), range(len(lines))))

        scores = tf_idf_sim(claim, [pl[0] for pl in p_lines])
        scores = list(zip(scores, [pl[1] for pl in p_lines], [pl[2] for pl in p_lines], [pl[0] for pl in p_lines]))
        scores = list(filter(lambda score: len(score[3].strip()), scores))
        sentences_l = list(sorted(scores, reverse=True, key=lambda elem: elem[0]))

        sentences = [s[3] for s in sentences_l[:5]]
        evidence = " ".join(sentences)


        print("Best pages: {0}".format(repr(pages)))

        print("Evidence:")
        for idx,sentence in enumerate(sentences_l[:5]):
            print("{0}\t{1}\t\t{2}\t{3}".format(idx+1, sentence[0], sentence[1],sentence[3]) )

        item = reader.text_to_instance(evidence, claim)

        prediction = model.forward_on_instance(item, args.cuda_device)
        cls = model.vocab._index_to_token["labels"][np.argmax(prediction["label_probs"])]
        print("PREDICTED: {0}".format(cls))
        print()