Python allennlp.common.Params.from_file() Examples

The following are 29 code examples of allennlp.common.Params.from_file(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module allennlp.common.Params , or try the search function .
Example #1
Source File: test_case.py    From vampire with Apache License 2.0 6 votes vote down vote up
def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params['dataset_reader'])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = list(reader.read(str(dataset_file)))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if 'vocabulary' in params:
            vocab_params = params['vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params['model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab) 
Example #2
Source File: model_test_case.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def set_up_model(self, param_file, dataset_file):

        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params["dataset_reader"])
        # The dataset reader might be lazy, but a lazy list here breaks some of our tests.
        instances = reader.read(str(dataset_file))
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if "vocabulary" in params:
            vocab_params = params["vocabulary"]
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.instances.index_with(vocab)
        self.model = Model.from_params(vocab=self.vocab, params=params["model"])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(list(self.instances))
        self.dataset.index_instances(self.vocab) 
Example #3
Source File: train_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_train_can_fine_tune_model_from_archive(self):
        params = Params.from_file(
            self.FIXTURES_ROOT / "basic_classifier" / "experiment_from_archive.jsonnet"
        )
        train_loop = TrainModel.from_params(
            params=params, serialization_dir=self.TEST_DIR, local_rank=0, batch_weight_key=""
        )
        train_loop.run()

        model = Model.from_archive(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )

        # This is checking that the vocabulary actually got extended.  The data that we're using for
        # training is different from the data we used to produce the model archive, and we set
        # parameters such that the vocab should have been extended.
        assert train_loop.model.vocab.get_vocab_size() > model.vocab.get_vocab_size() 
Example #4
Source File: from_params_test.py    From allennlp with Apache License 2.0 6 votes vote down vote up
def test_transferring_of_modules_ensures_type_consistency(self):

        model_archive = str(
            self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz"
        )
        trained_model = load_archive(model_archive).model

        config_file = str(self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2seq.jsonnet")
        model_params = Params.from_file(config_file).pop("model").as_dict(quiet=True)

        # Override only text_field_embedder and make it load Seq2SeqEncoder
        model_params["text_field_embedder"] = {
            "_pretrained": {
                "archive_file": model_archive,
                "module_path": "_seq2seq_encoder._module",
            }
        }
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=trained_model.vocab, params=Params(model_params)) 
Example #5
Source File: model_test_case.py    From magnitude with MIT License 6 votes vote down vote up
def set_up_model(self, param_file, dataset_file):
        # pylint: disable=attribute-defined-outside-init
        self.param_file = param_file
        params = Params.from_file(self.param_file)

        reader = DatasetReader.from_params(params[u'dataset_reader'])
        instances = reader.read(dataset_file)
        # Use parameters for vocabulary if they are present in the config file, so that choices like
        # "non_padded_namespaces", "min_count" etc. can be set if needed.
        if u'vocabulary' in params:
            vocab_params = params[u'vocabulary']
            vocab = Vocabulary.from_params(params=vocab_params, instances=instances)
        else:
            vocab = Vocabulary.from_instances(instances)
        self.vocab = vocab
        self.instances = instances
        self.model = Model.from_params(vocab=self.vocab, params=params[u'model'])

        # TODO(joelgrus) get rid of these
        # (a lot of the model tests use them, so they'll have to be changed)
        self.dataset = Batch(self.instances)
        self.dataset.index_instances(self.vocab) 
Example #6
Source File: bidaf_test.py    From magnitude with MIT License 6 votes vote down vote up
def test_mismatching_dimensions_throws_configuration_error(self):
        params = Params.from_file(self.param_file)
        # Make the phrase layer wrong - it should be 10 to match
        # the embedding + char cnn dimensions.
        params[u"model"][u"phrase_layer"][u"input_size"] = 12
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model"))

        params = Params.from_file(self.param_file)
        # Make the modeling layer input_dimension wrong - it should be 40 to match
        # 4 * output_dim of the phrase_layer.
        params[u"model"][u"phrase_layer"][u"input_size"] = 30
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model"))

        params = Params.from_file(self.param_file)
        # Make the modeling layer input_dimension wrong - it should be 70 to match
        # 4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim.
        params[u"model"][u"span_end_encoder"][u"input_size"] = 50
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model")) 
Example #7
Source File: elmo_token_embedder_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_file_archiving(self):
        # This happens to be a good place to test auxiliary file archiving.
        # Train the model
        params = Params.from_file(self.FIXTURES_ROOT / u'elmo' / u'config' / u'characters_token_embedder.json')
        serialization_dir = os.path.join(self.TEST_DIR, u'serialization')
        train_model(params, serialization_dir)

        # Inspect the archive
        archive_file = os.path.join(serialization_dir, u'model.tar.gz')
        unarchive_dir = os.path.join(self.TEST_DIR, u'unarchive')
        with tarfile.open(archive_file, u'r:gz') as archive:
            archive.extractall(unarchive_dir)

        # It should contain `files_to_archive.json`
        fta_file = os.path.join(unarchive_dir, u'files_to_archive.json')
        assert os.path.exists(fta_file)

        # Which should properly contain { flattened_key -> original_filename }
        with open(fta_file) as fta:
            files_to_archive = json.loads(fta.read())

        assert files_to_archive == {
                u'model.text_field_embedder.token_embedders.elmo.options_file':
                        unicode(pathlib.Path(u'allennlp') / u'tests' / u'fixtures' / u'elmo' / u'options.json'),
                u'model.text_field_embedder.token_embedders.elmo.weight_file':
                        unicode(pathlib.Path(u'allennlp') / u'tests' / u'fixtures' / u'elmo' / u'lm_weights.hdf5'),
        }

        # Check that the unarchived contents of those files match the original contents.
        for key, original_filename in list(files_to_archive.items()):
            new_filename = os.path.join(unarchive_dir, u"fta", key)
            assert filecmp.cmp(original_filename, new_filename) 
Example #8
Source File: main.py    From R-net with MIT License 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False,
                          force: bool = False,
                          ext_vars=None) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    param_path : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides, ext_vars=ext_vars)
    return train_model(params, serialization_dir, file_friendly_logging, recover, force) 
Example #9
Source File: train_multitask.py    From scicite with Apache License 2.0 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    param_path : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging, recover) 
Example #10
Source File: train_multitask_two_tasks.py    From scicite with Apache License 2.0 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    param_path : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging, recover) 
Example #11
Source File: knowbert_utils.py    From kb with Apache License 2.0 5 votes vote down vote up
def _extract_config_from_archive(model_archive):
    import tarfile
    import tempfile
    import os
    with tempfile.TemporaryDirectory() as tmp:
        with tarfile.open(model_archive, 'r:gz') as archive:
            archive.extract('config.json', path=tmp)
            config = Params.from_file(os.path.join(tmp, 'config.json'))
    return config 
Example #12
Source File: copynet_test.py    From nlp-models with MIT License 5 votes vote down vote up
def setUp(self):
        super(TestCopyNetReader, self).setUp()
        params = Params.from_file("nlpete/tests/fixtures/copynet/experiment.json")
        self.reader = DatasetReader.from_params(params["dataset_reader"])
        instances = self.reader.read("nlpete/tests/fixtures/copynet/copyover.tsv")
        self.instances = ensure_list(instances)
        self.vocab = Vocabulary.from_params(
            params=params["vocabulary"], instances=instances
        ) 
Example #13
Source File: decomposable_attention_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_mismatched_dimensions_raise_configuration_errors(self):
        params = Params.from_file(self.param_file)
        # Make the input_dim to the first feedforward_layer wrong - it should be 2.
        params[u"model"][u"attend_feedforward"][u"input_dim"] = 10
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model"))

        params = Params.from_file(self.param_file)
        # Make the projection output_dim of the last layer wrong - it should be
        # 3, equal to the number of classes.
        params[u"model"][u"aggregate_feedforward"][u"output_dim"] = 10
        with pytest.raises(ConfigurationError):
            Model.from_params(vocab=self.vocab, params=params.pop(u"model")) 
Example #14
Source File: decomposable_attention_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_model_load(self):
        params = Params.from_file(self.FIXTURES_ROOT / u'decomposable_attention' / u'experiment.json')
        model = Model.load(params, serialization_dir=self.FIXTURES_ROOT /
                           u'decomposable_attention' / u'serialization')

        assert isinstance(model, DecomposableAttention) 
Example #15
Source File: bidaf_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_batch_predictions_are_consistent(self):
        # The CNN encoder has problems with this kind of test - it's not properly masked yet, so
        # changing the amount of padding in the batch will result in small differences in the
        # output of the encoder.  Because BiDAF is so deep, these differences get magnified through
        # the network and make this test impossible.  So, we'll remove the CNN encoder entirely
        # from the model for this test.  If/when we fix the CNN encoder to work correctly with
        # masking, we can change this back to how the other models run this test, with just a
        # single line.
        # pylint: disable=protected-access,attribute-defined-outside-init

        # Save some state.
        saved_model = self.model
        saved_instances = self.instances

        # Modify the state, run the test with modified state.
        params = Params.from_file(self.param_file)
        reader = DatasetReader.from_params(params[u'dataset_reader'])
        reader._token_indexers = {u'tokens': reader._token_indexers[u'tokens']}
        self.instances = reader.read(self.FIXTURES_ROOT / u'data' / u'squad.json')
        vocab = Vocabulary.from_instances(self.instances)
        for instance in self.instances:
            instance.index_fields(vocab)
        del params[u'model'][u'text_field_embedder'][u'token_embedders'][u'token_characters']
        params[u'model'][u'phrase_layer'][u'input_size'] = 2
        self.model = Model.from_params(vocab=vocab, params=params[u'model'])

        self.ensure_batch_predictions_are_consistent()

        # Restore the state.
        self.model = saved_model
        self.instances = saved_instances 
Example #16
Source File: nlvr_coverage_semantic_parser_test.py    From magnitude with MIT License 5 votes vote down vote up
def test_forward_with_epoch_num_changes_cost_weight(self):
        # Redefining model. We do not want this to change the state of ``self.model``.
        params = Params.from_file(self.param_file)
        model = Model.from_params(vocab=self.vocab, params=params[u'model'])
        # Initial cost weight, before forward is called.
        assert model._checklist_cost_weight == 0.8
        iterator = EpochTrackingBucketIterator(sorting_keys=[[u'sentence', u'num_tokens']])
        cost_weights = []
        for epoch_data in iterator(self.dataset, num_epochs=4):
            model.forward(**epoch_data)
            cost_weights.append(model._checklist_cost_weight)
        # The config file has ``wait_num_epochs`` set to 0, so the model starts decreasing the cost
        # weight at epoch 0 itself.
        assert_almost_equal(cost_weights, [0.72, 0.648, 0.5832, 0.52488]) 
Example #17
Source File: train.py    From ConvLab with MIT License 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False,
                          force: bool = False) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    parameter_filename : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging, recover, force) 
Example #18
Source File: train.py    From magnitude with MIT License 5 votes vote down vote up
def train_model_from_file(parameter_filename     ,
                          serialization_dir     ,
                          overrides      = u"",
                          file_friendly_logging       = False,
                          recover       = False)         :
    u"""
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    param_path : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging, recover) 
Example #19
Source File: fine_tune.py    From magnitude with MIT License 5 votes vote down vote up
def fine_tune_model_from_file_paths(model_archive_path     ,
                                    config_file     ,
                                    serialization_dir     ,
                                    overrides      = u"",
                                    extend_vocab       = False,
                                    file_friendly_logging       = False)         :
    u"""
    A wrapper around :func:`fine_tune_model` which loads the model archive from a file.

    Parameters
    ----------
    model_archive_path : ``str``
        Path to a saved model archive that is the result of running the ``train`` command.
    config_file : ``str``
        A configuration file specifying how to continue training.  The format is identical to the
        configuration file for the ``train`` command, but any contents in the ``model`` section is
        ignored (as we are using the provided model archive instead).
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`fine_tune_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`fine_tune_model`.
    """
    # We don't need to pass in `cuda_device` here, because the trainer will call `model.cuda()` if
    # necessary.
    archive = load_archive(model_archive_path)
    params = Params.from_file(config_file, overrides)
    return fine_tune_model(model=archive.model,
                           params=params,
                           serialization_dir=serialization_dir,
                           extend_vocab=extend_vocab,
                           file_friendly_logging=file_friendly_logging) 
Example #20
Source File: util.py    From udify with MIT License 5 votes vote down vote up
def cache_vocab(params: Params, vocab_config_path: str = None):
    """
    Caches the vocabulary given in the Params to the filesystem. Useful for large datasets that are run repeatedly.
    :param params: the AllenNLP Params
    :param vocab_config_path: an optional config path for constructing the vocab
    """
    if "vocabulary" not in params or "directory_path" not in params["vocabulary"]:
        return

    vocab_path = params["vocabulary"]["directory_path"]

    if os.path.exists(vocab_path):
        if os.listdir(vocab_path):
            return

        # Remove empty vocabulary directory to make AllenNLP happy
        try:
            os.rmdir(vocab_path)
        except OSError:
            pass

    vocab_config_path = vocab_config_path if vocab_config_path else VOCAB_CONFIG_PATH

    params = merge_configs([params, Params.from_file(vocab_config_path)])
    params["vocabulary"].pop("directory_path", None)
    make_vocab_from_params(params, os.path.split(vocab_path)[0]) 
Example #21
Source File: train_fixtures.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def train_fixture_gpu(config_prefix: str) -> None:
    config_file = config_prefix + "experiment.json"
    serialization_dir = config_prefix + "serialization"
    params = Params.from_file(config_file)
    params["trainer"]["cuda_device"] = 0

    # train this one to a tempdir
    tempdir = tempfile.gettempdir()
    train_model(params, tempdir)

    # now copy back the weights and and archived model
    shutil.copy(os.path.join(tempdir, "best.th"), os.path.join(serialization_dir, "best_gpu.th"))
    shutil.copy(
        os.path.join(tempdir, "model.tar.gz"), os.path.join(serialization_dir, "model_gpu.tar.gz")
    ) 
Example #22
Source File: train_test.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def test_train_model_can_instantiate_from_params(self):
        params = Params.from_file(self.FIXTURES_ROOT / "simple_tagger" / "experiment.json")

        # Can instantiate from base class params
        TrainModel.from_params(
            params=params, serialization_dir=self.TEST_DIR, local_rank=0, batch_weight_key=""
        ) 
Example #23
Source File: find_learning_rate.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def find_learning_rate_from_args(args: argparse.Namespace) -> None:
    """
    Start learning rate finder for given args
    """
    params = Params.from_file(args.param_path, args.overrides)
    find_learning_rate_model(
        params,
        args.serialization_dir,
        start_lr=args.start_lr,
        end_lr=args.end_lr,
        num_batches=args.num_batches,
        linear_steps=args.linear,
        stopping_factor=args.stopping_factor,
        force=args.force,
    ) 
Example #24
Source File: train.py    From ConvLab with MIT License 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False,
                          force: bool = False) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    parameter_filename : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging, recover, force) 
Example #25
Source File: train.py    From ConvLab with MIT License 5 votes vote down vote up
def train_model_from_file(parameter_filename: str,
                          serialization_dir: str,
                          overrides: str = "",
                          file_friendly_logging: bool = False,
                          recover: bool = False,
                          force: bool = False) -> Model:
    """
    A wrapper around :func:`train_model` which loads the params from a file.

    Parameters
    ----------
    parameter_filename : ``str``
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : ``str``
        The directory in which to save results and logs. We just pass this along to
        :func:`train_model`.
    overrides : ``str``
        A JSON string that we will use to override values in the input parameter file.
    file_friendly_logging : ``bool``, optional (default=False)
        If ``True``, we make our output more friendly to saved model files.  We just pass this
        along to :func:`train_model`.
    recover : ``bool`, optional (default=False)
        If ``True``, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see the ``fine-tune`` command.
    force : ``bool``, optional (default=False)
        If ``True``, we will overwrite the serialization directory if it already exists.
    """
    # Load the experiment config from a file and pass it to ``train_model``.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(params, serialization_dir, file_friendly_logging, recover, force) 
Example #26
Source File: train.py    From magnitude with MIT License 4 votes vote down vote up
def create_serialization_dir(params        , serialization_dir     , recover      )        :
    u"""
    This function creates the serialization directory if it doesn't exist.  If it already exists
    and is non-empty, then it verifies that we're recovering from a training with an identical configuration.

    Parameters
    ----------
    params: ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: ``str``
        The directory in which to save results and logs.
    recover: ``bool``
        If ``True``, we will try to recover from an existing serialization directory, and crash if
        the directory doesn't exist, or doesn't match the configuration we're given.
    """
    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        if not recover:
            raise ConfigurationError("Serialization directory ({serialization_dir}) already exists and is "
                                     "not empty. Specify --recover to recover training from existing output.")

        logger.info("Recovering from prior training at {serialization_dir}.")

        recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME)
        if not os.path.exists(recovered_config_file):
            raise ConfigurationError(u"The serialization directory already exists but doesn't "
                                     u"contain a config.json. You probably gave the wrong directory.")
        else:
            loaded_params = Params.from_file(recovered_config_file)

            # Check whether any of the training configuration differs from the configuration we are
            # resuming.  If so, warn the user that training may fail.
            fail = False
            flat_params = params.as_flat_dict()
            flat_loaded = loaded_params.as_flat_dict()
            for key in list(flat_params.keys()) - list(flat_loaded.keys()):
                logger.error("Key '{key}' found in training configuration but not in the serialization "
                             "directory we're recovering from.")
                fail = True
            for key in list(flat_loaded.keys()) - list(flat_params.keys()):
                logger.error("Key '{key}' found in the serialization directory we're recovering from "
                             "but not in the training config.")
                fail = True
            for key in list(flat_params.keys()):
                if flat_params.get(key, None) != flat_loaded.get(key, None):
                    logger.error("Value for '{key}' in training configuration does not match that the value in "
                                 "the serialization directory we're recovering from: "
                                 "{flat_params[key]} != {flat_loaded[key]}")
                    fail = True
            if fail:
                raise ConfigurationError(u"Training configuration does not match the configuration we're "
                                         u"recovering from.")
    else:
        if recover:
            raise ConfigurationError("--recover specified but serialization_dir ({serialization_dir}) "
                                     u"does not exist.  There is nothing to recover from.")
        os.makedirs(serialization_dir, exist_ok=True) 
Example #27
Source File: train.py    From allennlp with Apache License 2.0 4 votes vote down vote up
def train_model_from_file(
    parameter_filename: str,
    serialization_dir: str,
    overrides: str = "",
    recover: bool = False,
    force: bool = False,
    node_rank: int = 0,
    include_package: List[str] = None,
    dry_run: bool = False,
) -> Optional[Model]:
    """
    A wrapper around [`train_model`](#train_model) which loads the params from a file.

    # Parameters

    parameter_filename : `str`
        A json parameter file specifying an AllenNLP experiment.
    serialization_dir : `str`
        The directory in which to save results and logs. We just pass this along to
        [`train_model`](#train_model).
    overrides : `str`
        A JSON string that we will use to override values in the input parameter file.
    recover : `bool`, optional (default=`False`)
        If `True`, we will try to recover a training run from an existing serialization
        directory.  This is only intended for use when something actually crashed during the middle
        of a run.  For continuing training a model on new data, see `Model.from_archive`.
    force : `bool`, optional (default=`False`)
        If `True`, we will overwrite the serialization directory if it already exists.
    node_rank : `int`, optional
        Rank of the current node in distributed training
    include_package : `str`, optional
        In distributed mode, extra packages mentioned will be imported in trainer workers.
    dry_run : `bool`, optional (default=`False`)
        Do not train a model, but create a vocabulary, show dataset statistics and other training
        information.

    # Returns

    best_model : `Optional[Model]`
        The model with the best epoch weights or `None` if in dry run.
    """
    # Load the experiment config from a file and pass it to `train_model`.
    params = Params.from_file(parameter_filename, overrides)
    return train_model(
        params=params,
        serialization_dir=serialization_dir,
        recover=recover,
        force=force,
        node_rank=node_rank,
        include_package=include_package,
        dry_run=dry_run,
    ) 
Example #28
Source File: train_multitask_two_tasks.py    From scicite with Apache License 2.0 4 votes vote down vote up
def create_serialization_dir(params: Params, serialization_dir: str, recover: bool) -> None:
    """
    This function creates the serialization directory if it doesn't exist.  If it already exists
    and is non-empty, then it verifies that we're recovering from a training with an identical configuration.

    Parameters
    ----------
    params: ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: ``str``
        The directory in which to save results and logs.
    recover: ``bool``
        If ``True``, we will try to recover from an existing serialization directory, and crash if
        the directory doesn't exist, or doesn't match the configuration we're given.
    """
    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        if not recover:
            raise ConfigurationError(f"Serialization directory ({serialization_dir}) already exists and is "
                                     f"not empty. Specify --recover to recover training from existing output.")

        logger.info(f"Recovering from prior training at {serialization_dir}.")

        recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME)
        if not os.path.exists(recovered_config_file):
            raise ConfigurationError("The serialization directory already exists but doesn't "
                                     "contain a config.json. You probably gave the wrong directory.")
        else:
            loaded_params = Params.from_file(recovered_config_file)

            # Check whether any of the training configuration differs from the configuration we are
            # resuming.  If so, warn the user that training may fail.
            fail = False
            flat_params = params.as_flat_dict()
            flat_loaded = loaded_params.as_flat_dict()
            for key in flat_params.keys() - flat_loaded.keys():
                logger.error(f"Key '{key}' found in training configuration but not in the serialization "
                             f"directory we're recovering from.")
                fail = True
            for key in flat_loaded.keys() - flat_params.keys():
                logger.error(f"Key '{key}' found in the serialization directory we're recovering from "
                             f"but not in the training config.")
                fail = True
            for key in flat_params.keys():
                if flat_params.get(key, None) != flat_loaded.get(key, None):
                    logger.error(f"Value for '{key}' in training configuration does not match that the value in "
                                 f"the serialization directory we're recovering from: "
                                 f"{flat_params[key]} != {flat_loaded[key]}")
                    fail = True
            if fail:
                raise ConfigurationError("Training configuration does not match the configuration we're "
                                         "recovering from.")
    else:
        if recover:
            raise ConfigurationError(f"--recover specified but serialization_dir ({serialization_dir}) "
                                     "does not exist.  There is nothing to recover from.")
        os.makedirs(serialization_dir, exist_ok=True) 
Example #29
Source File: train_multitask.py    From scicite with Apache License 2.0 4 votes vote down vote up
def create_serialization_dir(params: Params, serialization_dir: str, recover: bool) -> None:
    """
    This function creates the serialization directory if it doesn't exist.  If it already exists
    and is non-empty, then it verifies that we're recovering from a training with an identical configuration.

    Parameters
    ----------
    params: ``Params``
        A parameter object specifying an AllenNLP Experiment.
    serialization_dir: ``str``
        The directory in which to save results and logs.
    recover: ``bool``
        If ``True``, we will try to recover from an existing serialization directory, and crash if
        the directory doesn't exist, or doesn't match the configuration we're given.
    """
    if os.path.exists(serialization_dir) and os.listdir(serialization_dir):
        if not recover:
            raise ConfigurationError(f"Serialization directory ({serialization_dir}) already exists and is "
                                     f"not empty. Specify --recover to recover training from existing output.")

        logger.info(f"Recovering from prior training at {serialization_dir}.")

        recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME)
        if not os.path.exists(recovered_config_file):
            raise ConfigurationError("The serialization directory already exists but doesn't "
                                     "contain a config.json. You probably gave the wrong directory.")
        else:
            loaded_params = Params.from_file(recovered_config_file)

            # Check whether any of the training configuration differs from the configuration we are
            # resuming.  If so, warn the user that training may fail.
            fail = False
            flat_params = params.as_flat_dict()
            flat_loaded = loaded_params.as_flat_dict()
            for key in flat_params.keys() - flat_loaded.keys():
                logger.error(f"Key '{key}' found in training configuration but not in the serialization "
                             f"directory we're recovering from.")
                fail = True
            for key in flat_loaded.keys() - flat_params.keys():
                logger.error(f"Key '{key}' found in the serialization directory we're recovering from "
                             f"but not in the training config.")
                fail = True
            for key in flat_params.keys():
                if flat_params.get(key, None) != flat_loaded.get(key, None):
                    logger.error(f"Value for '{key}' in training configuration does not match that the value in "
                                 f"the serialization directory we're recovering from: "
                                 f"{flat_params[key]} != {flat_loaded[key]}")
                    fail = True
            if fail:
                raise ConfigurationError("Training configuration does not match the configuration we're "
                                         "recovering from.")
    else:
        if recover:
            raise ConfigurationError(f"--recover specified but serialization_dir ({serialization_dir}) "
                                     "does not exist.  There is nothing to recover from.")
        os.makedirs(serialization_dir, exist_ok=True)