Python allennlp.common.Params.from_file() Examples
The following are 29
code examples of allennlp.common.Params.from_file().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.common.Params
, or try the search function
.
Example #1
Source File: test_case.py From vampire with Apache License 2.0 | 6 votes |
def set_up_model(self, param_file, dataset_file): # pylint: disable=attribute-defined-outside-init self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params['dataset_reader']) # The dataset reader might be lazy, but a lazy list here breaks some of our tests. instances = list(reader.read(str(dataset_file))) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if 'vocabulary' in params: vocab_params = params['vocabulary'] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params(vocab=self.vocab, params=params['model']) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab)
Example #2
Source File: model_test_case.py From allennlp with Apache License 2.0 | 6 votes |
def set_up_model(self, param_file, dataset_file): self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params["dataset_reader"]) # The dataset reader might be lazy, but a lazy list here breaks some of our tests. instances = reader.read(str(dataset_file)) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if "vocabulary" in params: vocab_params = params["vocabulary"] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.instances.index_with(vocab) self.model = Model.from_params(vocab=self.vocab, params=params["model"]) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(list(self.instances)) self.dataset.index_instances(self.vocab)
Example #3
Source File: train_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_train_can_fine_tune_model_from_archive(self): params = Params.from_file( self.FIXTURES_ROOT / "basic_classifier" / "experiment_from_archive.jsonnet" ) train_loop = TrainModel.from_params( params=params, serialization_dir=self.TEST_DIR, local_rank=0, batch_weight_key="" ) train_loop.run() model = Model.from_archive( self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz" ) # This is checking that the vocabulary actually got extended. The data that we're using for # training is different from the data we used to produce the model archive, and we set # parameters such that the vocab should have been extended. assert train_loop.model.vocab.get_vocab_size() > model.vocab.get_vocab_size()
Example #4
Source File: from_params_test.py From allennlp with Apache License 2.0 | 6 votes |
def test_transferring_of_modules_ensures_type_consistency(self): model_archive = str( self.FIXTURES_ROOT / "basic_classifier" / "serialization" / "model.tar.gz" ) trained_model = load_archive(model_archive).model config_file = str(self.FIXTURES_ROOT / "basic_classifier" / "experiment_seq2seq.jsonnet") model_params = Params.from_file(config_file).pop("model").as_dict(quiet=True) # Override only text_field_embedder and make it load Seq2SeqEncoder model_params["text_field_embedder"] = { "_pretrained": { "archive_file": model_archive, "module_path": "_seq2seq_encoder._module", } } with pytest.raises(ConfigurationError): Model.from_params(vocab=trained_model.vocab, params=Params(model_params))
Example #5
Source File: model_test_case.py From magnitude with MIT License | 6 votes |
def set_up_model(self, param_file, dataset_file): # pylint: disable=attribute-defined-outside-init self.param_file = param_file params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params[u'dataset_reader']) instances = reader.read(dataset_file) # Use parameters for vocabulary if they are present in the config file, so that choices like # "non_padded_namespaces", "min_count" etc. can be set if needed. if u'vocabulary' in params: vocab_params = params[u'vocabulary'] vocab = Vocabulary.from_params(params=vocab_params, instances=instances) else: vocab = Vocabulary.from_instances(instances) self.vocab = vocab self.instances = instances self.model = Model.from_params(vocab=self.vocab, params=params[u'model']) # TODO(joelgrus) get rid of these # (a lot of the model tests use them, so they'll have to be changed) self.dataset = Batch(self.instances) self.dataset.index_instances(self.vocab)
Example #6
Source File: bidaf_test.py From magnitude with MIT License | 6 votes |
def test_mismatching_dimensions_throws_configuration_error(self): params = Params.from_file(self.param_file) # Make the phrase layer wrong - it should be 10 to match # the embedding + char cnn dimensions. params[u"model"][u"phrase_layer"][u"input_size"] = 12 with pytest.raises(ConfigurationError): Model.from_params(vocab=self.vocab, params=params.pop(u"model")) params = Params.from_file(self.param_file) # Make the modeling layer input_dimension wrong - it should be 40 to match # 4 * output_dim of the phrase_layer. params[u"model"][u"phrase_layer"][u"input_size"] = 30 with pytest.raises(ConfigurationError): Model.from_params(vocab=self.vocab, params=params.pop(u"model")) params = Params.from_file(self.param_file) # Make the modeling layer input_dimension wrong - it should be 70 to match # 4 * phrase_layer.output_dim + 3 * modeling_layer.output_dim. params[u"model"][u"span_end_encoder"][u"input_size"] = 50 with pytest.raises(ConfigurationError): Model.from_params(vocab=self.vocab, params=params.pop(u"model"))
Example #7
Source File: elmo_token_embedder_test.py From magnitude with MIT License | 5 votes |
def test_file_archiving(self): # This happens to be a good place to test auxiliary file archiving. # Train the model params = Params.from_file(self.FIXTURES_ROOT / u'elmo' / u'config' / u'characters_token_embedder.json') serialization_dir = os.path.join(self.TEST_DIR, u'serialization') train_model(params, serialization_dir) # Inspect the archive archive_file = os.path.join(serialization_dir, u'model.tar.gz') unarchive_dir = os.path.join(self.TEST_DIR, u'unarchive') with tarfile.open(archive_file, u'r:gz') as archive: archive.extractall(unarchive_dir) # It should contain `files_to_archive.json` fta_file = os.path.join(unarchive_dir, u'files_to_archive.json') assert os.path.exists(fta_file) # Which should properly contain { flattened_key -> original_filename } with open(fta_file) as fta: files_to_archive = json.loads(fta.read()) assert files_to_archive == { u'model.text_field_embedder.token_embedders.elmo.options_file': unicode(pathlib.Path(u'allennlp') / u'tests' / u'fixtures' / u'elmo' / u'options.json'), u'model.text_field_embedder.token_embedders.elmo.weight_file': unicode(pathlib.Path(u'allennlp') / u'tests' / u'fixtures' / u'elmo' / u'lm_weights.hdf5'), } # Check that the unarchived contents of those files match the original contents. for key, original_filename in list(files_to_archive.items()): new_filename = os.path.join(unarchive_dir, u"fta", key) assert filecmp.cmp(original_filename, new_filename)
Example #8
Source File: main.py From R-net with MIT License | 5 votes |
def train_model_from_file(parameter_filename: str, serialization_dir: str, overrides: str = "", file_friendly_logging: bool = False, recover: bool = False, force: bool = False, ext_vars=None) -> Model: """ A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- param_path : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides, ext_vars=ext_vars) return train_model(params, serialization_dir, file_friendly_logging, recover, force)
Example #9
Source File: train_multitask.py From scicite with Apache License 2.0 | 5 votes |
def train_model_from_file(parameter_filename: str, serialization_dir: str, overrides: str = "", file_friendly_logging: bool = False, recover: bool = False) -> Model: """ A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- param_path : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides) return train_model(params, serialization_dir, file_friendly_logging, recover)
Example #10
Source File: train_multitask_two_tasks.py From scicite with Apache License 2.0 | 5 votes |
def train_model_from_file(parameter_filename: str, serialization_dir: str, overrides: str = "", file_friendly_logging: bool = False, recover: bool = False) -> Model: """ A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- param_path : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides) return train_model(params, serialization_dir, file_friendly_logging, recover)
Example #11
Source File: knowbert_utils.py From kb with Apache License 2.0 | 5 votes |
def _extract_config_from_archive(model_archive): import tarfile import tempfile import os with tempfile.TemporaryDirectory() as tmp: with tarfile.open(model_archive, 'r:gz') as archive: archive.extract('config.json', path=tmp) config = Params.from_file(os.path.join(tmp, 'config.json')) return config
Example #12
Source File: copynet_test.py From nlp-models with MIT License | 5 votes |
def setUp(self): super(TestCopyNetReader, self).setUp() params = Params.from_file("nlpete/tests/fixtures/copynet/experiment.json") self.reader = DatasetReader.from_params(params["dataset_reader"]) instances = self.reader.read("nlpete/tests/fixtures/copynet/copyover.tsv") self.instances = ensure_list(instances) self.vocab = Vocabulary.from_params( params=params["vocabulary"], instances=instances )
Example #13
Source File: decomposable_attention_test.py From magnitude with MIT License | 5 votes |
def test_mismatched_dimensions_raise_configuration_errors(self): params = Params.from_file(self.param_file) # Make the input_dim to the first feedforward_layer wrong - it should be 2. params[u"model"][u"attend_feedforward"][u"input_dim"] = 10 with pytest.raises(ConfigurationError): Model.from_params(vocab=self.vocab, params=params.pop(u"model")) params = Params.from_file(self.param_file) # Make the projection output_dim of the last layer wrong - it should be # 3, equal to the number of classes. params[u"model"][u"aggregate_feedforward"][u"output_dim"] = 10 with pytest.raises(ConfigurationError): Model.from_params(vocab=self.vocab, params=params.pop(u"model"))
Example #14
Source File: decomposable_attention_test.py From magnitude with MIT License | 5 votes |
def test_model_load(self): params = Params.from_file(self.FIXTURES_ROOT / u'decomposable_attention' / u'experiment.json') model = Model.load(params, serialization_dir=self.FIXTURES_ROOT / u'decomposable_attention' / u'serialization') assert isinstance(model, DecomposableAttention)
Example #15
Source File: bidaf_test.py From magnitude with MIT License | 5 votes |
def test_batch_predictions_are_consistent(self): # The CNN encoder has problems with this kind of test - it's not properly masked yet, so # changing the amount of padding in the batch will result in small differences in the # output of the encoder. Because BiDAF is so deep, these differences get magnified through # the network and make this test impossible. So, we'll remove the CNN encoder entirely # from the model for this test. If/when we fix the CNN encoder to work correctly with # masking, we can change this back to how the other models run this test, with just a # single line. # pylint: disable=protected-access,attribute-defined-outside-init # Save some state. saved_model = self.model saved_instances = self.instances # Modify the state, run the test with modified state. params = Params.from_file(self.param_file) reader = DatasetReader.from_params(params[u'dataset_reader']) reader._token_indexers = {u'tokens': reader._token_indexers[u'tokens']} self.instances = reader.read(self.FIXTURES_ROOT / u'data' / u'squad.json') vocab = Vocabulary.from_instances(self.instances) for instance in self.instances: instance.index_fields(vocab) del params[u'model'][u'text_field_embedder'][u'token_embedders'][u'token_characters'] params[u'model'][u'phrase_layer'][u'input_size'] = 2 self.model = Model.from_params(vocab=vocab, params=params[u'model']) self.ensure_batch_predictions_are_consistent() # Restore the state. self.model = saved_model self.instances = saved_instances
Example #16
Source File: nlvr_coverage_semantic_parser_test.py From magnitude with MIT License | 5 votes |
def test_forward_with_epoch_num_changes_cost_weight(self): # Redefining model. We do not want this to change the state of ``self.model``. params = Params.from_file(self.param_file) model = Model.from_params(vocab=self.vocab, params=params[u'model']) # Initial cost weight, before forward is called. assert model._checklist_cost_weight == 0.8 iterator = EpochTrackingBucketIterator(sorting_keys=[[u'sentence', u'num_tokens']]) cost_weights = [] for epoch_data in iterator(self.dataset, num_epochs=4): model.forward(**epoch_data) cost_weights.append(model._checklist_cost_weight) # The config file has ``wait_num_epochs`` set to 0, so the model starts decreasing the cost # weight at epoch 0 itself. assert_almost_equal(cost_weights, [0.72, 0.648, 0.5832, 0.52488])
Example #17
Source File: train.py From ConvLab with MIT License | 5 votes |
def train_model_from_file(parameter_filename: str, serialization_dir: str, overrides: str = "", file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- parameter_filename : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides) return train_model(params, serialization_dir, file_friendly_logging, recover, force)
Example #18
Source File: train.py From magnitude with MIT License | 5 votes |
def train_model_from_file(parameter_filename , serialization_dir , overrides = u"", file_friendly_logging = False, recover = False) : u""" A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- param_path : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides) return train_model(params, serialization_dir, file_friendly_logging, recover)
Example #19
Source File: fine_tune.py From magnitude with MIT License | 5 votes |
def fine_tune_model_from_file_paths(model_archive_path , config_file , serialization_dir , overrides = u"", extend_vocab = False, file_friendly_logging = False) : u""" A wrapper around :func:`fine_tune_model` which loads the model archive from a file. Parameters ---------- model_archive_path : ``str`` Path to a saved model archive that is the result of running the ``train`` command. config_file : ``str`` A configuration file specifying how to continue training. The format is identical to the configuration file for the ``train`` command, but any contents in the ``model`` section is ignored (as we are using the provided model archive instead). serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`fine_tune_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`fine_tune_model`. """ # We don't need to pass in `cuda_device` here, because the trainer will call `model.cuda()` if # necessary. archive = load_archive(model_archive_path) params = Params.from_file(config_file, overrides) return fine_tune_model(model=archive.model, params=params, serialization_dir=serialization_dir, extend_vocab=extend_vocab, file_friendly_logging=file_friendly_logging)
Example #20
Source File: util.py From udify with MIT License | 5 votes |
def cache_vocab(params: Params, vocab_config_path: str = None): """ Caches the vocabulary given in the Params to the filesystem. Useful for large datasets that are run repeatedly. :param params: the AllenNLP Params :param vocab_config_path: an optional config path for constructing the vocab """ if "vocabulary" not in params or "directory_path" not in params["vocabulary"]: return vocab_path = params["vocabulary"]["directory_path"] if os.path.exists(vocab_path): if os.listdir(vocab_path): return # Remove empty vocabulary directory to make AllenNLP happy try: os.rmdir(vocab_path) except OSError: pass vocab_config_path = vocab_config_path if vocab_config_path else VOCAB_CONFIG_PATH params = merge_configs([params, Params.from_file(vocab_config_path)]) params["vocabulary"].pop("directory_path", None) make_vocab_from_params(params, os.path.split(vocab_path)[0])
Example #21
Source File: train_fixtures.py From allennlp with Apache License 2.0 | 5 votes |
def train_fixture_gpu(config_prefix: str) -> None: config_file = config_prefix + "experiment.json" serialization_dir = config_prefix + "serialization" params = Params.from_file(config_file) params["trainer"]["cuda_device"] = 0 # train this one to a tempdir tempdir = tempfile.gettempdir() train_model(params, tempdir) # now copy back the weights and and archived model shutil.copy(os.path.join(tempdir, "best.th"), os.path.join(serialization_dir, "best_gpu.th")) shutil.copy( os.path.join(tempdir, "model.tar.gz"), os.path.join(serialization_dir, "model_gpu.tar.gz") )
Example #22
Source File: train_test.py From allennlp with Apache License 2.0 | 5 votes |
def test_train_model_can_instantiate_from_params(self): params = Params.from_file(self.FIXTURES_ROOT / "simple_tagger" / "experiment.json") # Can instantiate from base class params TrainModel.from_params( params=params, serialization_dir=self.TEST_DIR, local_rank=0, batch_weight_key="" )
Example #23
Source File: find_learning_rate.py From allennlp with Apache License 2.0 | 5 votes |
def find_learning_rate_from_args(args: argparse.Namespace) -> None: """ Start learning rate finder for given args """ params = Params.from_file(args.param_path, args.overrides) find_learning_rate_model( params, args.serialization_dir, start_lr=args.start_lr, end_lr=args.end_lr, num_batches=args.num_batches, linear_steps=args.linear, stopping_factor=args.stopping_factor, force=args.force, )
Example #24
Source File: train.py From ConvLab with MIT License | 5 votes |
def train_model_from_file(parameter_filename: str, serialization_dir: str, overrides: str = "", file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- parameter_filename : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides) return train_model(params, serialization_dir, file_friendly_logging, recover, force)
Example #25
Source File: train.py From ConvLab with MIT License | 5 votes |
def train_model_from_file(parameter_filename: str, serialization_dir: str, overrides: str = "", file_friendly_logging: bool = False, recover: bool = False, force: bool = False) -> Model: """ A wrapper around :func:`train_model` which loads the params from a file. Parameters ---------- parameter_filename : ``str`` A json parameter file specifying an AllenNLP experiment. serialization_dir : ``str`` The directory in which to save results and logs. We just pass this along to :func:`train_model`. overrides : ``str`` A JSON string that we will use to override values in the input parameter file. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we make our output more friendly to saved model files. We just pass this along to :func:`train_model`. recover : ``bool`, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. """ # Load the experiment config from a file and pass it to ``train_model``. params = Params.from_file(parameter_filename, overrides) return train_model(params, serialization_dir, file_friendly_logging, recover, force)
Example #26
Source File: train.py From magnitude with MIT License | 4 votes |
def create_serialization_dir(params , serialization_dir , recover ) : u""" This function creates the serialization directory if it doesn't exist. If it already exists and is non-empty, then it verifies that we're recovering from a training with an identical configuration. Parameters ---------- params: ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir: ``str`` The directory in which to save results and logs. recover: ``bool`` If ``True``, we will try to recover from an existing serialization directory, and crash if the directory doesn't exist, or doesn't match the configuration we're given. """ if os.path.exists(serialization_dir) and os.listdir(serialization_dir): if not recover: raise ConfigurationError("Serialization directory ({serialization_dir}) already exists and is " "not empty. Specify --recover to recover training from existing output.") logger.info("Recovering from prior training at {serialization_dir}.") recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME) if not os.path.exists(recovered_config_file): raise ConfigurationError(u"The serialization directory already exists but doesn't " u"contain a config.json. You probably gave the wrong directory.") else: loaded_params = Params.from_file(recovered_config_file) # Check whether any of the training configuration differs from the configuration we are # resuming. If so, warn the user that training may fail. fail = False flat_params = params.as_flat_dict() flat_loaded = loaded_params.as_flat_dict() for key in list(flat_params.keys()) - list(flat_loaded.keys()): logger.error("Key '{key}' found in training configuration but not in the serialization " "directory we're recovering from.") fail = True for key in list(flat_loaded.keys()) - list(flat_params.keys()): logger.error("Key '{key}' found in the serialization directory we're recovering from " "but not in the training config.") fail = True for key in list(flat_params.keys()): if flat_params.get(key, None) != flat_loaded.get(key, None): logger.error("Value for '{key}' in training configuration does not match that the value in " "the serialization directory we're recovering from: " "{flat_params[key]} != {flat_loaded[key]}") fail = True if fail: raise ConfigurationError(u"Training configuration does not match the configuration we're " u"recovering from.") else: if recover: raise ConfigurationError("--recover specified but serialization_dir ({serialization_dir}) " u"does not exist. There is nothing to recover from.") os.makedirs(serialization_dir, exist_ok=True)
Example #27
Source File: train.py From allennlp with Apache License 2.0 | 4 votes |
def train_model_from_file( parameter_filename: str, serialization_dir: str, overrides: str = "", recover: bool = False, force: bool = False, node_rank: int = 0, include_package: List[str] = None, dry_run: bool = False, ) -> Optional[Model]: """ A wrapper around [`train_model`](#train_model) which loads the params from a file. # Parameters parameter_filename : `str` A json parameter file specifying an AllenNLP experiment. serialization_dir : `str` The directory in which to save results and logs. We just pass this along to [`train_model`](#train_model). overrides : `str` A JSON string that we will use to override values in the input parameter file. recover : `bool`, optional (default=`False`) If `True`, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see `Model.from_archive`. force : `bool`, optional (default=`False`) If `True`, we will overwrite the serialization directory if it already exists. node_rank : `int`, optional Rank of the current node in distributed training include_package : `str`, optional In distributed mode, extra packages mentioned will be imported in trainer workers. dry_run : `bool`, optional (default=`False`) Do not train a model, but create a vocabulary, show dataset statistics and other training information. # Returns best_model : `Optional[Model]` The model with the best epoch weights or `None` if in dry run. """ # Load the experiment config from a file and pass it to `train_model`. params = Params.from_file(parameter_filename, overrides) return train_model( params=params, serialization_dir=serialization_dir, recover=recover, force=force, node_rank=node_rank, include_package=include_package, dry_run=dry_run, )
Example #28
Source File: train_multitask_two_tasks.py From scicite with Apache License 2.0 | 4 votes |
def create_serialization_dir(params: Params, serialization_dir: str, recover: bool) -> None: """ This function creates the serialization directory if it doesn't exist. If it already exists and is non-empty, then it verifies that we're recovering from a training with an identical configuration. Parameters ---------- params: ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir: ``str`` The directory in which to save results and logs. recover: ``bool`` If ``True``, we will try to recover from an existing serialization directory, and crash if the directory doesn't exist, or doesn't match the configuration we're given. """ if os.path.exists(serialization_dir) and os.listdir(serialization_dir): if not recover: raise ConfigurationError(f"Serialization directory ({serialization_dir}) already exists and is " f"not empty. Specify --recover to recover training from existing output.") logger.info(f"Recovering from prior training at {serialization_dir}.") recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME) if not os.path.exists(recovered_config_file): raise ConfigurationError("The serialization directory already exists but doesn't " "contain a config.json. You probably gave the wrong directory.") else: loaded_params = Params.from_file(recovered_config_file) # Check whether any of the training configuration differs from the configuration we are # resuming. If so, warn the user that training may fail. fail = False flat_params = params.as_flat_dict() flat_loaded = loaded_params.as_flat_dict() for key in flat_params.keys() - flat_loaded.keys(): logger.error(f"Key '{key}' found in training configuration but not in the serialization " f"directory we're recovering from.") fail = True for key in flat_loaded.keys() - flat_params.keys(): logger.error(f"Key '{key}' found in the serialization directory we're recovering from " f"but not in the training config.") fail = True for key in flat_params.keys(): if flat_params.get(key, None) != flat_loaded.get(key, None): logger.error(f"Value for '{key}' in training configuration does not match that the value in " f"the serialization directory we're recovering from: " f"{flat_params[key]} != {flat_loaded[key]}") fail = True if fail: raise ConfigurationError("Training configuration does not match the configuration we're " "recovering from.") else: if recover: raise ConfigurationError(f"--recover specified but serialization_dir ({serialization_dir}) " "does not exist. There is nothing to recover from.") os.makedirs(serialization_dir, exist_ok=True)
Example #29
Source File: train_multitask.py From scicite with Apache License 2.0 | 4 votes |
def create_serialization_dir(params: Params, serialization_dir: str, recover: bool) -> None: """ This function creates the serialization directory if it doesn't exist. If it already exists and is non-empty, then it verifies that we're recovering from a training with an identical configuration. Parameters ---------- params: ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir: ``str`` The directory in which to save results and logs. recover: ``bool`` If ``True``, we will try to recover from an existing serialization directory, and crash if the directory doesn't exist, or doesn't match the configuration we're given. """ if os.path.exists(serialization_dir) and os.listdir(serialization_dir): if not recover: raise ConfigurationError(f"Serialization directory ({serialization_dir}) already exists and is " f"not empty. Specify --recover to recover training from existing output.") logger.info(f"Recovering from prior training at {serialization_dir}.") recovered_config_file = os.path.join(serialization_dir, CONFIG_NAME) if not os.path.exists(recovered_config_file): raise ConfigurationError("The serialization directory already exists but doesn't " "contain a config.json. You probably gave the wrong directory.") else: loaded_params = Params.from_file(recovered_config_file) # Check whether any of the training configuration differs from the configuration we are # resuming. If so, warn the user that training may fail. fail = False flat_params = params.as_flat_dict() flat_loaded = loaded_params.as_flat_dict() for key in flat_params.keys() - flat_loaded.keys(): logger.error(f"Key '{key}' found in training configuration but not in the serialization " f"directory we're recovering from.") fail = True for key in flat_loaded.keys() - flat_params.keys(): logger.error(f"Key '{key}' found in the serialization directory we're recovering from " f"but not in the training config.") fail = True for key in flat_params.keys(): if flat_params.get(key, None) != flat_loaded.get(key, None): logger.error(f"Value for '{key}' in training configuration does not match that the value in " f"the serialization directory we're recovering from: " f"{flat_params[key]} != {flat_loaded[key]}") fail = True if fail: raise ConfigurationError("Training configuration does not match the configuration we're " "recovering from.") else: if recover: raise ConfigurationError(f"--recover specified but serialization_dir ({serialization_dir}) " "does not exist. There is nothing to recover from.") os.makedirs(serialization_dir, exist_ok=True)