Python allennlp.data.dataset_readers.dataset_reader.DatasetReader.from_params() Examples
The following are 9
code examples of allennlp.data.dataset_readers.dataset_reader.DatasetReader.from_params().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
allennlp.data.dataset_readers.dataset_reader.DatasetReader
, or try the search function
.
Example #1
Source File: evaluate_custom.py From OpenBookQA with Apache License 2.0 | 5 votes |
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.evaluation_data_file logger.info("Reading evaluation data from %s", evaluation_data_path) dataset = dataset_reader.read(evaluation_data_path) iterator = DataIterator.from_params(config.pop("iterator")) iterator.index_with(model.vocab) metrics = evaluate(model, dataset, iterator, args.output_file) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) return metrics
Example #2
Source File: train.py From magnitude with MIT License | 5 votes |
def datasets_from_params(params ) : u""" Load all the datasets specified by the config. """ dataset_reader = DatasetReader.from_params(params.pop(u'dataset_reader')) validation_dataset_reader_params = params.pop(u"validation_dataset_reader", None) validation_and_test_dataset_reader = dataset_reader if validation_dataset_reader_params is not None: logger.info(u"Using a separate dataset reader to load validation and test data.") validation_and_test_dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) train_data_path = params.pop(u'train_data_path') logger.info(u"Reading training data from %s", train_data_path) train_data = dataset_reader.read(train_data_path) datasets = {u"train": train_data} validation_data_path = params.pop(u'validation_data_path', None) if validation_data_path is not None: logger.info(u"Reading validation data from %s", validation_data_path) validation_data = validation_and_test_dataset_reader.read(validation_data_path) datasets[u"validation"] = validation_data test_data_path = params.pop(u"test_data_path", None) if test_data_path is not None: logger.info(u"Reading test data from %s", test_data_path) test_data = validation_and_test_dataset_reader.read(test_data_path) datasets[u"test"] = test_data return datasets
Example #3
Source File: target_to_lines.py From summarus with Apache License 2.0 | 5 votes |
def target_to_lines(archive_file, input_file, output_file, lowercase=True): archive = load_archive(archive_file) reader = DatasetReader.from_params(archive.config.pop("dataset_reader")) with open(output_file, "w") as w: for t in reader.parse_set(input_file): target = t[1] target = target.strip() target = target.lower() if lowercase else target w.write(target.replace("\n", " ") + "\n")
Example #4
Source File: evaluate.py From ConvLab with MIT License | 4 votes |
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
Example #5
Source File: evaluate.py From ConvLab with MIT License | 4 votes |
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
Example #6
Source File: evaluate.py From ConvLab with MIT License | 4 votes |
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
Example #7
Source File: evaluate.py From allennlp with Apache License 2.0 | 4 votes |
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel(logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = ( json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {} ) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
Example #8
Source File: evaluate.py From magnitude with MIT License | 4 votes |
def evaluate_from_args(args ) : # Disable some of the more verbose logging statements logging.getLogger(u'allennlp.common.params').disabled = True logging.getLogger(u'allennlp.nn.initializers').disabled = True logging.getLogger(u'allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop(u'validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop(u'dataset_reader')) evaluation_data_path = args.input_file logger.info(u"Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) iterator_params = config.pop(u"validation_iterator", None) if iterator_params is None: iterator_params = config.pop(u"iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device) logger.info(u"Finished evaluating.") logger.info(u"Metrics:") for key, metric in list(metrics.items()): logger.info(u"%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, u"w") as file: json.dump(metrics, file, indent=4) return metrics
Example #9
Source File: test_summarization.py From summarus with Apache License 2.0 | 4 votes |
def _test_model(self, file_name): params = self.params[file_name].duplicate() reader_params = params.duplicate().pop("reader", default=Params({})) if reader_params["type"] == "cnn_dailymail": reader_params["cnn_tokenized_dir"] = TEST_STORIES_DIR dataset_file = TEST_URLS_FILE elif reader_params["type"] == "ria": dataset_file = RIA_EXAMPLE_FILE else: assert False reader = DatasetReader.from_params(reader_params) tokenizer = reader._tokenizer dataset = reader.read(dataset_file) vocabulary_params = params.pop("vocabulary", default=Params({})) vocabulary = Vocabulary.from_params(vocabulary_params, instances=dataset) model_params = params.pop("model") model = Model.from_params(model_params, vocab=vocabulary) print(model) print("Trainable params count: ", sum(p.numel() for p in model.parameters() if p.requires_grad)) iterator = DataIterator.from_params(params.pop('iterator')) iterator.index_with(vocabulary) trainer = Trainer.from_params(model, None, iterator, dataset, None, params.pop('trainer')) trainer.train() model.eval() predictor = Seq2SeqPredictor(model, reader) for article, reference_sents in reader.parse_set(dataset_file): ref_words = [token.text for token in tokenizer.tokenize(reference_sents)] decoded_words = predictor.predict(article)["predicted_tokens"] self.assertGreaterEqual(len(decoded_words), len(ref_words)) unk_count = 0 while DEFAULT_OOV_TOKEN in decoded_words: unk_index = decoded_words.index(DEFAULT_OOV_TOKEN) decoded_words.pop(unk_index) unk_count += 1 if unk_index < len(ref_words): ref_words.pop(unk_index) self.assertLess(unk_count, 5) self.assertListEqual(decoded_words[:len(ref_words)], ref_words)