Python utils.load_dataset() Examples
The following are 3
code examples of utils.load_dataset().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: base_model.py From neural_sequence_labeling with MIT License | 6 votes |
def _initialize_config(self): # create folders and logger if not os.path.exists(self.cfg["checkpoint_path"]): os.makedirs(self.cfg["checkpoint_path"]) if not os.path.exists(self.cfg["summary_path"]): os.makedirs(self.cfg["summary_path"]) self.logger = get_logger(os.path.join(self.cfg["checkpoint_path"], "log.txt")) # load dictionary dict_data = load_dataset(self.cfg["vocab"]) self.word_dict, self.char_dict = dict_data["word_dict"], dict_data["char_dict"] self.tag_dict = dict_data["tag_dict"] del dict_data self.word_vocab_size = len(self.word_dict) self.char_vocab_size = len(self.char_dict) self.tag_vocab_size = len(self.tag_dict) self.rev_word_dict = dict([(idx, word) for word, idx in self.word_dict.items()]) self.rev_char_dict = dict([(idx, char) for char, idx in self.char_dict.items()]) self.rev_tag_dict = dict([(idx, tag) for tag, idx in self.tag_dict.items()])
Example #2
Source File: train.py From glad with BSD 3-Clause "New" or "Revised" License | 5 votes |
def run(args): pprint(args) logging.basicConfig(level=logging.INFO) np.random.seed(args.seed) torch.manual_seed(args.seed) seed(args.seed) dataset, ontology, vocab, Eword = load_dataset() model = load_model(args.model, args, ontology, vocab) model.save_config() model.load_emb(Eword) model = model.to(model.device) if not args.test: logging.info('Starting train') model.run_train(dataset['train'], dataset['dev'], args) if args.resume: model.load_best_save(directory=args.resume) else: model.load_best_save(directory=args.dout) model = model.to(model.device) logging.info('Running dev evaluation') dev_out = model.run_eval(dataset['dev'], args) pprint(dev_out)
Example #3
Source File: train.py From seq2seq with MIT License | 5 votes |
def main(): args = parse_arguments() hidden_size = 512 embed_size = 256 assert torch.cuda.is_available() print("[!] preparing dataset...") train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" % (len(train_iter), len(train_iter.dataset), len(test_iter), len(test_iter.dataset))) print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size)) print("[!] Instantiating models...") encoder = Encoder(de_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, en_size, n_layers=1, dropout=0.5) seq2seq = Seq2Seq(encoder, decoder).cuda() optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) print(seq2seq) best_val_loss = None for e in range(1, args.epochs+1): train(e, seq2seq, optimizer, train_iter, en_size, args.grad_clip, DE, EN) val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN) print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" % (e, val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.isdir(".save"): os.makedirs(".save") torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e)) best_val_loss = val_loss test_loss = evaluate(seq2seq, test_iter, en_size, DE, EN) print("[TEST] loss:%5.2f" % test_loss)