Python utils.load_vocab() Examples
The following are 4
code examples of utils.load_vocab().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: infer.py From listen-attend-and-spell with Apache License 2.0 | 5 votes |
def main(args): vocab_list = np.array(utils.load_vocab(args.vocab)) vocab_size = len(vocab_list) config = tf.estimator.RunConfig(model_dir=args.model_dir) hparams = utils.create_hparams( args, vocab_size, utils.SOS_ID, utils.EOS_ID) hparams.decoder.set_hparam('beam_width', args.beam_width) model = tf.estimator.Estimator( model_fn=las_model_fn, config=config, params=hparams) predictions = model.predict( input_fn=lambda: input_fn( args.data, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=1), predict_keys='sample_ids') if args.beam_width > 0: predictions = [vocab_list[y['sample_ids'][:, 0]].tolist() + [utils.EOS] for y in predictions] else: predictions = [vocab_list[y['sample_ids']].tolist() + [utils.EOS] for y in predictions] predictions = [' '.join(y[:y.index(utils.EOS)]) for y in predictions] with open(args.save, 'w') as f: f.write('\n'.join(predictions))
Example #2
Source File: train.py From listen-attend-and-spell with Apache License 2.0 | 5 votes |
def main(args): vocab_list = utils.load_vocab(args.vocab) vocab_size = len(vocab_list) config = tf.estimator.RunConfig(model_dir=args.model_dir) hparams = utils.create_hparams( args, vocab_size, utils.SOS_ID, utils.EOS_ID) model = tf.estimator.Estimator( model_fn=las_model_fn, config=config, params=hparams) if args.valid: train_spec = tf.estimator.TrainSpec( input_fn=lambda: input_fn( args.train, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=args.num_epochs)) eval_spec = tf.estimator.EvalSpec( input_fn=lambda: input_fn( args.valid or args.train, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size), start_delay_secs=60, throttle_secs=args.eval_secs) tf.estimator.train_and_evaluate(model, train_spec, eval_spec) else: model.train( input_fn=lambda: input_fn( args.train, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=args.num_epochs))
Example #3
Source File: lucene_search.py From dl4ir-query-reformulator with BSD 3-Clause "New" or "Revised" License | 5 votes |
def create_index(self, index_folder, docs_path, add_terms=False): print 'Loading Vocab...' if not self.vocab: self.vocab = utils.load_vocab(prm.vocab_path, prm.n_words) os.mkdir(index_folder) self.t1 = FieldType() self.t1.setStored(True) self.t1.setIndexOptions(IndexOptions.DOCS) self.t2 = FieldType() self.t2.setStored(False) self.t2.setIndexOptions(IndexOptions.DOCS_AND_FREQS) self.t3 = FieldType() self.t3.setStored(True) self.t3.setIndexOptions(IndexOptions.NONE) fsDir = MMapDirectory(Paths.get(index_folder)) writerConfig = IndexWriterConfig(StandardAnalyzer()) self.writer = IndexWriter(fsDir, writerConfig) print "%d docs in index" % self.writer.numDocs() print "Indexing documents..." doc_id = 0 import corpus_hdf5 corpus = corpus_hdf5.CorpusHDF5(docs_path) for txt in corpus.get_text_iter(): title = corpus.get_article_title(doc_id) self.add_doc(doc_id, title, txt, add_terms) if doc_id % 1000 == 0: print 'indexing doc', doc_id doc_id += 1 print "Index of %d docs..." % self.writer.numDocs() self.writer.close()
Example #4
Source File: main.py From Bert-BiLSTM-CRF-pytorch with MIT License | 4 votes |
def train(**kwargs): config = Config() config.update(**kwargs) print('当前设置为:\n', config) if config.use_cuda: torch.cuda.set_device(config.gpu) print('loading corpus') vocab = load_vocab(config.vocab) label_dic = load_vocab(config.label_file) tagset_size = len(label_dic) train_data = read_corpus(config.train_file, max_length=config.max_length, label_dic=label_dic, vocab=vocab) dev_data = read_corpus(config.dev_file, max_length=config.max_length, label_dic=label_dic, vocab=vocab) train_ids = torch.LongTensor([temp.input_id for temp in train_data]) train_masks = torch.LongTensor([temp.input_mask for temp in train_data]) train_tags = torch.LongTensor([temp.label_id for temp in train_data]) train_dataset = TensorDataset(train_ids, train_masks, train_tags) train_loader = DataLoader(train_dataset, shuffle=True, batch_size=config.batch_size) dev_ids = torch.LongTensor([temp.input_id for temp in dev_data]) dev_masks = torch.LongTensor([temp.input_mask for temp in dev_data]) dev_tags = torch.LongTensor([temp.label_id for temp in dev_data]) dev_dataset = TensorDataset(dev_ids, dev_masks, dev_tags) dev_loader = DataLoader(dev_dataset, shuffle=True, batch_size=config.batch_size) model = BERT_LSTM_CRF(config.bert_path, tagset_size, config.bert_embedding, config.rnn_hidden, config.rnn_layer, dropout_ratio=config.dropout_ratio, dropout1=config.dropout1, use_cuda=config.use_cuda) if config.load_model: assert config.load_path is not None model = load_model(model, name=config.load_path) if config.use_cuda: model.cuda() model.train() optimizer = getattr(optim, config.optim) optimizer = optimizer(model.parameters(), lr=config.lr, weight_decay=config.weight_decay) eval_loss = 10000 for epoch in range(config.base_epoch): step = 0 for i, batch in enumerate(train_loader): step += 1 model.zero_grad() inputs, masks, tags = batch inputs, masks, tags = Variable(inputs), Variable(masks), Variable(tags) if config.use_cuda: inputs, masks, tags = inputs.cuda(), masks.cuda(), tags.cuda() feats = model(inputs, masks) loss = model.loss(feats, masks,tags) loss.backward() optimizer.step() if step % 50 == 0: print('step: {} | epoch: {}| loss: {}'.format(step, epoch, loss.item())) loss_temp = dev(model, dev_loader, epoch, config) if loss_temp < eval_loss: save_model(model,epoch)