Python model.Seq2Seq() Examples
The following are 12
code examples of model.Seq2Seq().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
model
, or try the search function
.
Example #1
Source File: test.py From seq2seq-summarizer with MIT License | 6 votes |
def decode_batch(batch: Batch, model: Seq2Seq, vocab: Vocab, criterion=None, *, pack_seq=True, show_cover_loss=False) -> Tuple[List[List[str]], Seq2SeqOutput]: """Test the `model` on the `batch`, return the decoded textual tokens and the Seq2SeqOutput.""" if not pack_seq: input_lengths = None else: input_lengths = batch.input_lengths with torch.no_grad(): input_tensor = batch.input_tensor.to(DEVICE) if batch.target_tensor is None or criterion is None: target_tensor = None else: target_tensor = batch.target_tensor.to(DEVICE) out = model(input_tensor, target_tensor, input_lengths, criterion, ext_vocab_size=batch.ext_vocab_size, include_cover_loss=show_cover_loss) decoded_batch = decode_batch_output(out.decoded_tokens, vocab, batch.oov_dict) target_length = batch.target_tensor.size(0) out.loss_value /= target_length return decoded_batch, out
Example #2
Source File: test.py From seq2seq-summarizer with MIT License | 5 votes |
def eval_batch(batch: Batch, model: Seq2Seq, vocab: Vocab, criterion=None, *, pack_seq=True, show_cover_loss=False) -> Tuple[float, float]: """Test the `model` on the `batch`, return the ROUGE score and the loss.""" decoded_batch, out = decode_batch(batch, model, vocab, criterion=criterion, pack_seq=pack_seq, show_cover_loss=show_cover_loss) examples = batch[0] gold_summaries = [ex.tgt for ex in examples] scores = rouge(gold_summaries, decoded_batch) return out.loss_value, scores[0]['l_f']
Example #3
Source File: test.py From seq2seq-summarizer with MIT License | 5 votes |
def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params): test_gen = test_set.generator(1, vocab, None, True if params.pointer else False) n_samples = int(params.test_sample_ratio * len(test_set.pairs)) if params.test_save_results and params.model_path_prefix: result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz') else: result_file = None model.eval() r1, r2, rl, rsu4 = 0, 0, 0, 0 prog_bar = tqdm(range(1, n_samples + 1)) for i in prog_bar: batch = next(test_gen) scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq, beam_size=params.beam_size, min_out_len=params.min_out_len, max_out_len=params.max_out_len, len_in_words=params.out_len_in_words, details=result_file is not None) if file_content: file_content = file_content.encode('utf-8') file_info = tarfile.TarInfo(name='%06d.txt' % i) file_info.size = len(file_content) result_file.addfile(file_info, fileobj=BytesIO(file_content)) if scores: r1 += scores[0]['1_f'] r2 += scores[0]['2_f'] rl += scores[0]['l_f'] rsu4 += scores[0]['su4_f'] prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100), RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100))
Example #4
Source File: chat.py From ai-seminar with MIT License | 5 votes |
def __init__(self, voc_path, train_dir): self.dialog = Dialog() self.dialog.load_vocab(voc_path) self.model = Seq2Seq(self.dialog.vocab_size) self.sess = tf.Session() ckpt = tf.train.get_checkpoint_state(train_dir) self.model.saver.restore(self.sess, ckpt.model_checkpoint_path)
Example #5
Source File: train.py From ai-seminar with MIT License | 5 votes |
def train(dialog, batch_size=100, epoch=100): model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: # TODO: 세션을 로드하고 로그를 위한 summary 저장등의 로직을 Seq2Seq 모델로 넣을 필요가 있음 ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print("새로운 모델을 생성하는 중 입니다.") sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) total_batch = int(math.ceil(len(dialog.examples)/float(batch_size))) for step in range(total_batch * epoch): enc_input, dec_input, targets = dialog.next_batch(batch_size) _, loss = model.train(sess, enc_input, dec_input, targets) if (step + 1) % 100 == 0: model.write_logs(sess, writer, enc_input, dec_input, targets) print('Step:', '%06d' % model.global_step.eval(), 'cost =', '{:.6f}'.format(loss)) checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name) model.saver.save(sess, checkpoint_path, global_step=model.global_step) print('최적화 완료!')
Example #6
Source File: train.py From ai-seminar with MIT License | 5 votes |
def test(dialog, batch_size=100): print("\n=== 예측 테스트 ===") model = Seq2Seq(dialog.vocab_size) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) enc_input, dec_input, targets = dialog.next_batch(batch_size) expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets) expect = dialog.decode(expect) outputs = dialog.decode(outputs) pick = random.randrange(0, len(expect) / 2) input = dialog.decode([dialog.examples[pick * 2]], True) expect = dialog.decode([dialog.examples[pick * 2 + 1]], True) outputs = dialog.cut_eos(outputs[pick]) print("\n정확도:", accuracy) print("랜덤 결과\n") print(" 입력값:", input) print(" 실제값:", expect) print(" 예측값:", ' '.join(outputs))
Example #7
Source File: train.py From seq2seq with MIT License | 5 votes |
def main(): args = parse_arguments() hidden_size = 512 embed_size = 256 assert torch.cuda.is_available() print("[!] preparing dataset...") train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)" % (len(train_iter), len(train_iter.dataset), len(test_iter), len(test_iter.dataset))) print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size)) print("[!] Instantiating models...") encoder = Encoder(de_size, embed_size, hidden_size, n_layers=2, dropout=0.5) decoder = Decoder(embed_size, hidden_size, en_size, n_layers=1, dropout=0.5) seq2seq = Seq2Seq(encoder, decoder).cuda() optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr) print(seq2seq) best_val_loss = None for e in range(1, args.epochs+1): train(e, seq2seq, optimizer, train_iter, en_size, args.grad_clip, DE, EN) val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN) print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS" % (e, val_loss, math.exp(val_loss))) # Save the model if the validation loss is the best we've seen so far. if not best_val_loss or val_loss < best_val_loss: print("[!] saving model...") if not os.path.isdir(".save"): os.makedirs(".save") torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e)) best_val_loss = val_loss test_loss = evaluate(seq2seq, test_iter, en_size, DE, EN) print("[TEST] loss:%5.2f" % test_loss)
Example #8
Source File: test.py From seq2seq-summarizer with MIT License | 4 votes |
def eval_bs_batch(batch: Batch, model: Seq2Seq, vocab: Vocab, *, pack_seq=True, beam_size=4, min_out_len=1, max_out_len=None, len_in_words=True, best_only=True, details: bool=True) -> Tuple[Optional[List[Dict[str, float]]], Optional[str]]: """ :param batch: a test batch of a single example :param model: a trained summarizer :param vocab: vocabulary of the trained summarizer :param pack_seq: currently has no effect as batch size is 1 :param beam_size: the beam size :param min_out_len: required minimum output length :param max_out_len: required maximum output length (if None, use the model's own value) :param len_in_words: if True, count output length in words instead of tokens (i.e. do not count punctuations) :param best_only: if True, run ROUGE only on the best hypothesis instead of all `beam size` many :param details: if True, also return a string containing the result of this document :return: two-level score lookup (hypothesis index => ROUGE metric => value) Test a trained summarizer on a document using the beam search decoder. """ assert len(batch.examples) == 1 with torch.no_grad(): input_tensor = batch.input_tensor.to(DEVICE) hypotheses = model.beam_search(input_tensor, batch.input_lengths if pack_seq else None, batch.ext_vocab_size, beam_size, min_out_len=min_out_len, max_out_len=max_out_len, len_in_words=len_in_words) if best_only: to_decode = [hypotheses[0].tokens] else: to_decode = [h.tokens for h in hypotheses] decoded_batch = decode_batch_output(to_decode, vocab, batch.oov_dict) if details: file_content = "[System Summary]\n" + format_tokens(decoded_batch[0]) else: file_content = None if batch.examples[0].tgt is not None: # run ROUGE if gold standard summary exists gold_summaries = [batch.examples[0].tgt for _ in range(len(decoded_batch))] scores = rouge(gold_summaries, decoded_batch) if details: file_content += "\n\n\n[Reference Summary]\n" + format_tokens(batch.examples[0].tgt) file_content += "\n\n\n[ROUGE Scores]\n" + format_rouge_scores(scores[0]) + "\n" else: scores = None if details: file_content += "\n\n\n[Source Text]\n" + format_tokens(batch.examples[0].src) return scores, file_content
Example #9
Source File: train.py From seq2seq-summarizer with MIT License | 4 votes |
def train_batch(batch: Batch, model: Seq2Seq, criterion, optimizer, *, pack_seq=True, forcing_ratio=0.5, partial_forcing=True, sample=False, rl_ratio: float=0, vocab=None, grad_norm: float=0, show_cover_loss=False): if not pack_seq: input_lengths = None else: input_lengths = batch.input_lengths optimizer.zero_grad() input_tensor = batch.input_tensor.to(DEVICE) target_tensor = batch.target_tensor.to(DEVICE) ext_vocab_size = batch.ext_vocab_size out = model(input_tensor, target_tensor, input_lengths, criterion, forcing_ratio=forcing_ratio, partial_forcing=partial_forcing, sample=sample, ext_vocab_size=ext_vocab_size, include_cover_loss=show_cover_loss) if rl_ratio > 0: assert vocab is not None sample_out = model(input_tensor, saved_out=out, criterion=criterion, sample=True, ext_vocab_size=ext_vocab_size) baseline_out = model(input_tensor, saved_out=out, visualize=False, ext_vocab_size=ext_vocab_size) scores = eval_batch_output([ex.tgt for ex in batch.examples], vocab, batch.oov_dict, sample_out.decoded_tokens, baseline_out.decoded_tokens) greedy_rouge = scores[1]['l_f'] neg_reward = greedy_rouge - scores[0]['l_f'] # if sample > baseline, the reward is positive (i.e. good exploration), rl_loss is negative rl_loss = neg_reward * sample_out.loss rl_loss_value = neg_reward * sample_out.loss_value loss = (1 - rl_ratio) * out.loss + rl_ratio * rl_loss loss_value = (1 - rl_ratio) * out.loss_value + rl_ratio * rl_loss_value else: loss = out.loss loss_value = out.loss_value greedy_rouge = None loss.backward() if grad_norm > 0: clip_grad_norm_(model.parameters(), grad_norm) optimizer.step() target_length = target_tensor.size(0) return loss_value / target_length, greedy_rouge
Example #10
Source File: model_utils.py From dumb-chatbot with MIT License | 4 votes |
def build_model(vocab_size, load_checkpoint=False, checkpoint_epoch=-1, print_module=True): hidden_size = config['MODEL']['HIDDEN_SIZE'] attn_method = config['MODEL']['ATTN_METHOD'] num_encoder_layers = config['MODEL']['N_ENCODER_LAYERS'] dropout = config['MODEL']['DROPOUT'] encoder = Encoder(vocab_size, hidden_size, num_encoder_layers, dropout=dropout) decoder = Decoder(hidden_size, vocab_size, attn_method, num_encoder_layers, dropout=dropout) model = Seq2Seq( encoder=encoder, decoder=decoder, max_length=config['LOADER']['MAX_LENGTH'], tie_weights=config['MODEL']['TIE_WEIGHTS'] ) if print_module: print(model) if load_checkpoint is True and os.path.exists(CHECKPOINT_PATH) is True: # load checkpoint prefix = config['TRAIN']['PREFIX'] model_path = None if checkpoint_epoch >= 0: model_path = '%s%s_%d' % (CHECKPOINT_PATH, prefix, checkpoint_epoch) else: # use last checkpoint checkpoints = [] for root, dirs, files in os.walk(CHECKPOINT_PATH): for f_name in files: f_name_sp = f_name.split('_') if len(f_name_sp) == 2: checkpoints.append(int(f_name_sp[1])) if len(checkpoints) > 0: model_path = '%s%s_%d' % (CHECKPOINT_PATH, prefix, max(checkpoints)) if model_path is not None and os.path.exists(model_path): if IMPORT_FROM_CUDA: loaded = torch.load(model_path, map_location=lambda storage, loc: storage) else: loaded = torch.load(model_path) model.load_state_dict(loaded) print('Load %s' % model_path) # print('Seq2Seq parameters:') # for name, param in model.state_dict().items(): # print(name, param.size()) if USE_CUDA: model = model.cuda() return model
Example #11
Source File: decode.py From Seq2Seq-PyTorch with Do What The F*ck You Want To Public License | 4 votes |
def _load_model(self): print 'Loading pretrained model' if self.config['model']['seq2seq'] == 'vanilla': print 'Loading Seq2Seq Vanilla model' self.model = Seq2Seq( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim'], trg_hidden_dim=self.config['model']['dim'], batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ).cuda() elif self.config['model']['seq2seq'] == 'attention': print 'Loading Seq2Seq Attention model' self.model = Seq2SeqAttention( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim'], trg_hidden_dim=self.config['model']['dim'], ctx_hidden_dim=self.config['model']['dim'], attention_mode='dot', batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ).cuda() self.model.load_state_dict(torch.load( open(self.model_weights) ))
Example #12
Source File: decode.py From Seq2Seq-PyTorch with Do What The F*ck You Want To Public License | 4 votes |
def _load_model(self): print 'Loading pretrained model' if self.config['model']['seq2seq'] == 'vanilla': print 'Loading Seq2Seq Vanilla model' self.model = Seq2Seq( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim'], trg_hidden_dim=self.config['model']['dim'], batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ).cuda() elif self.config['model']['seq2seq'] == 'attention': print 'Loading Seq2Seq Attention model' self.model = Seq2SeqAttention( src_emb_dim=self.config['model']['dim_word_src'], trg_emb_dim=self.config['model']['dim_word_trg'], src_vocab_size=len(self.src_dict), trg_vocab_size=len(self.tgt_dict), src_hidden_dim=self.config['model']['dim'], trg_hidden_dim=self.config['model']['dim'], ctx_hidden_dim=self.config['model']['dim'], attention_mode='dot', batch_size=self.config['data']['batch_size'], bidirectional=self.config['model']['bidirectional'], pad_token_src=self.src_dict['<pad>'], pad_token_trg=self.tgt_dict['<pad>'], nlayers=self.config['model']['n_layers_src'], nlayers_trg=self.config['model']['n_layers_trg'], dropout=0., ).cuda() self.model.load_state_dict(torch.load( open(self.model_weights) ))