Python model.Seq2Seq() Examples

The following are 12 code examples of model.Seq2Seq(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module model , or try the search function .
Example #1
Source File: test.py    From seq2seq-summarizer with MIT License 6 votes vote down vote up
def decode_batch(batch: Batch, model: Seq2Seq, vocab: Vocab, criterion=None, *, pack_seq=True,
                 show_cover_loss=False) -> Tuple[List[List[str]], Seq2SeqOutput]:
  """Test the `model` on the `batch`, return the decoded textual tokens and the Seq2SeqOutput."""
  if not pack_seq:
    input_lengths = None
  else:
    input_lengths = batch.input_lengths
  with torch.no_grad():
    input_tensor = batch.input_tensor.to(DEVICE)
    if batch.target_tensor is None or criterion is None:
      target_tensor = None
    else:
      target_tensor = batch.target_tensor.to(DEVICE)
    out = model(input_tensor, target_tensor, input_lengths, criterion,
                ext_vocab_size=batch.ext_vocab_size, include_cover_loss=show_cover_loss)
    decoded_batch = decode_batch_output(out.decoded_tokens, vocab, batch.oov_dict)
  target_length = batch.target_tensor.size(0)
  out.loss_value /= target_length
  return decoded_batch, out 
Example #2
Source File: test.py    From seq2seq-summarizer with MIT License 5 votes vote down vote up
def eval_batch(batch: Batch, model: Seq2Seq, vocab: Vocab, criterion=None, *, pack_seq=True,
               show_cover_loss=False) -> Tuple[float, float]:
  """Test the `model` on the `batch`, return the ROUGE score and the loss."""
  decoded_batch, out = decode_batch(batch, model, vocab, criterion=criterion, pack_seq=pack_seq,
                                    show_cover_loss=show_cover_loss)
  examples = batch[0]
  gold_summaries = [ex.tgt for ex in examples]
  scores = rouge(gold_summaries, decoded_batch)
  return out.loss_value, scores[0]['l_f'] 
Example #3
Source File: test.py    From seq2seq-summarizer with MIT License 5 votes vote down vote up
def eval_bs(test_set: Dataset, vocab: Vocab, model: Seq2Seq, params: Params):
  test_gen = test_set.generator(1, vocab, None, True if params.pointer else False)
  n_samples = int(params.test_sample_ratio * len(test_set.pairs))

  if params.test_save_results and params.model_path_prefix:
    result_file = tarfile.open(params.model_path_prefix + ".results.tgz", 'w:gz')
  else:
    result_file = None

  model.eval()
  r1, r2, rl, rsu4 = 0, 0, 0, 0
  prog_bar = tqdm(range(1, n_samples + 1))
  for i in prog_bar:
    batch = next(test_gen)
    scores, file_content = eval_bs_batch(batch, model, vocab, pack_seq=params.pack_seq,
                                         beam_size=params.beam_size,
                                         min_out_len=params.min_out_len,
                                         max_out_len=params.max_out_len,
                                         len_in_words=params.out_len_in_words,
                                         details=result_file is not None)
    if file_content:
      file_content = file_content.encode('utf-8')
      file_info = tarfile.TarInfo(name='%06d.txt' % i)
      file_info.size = len(file_content)
      result_file.addfile(file_info, fileobj=BytesIO(file_content))
    if scores:
      r1 += scores[0]['1_f']
      r2 += scores[0]['2_f']
      rl += scores[0]['l_f']
      rsu4 += scores[0]['su4_f']
      prog_bar.set_postfix(R1='%.4g' % (r1 / i * 100), R2='%.4g' % (r2 / i * 100),
                           RL='%.4g' % (rl / i * 100), RSU4='%.4g' % (rsu4 / i * 100)) 
Example #4
Source File: chat.py    From ai-seminar with MIT License 5 votes vote down vote up
def __init__(self, voc_path, train_dir):
        self.dialog = Dialog()
        self.dialog.load_vocab(voc_path)

        self.model = Seq2Seq(self.dialog.vocab_size)

        self.sess = tf.Session()
        ckpt = tf.train.get_checkpoint_state(train_dir)
        self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) 
Example #5
Source File: train.py    From ai-seminar with MIT License 5 votes vote down vote up
def train(dialog, batch_size=100, epoch=100):
    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        # TODO: 세션을 로드하고 로그를 위한 summary 저장등의 로직을 Seq2Seq 모델로 넣을 필요가 있음
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("새로운 모델을 생성하는 중 입니다.")
            sess.run(tf.global_variables_initializer())

        writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        total_batch = int(math.ceil(len(dialog.examples)/float(batch_size)))

        for step in range(total_batch * epoch):
            enc_input, dec_input, targets = dialog.next_batch(batch_size)

            _, loss = model.train(sess, enc_input, dec_input, targets)

            if (step + 1) % 100 == 0:
                model.write_logs(sess, writer, enc_input, dec_input, targets)

                print('Step:', '%06d' % model.global_step.eval(),
                      'cost =', '{:.6f}'.format(loss))

        checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.ckpt_name)
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

    print('최적화 완료!') 
Example #6
Source File: train.py    From ai-seminar with MIT License 5 votes vote down vote up
def test(dialog, batch_size=100):
    print("\n=== 예측 테스트 ===")

    model = Seq2Seq(dialog.vocab_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
        print("다음 파일에서 모델을 읽는 중 입니다..", ckpt.model_checkpoint_path)
        model.saver.restore(sess, ckpt.model_checkpoint_path)

        enc_input, dec_input, targets = dialog.next_batch(batch_size)

        expect, outputs, accuracy = model.test(sess, enc_input, dec_input, targets)

        expect = dialog.decode(expect)
        outputs = dialog.decode(outputs)

        pick = random.randrange(0, len(expect) / 2)
        input = dialog.decode([dialog.examples[pick * 2]], True)
        expect = dialog.decode([dialog.examples[pick * 2 + 1]], True)
        outputs = dialog.cut_eos(outputs[pick])

        print("\n정확도:", accuracy)
        print("랜덤 결과\n")
        print("    입력값:", input)
        print("    실제값:", expect)
        print("    예측값:", ' '.join(outputs)) 
Example #7
Source File: train.py    From seq2seq with MIT License 5 votes vote down vote up
def main():
    args = parse_arguments()
    hidden_size = 512
    embed_size = 256
    assert torch.cuda.is_available()

    print("[!] preparing dataset...")
    train_iter, val_iter, test_iter, DE, EN = load_dataset(args.batch_size)
    de_size, en_size = len(DE.vocab), len(EN.vocab)
    print("[TRAIN]:%d (dataset:%d)\t[TEST]:%d (dataset:%d)"
          % (len(train_iter), len(train_iter.dataset),
             len(test_iter), len(test_iter.dataset)))
    print("[DE_vocab]:%d [en_vocab]:%d" % (de_size, en_size))

    print("[!] Instantiating models...")
    encoder = Encoder(de_size, embed_size, hidden_size,
                      n_layers=2, dropout=0.5)
    decoder = Decoder(embed_size, hidden_size, en_size,
                      n_layers=1, dropout=0.5)
    seq2seq = Seq2Seq(encoder, decoder).cuda()
    optimizer = optim.Adam(seq2seq.parameters(), lr=args.lr)
    print(seq2seq)

    best_val_loss = None
    for e in range(1, args.epochs+1):
        train(e, seq2seq, optimizer, train_iter,
              en_size, args.grad_clip, DE, EN)
        val_loss = evaluate(seq2seq, val_iter, en_size, DE, EN)
        print("[Epoch:%d] val_loss:%5.3f | val_pp:%5.2fS"
              % (e, val_loss, math.exp(val_loss)))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_val_loss or val_loss < best_val_loss:
            print("[!] saving model...")
            if not os.path.isdir(".save"):
                os.makedirs(".save")
            torch.save(seq2seq.state_dict(), './.save/seq2seq_%d.pt' % (e))
            best_val_loss = val_loss
    test_loss = evaluate(seq2seq, test_iter, en_size, DE, EN)
    print("[TEST] loss:%5.2f" % test_loss) 
Example #8
Source File: test.py    From seq2seq-summarizer with MIT License 4 votes vote down vote up
def eval_bs_batch(batch: Batch, model: Seq2Seq, vocab: Vocab, *, pack_seq=True, beam_size=4,
                  min_out_len=1, max_out_len=None, len_in_words=True, best_only=True,
                  details: bool=True) -> Tuple[Optional[List[Dict[str, float]]], Optional[str]]:
  """
  :param batch: a test batch of a single example
  :param model: a trained summarizer
  :param vocab: vocabulary of the trained summarizer
  :param pack_seq: currently has no effect as batch size is 1
  :param beam_size: the beam size
  :param min_out_len: required minimum output length
  :param max_out_len: required maximum output length (if None, use the model's own value)
  :param len_in_words: if True, count output length in words instead of tokens (i.e. do not count
                       punctuations)
  :param best_only: if True, run ROUGE only on the best hypothesis instead of all `beam size` many
  :param details: if True, also return a string containing the result of this document
  :return: two-level score lookup (hypothesis index => ROUGE metric => value)

  Test a trained summarizer on a document using the beam search decoder.
  """
  assert len(batch.examples) == 1
  with torch.no_grad():
    input_tensor = batch.input_tensor.to(DEVICE)
    hypotheses = model.beam_search(input_tensor, batch.input_lengths if pack_seq else None,
                                   batch.ext_vocab_size, beam_size, min_out_len=min_out_len,
                                   max_out_len=max_out_len, len_in_words=len_in_words)
  if best_only:
    to_decode = [hypotheses[0].tokens]
  else:
    to_decode = [h.tokens for h in hypotheses]
  decoded_batch = decode_batch_output(to_decode, vocab, batch.oov_dict)
  if details:
    file_content = "[System Summary]\n" + format_tokens(decoded_batch[0])
  else:
    file_content = None
  if batch.examples[0].tgt is not None:  # run ROUGE if gold standard summary exists
    gold_summaries = [batch.examples[0].tgt for _ in range(len(decoded_batch))]
    scores = rouge(gold_summaries, decoded_batch)
    if details:
      file_content += "\n\n\n[Reference Summary]\n" + format_tokens(batch.examples[0].tgt)
      file_content += "\n\n\n[ROUGE Scores]\n" + format_rouge_scores(scores[0]) + "\n"
  else:
    scores = None
  if details:
    file_content += "\n\n\n[Source Text]\n" + format_tokens(batch.examples[0].src)
  return scores, file_content 
Example #9
Source File: train.py    From seq2seq-summarizer with MIT License 4 votes vote down vote up
def train_batch(batch: Batch, model: Seq2Seq, criterion, optimizer, *,
                pack_seq=True, forcing_ratio=0.5, partial_forcing=True, sample=False,
                rl_ratio: float=0, vocab=None, grad_norm: float=0, show_cover_loss=False):
  if not pack_seq:
    input_lengths = None
  else:
    input_lengths = batch.input_lengths

  optimizer.zero_grad()
  input_tensor = batch.input_tensor.to(DEVICE)
  target_tensor = batch.target_tensor.to(DEVICE)
  ext_vocab_size = batch.ext_vocab_size

  out = model(input_tensor, target_tensor, input_lengths, criterion,
              forcing_ratio=forcing_ratio, partial_forcing=partial_forcing, sample=sample,
              ext_vocab_size=ext_vocab_size, include_cover_loss=show_cover_loss)

  if rl_ratio > 0:
    assert vocab is not None
    sample_out = model(input_tensor, saved_out=out, criterion=criterion, sample=True,
                       ext_vocab_size=ext_vocab_size)
    baseline_out = model(input_tensor, saved_out=out, visualize=False,
                         ext_vocab_size=ext_vocab_size)
    scores = eval_batch_output([ex.tgt for ex in batch.examples], vocab, batch.oov_dict,
                               sample_out.decoded_tokens, baseline_out.decoded_tokens)
    greedy_rouge = scores[1]['l_f']
    neg_reward = greedy_rouge - scores[0]['l_f']
    # if sample > baseline, the reward is positive (i.e. good exploration), rl_loss is negative
    rl_loss = neg_reward * sample_out.loss
    rl_loss_value = neg_reward * sample_out.loss_value
    loss = (1 - rl_ratio) * out.loss + rl_ratio * rl_loss
    loss_value = (1 - rl_ratio) * out.loss_value + rl_ratio * rl_loss_value
  else:
    loss = out.loss
    loss_value = out.loss_value
    greedy_rouge = None

  loss.backward()
  if grad_norm > 0:
    clip_grad_norm_(model.parameters(), grad_norm)
  optimizer.step()

  target_length = target_tensor.size(0)
  return loss_value / target_length, greedy_rouge 
Example #10
Source File: model_utils.py    From dumb-chatbot with MIT License 4 votes vote down vote up
def build_model(vocab_size, load_checkpoint=False, checkpoint_epoch=-1, print_module=True):
    hidden_size = config['MODEL']['HIDDEN_SIZE']
    attn_method = config['MODEL']['ATTN_METHOD']
    num_encoder_layers = config['MODEL']['N_ENCODER_LAYERS']
    dropout = config['MODEL']['DROPOUT']
    encoder = Encoder(vocab_size, hidden_size, num_encoder_layers, dropout=dropout)
    decoder = Decoder(hidden_size, vocab_size, attn_method, num_encoder_layers, dropout=dropout)
    model = Seq2Seq(
        encoder=encoder,
        decoder=decoder,
        max_length=config['LOADER']['MAX_LENGTH'],
        tie_weights=config['MODEL']['TIE_WEIGHTS']
    )
    if print_module:
        print(model)
    if load_checkpoint is True and os.path.exists(CHECKPOINT_PATH) is True:
        # load checkpoint
        prefix = config['TRAIN']['PREFIX']
        model_path = None
        if checkpoint_epoch >= 0:
            model_path = '%s%s_%d' % (CHECKPOINT_PATH, prefix, checkpoint_epoch)
        else:
            # use last checkpoint
            checkpoints = []
            for root, dirs, files in os.walk(CHECKPOINT_PATH):
                for f_name in files:
                    f_name_sp = f_name.split('_')
                    if len(f_name_sp) == 2:
                        checkpoints.append(int(f_name_sp[1]))
            if len(checkpoints) > 0:
                model_path = '%s%s_%d' % (CHECKPOINT_PATH, prefix, max(checkpoints))

        if model_path is not None and os.path.exists(model_path):
            if IMPORT_FROM_CUDA:
                loaded = torch.load(model_path, map_location=lambda storage, loc: storage)
            else:
                loaded = torch.load(model_path)

            model.load_state_dict(loaded)
            print('Load %s' % model_path)

    # print('Seq2Seq parameters:')
    # for name, param in model.state_dict().items():
    #     print(name, param.size())
    if USE_CUDA:
        model = model.cuda()
    return model 
Example #11
Source File: decode.py    From Seq2Seq-PyTorch with Do What The F*ck You Want To Public License 4 votes vote down vote up
def _load_model(self):
        print 'Loading pretrained model'
        if self.config['model']['seq2seq'] == 'vanilla':
            print 'Loading Seq2Seq Vanilla model'

            self.model = Seq2Seq(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            ).cuda()

        elif self.config['model']['seq2seq'] == 'attention':
            print 'Loading Seq2Seq Attention model'

            self.model = Seq2SeqAttention(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                ctx_hidden_dim=self.config['model']['dim'],
                attention_mode='dot',
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            ).cuda()

        self.model.load_state_dict(torch.load(
            open(self.model_weights)
        )) 
Example #12
Source File: decode.py    From Seq2Seq-PyTorch with Do What The F*ck You Want To Public License 4 votes vote down vote up
def _load_model(self):
        print 'Loading pretrained model'
        if self.config['model']['seq2seq'] == 'vanilla':
            print 'Loading Seq2Seq Vanilla model'

            self.model = Seq2Seq(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            ).cuda()

        elif self.config['model']['seq2seq'] == 'attention':
            print 'Loading Seq2Seq Attention model'

            self.model = Seq2SeqAttention(
                src_emb_dim=self.config['model']['dim_word_src'],
                trg_emb_dim=self.config['model']['dim_word_trg'],
                src_vocab_size=len(self.src_dict),
                trg_vocab_size=len(self.tgt_dict),
                src_hidden_dim=self.config['model']['dim'],
                trg_hidden_dim=self.config['model']['dim'],
                ctx_hidden_dim=self.config['model']['dim'],
                attention_mode='dot',
                batch_size=self.config['data']['batch_size'],
                bidirectional=self.config['model']['bidirectional'],
                pad_token_src=self.src_dict['<pad>'],
                pad_token_trg=self.tgt_dict['<pad>'],
                nlayers=self.config['model']['n_layers_src'],
                nlayers_trg=self.config['model']['n_layers_trg'],
                dropout=0.,
            ).cuda()

        self.model.load_state_dict(torch.load(
            open(self.model_weights)
        ))