Python data.PAD_TOKEN Examples
The following are 26
code examples of data.PAD_TOKEN().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data
, or try the search function
.
Example #1
Source File: batcher.py From unified-summarization with MIT License | 6 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences if hps.model in ['rewriter', 'end2end']: self.init_rewriter_encoder_seq(example_list, hps) # initialize the input to the rewriter encoder self.init_rewriter_decoder_seq(example_list, hps) # initialize the input and targets for the rewriter decoder if hps.model in ['selector', 'end2end']: self.init_selector_encoder_seq(example_list, hps) # initialize the input to the selector encoder self.init_selector_target(example_list, hps) # initialize the target to selector self.store_orig_strings(example_list) # store the original strings
Example #2
Source File: batch_reader.py From long-summarization with Apache License 2.0 | 6 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self._hps = hps self.pad_id = vocab.word2id( data.PAD_TOKEN) # id of the PAD token used to pad sequences self.sec_pad_id = vocab.word2id(data.SEC_PAD_TOKEN) # initialize the input to the encoder self.init_encoder_seq(example_list, hps) # initialize the input and targets for the decoder self.init_decoder_seq(example_list, hps) self.store_orig_strings(example_list) # store the original strings
Example #3
Source File: batch_reader.py From long-summarization with Apache License 2.0 | 5 votes |
def _pad_words(words, max_len=None, pad_id=data.PAD_TOKEN): """ given a section (list of sentences), returns a single list of words in that section """ if max_len is None: max_len = len(words) while len(words) < max_len: words += [pad_id] return words[:max_len]
Example #4
Source File: batcher.py From docker with MIT License | 5 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences if FLAGS.run_method == 'auto-encoder': self.init_encoder_seq(example_list, hps) # initialize the input to the encoder self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder self.store_orig_strings(example_list) # store the original strings
Example #5
Source File: batcher_discriminator.py From docker with MIT License | 5 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences self.init_decoder_seq(example_list, hps) # initialize the input to the encoder
Example #6
Source File: batcher.py From pointer_summarizer with Apache License 2.0 | 5 votes |
def __init__(self, example_list, vocab, batch_size): self.batch_size = batch_size self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences self.init_encoder_seq(example_list) # initialize the input to the encoder self.init_decoder_seq(example_list) # initialize the input and targets for the decoder self.store_orig_strings(example_list) # store the original strings
Example #7
Source File: batch_reader.py From long-summarization with Apache License 2.0 | 5 votes |
def _get_section_words(sec, max_len=None, pad_id=data.PAD_TOKEN, pad=True): """ given a section (list of sentences), returns a single list of words in that section """ words = ' '.join(sec).split() if max_len is None: max_len = len(words) if pad: while len(words) < max_len: words += [pad_id] return words[:max_len]
Example #8
Source File: batcher.py From rotational-unit-of-memory with MIT License | 5 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id( data.PAD_TOKEN) # id of the PAD token used to pad sequences # initialize the input to the encoder self.init_encoder_seq(example_list, hps) # initialize the input and targets for the decoder self.init_decoder_seq(example_list, hps) self.store_orig_strings(example_list) # store the original strings
Example #9
Source File: batcher.py From Reinforce-Paraphrase-Generation with MIT License | 5 votes |
def __init__(self, example_list, vocab, batch_size): self.batch_size = batch_size self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences self.init_encoder_seq(example_list) # initialize the input to the encoder self.init_decoder_seq(example_list) # initialize the input and targets for the decoder self.store_orig_strings(example_list) # store the original strings
Example #10
Source File: batcher.py From MAX-Text-Summarizer with Apache License 2.0 | 5 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences self.init_encoder_seq(example_list, hps) # initialize the input to the encoder self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder self.store_orig_strings(example_list) # store the original strings
Example #11
Source File: batcher.py From pointer-generator with Apache License 2.0 | 5 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences self.init_encoder_seq(example_list, hps) # initialize the input to the encoder self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder self.store_orig_strings(example_list) # store the original strings
Example #12
Source File: batcher.py From RLSeq2Seq with MIT License | 5 votes |
def __init__(self, example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ self.pad_id = vocab.word2id(data.PAD_TOKEN) # id of the PAD token used to pad sequences self.init_encoder_seq(example_list, hps) # initialize the input to the encoder self.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder self.store_orig_strings(example_list) # store the original strings
Example #13
Source File: batcher.py From TransferRL with MIT License | 5 votes |
def create_batch(example_list, hps, vocab): """Turns the example_list into a Batch object. Args: example_list: List of Example objects hps: hyperparameters vocab: Vocabulary object """ batch = Batch() batch.pad_id = vocab.word2id(data.PAD_TOKEN) batch.init_encoder_seq(example_list, hps) # initialize the input to the encoder batch.init_decoder_seq(example_list, hps) # initialize the input and targets for the decoder batch.store_orig_strings(example_list) # store the original strings return batch
Example #14
Source File: seq2seq_attention.py From multilabel-image-classification-tensorflow with MIT License | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #15
Source File: seq2seq_attention.py From models with Apache License 2.0 | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #16
Source File: seq2seq_attention.py From g-tensorflow-models with Apache License 2.0 | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #17
Source File: seq2seq_attention.py From HumanRecognition with MIT License | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #18
Source File: seq2seq_attention.py From DOTA_models with Apache License 2.0 | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #19
Source File: seq2seq_attention.py From object_detection_with_tensorflow with MIT License | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #20
Source File: seq2seq_attention.py From object_detection_kitti with Apache License 2.0 | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #21
Source File: seq2seq_attention.py From hands-detection with MIT License | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #22
Source File: seq2seq_attention.py From ECO-pytorch with BSD 2-Clause "Simplified" License | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.WordToId(data.PAD_TOKEN) > 0 assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0 assert vocab.WordToId(data.SENTENCE_START) > 0 assert vocab.WordToId(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #23
Source File: seq2seq_attention.py From Action_Recognition_Zoo with MIT License | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.WordToId(data.PAD_TOKEN) > 0 assert vocab.WordToId(data.UNKNOWN_TOKEN) >= 0 assert vocab.WordToId(data.SENTENCE_START) > 0 assert vocab.WordToId(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #24
Source File: main.py From text2text with Apache License 2.0 | 4 votes |
def main(unused_argv): config = importlib.import_module('config.%s' % FLAGS.config) for argument in FLAGS.override.split(','): if '=' in argument: name = argument.split('=')[0] value = type(getattr(config, name))(argument.split('=')[1]) setattr(config, name, value) config.input_vocab = data.Vocab(config.input_vocab_file, config.max_vocab_size) # Max IDs if config.input_vocab.WordToId(data.PAD_TOKEN) <= 0: raise ValueError('Invalid PAD_TOKEN id.') # id of the UNKNOWN_TOKEN should be "0" for copynet model if config.input_vocab.WordToId(data.UNKNOWN_TOKEN) != 0: raise ValueError('Invalid UNKOWN_TOKEN id.') if config.input_vocab.WordToId(data.SENTENCE_START) <= 0: raise ValueError('Invalid SENTENCE_START id.') if config.input_vocab.WordToId(data.SENTENCE_END) <= 0: raise ValueError('Invalid SENTENCE_END id.') if config.output_vocab_file: config.output_vocab = data.Vocab(config.output_vocab_file, config.max_vocab_size) # Max IDs if config.output_vocab.WordToId(data.PAD_TOKEN) <= 0: raise ValueError('Invalid PAD_TOKEN id.') # id of the UNKNOWN_TOKEN should be "0" for copynet model if config.output_vocab.WordToId(data.UNKNOWN_TOKEN) != 0: raise ValueError('Invalid UNKOWN_TOKEN id.') if config.output_vocab.WordToId(data.SENTENCE_START) <= 0: raise ValueError('Invalid SENTENCE_START id.') if config.output_vocab.WordToId(data.SENTENCE_END) <= 0: raise ValueError('Invalid SENTENCE_END id.') else: config.output_vocab = config.input_vocab train_batcher = config.Batcher(config.train_set, config) valid_batcher = config.Batcher(config.valid_set, config) tf.set_random_seed(config.random_seed) if FLAGS.mode == 'train': model = config.Model(config, 'train', num_gpus=FLAGS.num_gpus) _Train(model, config, train_batcher) elif FLAGS.mode == 'eval': config.dropout_rnn = 1.0 config.dropout_emb = 1.0 model = config.Model(config, 'eval', num_gpus=FLAGS.num_gpus) _Eval(model, config, valid_batcher) elif FLAGS.mode == 'decode': config.dropout_rnn = 1.0 config.dropout_emb = 1.0 config.batch_size = config.beam_size model = config.Model(config, 'decode', num_gpus=FLAGS.num_gpus) decoder = decode.BeamSearch(model, valid_batcher, config) decoder.DecodeLoop()
Example #25
Source File: seq2seq_attention.py From Gun-Detector with Apache License 2.0 | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()
Example #26
Source File: seq2seq_attention.py From yolo_v2 with Apache License 2.0 | 4 votes |
def main(unused_argv): vocab = data.Vocab(FLAGS.vocab_path, 1000000) # Check for presence of required special tokens. assert vocab.CheckVocab(data.PAD_TOKEN) > 0 assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0 assert vocab.CheckVocab(data.SENTENCE_START) > 0 assert vocab.CheckVocab(data.SENTENCE_END) > 0 batch_size = 4 if FLAGS.mode == 'decode': batch_size = FLAGS.beam_size hps = seq2seq_attention_model.HParams( mode=FLAGS.mode, # train, eval, decode min_lr=0.01, # min learning rate. lr=0.15, # learning rate batch_size=batch_size, enc_layers=4, enc_timesteps=120, dec_timesteps=30, min_input_len=2, # discard articles/summaries < than this num_hidden=256, # for rnn cell emb_dim=128, # If 0, don't use embedding max_grad_norm=2, num_softmax_samples=4096) # If 0, no sampled softmax. batcher = batch_reader.Batcher( FLAGS.data_path, vocab, hps, FLAGS.article_key, FLAGS.abstract_key, FLAGS.max_article_sentences, FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing, truncate_input=FLAGS.truncate_input) tf.set_random_seed(FLAGS.random_seed) if hps.mode == 'train': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Train(model, batcher) elif hps.mode == 'eval': model = seq2seq_attention_model.Seq2SeqAttentionModel( hps, vocab, num_gpus=FLAGS.num_gpus) _Eval(model, batcher, vocab=vocab) elif hps.mode == 'decode': decode_mdl_hps = hps # Only need to restore the 1st step and reuse it since # we keep and feed in state for each step's output. decode_mdl_hps = hps._replace(dec_timesteps=1) model = seq2seq_attention_model.Seq2SeqAttentionModel( decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus) decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab) decoder.DecodeLoop()