Python data.get_batch() Examples
The following are 10
code examples of data.get_batch().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data
, or try the search function
.
Example #1
Source File: chatbot.py From stanford-tensorflow-tutorials with MIT License | 5 votes |
def _eval_test_set(sess, model, test_buckets): """ Evaluate on the test set. """ for bucket_id in range(len(config.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print(" Test: empty bucket %d" % (bucket_id)) continue start = time.time() encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) print('Test bucket {}: loss {}, time {}'.format(bucket_id, step_loss, time.time() - start))
Example #2
Source File: chatbot.py From stanford-tensorflow-tutorials with MIT License | 5 votes |
def train(): """ Train the bot """ test_buckets, data_buckets, train_buckets_scale = _get_buckets() # in train mode, we need to create the backward path, so forwrad_only is False model = ChatBotModel(False, config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('Running session') sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) iteration = model.global_step.eval() total_loss = 0 while True: skip_step = _get_skip_step(iteration) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format(iteration, total_loss/skip_step, time.time() - start)) start = time.time() total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step) if iteration % (10 * skip_step) == 0: # Run evals on development set and print their loss _eval_test_set(sess, model, test_buckets) start = time.time() sys.stdout.flush()
Example #3
Source File: chatbot.py From stanford-tensorflow-tutorials with MIT License | 5 votes |
def _eval_test_set(sess, model, test_buckets): """ Evaluate on the test set. """ for bucket_id in range(len(config.BUCKETS)): if len(test_buckets[bucket_id]) == 0: print(" Test: empty bucket %d" % (bucket_id)) continue start = time.time() encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(test_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) print('Test bucket {}: loss {}, time {}'.format(bucket_id, step_loss, time.time() - start))
Example #4
Source File: chatbot.py From stanford-tensorflow-tutorials with MIT License | 5 votes |
def train(): """ Train the bot """ test_buckets, data_buckets, train_buckets_scale = _get_buckets() # in train mode, we need to create the backward path, so forwrad_only is False model = ChatBotModel(False, config.BATCH_SIZE) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: print('Running session') sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) iteration = model.global_step.eval() total_loss = 0 while True: skip_step = _get_skip_step(iteration) bucket_id = _get_random_bucket(train_buckets_scale) encoder_inputs, decoder_inputs, decoder_masks = data.get_batch(data_buckets[bucket_id], bucket_id, batch_size=config.BATCH_SIZE) start = time.time() _, step_loss, _ = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, False) total_loss += step_loss iteration += 1 if iteration % skip_step == 0: print('Iter {}: loss {}, time {}'.format(iteration, total_loss/skip_step, time.time() - start)) start = time.time() total_loss = 0 saver.save(sess, os.path.join(config.CPT_PATH, 'chatbot'), global_step=model.global_step) if iteration % (10 * skip_step) == 0: # Run evals on development set and print their loss _eval_test_set(sess, model, test_buckets) start = time.time() sys.stdout.flush()
Example #5
Source File: discoGAN.py From DiscoGAN-Tensorflow with MIT License | 4 votes |
def train(self,LR=2e-4,B1=0.5,B2=0.999,iterations=50000,sample_frequency=10, sample_overlap=500,save_frequency=1000,domain_a="a",domain_b="b"): self.trainer_D = tf.train.AdamOptimizer(LR,beta1=B1,beta2=B2).minimize(self.l_disc,var_list=self.disc_params) self.trainer_G = tf.train.AdamOptimizer(LR,beta1=B1,beta2=B2).minimize(self.l_g,var_list=self.gen_params) with self.sess as sess: sess.run(tf.global_variables_initializer()) if self.analytics: if not os.path.exists("logs"): os.makedirs("logs") self.summary_writer = tf.summary.FileWriter(os.getcwd()+'/logs',graph=sess.graph) for i in range(iterations): realA = data.get_batch(self.batch_size,domain_a) realB = data.get_batch(self.batch_size,domain_b) op_list = [self.trainer_D,self.l_disc,self.trainer_G,self.l_g,self.merged_summary_op] _,dLoss,_,gLoss,summary_str = sess.run(op_list,feed_dict={self.x_a:realA,self.x_b:realB}) realA = data.get_batch(self.batch_size,domain_a) realB = data.get_batch(self.batch_size,domain_b) _,gLoss = sess.run([self.trainer_G,self.l_g],feed_dict={self.x_a:realA,self.x_b:realB}) if i%10 == 0: self.summary_writer.add_summary(summary_str, i) print("Generator Loss: " + str(gLoss) + "\tDiscriminator Loss: " + str(dLoss)) if i % sample_frequency == 0: realA = data.get_batch(1,domain_a) realB = data.get_batch(1,domain_b) ops = [self.g_ba,self.g_ab,self.g_aba,self.g_bab] out_a,out_b,out_ab,out_ba = sess.run(ops,feed_dict={self.x_a:realA,self.x_b:realB}) data.save(self.gen_a_dir+"/img"+str(i%sample_overlap)+'.png',out_a[0]) data.save(self.gen_b_dir+"/img"+str(i%sample_overlap)+'.png',out_b[0]) data.save(self.rec_a_dir+"/img"+str(i%sample_overlap)+'.png',out_ba[0]) data.save(self.rec_b_dir+"/img"+str(i%sample_overlap)+'.png',out_ab[0]) if i % save_frequency == 0: if not os.path.exists(self.model_directory): os.makedirs(self.model_directory) self.saver.save(sess,self.model_directory+'/model-'+str(i)+'.ckpt') print("Saved Model") """ Restore previously saved weights from trained / in-progress model """ def restore(): try: self.saver.restore(self.sess, tf.train.latest_checkpoint(self.model_directory)) except: print("Previous weights not found")
Example #6
Source File: main.py From ETM with MIT License | 4 votes |
def train(epoch): model.train() acc_loss = 0 acc_kl_theta_loss = 0 cnt = 0 indices = torch.randperm(args.num_docs_train) indices = torch.split(indices, args.batch_size) for idx, ind in enumerate(indices): optimizer.zero_grad() model.zero_grad() data_batch = data.get_batch(train_tokens, train_counts, ind, args.vocab_size, device) sums = data_batch.sum(1).unsqueeze(1) if args.bow_norm: normalized_data_batch = data_batch / sums else: normalized_data_batch = data_batch recon_loss, kld_theta = model(data_batch, normalized_data_batch) total_loss = recon_loss + kld_theta total_loss.backward() if args.clip > 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() acc_loss += torch.sum(recon_loss).item() acc_kl_theta_loss += torch.sum(kld_theta).item() cnt += 1 if idx % args.log_interval == 0 and idx > 0: cur_loss = round(acc_loss / cnt, 2) cur_kl_theta = round(acc_kl_theta_loss / cnt, 2) cur_real_loss = round(cur_loss + cur_kl_theta, 2) print('Epoch: {} .. batch: {}/{} .. LR: {} .. KL_theta: {} .. Rec_loss: {} .. NELBO: {}'.format( epoch, idx, len(indices), optimizer.param_groups[0]['lr'], cur_kl_theta, cur_loss, cur_real_loss)) cur_loss = round(acc_loss / cnt, 2) cur_kl_theta = round(acc_kl_theta_loss / cnt, 2) cur_real_loss = round(cur_loss + cur_kl_theta, 2) print('*'*100) print('Epoch----->{} .. LR: {} .. KL_theta: {} .. Rec_loss: {} .. NELBO: {}'.format( epoch, optimizer.param_groups[0]['lr'], cur_kl_theta, cur_loss, cur_real_loss)) print('*'*100)
Example #7
Source File: main.py From ETM with MIT License | 4 votes |
def evaluate(m, source, tc=False, td=False): """Compute perplexity on document completion. """ m.eval() with torch.no_grad(): if source == 'val': indices = torch.split(torch.tensor(range(args.num_docs_valid)), args.eval_batch_size) tokens = valid_tokens counts = valid_counts else: indices = torch.split(torch.tensor(range(args.num_docs_test)), args.eval_batch_size) tokens = test_tokens counts = test_counts ## get \beta here beta = m.get_beta() ### do dc and tc here acc_loss = 0 cnt = 0 indices_1 = torch.split(torch.tensor(range(args.num_docs_test_1)), args.eval_batch_size) for idx, ind in enumerate(indices_1): ## get theta from first half of docs data_batch_1 = data.get_batch(test_1_tokens, test_1_counts, ind, args.vocab_size, device) sums_1 = data_batch_1.sum(1).unsqueeze(1) if args.bow_norm: normalized_data_batch_1 = data_batch_1 / sums_1 else: normalized_data_batch_1 = data_batch_1 theta, _ = m.get_theta(normalized_data_batch_1) ## get prediction loss using second half data_batch_2 = data.get_batch(test_2_tokens, test_2_counts, ind, args.vocab_size, device) sums_2 = data_batch_2.sum(1).unsqueeze(1) res = torch.mm(theta, beta) preds = torch.log(res) recon_loss = -(preds * data_batch_2).sum(1) loss = recon_loss / sums_2.squeeze() loss = loss.mean().item() acc_loss += loss cnt += 1 cur_loss = acc_loss / cnt ppl_dc = round(math.exp(cur_loss), 1) print('*'*100) print('{} Doc Completion PPL: {}'.format(source.upper(), ppl_dc)) print('*'*100) if tc or td: beta = beta.data.cpu().numpy() if tc: print('Computing topic coherence...') get_topic_coherence(beta, train_tokens, vocab) if td: print('Computing topic diversity...') get_topic_diversity(beta, 25) return ppl_dc
Example #8
Source File: train_nli.py From neural_chat with MIT License | 4 votes |
def evaluate(epoch, eval_type='valid', final_eval=False): nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] if eval_type == 'valid' else test['s1'] s2 = valid['s2'] if eval_type == 'valid' else test['s2'] target = valid['label'] if eval_type == 'valid' else test['label'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec, params.word_emb_dim) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec, params.word_emb_dim) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda() # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() # save model eval_acc = round(100 * correct / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc)) if eval_type == 'valid' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(nli_net.state_dict(), os.path.join(params.outputdir, params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}' .format(params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc
Example #9
Source File: train_nli.py From T2F with MIT License | 4 votes |
def evaluate(epoch, eval_type='valid', final_eval=False): nli_net.eval() correct = 0. global val_acc_best, lr, stop_training, adam_stop if eval_type == 'valid': print('\nVALIDATION : Epoch {0}'.format(epoch)) s1 = valid['s1'] if eval_type == 'valid' else test['s1'] s2 = valid['s2'] if eval_type == 'valid' else test['s2'] target = valid['label'] if eval_type == 'valid' else test['label'] for i in range(0, len(s1), params.batch_size): # prepare batch s1_batch, s1_len = get_batch(s1[i:i + params.batch_size], word_vec) s2_batch, s2_len = get_batch(s2[i:i + params.batch_size], word_vec) s1_batch, s2_batch = Variable(s1_batch.cuda()), Variable(s2_batch.cuda()) tgt_batch = Variable(torch.LongTensor(target[i:i + params.batch_size])).cuda() # model forward output = nli_net((s1_batch, s1_len), (s2_batch, s2_len)) pred = output.data.max(1)[1] correct += pred.long().eq(tgt_batch.data.long()).cpu().sum() # save model eval_acc = round(100 * correct / len(s1), 2) if final_eval: print('finalgrep : accuracy {0} : {1}'.format(eval_type, eval_acc)) else: print('togrep : results : epoch {0} ; mean accuracy {1} :\ {2}'.format(epoch, eval_type, eval_acc)) if eval_type == 'valid' and epoch <= params.n_epochs: if eval_acc > val_acc_best: print('saving model at epoch {0}'.format(epoch)) if not os.path.exists(params.outputdir): os.makedirs(params.outputdir) torch.save(nli_net.state_dict(), os.path.join(params.outputdir, params.outputmodelname)) val_acc_best = eval_acc else: if 'sgd' in params.optimizer: optimizer.param_groups[0]['lr'] = optimizer.param_groups[0]['lr'] / params.lrshrink print('Shrinking lr by : {0}. New lr = {1}' .format(params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0]['lr'] < params.minlr: stop_training = True if 'adam' in params.optimizer: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc
Example #10
Source File: chatbot.py From stanford-tensorflow-tutorials with MIT License | 4 votes |
def chat(): """ in test mode, we don't to create the backward path """ _, enc_vocab = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.enc')) inv_dec_vocab, _ = data.load_vocab(os.path.join(config.PROCESSED_PATH, 'vocab.dec')) model = ChatBotModel(True, batch_size=1) model.build_graph() saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) _check_restore_parameters(sess, saver) output_file = open(os.path.join(config.PROCESSED_PATH, config.OUTPUT_FILE), 'a+') # Decode from standard input. max_length = config.BUCKETS[-1][0] print('Welcome to TensorBro. Say something. Enter to exit. Max length is', max_length) while True: line = _get_user_input() if len(line) > 0 and line[-1] == '\n': line = line[:-1] if line == '': break output_file.write('HUMAN ++++ ' + line + '\n') # Get token-ids for the input sentence. token_ids = data.sentence2id(enc_vocab, str(line)) if (len(token_ids) > max_length): print('Max length I can handle is:', max_length) line = _get_user_input() continue # Which bucket does it belong to? bucket_id = _find_right_bucket(len(token_ids)) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, decoder_masks = data.get_batch([(token_ids, [])], bucket_id, batch_size=1) # Get output logits for the sentence. _, _, output_logits = run_step(sess, model, encoder_inputs, decoder_inputs, decoder_masks, bucket_id, True) response = _construct_response(output_logits, inv_dec_vocab) print(response) output_file.write('BOT ++++ ' + response + '\n') output_file.write('=============================================\n') output_file.close()