Python utils.get_batch() Examples
The following are 30
code examples of utils.get_batch().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: main.py From mos with MIT License | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #2
Source File: train.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args.bptt, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #3
Source File: test.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): print(i, data_source.size(0)-1) data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden, args.arc) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) # Load the best saved model.
Example #4
Source File: model_search.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, model, parallel_model, params, batch_size=10): # Turn on evaluation mode which disables dropout. arch_pool = params['arch_pool'] logging.info('Evaluating on {} archs'.format(len(arch_pool))) start_time = time.time() valid_score_list = [] for arch in arch_pool: model.eval() hidden = model.init_hidden(batch_size) #whether use random batch ? # data_source is in the format of [length, bs, ...] #for i in range(0, data_source.size(0) - 1, params['bptt']): #for i in range(1): batch = np.random.randint(0, data_source.size(0)//params['bptt']) data, targets = get_batch(data_source, batch, params['bptt'], evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden, arch) loss = F.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data[0] valid_score_list.append(loss) eval_time = time.time() - start_time mean_valid_score = np.mean(valid_score_list) logging.info('Mean loss {:5.2f} | mean ppl {:8.2f} | time {:5.2f} secs'.format(mean_valid_score, np.exp(mean_valid_score), eval_time)) return valid_score_list
Example #5
Source File: train.py From darts with Apache License 2.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #6
Source File: test.py From darts with Apache License 2.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): print(i, data_source.size(0)-1) data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) # Load the best saved model.
Example #7
Source File: train_search.py From darts with Apache License 2.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #8
Source File: test.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): print(i, data_source.size(0)-1) data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) # Load the best saved model.
Example #9
Source File: train.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #10
Source File: finetune.py From mos with MIT License | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += len(data) * loss hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #11
Source File: main.py From bird_classification with MIT License | 6 votes |
def predict(self, sess, set_type): if set_type == 'val': num_images = len(self.obj.val_list) generator = self.obj.val_generator() elif set_type == 'test': num_images = len(self.obj.test_list) generator = self.obj.test_generator() else: num_images = len(self.obj.train_list) generator = self.obj.train_generator() true_positives = 0 num_batches = num_images//self.batch_size if num_images%self.batch_size == 0 else num_images//self.batch_size + 1 model_predictions = [] for i in range(num_batches): x_batch, _ = get_batch(generator, set_type , height=self.model.height, width=self.model.width) predicted = sess.run([ self.model.pred], feed_dict={self.model.x:x_batch}) model_predictions.extend(predicted[0]) return model_predictions
Example #12
Source File: main.py From bird_classification with MIT License | 6 votes |
def evaluate(self, sess, set_type): if set_type == 'val': num_images = len(self.obj.val_list) generator = self.obj.val_generator() else: num_images = len(self.obj.train_list) generator = self.obj.train_generator() true_positives = 0 val_loss = 0 num_batches = num_images//self.batch_size if num_images%self.batch_size == 0 else num_images//self.batch_size + 1 for i in range(num_batches): x_batch, y_batch = get_batch(generator, set_type, height=self.model.height, width=self.model.width) predicted = sess.run([self.model.pred], feed_dict={self.model.x:x_batch, self.model.y:y_batch}) true_positives = true_positives + np.sum(predicted[0] == np.argmax(y_batch,1)) print('set_type:',set_type, 'accuracy = ', true_positives*100.0/num_images) #predict the labels for test dataset
Example #13
Source File: main.py From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #14
Source File: finetune.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #15
Source File: main.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #16
Source File: cvae.py From CollaborativeVAE with MIT License | 5 votes |
def cdl_estimate(self, data_x, num_iter): for i in range(num_iter): b_x, ids = utils.get_batch(data_x, self.params.batch_size) _, l, gen_loss, v_loss = self.sess.run((self.optimizer, self.loss, self.gen_loss, self.v_loss), feed_dict={self.x: b_x, self.v: self.m_V[ids, :]}) # Display logs per epoch step if i % self.print_step == 0 and self.verbose: print "Iter:", '%04d' % (i+1), \ "loss=", "{:.5f}".format(l), \ "genloss=", "{:.5f}".format(gen_loss), \ "vloss=", "{:.5f}".format(v_loss) return gen_loss
Example #17
Source File: test_phrase_grammar.py From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=1): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output = model.decoder(output) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss / len(data_source)
Example #18
Source File: finetune.py From lm-context-analysis with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #19
Source File: eval.py From fraternal-dropout with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) # Load the best saved model.
Example #20
Source File: main.py From fraternal-dropout with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #21
Source File: eval.py From lm-context-analysis with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size, seq_len): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 tokens = 0 n = 0 save_all_losses = [] ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, seq_len): tokens += seq_len data, targets = get_batch(data_source, i, args, evaluation=True, seq_len=seq_len) output, hidden = model(data, hidden) output = nn.functional.log_softmax(output.permute(2,1,0)).permute(2,1,0) targets = targets.view(data.data.shape[0], batch_size, -1) CELoss = torch.gather(output.data, dim=2, index=targets.data).squeeze() CELoss = -1*CELoss if tokens < args.start_token: continue # We are not ready to accumulate error yet elif tokens >= args.start_token and tokens-seq_len < args.start_token: data.data = data.data[-(tokens-args.start_token+1):] CELoss = CELoss[-(tokens-args.start_token+1):] print('First word: %s' % (corpus.dictionary.idx2word[data.data[-(tokens-args.start_token+1),0]])) total_loss += torch.sum(CELoss) n += data.size(0) save_all_losses += CELoss.tolist() hidden = repackage_hidden(hidden) print('total: %d' % n) print('Last word: %s' % (corpus.dictionary.idx2word[data.data[-1,0]])) return total_loss / float(n), save_all_losses
Example #22
Source File: main.py From lm-context-analysis with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #23
Source File: main.py From bird_classification with MIT License | 4 votes |
def run_training_testing(self, model_weight_path, gpu_memory_fraction): # train the network config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction train_generator_obj = self.obj.train_generator() with tf.Session(config=config) as sess: summary_writer = tf.summary.FileWriter('./checkpoints/', sess.graph) saver = tf.train.Saver(max_to_keep=2) self.model.optimize() sess.run(tf.global_variables_initializer()) self.model.load_weight(sess, model_weight_path) loss = 0 true_positives = 0 for epochs in range(1, self.num_epochs+1): start_time = time.time() for step in range(len(self.obj.train_list)//self.batch_size + 1): x_batch, y_batch = get_batch(train_generator_obj, 'train', height=self.model.height, width=self.model.width) #temp1 = sess.run([self.pool] , feed_dict={self.model.x:x_batch, self.model.y:y_batch}) #print(temp1.shape) _, loss_curr, predicted = sess.run([self.model.optimizer, self.model.loss, self.model.pred] , feed_dict={self.model.x:x_batch, self.model.y:y_batch}) loss = 0.9*loss + 0.1*loss_curr true_positives = true_positives + np.sum(predicted == np.argmax(y_batch,1)) end_time = time.time() print('time_taken', end_time -start_time) print('epochs:',epochs, ' train-loss:', loss, 'train-acc:', true_positives*100.0/len(self.obj.train_list)) true_positives = 0 saver.save(sess, './checkpoints/', global_step=step) self.evaluate(sess, 'val') print('') # predict values for test dataset config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = gpu_memory_fraction with tf.Session(config=config) as sess: saver.restore(sess, tf.train.latest_checkpoint('./checkpoints/')) model_pred = self.predict(sess, 'test') #save the results in the required csv format save_csv(model_pred, self.obj)
Example #24
Source File: finetune.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Load the best saved model.
Example #25
Source File: main.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(model.decoder.weight, model.decoder.bias, output, targets) loss = raw_loss # Activiation Regularization if args.alpha: loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) if args.beta: loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. if args.clip: torch.nn.utils.clip_grad_norm_(params, args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:05.5f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f} | bpc {:8.3f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss), cur_loss / math.log(2))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Loop over epochs.
Example #26
Source File: pointer.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 4 votes |
def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source) # Load the best saved model.
Example #27
Source File: main.py From lm-context-analysis with Apache License 2.0 | 4 votes |
def train(): # Turn on training mode which enables dropout. if args.model == 'QRNN': model.reset() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = args.bptt if np.random.random() < 0.95 else args.bptt / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / args.bptt model.train() data, targets = get_batch(train_data, i, args, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, ntokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(args.alpha * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(args.beta * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, optimizer.param_groups[0]['lr'], elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len # Loop over epochs.
Example #28
Source File: vae.py From CollaborativeVAE with MIT License | 4 votes |
def run_latent(self, data_x, hidden_dim, batch_size, lr, epoch, print_step=100): tf.reset_default_graph() n = data_x.shape[0] input_dim = len(data_x[0]) num_iter = int(n / batch_size) sess = tf.Session() rec = { 'W_z_mean': tf.get_variable("W_z_mean", [self.dims[1], self.n_z], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32), 'b_z_mean': tf.get_variable("b_z_mean", [self.n_z], initializer=tf.constant_initializer(0.0), dtype=tf.float32), 'W_z_log_sigma': tf.get_variable("W_z_log_sigma", [self.dims[1], self.n_z], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32), 'b_z_log_sigma': tf.get_variable("b_z_log_sigma", [self.n_z], initializer=tf.constant_initializer(0.0), dtype=tf.float32)} gen = {'W2': tf.get_variable("W2", [self.n_z, self.dims[1]], initializer=tf.contrib.layers.xavier_initializer(), dtype=tf.float32), 'b2': tf.get_variable("b2", [self.dims[1]], initializer=tf.constant_initializer(0.0), dtype=tf.float32)} x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x') z_mean = tf.matmul(x, rec['W_z_mean']) + rec['b_z_mean'] z_log_sigma_sq = tf.matmul(x, rec['W_z_log_sigma']) + rec['b_z_log_sigma'] eps = tf.random_normal((batch_size, hidden_dim), 0, 1,dtype=tf.float32) z = z_mean + tf.sqrt(tf.maximum(tf.exp(z_log_sigma_sq), 1e-10)) * eps x_recon = tf.matmul(z, gen['W2']) + gen['b2'] x_recon = tf.nn.sigmoid(x_recon, name='x_recon') gen_loss = -tf.reduce_mean(tf.reduce_sum(x * tf.log(tf.maximum(x_recon, 1e-10)) + (1-x) * tf.log(tf.maximum(1 - x_recon, 1e-10)),1)) latent_loss = 0.5 * tf.reduce_mean(tf.reduce_sum(tf.square(z_mean) + tf.exp(z_log_sigma_sq) - z_log_sigma_sq - 1, 1)) loss = gen_loss + latent_loss train_op = tf.train.AdamOptimizer(lr).minimize(loss) sess.run(tf.global_variables_initializer()) for i in range(epoch): for it in range(num_iter): b_x, ids = utils.get_batch(data_x, batch_size) _, l, gl, ll = sess.run((train_op, loss, gen_loss, latent_loss), feed_dict={x: b_x}) if (i + 1) % print_step == 0: logging.info('epoch {0}: batch loss = {1}, gen_loss={2}, latent_loss={3}'.format(i, l, gl, ll)) self.weights.append(sess.run(rec['W_z_mean'])) self.weights.append(sess.run(rec['W_z_log_sigma'])) self.biases.append(sess.run(rec['b_z_mean'])) self.biases.append(sess.run(rec['b_z_log_sigma'])) self.de_weights.append(sess.run(gen['W2'])) self.de_biases.append(sess.run(gen['b2']))
Example #29
Source File: vae.py From CollaborativeVAE with MIT License | 4 votes |
def run(self, data_x, hidden_dim, activation, loss, lr, print_step, epoch, batch_size=100): tf.reset_default_graph() input_dim = len(data_x[0]) n = data_x.shape[0] num_iter = int(n / batch_size) sess = tf.Session() x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x') x_ = tf.placeholder(dtype=tf.float32, shape=[ None, input_dim], name='x_') encode = {'weights': tf.Variable(xavier_init(input_dim, hidden_dim, dtype=tf.float32)), 'biases': tf.Variable(tf.zeros([hidden_dim], dtype=tf.float32))} decode = {'biases': tf.Variable(tf.zeros([input_dim],dtype=tf.float32)), 'weights': tf.transpose(encode['weights'])} encoded = self.activate( tf.matmul(x, encode['weights']) + encode['biases'], activation) decoded = tf.matmul(encoded, decode['weights']) + decode['biases'] # reconstruction loss if loss == 'rmse': # loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x_, decoded)))) loss = tf.reduce_mean(tf.reduce_sum(tf.square(x_ - decoded), 1)) elif loss == 'cross-entropy': decoded = tf.nn.sigmoid(decoded, name='decoded') # loss = -tf.reduce_mean(x_ * tf.log(decoded)) loss = -tf.reduce_mean(tf.reduce_sum(x_ * tf.log(tf.maximum(decoded, 1e-16)) + (1-x_)*tf.log(tf.maximum(1-decoded, 1e-16)), 1)) train_op = tf.train.AdamOptimizer(lr).minimize(loss) sess.run(tf.global_variables_initializer()) for i in range(epoch): for it in range(num_iter): b_x_, ids = utils.get_batch(data_x, batch_size) b_x = self.add_noise(b_x_) _, l = sess.run((train_op, loss), feed_dict={x: b_x, x_: b_x_}) if (i + 1) % print_step == 0: l = sess.run(loss, feed_dict={x: b_x_, x_: b_x_}) logging.info('epoch {0}: batch loss = {1}'.format(i, l)) # debug # print('Decoded', sess.run(decoded, feed_dict={x: self.data_x_})[0]) self.weights.append(sess.run(encode['weights'])) self.biases.append(sess.run(encode['biases'])) self.de_weights.append(sess.run(decode['weights'])) self.de_biases.append(sess.run(decode['biases'])) return sess.run(encoded, feed_dict={x: data_x})
Example #30
Source File: pointer.py From outlier-exposure with Apache License 2.0 | 4 votes |
def evaluate(data_source, batch_size=10, window=args.window): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) next_word_history = None pointer_history = None for i in range(0, data_source.size(0) - 1, args.bptt): if i > 0: print(i, len(data_source), math.exp(total_loss / i)) data, targets = get_batch(data_source, i, evaluation=True, args=args) output, hidden, rnn_outs, _ = model(data, hidden, return_h=True) rnn_out = rnn_outs[-1].squeeze() output_flat = output.view(-1, ntokens) ### # Fill pointer history start_idx = len(next_word_history) if next_word_history is not None else 0 next_word_history = torch.cat([one_hot(t.data[0], ntokens) for t in targets]) if next_word_history is None else torch.cat([next_word_history, torch.cat([one_hot(t.data[0], ntokens) for t in targets])]) #print(next_word_history) pointer_history = Variable(rnn_out.data) if pointer_history is None else torch.cat([pointer_history, Variable(rnn_out.data)], dim=0) #print(pointer_history) ### # Built-in cross entropy # total_loss += len(data) * criterion(output_flat, targets).data[0] ### # Manual cross entropy # softmax_output_flat = torch.nn.functional.softmax(output_flat) # soft = torch.gather(softmax_output_flat, dim=1, index=targets.view(-1, 1)) # entropy = -torch.log(soft) # total_loss += len(data) * entropy.mean().data[0] ### # Pointer manual cross entropy loss = 0 softmax_output_flat = torch.nn.functional.softmax(output_flat) for idx, vocab_loss in enumerate(softmax_output_flat): p = vocab_loss if start_idx + idx > window: valid_next_word = next_word_history[start_idx + idx - window:start_idx + idx] valid_pointer_history = pointer_history[start_idx + idx - window:start_idx + idx] logits = torch.mv(valid_pointer_history, rnn_out[idx]) theta = args.theta ptr_attn = torch.nn.functional.softmax(theta * logits).view(-1, 1) ptr_dist = (ptr_attn.expand_as(valid_next_word) * valid_next_word).sum(0).squeeze() lambdah = args.lambdasm p = lambdah * ptr_dist + (1 - lambdah) * vocab_loss ### target_loss = p[targets[idx].data] loss += (-torch.log(target_loss)).data[0] total_loss += loss / batch_size ### hidden = repackage_hidden(hidden) next_word_history = next_word_history[-window:] pointer_history = pointer_history[-window:] return total_loss / len(data_source) # Load the best saved model.