Python model.init_hidden() Examples
The following are 30
code examples of model.init_hidden().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
model
, or try the search function
.
Example #1
Source File: main.py From mos with MIT License | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #2
Source File: train.py From darts with Apache License 2.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #3
Source File: test.py From darts with Apache License 2.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): print(i, data_source.size(0)-1) data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) # Load the best saved model.
Example #4
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 6 votes |
def export_onnx(path, batch_size, seq_len): print('The model is also exported in ONNX format at {}'. format(os.path.realpath(args.onnx_export))) model.eval() dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device) hidden = model.init_hidden(batch_size) torch.onnx.export(model, (dummy_input, hidden), path) # Loop over epochs.
Example #5
Source File: train.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args.bptt, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #6
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 6 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) if args.model == 'Transformer': output = model(data) output = output.view(-1, ntokens) else: output, hidden = model(data, hidden) hidden = repackage_hidden(hidden) total_loss += len(data) * criterion(output, targets).item() return total_loss / (len(data_source) - 1)
Example #7
Source File: test.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): print(i, data_source.size(0)-1) data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source) # Load the best saved model.
Example #8
Source File: train.py From NAO with GNU General Public License v3.0 | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += loss * len(data) hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #9
Source File: main.py From PyTorch with MIT License | 6 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) if args.model == 'Transformer': output = model(data) else: output, hidden = model(data, hidden) hidden = repackage_hidden(hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
Example #10
Source File: finetune.py From mos with MIT License | 6 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args) targets = targets.view(-1) log_prob, hidden = parallel_model(data, hidden) loss = nn.functional.nll_loss(log_prob.view(-1, log_prob.size(2)), targets).data total_loss += len(data) * loss hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #11
Source File: main.py From LM_syneval with MIT License | 6 votes |
def evaluate(lm_data_source, ccg_data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) if (not args.single) and (torch.cuda.device_count() > 1): #"module" is necessary when using DataParallel hidden = model.module.init_hidden(eval_batch_size) else: hidden = model.init_hidden(eval_batch_size) for i in range(0, lm_data_source.size(0) + ccg_data_source.size(0) - 1, args.bptt): # TAG if i > lm_data_source.size(0): data, targets = get_batch(ccg_data_source, i - lm_data_source.size(0), evaluation=True) # LM else: data, targets = get_batch(lm_data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) curr_loss = len(data) * criterion(output_flat, targets).data total_loss += curr_loss hidden = repackage_hidden(hidden) if len(ccg_data_source) == 0: return total_loss / len(lm_data_source) return total_loss[0] / (len(lm_data_source)+len(ccg_data_source))
Example #12
Source File: main.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #13
Source File: main.py From word-language-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #14
Source File: main.py From Ordered-Neurons with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) total_loss += len(data) * criterion(model.decoder.weight, model.decoder.bias, output, targets).data hidden = repackage_hidden(hidden) return total_loss.item() / len(data_source)
Example #15
Source File: finetune.py From awd-lstm-lm with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #16
Source File: main.py From PyTorch with MIT License | 5 votes |
def export_onnx(path, batch_size, seq_len): print('The model is also exported in ONNX format at {}'. format(os.path.realpath(args.onnx_export))) model.eval() dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device) hidden = model.init_hidden(batch_size) torch.onnx.export(model, (dummy_input, hidden), path) # Loop over epochs.
Example #17
Source File: main.py From PyTorch with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if args.model == 'Transformer': output = model(data) else: hidden = repackage_hidden(hidden) output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
Example #18
Source File: main.py From word-language-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #19
Source File: eval_ood.py From outlier-exposure with Apache License 2.0 | 5 votes |
def evaluate(data_source, corpus, batch_size=10, ood=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() loss_accum = 0 losses = [] ntokens = len(corpus.dictionary) for i in range(0, data_source.size(0) - 1, args.bptt): if (i >= ood_num_examples // test_batch_size) and (ood is True): break hidden = model.init_hidden(batch_size) hidden = repackage_hidden(hidden) data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) logits = model.decoder(output) smaxes = F.softmax(logits - torch.max(logits, dim=1, keepdim=True)[0], dim=1) tmp = smaxes[range(targets.size(0)), targets] log_prob = torch.log(tmp).mean(0) # divided by seq len, so this is the negative nats per char loss = -log_prob.data.cpu().numpy()[0] loss_accum += loss # losses.append(loss) # Experimental! # anomaly_score = -torch.max(smaxes, dim=1)[0].mean() # negative MSP anomaly_score = ((smaxes).add(1e-18).log() * uniform_base_rates.unsqueeze(0)).sum(1).mean(0) # negative KL to uniform losses.append(anomaly_score.data.cpu().numpy()[0]) # return loss_accum / (len(data_source) // args.bptt), losses # Run on test data.
Example #20
Source File: train.py From outlier-exposure with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10, test=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_oe_loss = 0 num_batches = 0 ntokens = len(corpus.dictionary) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True) if len(data.size()) == 1: # happens for test set? data.unsqueeze(-1) data_oe.unsqueeze(-1) if data.size(0) != data_oe.size(0): continue bs = test_batch_size if test else eval_batch_size hidden = model.init_hidden(2 * bs) hidden = repackage_hidden(hidden) output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat([data, data_oe], dim=1), hidden, return_h=True) output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2) output, output_oe = output.contiguous(), output_oe.contiguous() output = output.view(output.size(0)*output.size(1), output.size(2)) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data # OE loss logits_oe = model.decoder(output_oe) smaxes_oe = F.softmax(logits_oe - torch.max(logits_oe, dim=-1, keepdim=True)[0], dim=-1) loss_oe = -smaxes_oe.log().mean(-1) loss_oe = loss_oe.mean().data # total_loss += loss total_oe_loss += loss_oe num_batches += 1 return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
Example #21
Source File: train_base_rates.py From outlier-exposure with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10, test=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_oe_loss = 0 num_batches = 0 ntokens = len(corpus.dictionary) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True) if len(data.size()) == 1: # happens for test set? data.unsqueeze(-1) data_oe.unsqueeze(-1) if data.size(0) != data_oe.size(0): continue bs = test_batch_size if test else eval_batch_size hidden = model.init_hidden(2 * bs) hidden = repackage_hidden(hidden) output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat([data, data_oe], dim=1), hidden, return_h=True) output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2) output, output_oe = output.contiguous(), output_oe.contiguous() output = output.view(output.size(0)*output.size(1), output.size(2)) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data # OE loss logits_oe = model.decoder(output_oe) smaxes_oe = F.softmax(logits_oe - torch.max(logits_oe, dim=-1, keepdim=True)[0], dim=-1) loss_oe = -smaxes_oe.log().mean(-1) loss_oe = loss_oe.mean().data # total_loss += loss total_oe_loss += loss_oe num_batches += 1 return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
Example #22
Source File: main.py From dni-pytorch with MIT License | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #23
Source File: main.py From dni-pytorch with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() with dni.defer_backward(): output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) dni.backward(loss) # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #24
Source File: finetune.py From lm-context-analysis with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. if args.model == 'QRNN': model.reset() model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #25
Source File: main.py From Count-Sketch-Optimizers with Apache License 2.0 | 5 votes |
def export_onnx(path, batch_size, seq_len): print('The model is also exported in ONNX format at {}'. format(os.path.realpath(args.onnx_export))) model.eval() dummy_input = torch.LongTensor(seq_len * batch_size).zero_().view(-1, batch_size).to(device) hidden = model.init_hidden(batch_size) torch.onnx.export(model, (dummy_input, hidden), path) # Loop over epochs.
Example #26
Source File: main.py From Count-Sketch-Optimizers with Apache License 2.0 | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) sys.stdout.flush() total_loss = 0 start_time = time.time()
Example #27
Source File: main.py From Count-Sketch-Optimizers with Apache License 2.0 | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() hidden = repackage_hidden(hidden) return total_loss / len(data_source)
Example #28
Source File: main.py From fraternal-dropout with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #29
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if args.model == 'Transformer': output = model(data) output = output.view(-1, ntokens) else: hidden = repackage_hidden(hidden) output, hidden = model(data, hidden) loss = criterion(output, targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() if args.dry_run: break
Example #30
Source File: train.py From l2w with GNU General Public License v3.0 | 5 votes |
def evaluate(split): # Turn on evaluation mode which disables dropout. model.eval() total_loss, nbatches = 0, 0 ntokens = len(corpus.dictionary.idx2word) hidden = model.init_hidden(args.eval_batch_size) for source, target in corpus.iter(split, args.eval_batch_size, args.bptt, use_cuda=args.cuda): model.softmax.set_target(target.data.view(-1)) output, hidden = model(source, hidden) total_loss += criterion(output, target.view(-1)).data.sum() hidden = repackage_hidden(hidden) nbatches += 1 return total_loss / nbatches