Python data.size() Examples
The following are 30
code examples of data.size().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
data
, or try the search function
.
Example #1
Source File: main.py From Character-Level-Language-Modeling-with-Deeper-Self-Attention-pytorch with MIT License | 7 votes |
def get_batch(source, i, train): if train: i = torch.randint(low=0, high=(len(source) - args.bptt), size=(1,)).long().item() seq_len = args.bptt target = source[i + 1:i + 1 + seq_len].t() else: seq_len = min(args.bptt, len(source) - 1 - i) target = source[i + seq_len, :] data = source[i:i + seq_len].t() data_mask = (data != pad).unsqueeze(-2) target_mask = make_std_mask(data.long()) # reshape target to match what cross_entropy expects target = target.contiguous().view(-1) return data, target, data_mask, target_mask
Example #2
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 6 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) if args.model == 'Transformer': output = model(data) output = output.view(-1, ntokens) else: output, hidden = model(data, hidden) hidden = repackage_hidden(hidden) total_loss += len(data) * criterion(output, targets).item() return total_loss / (len(data_source) - 1)
Example #3
Source File: main.py From PyTorch with MIT License | 6 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) if args.model == 'Transformer': output = model(data) else: output, hidden = model(data, hidden) hidden = repackage_hidden(hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() return total_loss / (len(data_source) - 1)
Example #4
Source File: train_rnn.py From relational-rnn-pytorch with Apache License 2.0 | 6 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) output, hidden = model(data, hidden) if not args.adaptivesoftmax: loss = criterion(output.view(-1, ntokens), targets) else: _, loss = criterion_adaptive(output.view(-1, args.nhid), targets) total_loss += len(data) * loss.item() hidden = repackage_hidden(hidden) return total_loss / len(data_source)
Example #5
Source File: main.py From LM_syneval with MIT License | 6 votes |
def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. if isinstance(data, tuple): nbatch = data[0].size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). tag_data = data[1].narrow(0, 0, nbatch * bsz) data = data[0].narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. tag_data = tag_data.view(bsz, -1).t().contiguous() else: nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() # Turning the data over to CUDA at this point may lead to more OOM errors #if args.cuda: # data = data.cuda() if isinstance(data,tuple): return data, tag_data return data
Example #6
Source File: train_rmc.py From relational-rnn-pytorch with Apache License 2.0 | 6 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. ntokens = len(corpus.dictionary) memory = model.module.initial_state(eval_batch_size, trainable=False).to(device) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) data = torch.t(data) loss, memory = model(data, memory, targets) loss = torch.mean(loss) # data has shape [T * B, N] total_loss += args.bptt * loss.item() return total_loss / len(data_source)
Example #7
Source File: main.py From LM_syneval with MIT License | 6 votes |
def evaluate(lm_data_source, ccg_data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) if (not args.single) and (torch.cuda.device_count() > 1): #"module" is necessary when using DataParallel hidden = model.module.init_hidden(eval_batch_size) else: hidden = model.init_hidden(eval_batch_size) for i in range(0, lm_data_source.size(0) + ccg_data_source.size(0) - 1, args.bptt): # TAG if i > lm_data_source.size(0): data, targets = get_batch(ccg_data_source, i - lm_data_source.size(0), evaluation=True) # LM else: data, targets = get_batch(lm_data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) curr_loss = len(data) * criterion(output_flat, targets).data total_loss += curr_loss hidden = repackage_hidden(hidden) if len(ccg_data_source) == 0: return total_loss / len(lm_data_source) return total_loss[0] / (len(lm_data_source)+len(ccg_data_source))
Example #8
Source File: main.py From lung_nodule_detector with MIT License | 6 votes |
def singletest(data, net, config, splitfun, combinefun, n_per_run, margin=64): z, h, w = data.size(2), data.size(3), data.size(4) print(data.size()) data = splitfun(data, config['max_stride'], margin) data = Variable(data.cuda(async=True), volatile=True, requires_grad=False) splitlist = range(0, args.split + 1, n_per_run) outputlist = [] for i in range(len(splitlist) - 1): output = net(data[splitlist[i]:splitlist[i + 1]]) output = output.data.cpu().numpy() outputlist.append(output) output = np.concatenate(outputlist, 0) output = combinefun(output, z / config['stride'], h / config['stride'], w / config['stride']) return output
Example #9
Source File: main.py From Character-Level-Language-Modeling-with-Deeper-Self-Attention-pytorch with MIT License | 6 votes |
def make_std_mask(tgt): """Create a mask to hide padding and future words.""" tgt_mask = (tgt != pad).unsqueeze(-2) tgt_mask = tgt_mask & subsequent_mask(tgt.size(-1)).type_as(tgt_mask) return tgt_mask # get_batch subdivides the source data into chunks of length args.bptt. # If source is equal to the example output of the batchify function, with # a bptt-limit of 2, we'd get the following two Variables for i = 0: # ┌ a g m s ┐ ┌ b h n t ┐ # └ b h n t ┘ └ c i o u ┘ # Note that despite the name of the function, the subdivison of data is not # done along the batch dimension (i.e. dimension 1), since that was handled # by the batchify function. The chunks are along dimension 0, corresponding # to the seq_len dimension in the LSTM.
Example #10
Source File: main.py From DeepLung with GNU General Public License v3.0 | 6 votes |
def singletest(data,net,config,splitfun,combinefun,n_per_run,margin = 64,isfeat=False): z, h, w = data.size(2), data.size(3), data.size(4) print(data.size()) data = splitfun(data,config['max_stride'],margin) data = Variable(data.cuda(async = True), volatile = True,requires_grad=False) splitlist = range(0,args.split+1,n_per_run) outputlist = [] featurelist = [] for i in range(len(splitlist)-1): if isfeat: output,feature = net(data[splitlist[i]:splitlist[i+1]]) featurelist.append(feature) else: output = net(data[splitlist[i]:splitlist[i+1]]) output = output.data.cpu().numpy() outputlist.append(output) output = np.concatenate(outputlist,0) output = combinefun(output, z / config['stride'], h / config['stride'], w / config['stride']) if isfeat: feature = np.concatenate(featurelist,0).transpose([0,2,3,4,1]) feature = combinefun(feature, z / config['stride'], h / config['stride'], w / config['stride']) return output,feature else: return output
Example #11
Source File: main_LM.py From PRPN-Analysis with MIT License | 5 votes |
def batchify(data, bsz, random_start_idx=False): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). if random_start_idx: start_idx = random.randint(0, data.size(0) % bsz - 1) else: start_idx = 0 data = data.narrow(0, start_idx, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda() return data
Example #12
Source File: main_LM.py From PRPN-Analysis with MIT License | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #13
Source File: main_LM.py From PRPN-Analysis with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) train_data = batchify(corpus.train, args.batch_size, random_start_idx=True) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #14
Source File: main_LM.py From PRPN with MIT License | 5 votes |
def batchify(data, bsz, random_start_idx=False): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). if random_start_idx: start_idx = random.randint(0, data.size(0) % bsz - 1) else: start_idx = 0 data = data.narrow(0, start_idx, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda() return data
Example #15
Source File: main_LM.py From PRPN with MIT License | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #16
Source File: main_LM.py From PRPN with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) train_data = batchify(corpus.train, args.batch_size, random_start_idx=True) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #17
Source File: main.py From PyTorch with MIT License | 5 votes |
def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() return data.to(device)
Example #18
Source File: main.py From word-language-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #19
Source File: main.py From PyTorch with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) if args.model != 'Transformer': hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. model.zero_grad() if args.model == 'Transformer': output = model(data) else: hidden = repackage_hidden(hidden) output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
Example #20
Source File: main.py From word-language-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda() return data
Example #21
Source File: main.py From word-language-model with BSD 3-Clause "New" or "Revised" License | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #22
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 5 votes |
def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() return data.to(device)
Example #23
Source File: dynamiceval.py From dynamic-evaluation with BSD 2-Clause "Simplified" License | 5 votes |
def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda() return data #######################################################################
Example #24
Source File: train.py From outlier-exposure with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10, test=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_oe_loss = 0 num_batches = 0 ntokens = len(corpus.dictionary) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True) if len(data.size()) == 1: # happens for test set? data.unsqueeze(-1) data_oe.unsqueeze(-1) if data.size(0) != data_oe.size(0): continue bs = test_batch_size if test else eval_batch_size hidden = model.init_hidden(2 * bs) hidden = repackage_hidden(hidden) output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat([data, data_oe], dim=1), hidden, return_h=True) output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2) output, output_oe = output.contiguous(), output_oe.contiguous() output = output.view(output.size(0)*output.size(1), output.size(2)) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data # OE loss logits_oe = model.decoder(output_oe) smaxes_oe = F.softmax(logits_oe - torch.max(logits_oe, dim=-1, keepdim=True)[0], dim=-1) loss_oe = -smaxes_oe.log().mean(-1) loss_oe = loss_oe.mean().data # total_loss += loss total_oe_loss += loss_oe num_batches += 1 return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
Example #25
Source File: train_base_rates.py From outlier-exposure with Apache License 2.0 | 5 votes |
def evaluate(data_source, batch_size=10, test=False): # Turn on evaluation mode which disables dropout. model.eval() if args.model == 'QRNN': model.reset() total_loss = 0 total_oe_loss = 0 num_batches = 0 ntokens = len(corpus.dictionary) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args, evaluation=True) data_oe, _ = get_batch(oe_val_dataset, i, args, evaluation=True) if len(data.size()) == 1: # happens for test set? data.unsqueeze(-1) data_oe.unsqueeze(-1) if data.size(0) != data_oe.size(0): continue bs = test_batch_size if test else eval_batch_size hidden = model.init_hidden(2 * bs) hidden = repackage_hidden(hidden) output, hidden, rnn_hs, dropped_rnn_hs = model(torch.cat([data, data_oe], dim=1), hidden, return_h=True) output, output_oe = torch.chunk(dropped_rnn_hs[-1], dim=1, chunks=2) output, output_oe = output.contiguous(), output_oe.contiguous() output = output.view(output.size(0)*output.size(1), output.size(2)) loss = criterion(model.decoder.weight, model.decoder.bias, output, targets).data # OE loss logits_oe = model.decoder(output_oe) smaxes_oe = F.softmax(logits_oe - torch.max(logits_oe, dim=-1, keepdim=True)[0], dim=-1) loss_oe = -smaxes_oe.log().mean(-1) loss_oe = loss_oe.mean().data # total_loss += loss total_oe_loss += loss_oe num_batches += 1 return total_loss[0] / num_batches, total_oe_loss[0] / num_batches
Example #26
Source File: train_base_rates.py From outlier-exposure with Apache License 2.0 | 5 votes |
def get_base_rates(): batch, i = 0, 0 seq_len = args.bptt ntokens = len(corpus.dictionary) token_counts = np.zeros(ntokens) total_count = 0 for i in range(0, train_data.size(0), args.bptt): # Assume OE dataset is larger. It is, because we're using wikitext-2. data, targets = get_batch(train_data, i, args, seq_len=seq_len) for j in range(targets.numel()): token_counts[targets[j].data.cpu().numpy()[0]] += 1 total_count += 1 batch += 1 return token_counts / total_count
Example #27
Source File: main.py From dni-pytorch with MIT License | 5 votes |
def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz # Trim off any extra elements that wouldn't cleanly fit (remainders). data = data.narrow(0, 0, nbatch * bsz) # Evenly divide the data across the bsz batches. data = data.view(bsz, -1).t().contiguous() if args.cuda: data = data.cuda() return data
Example #28
Source File: main.py From dni-pytorch with MIT License | 5 votes |
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, evaluation=True) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(hidden) return total_loss[0] / len(data_source)
Example #29
Source File: main.py From dni-pytorch with MIT License | 5 votes |
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() with dni.defer_backward(): output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) dni.backward(loss) # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() # Loop over epochs.
Example #30
Source File: train.py From reversible-rnn with MIT License | 5 votes |
def evaluate(data_source, batch_size=10): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 ntokens = len(corpus.dictionary) hidden = model.init_hiddens(batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i, args) output_dict = model(data, hidden) output_flat = output_dict['decoded'].view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).data hidden = repackage_hidden(output_dict['last_h']) return total_loss.item() / len(data_source)