Python torch.optim.zero_grad() Examples
The following are 11
code examples of torch.optim.zero_grad().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.optim
, or try the search function
.
Example #1
Source File: amc.py From LeGR with Apache License 2.0 | 7 votes |
def train_steps(self, steps): self.model.train() optimizer = optim.SGD(self.model.parameters(), lr=4.5e-3, momentum=0.9, weight_decay=4e-5, nesterov=True) criterion = torch.nn.CrossEntropyLoss() s = 0 avg_loss = [] iterator = iter(self.train_loader) while s < steps: try: batch, label = next(iterator) except StopIteration: iterator = iter(self.train_loader) batch, label = next(iterator) batch, label = batch.to('cuda'), label.to('cuda') optimizer.zero_grad() out = self.model(batch) loss = criterion(out, label) loss.backward() avg_loss.append(loss.item()) optimizer.step() s += 1 print('Avg Loss: {:.3f}'.format(np.mean(avg_loss)))
Example #2
Source File: run.py From MobileNetV3-pytorch with MIT License | 5 votes |
def train(model, loader, mixup, epoch, optim, criterion, device, dtype, batch_size, log_interval, child): model.train() correct1, correct5 = 0, 0 enum_load = enumerate(loader) if child else enumerate(tqdm(loader)) for batch_idx, (data, t) in enum_load: data, t = data.to(device=device, dtype=dtype), t.to(device=device) data, target = mixup(data, t) optim.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optim.batch_step() corr = correct(output, t, topk=(1, 5)) correct1 += corr[0] correct5 += corr[1] if batch_idx % log_interval == 0 and not child: tqdm.write( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}. ' 'Top-1 accuracy: {:.2f}%({:.2f}%). ' 'Top-5 accuracy: {:.2f}%({:.2f}%).'.format(epoch, batch_idx, len(loader), 100. * batch_idx / len(loader), loss.item(), 100. * corr[0] / batch_size, 100. * correct1 / (batch_size * (batch_idx + 1)), 100. * corr[1] / batch_size, 100. * correct5 / (batch_size * (batch_idx + 1)))) return loss.item(), correct1 / len(loader.sampler), correct5 / len(loader.sampler)
Example #3
Source File: run.py From MobileNetV3-pytorch with MIT License | 5 votes |
def find_bounds_clr(model, loader, optimizer, criterion, device, dtype, min_lr=8e-6, max_lr=8e-5, step_size=2000, mode='triangular', save_path='.'): model.train() correct1, correct5 = 0, 0 scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr, step_size_up=step_size, mode=mode) epoch_count = step_size // len(loader) # Assuming step_size is multiple of batch per epoch accuracy = [] for _ in trange(epoch_count): for batch_idx, (data, target) in enumerate(tqdm(loader)): if scheduler is not None: scheduler.step() data, target = data.to(device=device, dtype=dtype), target.to(device=device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() corr = correct(output, target) accuracy.append(corr[0] / data.shape[0]) lrs = np.linspace(min_lr, max_lr, step_size) plt.plot(lrs, accuracy) plt.show() plt.savefig(os.path.join(save_path, 'find_bounds_clr.pdf')) np.save(os.path.join(save_path, 'acc.npy'), accuracy) return
Example #4
Source File: lstm.py From Gesture-Recognition with MIT License | 5 votes |
def train(model, optim, criterion, datum, label, states, num_classes): ''' Modify weights based off cost from one datapoint ''' optim.zero_grad() output, states = model(datum, states) output = output.view(1, num_classes) is_correct = accuracy(output, label, num_classes) loss = criterion(output, label) loss.backward() states = (states[0].detach(), states[1].detach()) optim.step() return loss.item(), states, is_correct
Example #5
Source File: popnn_torch.py From Gesture-Recognition with MIT License | 5 votes |
def train(model, optim, criterion, datum, label): ''' Modify weights based off cost from one datapoint ''' optim.zero_grad() output = model(datum) output = output.view(1, num_classes) is_correct = accuracy(output, label) loss = criterion(output, label) loss.backward() optim.step() return loss.item(), is_correct
Example #6
Source File: orthogonal_layer.py From FrEIA with MIT License | 5 votes |
def test_param_update(self): for i in range(2500): optim.zero_grad() x = torch.randn(self.batch_size, inp_size) y = test_net(x) loss = torch.mean((y-x)**2) loss.backward() for name, p in test_net.named_parameters(): if 'weights' in name: gp = torch.mm(p.grad, p.data.t()) p.grad = torch.mm(gp - gp.t(), p.data) weights = p.data optim.step() if i%25 == 0: print(loss.item(), end='\t') WWt = torch.mm(weights, weights.t()) WWt -= torch.eye(weights.shape[0]) print(torch.max(torch.abs(WWt)).item(), end='\t') print(torch.mean(WWt**2).item(), end='\t') print()
Example #7
Source File: morphnet.py From LeGR with Apache License 2.0 | 5 votes |
def train_epoch(model, optim, criterion, loader, lbda=None, cbns=None, maps=None, constraint=None): model.train() total = 0 top1 = 0 for i, (batch, label) in enumerate(loader): optim.zero_grad() batch, label = batch.to('cuda'), label.to('cuda') total += batch.size(0) out = model(batch) _, pred = out.max(dim=1) top1 += pred.eq(label).sum() if constraint: reg = lbda * regularizer(model, constraint, cbns, maps) loss = criterion(out, label) + reg else: loss = criterion(out, label) loss.backward() optim.step() if (i % 100 == 0) or (i == len(loader)-1): print('Train | Batch ({}/{}) | Top-1: {:.2f} ({}/{})'.format( i+1, len(loader), float(top1)/total*100, top1, total)) if constraint: truncate_smallbeta(model, cbns)
Example #8
Source File: amc.py From LeGR with Apache License 2.0 | 5 votes |
def train_step(self, policy_loss): self.net.zero_grad() policy_loss.backward() self.optimizer.step()
Example #9
Source File: amc.py From LeGR with Apache License 2.0 | 5 votes |
def train_step(self, state, action, target): self.net.zero_grad() pred = self.net(state, action) loss = self.criterion(pred, target) loss.backward() self.optimizer.step() return pred
Example #10
Source File: amc.py From LeGR with Apache License 2.0 | 5 votes |
def train_epoch(self, optim, criterion): self.model.train() total = 0 top1 = 0 data_t = 0 train_t = 0 total_loss = 0 s = time.time() for i, (batch, label) in enumerate(self.train_loader): data_t += time.time()-s s = time.time() optim.zero_grad() batch, label = batch.to('cuda'), label.to('cuda') total += batch.size(0) out = self.model(batch) loss = criterion(out, label) loss.backward() total_loss += loss.item() optim.step() train_t += time.time()-s if (i % 100 == 0) or (i == len(self.train_loader)-1): print('Batch ({}/{}) | Loss: {:.3f} | (PerBatch) Data: {:.3f}s, Network: {:.3f}s'.format( i+1, len(self.train_loader), total_loss/(i+1), data_t/(i+1), train_t/(i+1))) s = time.time()
Example #11
Source File: main.py From Text-Classification-Pytorch with MIT License | 5 votes |
def train_model(model, train_iter, epoch): total_epoch_loss = 0 total_epoch_acc = 0 model.cuda() optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters())) steps = 0 model.train() for idx, batch in enumerate(train_iter): text = batch.text[0] target = batch.label target = torch.autograd.Variable(target).long() if torch.cuda.is_available(): text = text.cuda() target = target.cuda() if (text.size()[0] is not 32):# One of the batch returned by BucketIterator has length different than 32. continue optim.zero_grad() prediction = model(text) loss = loss_fn(prediction, target) num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum() acc = 100.0 * num_corrects/len(batch) loss.backward() clip_gradient(model, 1e-1) optim.step() steps += 1 if steps % 100 == 0: print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%') total_epoch_loss += loss.item() total_epoch_acc += acc.item() return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)