Python torch.optim.zero_grad() Examples

The following are 11 code examples of torch.optim.zero_grad(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.optim , or try the search function .
Example #1
Source File: amc.py    From LeGR with Apache License 2.0 7 votes vote down vote up
def train_steps(self, steps):
        self.model.train()
        optimizer = optim.SGD(self.model.parameters(), lr=4.5e-3, momentum=0.9, weight_decay=4e-5, nesterov=True)
        criterion = torch.nn.CrossEntropyLoss()

        s = 0
        avg_loss = []
        iterator = iter(self.train_loader)
        while s < steps:
            try:
                batch, label = next(iterator)
            except StopIteration:
                iterator = iter(self.train_loader)
                batch, label = next(iterator)
            batch, label = batch.to('cuda'), label.to('cuda')
            optimizer.zero_grad()
            out = self.model(batch)
            loss = criterion(out, label)
            loss.backward()
            avg_loss.append(loss.item())
            optimizer.step()
            s += 1
        print('Avg Loss: {:.3f}'.format(np.mean(avg_loss))) 
Example #2
Source File: run.py    From MobileNetV3-pytorch with MIT License 5 votes vote down vote up
def train(model, loader, mixup, epoch, optim, criterion, device, dtype, batch_size, log_interval, child):
    model.train()
    correct1, correct5 = 0, 0

    enum_load = enumerate(loader) if child else enumerate(tqdm(loader))
    for batch_idx, (data, t) in enum_load:
        data, t = data.to(device=device, dtype=dtype), t.to(device=device)
        data, target = mixup(data, t)

        optim.zero_grad()
        output = model(data)

        loss = criterion(output, target)
        loss.backward()
        optim.batch_step()

        corr = correct(output, t, topk=(1, 5))
        correct1 += corr[0]
        correct5 += corr[1]
        if batch_idx % log_interval == 0 and not child:
            tqdm.write(
                'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}. '
                'Top-1 accuracy: {:.2f}%({:.2f}%). '
                'Top-5 accuracy: {:.2f}%({:.2f}%).'.format(epoch, batch_idx, len(loader),
                                                           100. * batch_idx / len(loader), loss.item(),
                                                           100. * corr[0] / batch_size,
                                                           100. * correct1 / (batch_size * (batch_idx + 1)),
                                                           100. * corr[1] / batch_size,
                                                           100. * correct5 / (batch_size * (batch_idx + 1))))
    return loss.item(), correct1 / len(loader.sampler), correct5 / len(loader.sampler) 
Example #3
Source File: run.py    From MobileNetV3-pytorch with MIT License 5 votes vote down vote up
def find_bounds_clr(model, loader, optimizer, criterion, device, dtype, min_lr=8e-6, max_lr=8e-5, step_size=2000,
                    mode='triangular', save_path='.'):
    model.train()
    correct1, correct5 = 0, 0
    scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr, step_size_up=step_size, mode=mode)
    epoch_count = step_size // len(loader)  # Assuming step_size is multiple of batch per epoch
    accuracy = []
    for _ in trange(epoch_count):
        for batch_idx, (data, target) in enumerate(tqdm(loader)):
            if scheduler is not None:
                scheduler.step()
            data, target = data.to(device=device, dtype=dtype), target.to(device=device)

            optimizer.zero_grad()
            output = model(data)

            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            corr = correct(output, target)
            accuracy.append(corr[0] / data.shape[0])

    lrs = np.linspace(min_lr, max_lr, step_size)
    plt.plot(lrs, accuracy)
    plt.show()
    plt.savefig(os.path.join(save_path, 'find_bounds_clr.pdf'))
    np.save(os.path.join(save_path, 'acc.npy'), accuracy)
    return 
Example #4
Source File: lstm.py    From Gesture-Recognition with MIT License 5 votes vote down vote up
def train(model, optim, criterion, datum, label, states, num_classes):
	''' Modify weights based off cost from one datapoint '''
	optim.zero_grad()
	output, states = model(datum, states)
	output = output.view(1, num_classes)
	is_correct = accuracy(output, label, num_classes)
	loss = criterion(output, label)
	loss.backward()
	states = (states[0].detach(), states[1].detach())
	optim.step()

	return loss.item(), states, is_correct 
Example #5
Source File: popnn_torch.py    From Gesture-Recognition with MIT License 5 votes vote down vote up
def train(model, optim, criterion, datum, label):
	''' Modify weights based off cost from one datapoint '''
	optim.zero_grad()
	output = model(datum)
	output = output.view(1, num_classes)
	is_correct = accuracy(output, label)
	loss = criterion(output, label)
	loss.backward()
	optim.step()

	return loss.item(), is_correct 
Example #6
Source File: orthogonal_layer.py    From FrEIA with MIT License 5 votes vote down vote up
def test_param_update(self):

        for i in range(2500):
            optim.zero_grad()

            x = torch.randn(self.batch_size, inp_size)
            y = test_net(x)

            loss = torch.mean((y-x)**2)
            loss.backward()

            for name, p in test_net.named_parameters():
                if 'weights' in name:
                    gp = torch.mm(p.grad, p.data.t())
                    p.grad = torch.mm(gp - gp.t(), p.data)

                    weights = p.data

            optim.step()

            if i%25 == 0:
                print(loss.item(), end='\t')
                WWt = torch.mm(weights, weights.t())
                WWt -= torch.eye(weights.shape[0])
                print(torch.max(torch.abs(WWt)).item(), end='\t')
                print(torch.mean(WWt**2).item(), end='\t')
                print() 
Example #7
Source File: morphnet.py    From LeGR with Apache License 2.0 5 votes vote down vote up
def train_epoch(model, optim, criterion, loader, lbda=None, cbns=None, maps=None, constraint=None):
    model.train()
    total = 0
    top1 = 0
    for i, (batch, label) in enumerate(loader):
        optim.zero_grad()
        batch, label = batch.to('cuda'), label.to('cuda')
        total += batch.size(0)

        out = model(batch)
        _, pred = out.max(dim=1)
        top1 += pred.eq(label).sum()
        if constraint:
            reg = lbda * regularizer(model, constraint, cbns, maps)
            loss = criterion(out, label) + reg
        else:
            loss = criterion(out, label)
        loss.backward()
        optim.step()

        if (i % 100 == 0) or (i == len(loader)-1):
            print('Train | Batch ({}/{}) | Top-1: {:.2f} ({}/{})'.format(
                i+1, len(loader),
                float(top1)/total*100, top1, total))
    if constraint:
        truncate_smallbeta(model, cbns) 
Example #8
Source File: amc.py    From LeGR with Apache License 2.0 5 votes vote down vote up
def train_step(self, policy_loss):
        self.net.zero_grad()
        policy_loss.backward()
        self.optimizer.step() 
Example #9
Source File: amc.py    From LeGR with Apache License 2.0 5 votes vote down vote up
def train_step(self, state, action, target):
        self.net.zero_grad()
        pred = self.net(state, action)
        loss = self.criterion(pred, target)
        loss.backward()
        self.optimizer.step()
        return pred 
Example #10
Source File: amc.py    From LeGR with Apache License 2.0 5 votes vote down vote up
def train_epoch(self, optim, criterion):
        self.model.train()
        total = 0
        top1 = 0

        data_t = 0
        train_t = 0
        total_loss = 0
        s = time.time()
        for i, (batch, label) in enumerate(self.train_loader):
            data_t += time.time()-s
            s = time.time()
            optim.zero_grad()
            batch, label = batch.to('cuda'), label.to('cuda')
            total += batch.size(0)

            out = self.model(batch)
            loss = criterion(out, label)
            loss.backward()
            total_loss += loss.item()
            optim.step()
            train_t += time.time()-s

            if (i % 100 == 0) or (i == len(self.train_loader)-1):
                print('Batch ({}/{}) | Loss: {:.3f} | (PerBatch) Data: {:.3f}s,  Network: {:.3f}s'.format(
                    i+1, len(self.train_loader), total_loss/(i+1), data_t/(i+1), train_t/(i+1)))
            s = time.time() 
Example #11
Source File: main.py    From Text-Classification-Pytorch with MIT License 5 votes vote down vote up
def train_model(model, train_iter, epoch):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.cuda()
    optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
    steps = 0
    model.train()
    for idx, batch in enumerate(train_iter):
        text = batch.text[0]
        target = batch.label
        target = torch.autograd.Variable(target).long()
        if torch.cuda.is_available():
            text = text.cuda()
            target = target.cuda()
        if (text.size()[0] is not 32):# One of the batch returned by BucketIterator has length different than 32.
            continue
        optim.zero_grad()
        prediction = model(text)
        loss = loss_fn(prediction, target)
        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()
        acc = 100.0 * num_corrects/len(batch)
        loss.backward()
        clip_gradient(model, 1e-1)
        optim.step()
        steps += 1
        
        if steps % 100 == 0:
            print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%')
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
        
    return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)