Python Examples of torch.optim.zero

Source File: amc.py From LeGR with Apache License 2.0

7 votes

def train_steps(self, steps):
        self.model.train()
        optimizer = optim.SGD(self.model.parameters(), lr=4.5e-3, momentum=0.9, weight_decay=4e-5, nesterov=True)
        criterion = torch.nn.CrossEntropyLoss()

        s = 0
        avg_loss = []
        iterator = iter(self.train_loader)
        while s < steps:
            try:
                batch, label = next(iterator)
            except StopIteration:
                iterator = iter(self.train_loader)
                batch, label = next(iterator)
            batch, label = batch.to('cuda'), label.to('cuda')
            optimizer.zero_grad()
            out = self.model(batch)
            loss = criterion(out, label)
            loss.backward()
            avg_loss.append(loss.item())
            optimizer.step()
            s += 1
        print('Avg Loss: {:.3f}'.format(np.mean(avg_loss)))

Source File: run.py From MobileNetV3-pytorch with MIT License

5 votes

def train(model, loader, mixup, epoch, optim, criterion, device, dtype, batch_size, log_interval, child):
    model.train()
    correct1, correct5 = 0, 0

    enum_load = enumerate(loader) if child else enumerate(tqdm(loader))
    for batch_idx, (data, t) in enum_load:
        data, t = data.to(device=device, dtype=dtype), t.to(device=device)
        data, target = mixup(data, t)

        optim.zero_grad()
        output = model(data)

        loss = criterion(output, target)
        loss.backward()
        optim.batch_step()

        corr = correct(output, t, topk=(1, 5))
        correct1 += corr[0]
        correct5 += corr[1]
        if batch_idx % log_interval == 0 and not child:
            tqdm.write(
                'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}. '
                'Top-1 accuracy: {:.2f}%({:.2f}%). '
                'Top-5 accuracy: {:.2f}%({:.2f}%).'.format(epoch, batch_idx, len(loader),
                                                           100. * batch_idx / len(loader), loss.item(),
                                                           100. * corr[0] / batch_size,
                                                           100. * correct1 / (batch_size * (batch_idx + 1)),
                                                           100. * corr[1] / batch_size,
                                                           100. * correct5 / (batch_size * (batch_idx + 1))))
    return loss.item(), correct1 / len(loader.sampler), correct5 / len(loader.sampler)

Source File: run.py From MobileNetV3-pytorch with MIT License

5 votes

def find_bounds_clr(model, loader, optimizer, criterion, device, dtype, min_lr=8e-6, max_lr=8e-5, step_size=2000,
                    mode='triangular', save_path='.'):
    model.train()
    correct1, correct5 = 0, 0
    scheduler = CyclicLR(optimizer, base_lr=min_lr, max_lr=max_lr, step_size_up=step_size, mode=mode)
    epoch_count = step_size // len(loader)  # Assuming step_size is multiple of batch per epoch
    accuracy = []
    for _ in trange(epoch_count):
        for batch_idx, (data, target) in enumerate(tqdm(loader)):
            if scheduler is not None:
                scheduler.step()
            data, target = data.to(device=device, dtype=dtype), target.to(device=device)

            optimizer.zero_grad()
            output = model(data)

            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            corr = correct(output, target)
            accuracy.append(corr[0] / data.shape[0])

    lrs = np.linspace(min_lr, max_lr, step_size)
    plt.plot(lrs, accuracy)
    plt.show()
    plt.savefig(os.path.join(save_path, 'find_bounds_clr.pdf'))
    np.save(os.path.join(save_path, 'acc.npy'), accuracy)
    return

Source File: lstm.py From Gesture-Recognition with MIT License

5 votes

def train(model, optim, criterion, datum, label, states, num_classes):
	''' Modify weights based off cost from one datapoint '''
	optim.zero_grad()
	output, states = model(datum, states)
	output = output.view(1, num_classes)
	is_correct = accuracy(output, label, num_classes)
	loss = criterion(output, label)
	loss.backward()
	states = (states[0].detach(), states[1].detach())
	optim.step()

	return loss.item(), states, is_correct

Source File: popnn_torch.py From Gesture-Recognition with MIT License

5 votes

def train(model, optim, criterion, datum, label):
	''' Modify weights based off cost from one datapoint '''
	optim.zero_grad()
	output = model(datum)
	output = output.view(1, num_classes)
	is_correct = accuracy(output, label)
	loss = criterion(output, label)
	loss.backward()
	optim.step()

	return loss.item(), is_correct

Source File: orthogonal_layer.py From FrEIA with MIT License

5 votes

def test_param_update(self):

        for i in range(2500):
            optim.zero_grad()

            x = torch.randn(self.batch_size, inp_size)
            y = test_net(x)

            loss = torch.mean((y-x)**2)
            loss.backward()

            for name, p in test_net.named_parameters():
                if 'weights' in name:
                    gp = torch.mm(p.grad, p.data.t())
                    p.grad = torch.mm(gp - gp.t(), p.data)

                    weights = p.data

            optim.step()

            if i%25 == 0:
                print(loss.item(), end='\t')
                WWt = torch.mm(weights, weights.t())
                WWt -= torch.eye(weights.shape[0])
                print(torch.max(torch.abs(WWt)).item(), end='\t')
                print(torch.mean(WWt**2).item(), end='\t')
                print()

Source File: morphnet.py From LeGR with Apache License 2.0

5 votes

def train_epoch(model, optim, criterion, loader, lbda=None, cbns=None, maps=None, constraint=None):
    model.train()
    total = 0
    top1 = 0
    for i, (batch, label) in enumerate(loader):
        optim.zero_grad()
        batch, label = batch.to('cuda'), label.to('cuda')
        total += batch.size(0)

        out = model(batch)
        _, pred = out.max(dim=1)
        top1 += pred.eq(label).sum()
        if constraint:
            reg = lbda * regularizer(model, constraint, cbns, maps)
            loss = criterion(out, label) + reg
        else:
            loss = criterion(out, label)
        loss.backward()
        optim.step()

        if (i % 100 == 0) or (i == len(loader)-1):
            print('Train | Batch ({}/{}) | Top-1: {:.2f} ({}/{})'.format(
                i+1, len(loader),
                float(top1)/total*100, top1, total))
    if constraint:
        truncate_smallbeta(model, cbns)

Source File: amc.py From LeGR with Apache License 2.0

5 votes

def train_step(self, policy_loss):
        self.net.zero_grad()
        policy_loss.backward()
        self.optimizer.step()

Source File: amc.py From LeGR with Apache License 2.0

5 votes

def train_step(self, state, action, target):
        self.net.zero_grad()
        pred = self.net(state, action)
        loss = self.criterion(pred, target)
        loss.backward()
        self.optimizer.step()
        return pred

Source File: amc.py From LeGR with Apache License 2.0

5 votes

def train_epoch(self, optim, criterion):
        self.model.train()
        total = 0
        top1 = 0

        data_t = 0
        train_t = 0
        total_loss = 0
        s = time.time()
        for i, (batch, label) in enumerate(self.train_loader):
            data_t += time.time()-s
            s = time.time()
            optim.zero_grad()
            batch, label = batch.to('cuda'), label.to('cuda')
            total += batch.size(0)

            out = self.model(batch)
            loss = criterion(out, label)
            loss.backward()
            total_loss += loss.item()
            optim.step()
            train_t += time.time()-s

            if (i % 100 == 0) or (i == len(self.train_loader)-1):
                print('Batch ({}/{}) | Loss: {:.3f} | (PerBatch) Data: {:.3f}s,  Network: {:.3f}s'.format(
                    i+1, len(self.train_loader), total_loss/(i+1), data_t/(i+1), train_t/(i+1)))
            s = time.time()

Source File: main.py From Text-Classification-Pytorch with MIT License

5 votes

def train_model(model, train_iter, epoch):
    total_epoch_loss = 0
    total_epoch_acc = 0
    model.cuda()
    optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
    steps = 0
    model.train()
    for idx, batch in enumerate(train_iter):
        text = batch.text[0]
        target = batch.label
        target = torch.autograd.Variable(target).long()
        if torch.cuda.is_available():
            text = text.cuda()
            target = target.cuda()
        if (text.size()[0] is not 32):# One of the batch returned by BucketIterator has length different than 32.
            continue
        optim.zero_grad()
        prediction = model(text)
        loss = loss_fn(prediction, target)
        num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()
        acc = 100.0 * num_corrects/len(batch)
        loss.backward()
        clip_gradient(model, 1e-1)
        optim.step()
        steps += 1
        
        if steps % 100 == 0:
            print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%')
        
        total_epoch_loss += loss.item()
        total_epoch_acc += acc.item()
        
    return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)

Python torch.optim.zero_grad() Examples