Python Examples of torch.nn.NLLLoss

Source File: model.py From reinvent-randomized with MIT License

6 votes

def __init__(self, vocabulary, tokenizer, network_params=None, max_sequence_length=256, no_cuda=False,
                 mode="train"):
        """
        Implements an RNN.
        :param vocabulary: Vocabulary to use.
        :param tokenizer: Tokenizer to use.
        :param network_params: Network params to initialize the RNN.
        :param max_sequence_length: Sequences longer than this value will not be processed.
        :param no_cuda: The model is explicitly initialized as not using cuda, even if cuda is available.
        :param mode: Training or eval mode.
        """
        self.vocabulary = vocabulary
        self.tokenizer = tokenizer
        self.max_sequence_length = max_sequence_length

        if not isinstance(network_params, dict):
            network_params = {}

        self.network = RNN(**network_params)
        if torch.cuda.is_available() and not no_cuda:
            self.network.cuda()

        self.nll_loss = tnn.NLLLoss(reduction="none", ignore_index=0)

        self.set_mode(mode)

Source File: mgru_rte_model.py From Recognizing-Textual-Entailment with MIT License

6 votes

def fit_batch(self, premise_batch, hypothesis_batch, y_batch):
        if not hasattr(self, 'criterion'):
            self.criterion = nn.NLLLoss()
        if not hasattr(self, 'optimizer'):
            self.optimizer = optim.Adam(self.parameters(), lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2'])

        self.optimizer.zero_grad()
        preds = self.__call__(premise_batch, hypothesis_batch, training=True)
        loss = self.criterion(preds, y_batch)
        loss.backward()
        self.optimizer.step()

        _, pred_labels = torch.max(preds, dim=-1, keepdim=True)
        y_true = self._get_numpy_array_from_variable(y_batch)
        y_pred = self._get_numpy_array_from_variable(pred_labels)
        acc = accuracy_score(y_true, y_pred)

        ret_loss = self._get_numpy_array_from_variable(loss)[0]
        return ret_loss, acc

Source File: rte_model.py From Recognizing-Textual-Entailment with MIT License

6 votes

def fit_batch(self, premise_batch, hypothesis_batch, y_batch):
        if not hasattr(self,'criterion'):
            self.criterion = nn.NLLLoss()
        if not hasattr(self, 'optimizer'):
            self.optimizer = optim.Adam(self.parameters(),  lr=self.options['LR'], betas=(0.9, 0.999), eps=1e-08, weight_decay=self.options['L2'])
        
        self.optimizer.zero_grad()
        preds = self.__call__(premise_batch, hypothesis_batch, training= True)
        loss = self.criterion(preds, y_batch)
        loss.backward()
        self.optimizer.step()
        
        _, pred_labels = torch.max(preds, dim=-1, keepdim = True)
        y_true = self._get_numpy_array_from_variable(y_batch)
        y_pred = self._get_numpy_array_from_variable(pred_labels)
        acc = accuracy_score(y_true, y_pred)

        ret_loss = self._get_numpy_array_from_variable(loss)[0]
        return ret_loss, acc

Source File: tutorial.py From TaskBot with GNU General Public License v3.0

6 votes

def trainIters(encoder, decoder, epochs, dataset, init_epochs, learning_rate=0.01):
    plot_losses = []

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for epoch in range(init_epochs, epochs+init_epochs):
        for i, (input_tensor, target_tensor) in enumerate(dataset.gen()):
            loss = train(input_tensor, target_tensor, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion)
            if loss:
                plot_losses.append(loss)
                if i % 1000==0:
                    print("epoch {}, step: {}, loss: {}".format(
                        epoch, i, loss
                    ))
            else:
                print(input_tensor, target_tensor)
        print("save model")
        torch.save(encoder.state_dict(), "epoch_{}_step_{}_encoder_loss_{}.pkl".format(epoch, i, loss))
        torch.save(decoder.state_dict(), "epoch_{}_step_{}_decoder_loss_{}.pkl".format(epoch, i, loss))

Source File: Loss.py From video-caption-openNMT.pytorch with MIT License

6 votes

def __init__(self, generator, tgt_vocab, normalization="sents",
                 label_smoothing=0.0):
        super(NMTLossCompute, self).__init__(generator, tgt_vocab)
        assert (label_smoothing >= 0.0 and label_smoothing <= 1.0)
        if label_smoothing > 0:
            # When label smoothing is turned on,
            # KL-divergence between q_{smoothed ground truth prob.}(w)
            # and p_{prob. computed by model}(w) is minimized.
            # If label smoothing value is set to zero, the loss
            # is equivalent to NLLLoss or CrossEntropyLoss.
            # All non-true labels are uniformly set to low-confidence.
            self.criterion = nn.KLDivLoss(size_average=False)
            one_hot = torch.randn(1, len(tgt_vocab))
            one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2))
            one_hot[0][self.padding_idx] = 0
            self.register_buffer('one_hot', one_hot)
        else:
            weight = torch.ones(len(tgt_vocab))
            weight[self.padding_idx] = 0
            self.criterion = nn.NLLLoss(weight, size_average=False)
        self.confidence = 1.0 - label_smoothing

Source File: models.py From gandissect with MIT License

6 votes

def __init__(self, net_enc, net_dec, labeldata, loss_scale=None):
        super(SegmentationModule, self).__init__()
        self.encoder = net_enc
        self.decoder = net_dec
        self.crit_dict = nn.ModuleDict()
        if loss_scale is None:
            self.loss_scale = {"object": 1, "part": 0.5, "scene": 0.25, "material": 1}
        else:
            self.loss_scale = loss_scale

        # criterion
        self.crit_dict["object"] = nn.NLLLoss(ignore_index=0)  # ignore background 0
        self.crit_dict["material"] = nn.NLLLoss(ignore_index=0)  # ignore background 0
        self.crit_dict["scene"] = nn.NLLLoss(ignore_index=-1)  # ignore unlabelled -1

        # Label data - read from json
        self.labeldata = labeldata
        object_to_num = {k: v for v, k in enumerate(labeldata['object'])}
        part_to_num = {k: v for v, k in enumerate(labeldata['part'])}
        self.object_part = {object_to_num[k]:
                [part_to_num[p] for p in v]
                for k, v in labeldata['object_part'].items()}
        self.object_with_part = sorted(self.object_part.keys())
        self.decoder.object_part = self.object_part
        self.decoder.object_with_part = self.object_with_part

Source File: utilz.py From tamil-lm2 with GNU General Public License v2.0

6 votes

def train(config, argv, name, ROOT_DIR,  model, dataset):
    _batchop = partial(batchop, VOCAB=dataset.input_vocab, LABELS=dataset.output_vocab)
    predictor_feed = DataFeed(name, dataset.testset, batchop=_batchop, batch_size=1)
    train_feed     = DataFeed(name, portion(dataset.trainset, config.HPCONFIG.trainset_size),
                              batchop=_batchop, batch_size=config.CONFIG.batch_size)
    
    predictor = Predictor(name,
                          model=model,
                          directory=ROOT_DIR,
                          feed=predictor_feed,
                          repr_function=partial(repr_function
                                                , VOCAB=dataset.input_vocab
                                                , LABELS=dataset.output_vocab
                                                , dataset=dataset.testset_dict))

    loss_ = partial(loss, loss_function=nn.NLLLoss())

Source File: Loss.py From DC-NeuralConversation with MIT License

6 votes

def __init__(self, generator, tgt_vocab, normalization="sents",
                 label_smoothing=0.0):
        super(NMTLossCompute, self).__init__(generator, tgt_vocab)
        assert (label_smoothing >= 0.0 and label_smoothing <= 1.0)

        if label_smoothing > 0:
            # When label smoothing is turned on,
            # KL-divergence between q_{smoothed ground truth prob.}(w)
            # and p_{prob. computed by model}(w) is minimized.
            # If label smoothing value is set to zero, the loss
            # is equivalent to NLLLoss or CrossEntropyLoss.
            # All non-true labels are uniformly set to low-confidence.
            self.criterion = nn.KLDivLoss(size_average=False)
            one_hot = torch.randn(1, len(tgt_vocab))
            one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2))
            one_hot[0][self.padding_idx] = 0
            self.register_buffer('one_hot', one_hot)
        else:
            weight = torch.ones(len(tgt_vocab))
            weight[self.padding_idx] = 0
            self.criterion = nn.NLLLoss(weight, size_average=False)
        self.confidence = 1.0 - label_smoothing

Source File: action_models.py From Rita with BSD 3-Clause "New" or "Revised" License

6 votes

def train(output, input, ann,learning_rate=.005):
    # function for training the neural net
    criterion = nn.NLLLoss()
    ann.zero_grad()  # initializing gradients with zeros
    # predicting the output
    output_p = ann(input)  # input --> hidden_layer --> output
    loss = criterion(output_p, output)
    # comparing the guessed output with actual output
    loss.backward()  # backpropagating to compute gradients with respect to loss

    for p in ann.parameters():
        # adding learning rate to slow down the network
        p.data.add_(-learning_rate, p.grad.data)
    return output, loss.data[0]  # returning predicted output and loss

#n_iters=100000

Source File: LstmModel.py From Medical-named-entity-recognition-for-ccks2017 with MIT License

6 votes

def __init__(self, parameter):
        super(LSTMTagger, self).__init__()
        self.hidden_dim = parameter['hidden_dim']

        self.word_embeddings = nn.Embedding(parameter['vocab_size'],
                                            parameter['embedding_dim'])

        self.embedding_dim = parameter['embedding_dim']

        # The LSTM takes word embeddings and captical embedding as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm = nn.LSTM(self.embedding_dim, parameter['hidden_dim'])

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(parameter['hidden_dim'], parameter['tagset_size'])
        self.hidden = self.init_hidden()
        self.loss_function = nn.NLLLoss()

Source File: test_net.py From skorch with BSD 3-Clause "New" or "Revised" License

6 votes

def test_criterion_training_set_correctly(self, net_cls, module_cls, data):
        # check that criterion's training attribute is set correctly

        X, y = data[0][:50], data[1][:50]  # don't need all the data
        side_effect = []

        class MyCriterion(nn.NLLLoss):
            """Criterion that records its training attribute"""
            def forward(self, *args, **kwargs):
                side_effect.append(self.training)
                return super().forward(*args, **kwargs)

        net = net_cls(module_cls, criterion=MyCriterion, max_epochs=1)
        net.fit(X, y)

        # called once with training=True for train step, once with
        # training=False for validation step
        assert side_effect == [True, False]

        net.partial_fit(X, y)
        # same logic as before
        assert side_effect == [True, False, True, False]

Source File: Loss.py From reversible-rnn with MIT License

6 votes

def __init__(self, generator, tgt_vocab, label_smoothing=0.0):
        super(NMTLossCompute, self).__init__(generator, tgt_vocab)
        assert (label_smoothing >= 0.0 and label_smoothing <= 1.0)

        self.tgt_vocab_len = len(tgt_vocab)

        if label_smoothing > 0:
            # When label smoothing is turned on,
            # KL-divergence between q_{smoothed ground truth prob.}(w)
            # and p_{prob. computed by model}(w) is minimized.
            # If label smoothing value is set to zero, the loss
            # is equivalent to NLLLoss or CrossEntropyLoss.
            # All non-true labels are uniformly set to low-confidence.
            self.criterion = nn.KLDivLoss(size_average=False)
            one_hot = torch.randn(1, len(tgt_vocab))
            one_hot.fill_(label_smoothing / (len(tgt_vocab) - 2))
            one_hot[0][self.padding_idx] = 0
            self.register_buffer('one_hot', one_hot)
        else:
            weight = torch.ones(len(tgt_vocab))
            weight[self.padding_idx] = 0
            self.criterion = nn.NLLLoss(weight, size_average=False)  # IMPORTANT: NLLLoss is what we use. Interesting that size_average=False
            # ipdb.set_trace()
        self.confidence = 1.0 - label_smoothing

Source File: losses.py From second.pytorch with MIT License

5 votes

def _sigmoid_cross_entropy_with_logits(logits, labels):
  # to be compatible with tensorflow, we don't use ignore_idx
  loss = torch.clamp(logits, min=0) - logits * labels.type_as(logits)
  loss += torch.log1p(torch.exp(-torch.abs(logits)))
  # loss = nn.BCEWithLogitsLoss(reduce="none")(logits, labels.type_as(logits))
  # transpose_param = [0] + [param[-1]] + param[1:-1]
  # logits = logits.permute(*transpose_param)
  # loss_ftor = nn.NLLLoss(reduce=False)
  # loss = loss_ftor(F.logsigmoid(logits), labels)
  return loss

Source File: loss.py From joeynmt with Apache License 2.0

5 votes

def __init__(self, pad_index: int, smoothing: float = 0.0):
        super(XentLoss, self).__init__()
        self.smoothing = smoothing
        self.pad_index = pad_index
        if self.smoothing <= 0.0:
            # standard xent loss
            self.criterion = nn.NLLLoss(ignore_index=self.pad_index,
                                        reduction='sum')
        else:
            # custom label-smoothed loss, computed with KL divergence loss
            self.criterion = nn.KLDivLoss(reduction='sum')

Source File: layers.py From mead-baseline with Apache License 2.0

5 votes

def __init__(self, LossFn: nn.Module = nn.NLLLoss, avg: str = "token"):
        """A class that applies a Loss function to sequence via the folding trick.

        :param LossFn: A loss function to apply (defaults to `nn.NLLLoss`)
        :param avg: A divisor to apply, valid values are `token` and `batch`
        """
        super().__init__()
        self.avg = avg
        if avg == "token":
            self.crit = LossFn(ignore_index=Offsets.PAD, reduction="mean")
            self._norm = self._no_norm
        else:
            self.crit = LossFn(ignore_index=Offsets.PAD, reduction="sum")
            self._norm = self._batch_norm

Source File: test_net.py From skorch with BSD 3-Clause "New" or "Revised" License

5 votes

def test_criterion_params_on_device(self, net_cls, module_cls, device):
        # attributes like criterion.weight should be automatically moved
        # to the Net's device.
        criterion = torch.nn.NLLLoss
        weight = torch.ones(2)
        net = net_cls(
            module_cls,
            criterion=criterion,
            criterion__weight=weight,
            device=device,
        )

        assert weight.device.type == 'cpu'
        net.initialize()
        assert net.criterion_.weight.device.type == device

Source File: seq2seq.py From ParlAI with MIT License

5 votes

def build_criterion(self):
        # set up criteria
        if self.opt.get('numsoftmax', 1) > 1:
            return nn.NLLLoss(ignore_index=self.NULL_IDX, reduction='none')
        else:
            return nn.CrossEntropyLoss(ignore_index=self.NULL_IDX, reduction='none')

Source File: update.py From Federated-Learning-PyTorch with MIT License

5 votes

def test_inference(args, model, test_dataset):
    """ Returns the test accuracy and loss.
    """

    model.eval()
    loss, total, correct = 0.0, 0.0, 0.0

    device = 'cuda' if args.gpu else 'cpu'
    criterion = nn.NLLLoss().to(device)
    testloader = DataLoader(test_dataset, batch_size=128,
                            shuffle=False)

    for batch_idx, (images, labels) in enumerate(testloader):
        images, labels = images.to(device), labels.to(device)

        # Inference
        outputs = model(images)
        batch_loss = criterion(outputs, labels)
        loss += batch_loss.item()

        # Prediction
        _, pred_labels = torch.max(outputs, 1)
        pred_labels = pred_labels.view(-1)
        correct += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)

    accuracy = correct/total
    return accuracy, loss

Source File: seq2seq.py From neural_chat with MIT License

5 votes

def build_criterion(self):
        # set up criteria
        if self.opt.get('numsoftmax', 1) > 1:
            return nn.NLLLoss(ignore_index=self.NULL_IDX, reduction='sum')
        else:
            return nn.CrossEntropyLoss(ignore_index=self.NULL_IDX, reduction='sum')

Source File: update.py From Federated-Learning-PyTorch with MIT License

5 votes

def __init__(self, args, dataset, idxs, logger):
        self.args = args
        self.logger = logger
        self.trainloader, self.validloader, self.testloader = self.train_val_test(
            dataset, list(idxs))
        self.device = 'cuda' if args.gpu else 'cpu'
        # Default criterion set to NLL loss function
        self.criterion = nn.NLLLoss().to(self.device)