Python torch.nn.CrossEntropyLoss() Examples
The following are 30
code examples of torch.nn.CrossEntropyLoss().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn
, or try the search function
.
Example #1
Source File: modeling.py From BERT-for-Chinese-Question-Answering with Apache License 2.0 | 7 votes |
def forward(self, input_ids, token_type_ids, attention_mask, labels=None): pooled_outputs = [] for i in range(input_ids.size(1)): _, pooled_output = self.bert(input_ids[:, i, :], token_type_ids[:, i, :], attention_mask[:, i, :]) pooled_output = self.dropout(pooled_output) pooled_outputs.append(pooled_output.unsqueeze_(1)) logits = self.classifier(torch.cat(pooled_outputs, 1).view(-1, self.hidden_size)) logits = logits.view(-1, input_ids.size(1)) if labels is not None: loss_fct = CrossEntropyLoss() loss = loss_fct(logits, labels) return loss, logits else: return logits
Example #2
Source File: train.py From pytorch-multigpu with MIT License | 7 votes |
def main(): best_acc = 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('==> Preparing data..') transforms_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]) dataset_train = CIFAR10(root='../data', train=True, download=True, transform=transforms_train) train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=args.num_worker) # there are 10 classes so the dataset name is cifar-10 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') print('==> Making model..') net = pyramidnet() net = nn.DataParallel(net) net = net.to(device) num_params = sum(p.numel() for p in net.parameters() if p.requires_grad) print('The number of parameters of model is', num_params) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=args.lr) # optimizer = optim.SGD(net.parameters(), lr=args.lr, # momentum=0.9, weight_decay=1e-4) train(net, criterion, optimizer, train_loader, device)
Example #3
Source File: Patient2Vec.py From Patient2Vec with MIT License | 6 votes |
def get_loss(pred, y, criterion, mtr, a=0.5): """ To calculate loss :param pred: predicted value :param y: actual value :param criterion: nn.CrossEntropyLoss :param mtr: beta matrix """ mtr_t = torch.transpose(mtr, 1, 2) aa = torch.bmm(mtr, mtr_t) loss_fn = 0 for i in range(aa.size()[0]): aai = torch.add(aa[i, ], Variable(torch.neg(torch.eye(mtr.size()[1])))) loss_fn += torch.trace(torch.mul(aai, aai).data) loss_fn /= aa.size()[0] loss = torch.add(criterion(pred, y), Variable(torch.FloatTensor([loss_fn * a]))) return loss
Example #4
Source File: modeling.py From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, start_positions=None, end_positions=None): sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) logits = self.qa_outputs(sequence_output) start_logits, end_logits = logits.split(1, dim=-1) start_logits = start_logits.squeeze(-1) end_logits = end_logits.squeeze(-1) if start_positions is not None and end_positions is not None: # If we are on multi-GPU, split add a dimension if len(start_positions.size()) > 1: start_positions = start_positions.squeeze(-1) if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometimes the start/end positions are outside our model inputs, we ignore these terms ignored_index = start_logits.size(1) start_positions.clamp_(0, ignored_index) end_positions.clamp_(0, ignored_index) loss_fct = CrossEntropyLoss(ignore_index=ignored_index) start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2 return total_loss else: return start_logits, end_logits
Example #5
Source File: run_cmrc2019_baseline.py From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, answer_mask=None,positions=None): sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) answer_mask = answer_mask.to(dtype=next(self.parameters()).dtype) logits = self.qa_outputs(sequence_output).squeeze(-1) #logits = logits*answer_mask_ logits = logits + (1-answer_mask) * -10000.0 if positions is not None: # If we are on multi-GPU, split add a dimension if len(positions.size()) > 1: positions = positions.squeeze(-1) # sometimes the positions are outside our model inputs, we ignore these terms ignored_index = logits.size(1) positions.clamp_(0, ignored_index) loss_fct = CrossEntropyLoss(ignore_index=ignored_index) total_loss = loss_fct(logits, positions) return total_loss else: return logits
Example #6
Source File: utils.py From deep-learning-note with MIT License | 6 votes |
def train_cnn(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs): net = net.to(device) print('training on', device) loss = nn.CrossEntropyLoss() batch_count = 0 for epoch in range(num_epochs): train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time() for X, y in train_iter: X = X.to(device) y = y.to(device) y_hat = net(X) l = loss(y_hat, y) optimizer.zero_grad() l.backward() optimizer.step() train_l_sum += l.cpu().item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item() n += y.shape[0] batch_count += 1 test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc, time.time() - start))
Example #7
Source File: trainer.py From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 | 6 votes |
def _loss(self, learning_algorithm, label_paddingId, use_crf=False): """ :param learning_algorithm: :param label_paddingId: :param use_crf: :return: """ if use_crf: loss_function = self.model.crf_layer.neg_log_likelihood_loss return loss_function elif learning_algorithm == "SGD": loss_function = nn.CrossEntropyLoss(ignore_index=label_paddingId, reduction="sum") return loss_function else: loss_function = nn.CrossEntropyLoss(ignore_index=label_paddingId, reduction="mean") return loss_function
Example #8
Source File: loss.py From overhaul-distillation with MIT License | 6 votes |
def FocalLoss(self, logit, target, gamma=2, alpha=0.5): n, c, h, w = logit.size() criterion = nn.CrossEntropyLoss(weight=self.weight, ignore_index=self.ignore_index, size_average=self.size_average) if self.cuda: criterion = criterion.cuda() logpt = -criterion(logit, target.long()) pt = torch.exp(logpt) if alpha is not None: logpt *= alpha loss = -((1 - pt) ** gamma) * logpt if self.batch_average: loss /= n return loss
Example #9
Source File: 53_machine_translation.py From deep-learning-note with MIT License | 6 votes |
def train(encoder, decoder, dataset, lr, batch_size, num_epochs): enc_optimizer = torch.optim.Adam(encoder.parameters(), lr=lr) dec_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr) loss = nn.CrossEntropyLoss(reduction='none') data_iter = Data.DataLoader(dataset, batch_size, shuffle=True) for epoch in range(num_epochs): l_sum = 0.0 start = time.time() for X, Y in data_iter: enc_optimizer.zero_grad() dec_optimizer.zero_grad() l = batch_loss(encoder, decoder, X, Y, loss) l.backward() enc_optimizer.step() dec_optimizer.step() l_sum += l.item() if (epoch + 1) % 10 == 0: print("epoch %d, loss %.3f, time: %.1f sec" % (epoch + 1, l_sum / len(data_iter), time.time() - start))
Example #10
Source File: main.py From transferlearning with MIT License | 6 votes |
def test(model, data_tar, e): total_loss_test = 0 correct = 0 criterion = nn.CrossEntropyLoss() with torch.no_grad(): for batch_id, (data, target) in enumerate(data_tar): data, target = data.view(-1,28 * 28).to(DEVICE),target.to(DEVICE) model.eval() ypred, _, _ = model(data, data) loss = criterion(ypred, target) pred = ypred.data.max(1)[1] # get the index of the max log-probability correct += pred.eq(target.data.view_as(pred)).cpu().sum() total_loss_test += loss.data accuracy = correct * 100. / len(data_tar.dataset) res = 'Test: total loss: {:.6f}, correct: [{}/{}], testing accuracy: {:.4f}%'.format( total_loss_test, correct, len(data_tar.dataset), accuracy ) tqdm.write(res) RESULT_TEST.append([e, total_loss_test, accuracy]) log_test.write(res + '\n')
Example #11
Source File: components.py From interpret-text with MIT License | 6 votes |
def __init__(self, args, model): """Initialize an instance of the wrapper :param args: arguments containing training and structure parameters :type args: ModelArguments :param model: A classifier module, ex. BERT or RNN classifier module :type model: BertForSequenceClassification or ClassifierModule """ self.args = args self.model = model self.opt = None self.num_epochs = args.num_pretrain_epochs self.epochs_since_improv = 0 self.best_test_acc = 0 self.avg_accuracy = 0 self.test_accs = [] self.train_accs = [] self.loss_func = nn.CrossEntropyLoss(reduction="none")
Example #12
Source File: bert_tagger.py From mrc-for-flat-nested-ner with Apache License 2.0 | 6 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None, input_mask=None): last_bert_layer, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, \ output_all_encoded_layers=False) last_bert_layer = last_bert_layer.view(-1, self.hidden_size) last_bert_layer = self.dropout(last_bert_layer) logits = self.classifier(last_bert_layer) if labels is not None: loss_fct = CrossEntropyLoss() if input_mask is not None: masked_logits = torch.masked_select(logits, input_mask) loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) else: loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) return loss else: return logits
Example #13
Source File: bert_mrc_ner.py From mrc-for-flat-nested-ner with Apache License 2.0 | 6 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, start_positions=None, end_positions=None): sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) sequence_output = sequence_output.view(-1, self.hidden_size) start_logits = self.start_outputs(sequence_output) end_logits = self.end_outputs(sequence_output) if start_positions is not None and end_positions is not None: loss_fct = CrossEntropyLoss() start_loss = loss_fct(start_logits.view(-1, 2), start_positions.view(-1)) end_loss = loss_fct(end_logits.view(-1, 2), end_positions.view(-1)) total_loss = start_loss + end_loss + span_loss # total_loss = (start_loss + end_loss) / 2 return total_loss else: return start_logits, end_logits
Example #14
Source File: __init__.py From dfw with MIT License | 6 votes |
def get_loss(args): if args.loss == 'svm': loss_fn = MultiClassHingeLoss() elif args.loss == 'ce': loss_fn = nn.CrossEntropyLoss() else: raise ValueError print('L2 regularization: \t {}'.format(args.l2)) print('\nLoss function:') print(loss_fn) if args.cuda: loss_fn = loss_fn.cuda() return loss_fn
Example #15
Source File: SemBranch.py From Semantic-Aware-Scene-Recognition with MIT License | 6 votes |
def __init__(self, scene_classes, semantic_classes=151): super(SemBranch, self).__init__() # Semantic Branch self.in_block_sem = nn.Sequential( nn.Conv2d(semantic_classes + 1, 64, kernel_size=7, stride=2, padding=3, bias=False), nn.BatchNorm2d(64), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ) self.in_block_sem_1 = BasicBlockSem(64, 128, kernel_size=3, stride=2, padding=1) self.in_block_sem_2 = BasicBlockSem(128, 256, kernel_size=3, stride=2, padding=1) self.in_block_sem_3 = BasicBlockSem(256, 512, kernel_size=3, stride=2, padding=1) # Semantic Scene Classification Layers self.dropout = nn.Dropout(0.3) self.avgpool = nn.AvgPool2d(7, stride=1) self.fc_SEM = nn.Linear(512, scene_classes) # Loss self.criterion = nn.CrossEntropyLoss()
Example #16
Source File: utility_functions.py From MaskTrack with MIT License | 6 votes |
def cross_entropy_loss_weighted(output, labels): temp = labels.data.cpu().numpy() freqCount = scipystats.itemfreq(temp) total = freqCount[0][1]+freqCount[1][1] perc_1 = freqCount[1][1]/total perc_0 = freqCount[0][1]/total weight_array = [perc_1, perc_0] if torch.cuda.is_available(): weight_tensor = torch.FloatTensor(weight_array).cuda() else: weight_tensor = torch.FloatTensor(weight_array) ce_loss = nn.CrossEntropyLoss(weight=weight_tensor) images, channels, height, width = output.data.shape loss = ce_loss(output, labels.long().view(images, height, width)) return loss
Example #17
Source File: loss.py From Fast_Seg with Apache License 2.0 | 6 votes |
def __init__(self, ignore_label, reduction='elementwise_mean', thresh=0.6, min_kept=256, down_ratio=1, use_weight=False): super(OhemCrossEntropy2dTensor, self).__init__() self.ignore_label = ignore_label self.thresh = float(thresh) self.min_kept = int(min_kept) self.down_ratio = down_ratio if use_weight: weight = torch.FloatTensor( [0.8373, 0.918, 0.866, 1.0345, 1.0166, 0.9969, 0.9754, 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, 1.0865, 1.0955, 1.0865, 1.1529, 1.0507]) self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction, weight=weight, ignore_index=ignore_label) else: self.criterion = torch.nn.CrossEntropyLoss(reduction=reduction, ignore_index=ignore_label)
Example #18
Source File: modeling.py From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): _, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) if labels is not None: loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) return loss else: return logits
Example #19
Source File: loss_funcs.py From mrc-for-flat-nested-ner with Apache License 2.0 | 5 votes |
def cross_entropy_loss(): # loss loss = nn.CrossEntropyLoss() input = torch.randn(3, 5, requires_grad=True) target = torch.empty(3, dtype=torch.long).random_(5) output = loss(input, target) output.backward()
Example #20
Source File: bert_basic_layer.py From mrc-for-flat-nested-ner with Apache License 2.0 | 5 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, next_sentence_label=None): sequence_output, pooled_output, attn = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output) if masked_lm_labels is not None and next_sentence_label is not None: loss_fct = CrossEntropyLoss(ignore_index=-1) masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) total_loss = masked_lm_loss + next_sentence_loss return total_loss, attn else: return prediction_scores, seq_relationship_score, attn
Example #21
Source File: modeling.py From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): flat_input_ids = input_ids.view(-1, input_ids.size(-1)) flat_token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) flat_attention_mask = attention_mask.view(-1, attention_mask.size(-1)) _, pooled_output = self.bert(flat_input_ids, flat_token_type_ids, flat_attention_mask, output_all_encoded_layers=False) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) reshaped_logits = logits.view(-1, self.num_choices) if labels is not None: loss_fct = CrossEntropyLoss() loss = loss_fct(reshaped_logits, labels) return loss else: return reshaped_logits
Example #22
Source File: bert_qa.py From mrc-for-flat-nested-ner with Apache License 2.0 | 5 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, start_positions=None, end_positions=None): sequence_output, _ = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) logits = self.qa_outputs(sequence_output) start_logits, end_logits = logits.split(1, dim=-1) start_logits = start_logits.squeeze(-1) end_logits = end_logits.squeeze(-1) if start_positions is not None and end_positions is not None: # if we are on mulit-GPU, split add a dimension if len(start_positions.size()) > 1: start_positions = start_positions.squeeze(-1) if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometime the stat/ end positions are outsize our model inputs. # we ignore these terms ignored_index = start_logits.size(1) start_positions.clamp_(0, ignored_index) end_positions.clamp_(0, ignored_index) loss_fct = CrossEntropyLoss(ignore_index=ignored_index) start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2 return total_loss else: return start_logits, end_logits
Example #23
Source File: cross_entropy.py From MobileNetV3-pytorch with MIT License | 5 votes |
def __init__(self, weight=None, ignore_index=-100, reduction='mean'): super(CrossEntropyLoss, self).__init__(weight=weight, ignore_index=ignore_index, reduction=reduction)
Example #24
Source File: common_utils.py From interpret-text with MIT License | 5 votes |
def create_pytorch_multiclass_classifier(X, y): # Get unique number of classes numClasses = np.unique(y).shape[0] # create simple (dummy) Pytorch DNN model for multiclass classification epochs = 12 torch_X = torch.Tensor(X).float() torch_y = torch.Tensor(y).long() # Create network structure net = _common_pytorch_generator(X.shape[1], numClasses=numClasses) # Train the model criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.01) return _train_pytorch_model(epochs, criterion, optimizer, net, torch_X, torch_y)
Example #25
Source File: common_utils.py From interpret-text with MIT License | 5 votes |
def create_pytorch_classifier(X, y): # create simple (dummy) Pytorch DNN model for binary classification epochs = 12 torch_X = torch.Tensor(X).float() torch_y = torch.Tensor(y).long() # Create network structure net = _common_pytorch_generator(X.shape[1], numClasses=2) # Train the model criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(net.parameters(), lr=0.01) return _train_pytorch_model(epochs, criterion, optimizer, net, torch_X, torch_y)
Example #26
Source File: validate.py From ghostnet with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() model = ghostnet(num_classes=args.num_classes, width=args.width, dropout=args.dropout) model.load_state_dict(torch.load('./models/state_dict_93.98.pth')) if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() elif args.num_gpu < 1: model = model else: model = model.cuda() print('GhostNet created.') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) loader = torch.utils.data.DataLoader( datasets.ImageFolder(valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) model.eval() validate_loss_fn = nn.CrossEntropyLoss().cuda() eval_metrics = validate(model, loader, validate_loss_fn, args) print(eval_metrics)
Example #27
Source File: trainer.py From Hash-Embeddings with MIT License | 5 votes |
def __init__(self, model, criterion=nn.CrossEntropyLoss, optimizer=torch.optim.Adam, verbose=3, seed=123, metric="accuracy", isCuda=torch.cuda.is_available()): self.model = model np.random.seed(seed) torch.random.manual_seed(seed) self.isCuda = isCuda if self.isCuda: assert torch.cuda.is_available() print("Using CUDA") self.model = self.model.cuda() torch.cuda.manual_seed(seed) self.criterion = criterion self.optimizer = optimizer self.verbose = verbose self.seed = seed self.eval_metric = metric if metric == "accuracy": self.eval_metric = evaluate_accuracy self.criterion = criterion() self.optimizer = optimizer(model.parameters())
Example #28
Source File: train.py From MomentumContrast.pytorch with MIT License | 5 votes |
def train(model_q, model_k, device, train_loader, queue, optimizer, epoch, temp=0.07): model_q.train() total_loss = 0 for batch_idx, (data, target) in enumerate(train_loader): x_q = data[0] x_k = data[1] x_q, x_k = x_q.to(device), x_k.to(device) q = model_q(x_q) k = model_k(x_k) k = k.detach() N = data[0].shape[0] K = queue.shape[0] l_pos = torch.bmm(q.view(N,1,-1), k.view(N,-1,1)) l_neg = torch.mm(q.view(N,-1), queue.T.view(-1,K)) logits = torch.cat([l_pos.view(N, 1), l_neg], dim=1) labels = torch.zeros(N, dtype=torch.long) labels = labels.to(device) cross_entropy_loss = nn.CrossEntropyLoss() loss = cross_entropy_loss(logits/temp, labels) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() momentum_update(model_q, model_k) queue = queue_data(queue, k) queue = dequeue_data(queue) total_loss /= len(train_loader.dataset) print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, total_loss))
Example #29
Source File: modeling.py From cmrc2019 with Creative Commons Attribution Share Alike 4.0 International | 5 votes |
def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, next_sentence_label=None): sequence_output, pooled_output = self.bert(input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output) if masked_lm_labels is not None and next_sentence_label is not None: loss_fct = CrossEntropyLoss(ignore_index=-1) masked_lm_loss = loss_fct(prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1)) next_sentence_loss = loss_fct(seq_relationship_score.view(-1, 2), next_sentence_label.view(-1)) total_loss = masked_lm_loss + next_sentence_loss return total_loss else: return prediction_scores, seq_relationship_score
Example #30
Source File: loss.py From overhaul-distillation with MIT License | 5 votes |
def CrossEntropyLoss(self, logit, target): n, c, h, w = logit.size() criterion = nn.CrossEntropyLoss(weight=self.weight, ignore_index=self.ignore_index, size_average=self.size_average) if self.cuda: criterion = criterion.cuda() loss = criterion(logit, target.long()) if self.batch_average: loss /= n return loss