Python torch.nn.functional.smooth_l1_loss() Examples
The following are 30
code examples of torch.nn.functional.smooth_l1_loss().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn.functional
, or try the search function
.
Example #1
Source File: state_controller.py From doom-net-pytorch with MIT License | 6 votes |
def backward(self, rewards, nonterminals): # # calculate step returns in reverse order returns = torch.Tensor(len(rewards) - 1, *self.outputs[-1].value.size()).to(device) step_return = self.outputs[-1].value.detach().cpu() for i in range(len(rewards) - 2, -1, -1): step_return.mul_(self.discount * nonterminals[i]).add_(rewards[i]) returns[i] = step_return # # calculate losses policy_loss = 0 value_loss = 0 steps = len(self.outputs) - 1 for i in range(steps): advantage = returns[i] - self.outputs[i].value.detach() policy_loss += -self.outputs[i].log_action * advantage value_loss += F.smooth_l1_loss(self.outputs[i].value, returns[i]) loss = policy_loss.mean()/steps + value_loss/steps loss.backward() # reset state self.reset()
Example #2
Source File: mrcnn.py From medicaldetectiontoolkit with Apache License 2.0 | 6 votes |
def compute_mrcnn_bbox_loss(mrcnn_target_deltas, mrcnn_pred_deltas, target_class_ids): """ :param mrcnn_target_deltas: (n_sampled_rois, (dy, dx, (dz), log(dh), log(dw), (log(dh))) :param mrcnn_pred_deltas: (n_sampled_rois, n_classes, (dy, dx, (dz), log(dh), log(dw), (log(dh))) :param target_class_ids: (n_sampled_rois) :return: loss: torch 1D tensor. """ if 0 not in torch.nonzero(target_class_ids > 0).size(): positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0] positive_roi_class_ids = target_class_ids[positive_roi_ix].long() target_bbox = mrcnn_target_deltas[positive_roi_ix, :].detach() pred_bbox = mrcnn_pred_deltas[positive_roi_ix, positive_roi_class_ids, :] loss = F.smooth_l1_loss(pred_bbox, target_bbox) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #3
Source File: ufrcnn.py From medicaldetectiontoolkit with Apache License 2.0 | 6 votes |
def compute_rpn_bbox_loss(rpn_target_deltas, rpn_pred_deltas, rpn_match): """ :param rpn_target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unsed bbox deltas. :param rpn_pred_deltas: predicted deltas from RPN. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors. :return: loss: torch 1D tensor. """ if 0 not in torch.nonzero(rpn_match == 1).size(): indices = torch.nonzero(rpn_match == 1).squeeze(1) # Pick bbox deltas that contribute to the loss rpn_pred_deltas = rpn_pred_deltas[indices] # Trim target bounding box deltas to the same length as rpn_bbox. target_deltas = rpn_target_deltas[:rpn_pred_deltas.size()[0], :] # Smooth L1 loss loss = F.smooth_l1_loss(rpn_pred_deltas, target_deltas) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #4
Source File: models.py From distributed_rl with MIT License | 6 votes |
def calc_priorities(self, target_net, transitions, alpha=0.6, gamma=0.999, device=torch.device("cpu")): batch = utils.Transition(*zip(*transitions)) next_state_batch = torch.stack(batch.next_state).to(device) state_batch = torch.stack(batch.state).to(device) action_batch = torch.stack(batch.action).to(device) reward_batch = torch.stack(batch.reward).to(device) done_batch = torch.stack(batch.done).to(device) state_action_values = self.forward(state_batch).gather(1, action_batch) next_action = self.forward(next_state_batch).argmax(dim=1).unsqueeze(1) next_state_values = target_net(next_state_batch).gather(1, next_action).detach() expected_state_action_values = (next_state_values * gamma * (1.0 - done_batch)) \ + reward_batch delta = F.smooth_l1_loss(state_action_values, expected_state_action_values, reduce=False) prios = (delta.abs() + 1e-5).pow(alpha) return delta, prios.detach()
Example #5
Source File: smooth_l1_loss.py From DenseMatchingBenchmark with MIT License | 6 votes |
def loss_per_level(self, estDisp, gtDisp): N, C, H, W = estDisp.shape scaled_gtDisp = gtDisp scale = 1.0 if gtDisp.shape[-2] != H or gtDisp.shape[-1] != W: # compute scale per level and scale gtDisp scale = gtDisp.shape[-1] / (W * 1.0) scaled_gtDisp = gtDisp / scale scaled_gtDisp = self.scale_func(scaled_gtDisp, (H, W)) # mask for valid disparity # (start disparity, max disparity / scale) # Attention: the invalid disparity of KITTI is set as 0, be sure to mask it out mask = (scaled_gtDisp > self.start_disp) & (scaled_gtDisp < (self.max_disp / scale)) if mask.sum() < 1.0: print('SmoothL1 loss: there is no point\'s disparity is in ({},{})!'.format(self.start_disp, self.max_disp / scale)) loss = (torch.abs(estDisp - scaled_gtDisp) * mask.float()).mean() return loss # smooth l1 loss loss = F.smooth_l1_loss(estDisp[mask], scaled_gtDisp[mask], reduction='mean') return loss
Example #6
Source File: actor_critic.py From pytorchrl with MIT License | 6 votes |
def finish_episode(): R = 0 saved_actions = model.saved_actions value_loss = 0 rewards = [] for r in model.rewards[::-1]: R = r + args.gamma * R rewards.insert(0, R) rewards = torch.Tensor(rewards) rewards = (rewards - rewards.mean()) / (rewards.std() + np.finfo(np.float32).eps) for (action, value), r in zip(saved_actions, rewards): reward = r - value.data[0,0] action.reinforce(reward) value_loss += F.smooth_l1_loss(value, Variable(torch.Tensor([r]))) optimizer.zero_grad() final_nodes = [value_loss] + list(map(lambda p: p.action, saved_actions)) gradients = [torch.ones(1)] + [None] * len(saved_actions) autograd.backward(final_nodes, gradients) optimizer.step() del model.rewards[:] del model.saved_actions[:]
Example #7
Source File: losses.py From centerpose with MIT License | 6 votes |
def _reg_loss(regr, gt_regr, mask): ''' L1 regression loss Arguments: regr (batch x max_objects x dim) gt_regr (batch x max_objects x dim) mask (batch x max_objects) ''' num = mask.float().sum() mask = mask.unsqueeze(2).expand_as(gt_regr).float() regr = regr * mask gt_regr = gt_regr * mask regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) regr_loss = regr_loss / (num + 1e-4) return regr_loss
Example #8
Source File: DQN.py From BCQ with MIT License | 6 votes |
def train(self, replay_buffer): # Sample replay buffer state, action, next_state, reward, done = replay_buffer.sample() # Compute the target Q value with torch.no_grad(): target_Q = reward + done * self.discount * self.Q_target(next_state).max(1, keepdim=True)[0] # Get current Q estimate current_Q = self.Q(state).gather(1, action) # Compute Q loss Q_loss = F.smooth_l1_loss(current_Q, target_Q) # Optimize the Q self.Q_optimizer.zero_grad() Q_loss.backward() self.Q_optimizer.step() # Update target network by polyak or full copy every X iterations. self.iterations += 1 self.maybe_update_target()
Example #9
Source File: train_siamrpn.py From Siamese-RPN-pytorch with MIT License | 6 votes |
def forward(self, predictions, targets): print('+++++++++++++++++++++++++++++++++++') cout, rout = predictions """ class """ class_pred = cout.squeeze().permute(1,2,0).reshape(-1, 2) class_target = targets[:, 0].long() pos_index = list(np.where(class_target == 1)[0]) neg_index = list(np.where(class_target == 0)[0]) class_target = class_target[pos_index + neg_index] class_pred = class_pred[pos_index + neg_index] closs = F.cross_entropy(class_pred, class_target, size_average=False, reduce=False) closs = torch.div(torch.sum(closs[np.where(class_target != -100)]), 64) reg_pred = rout.view(-1, 4) reg_target = targets[:, 1:] #[1445, 4] rloss = F.smooth_l1_loss(reg_pred, reg_target, size_average=False, reduce=False) rloss = torch.div(torch.sum(rloss[np.where(class_target == 1)]), 16) #debug vis pos anchor loss = closs + rloss return closs, rloss, loss, reg_pred, reg_target, pos_index, neg_index
Example #10
Source File: train_siamrpn.py From Siamese-RPN-pytorch with MIT License | 6 votes |
def forward(self, predictions, targets): print('+++++++++++++++++++++++++++++++++++++++++++++++++++') cout, rout = predictions """ class """ class_pred, class_target = cout, targets[:, 0].long() pos_index , neg_index = list(np.where(class_target == 1)[0]), list(np.where(class_target == 0)[0]) pos_num, neg_num = len(pos_index), len(neg_index) class_pred, class_target = class_pred[pos_index + neg_index], class_target[pos_index + neg_index] closs = F.cross_entropy(class_pred, class_target, size_average=False, reduce=False) closs = torch.div(torch.sum(closs), 64) """ regression """ reg_pred = rout reg_target = targets[:, 1:] rloss = F.smooth_l1_loss(reg_pred, reg_target, size_average=False, reduce=False) #1445, 4 rloss = torch.div(torch.sum(rloss, dim = 1), 4) rloss = torch.div(torch.sum(rloss[pos_index]), 16) loss = closs + rloss return closs, rloss, loss, reg_pred, reg_target, pos_index, neg_index
Example #11
Source File: mrcnn.py From RegRCNN with Apache License 2.0 | 6 votes |
def compute_rpn_bbox_loss(rpn_pred_deltas, rpn_target_deltas, rpn_match): """ :param rpn_target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unsed bbox deltas. :param rpn_pred_deltas: predicted deltas from RPN. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors. :return: loss: torch 1D tensor. """ if not 0 in torch.nonzero(rpn_match == 1).size(): indices = torch.nonzero(rpn_match == 1).squeeze(1) # Pick bbox deltas that contribute to the loss rpn_pred_deltas = rpn_pred_deltas[indices] # Trim target bounding box deltas to the same length as rpn_bbox. target_deltas = rpn_target_deltas[:rpn_pred_deltas.size()[0], :] # Smooth L1 loss loss = F.smooth_l1_loss(rpn_pred_deltas, target_deltas) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #12
Source File: mrcnn.py From RegRCNN with Apache License 2.0 | 6 votes |
def compute_mrcnn_regression_loss(tasks, pred, target, target_class_ids): """regression loss is a distance metric between target vector and predicted regression vector. :param pred: (n_sampled_rois, n_classes, [n_rg_feats if real regression or 1 if rg_bin task) :param target: (n_sampled_rois, [n_rg_feats or n_rg_bins]) :return: differentiable loss, torch 1D tensor on cuda """ if not 0 in target.shape and not 0 in torch.nonzero(target_class_ids > 0).shape: positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0] positive_roi_class_ids = target_class_ids[positive_roi_ix].long() target = target[positive_roi_ix].detach() pred = pred[positive_roi_ix, positive_roi_class_ids] if "regression_bin" in tasks: loss = F.cross_entropy(pred, target.long()) else: loss = F.smooth_l1_loss(pred, target) #loss = F.mse_loss(pred, target) else: loss = torch.FloatTensor([0.]).cuda() return loss ############################################################ # Detection Layer ############################################################
Example #13
Source File: retina_net.py From RegRCNN with Apache License 2.0 | 6 votes |
def compute_bbox_loss(target_deltas, pred_deltas, anchor_matches): """ :param target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unused bbox deltas. :param pred_deltas: predicted deltas from bbox regression head. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param anchor_matches: tensor (n_anchors). value in [-1, 0, class_ids] for negative, neutral, and positive matched anchors. i.e., positively matched anchors are marked by class_id >0 :return: loss: torch 1D tensor. """ if not 0 in torch.nonzero(anchor_matches>0).shape: indices = torch.nonzero(anchor_matches>0).squeeze(1) # Pick bbox deltas that contribute to the loss pred_deltas = pred_deltas[indices] # Trim target bounding box deltas to the same length as pred_deltas. target_deltas = target_deltas[:pred_deltas.shape[0], :].detach() # Smooth L1 loss loss = F.smooth_l1_loss(pred_deltas, target_deltas) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #14
Source File: retina_net.py From RegRCNN with Apache License 2.0 | 6 votes |
def compute_rg_loss(tasks, target, pred, anchor_matches): """ :param target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unsed bbox deltas. :param pred_deltas: predicted deltas from bbox regression head. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param anchor_matches: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors. :return: loss: torch 1D tensor. """ if not 0 in target.shape and not 0 in torch.nonzero(anchor_matches>0).shape: indices = torch.nonzero(anchor_matches>0).squeeze(1) # Pick rgs that contribute to the loss pred = pred[indices] # Trim target target = target[:pred.shape[0]].detach() if 'regression_bin' in tasks: loss = F.cross_entropy(pred, target.long()) else: loss = F.smooth_l1_loss(pred, target) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #15
Source File: multibox_loss.py From pytorch-ssd with MIT License | 6 votes |
def forward(self, confidence, predicted_locations, labels, gt_locations): """Compute classification loss and smooth l1 loss. Args: confidence (batch_size, num_priors, num_classes): class predictions. locations (batch_size, num_priors, 4): predicted locations. labels (batch_size, num_priors): real labels of all the priors. boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. """ num_classes = confidence.size(2) with torch.no_grad(): # derived from cross_entropy=sum(log(p)) loss = -F.log_softmax(confidence, dim=2)[:, :, 0] mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio) confidence = confidence[mask, :] classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], size_average=False) pos_mask = labels > 0 predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4) gt_locations = gt_locations[pos_mask, :].reshape(-1, 4) smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, size_average=False) num_pos = gt_locations.size(0) return smooth_l1_loss/num_pos, classification_loss/num_pos
Example #16
Source File: box_utils.py From neural-motifs with MIT License | 6 votes |
def bbox_loss(prior_boxes, deltas, gt_boxes, eps=1e-4, scale_before=1): """ Computes the loss for predicting the GT boxes from prior boxes :param prior_boxes: [num_boxes, 4] (x1, y1, x2, y2) :param deltas: [num_boxes, 4] (tx, ty, th, tw) :param gt_boxes: [num_boxes, 4] (x1, y1, x2, y2) :return: """ prior_centers = center_size(prior_boxes) #(cx, cy, w, h) gt_centers = center_size(gt_boxes) #(cx, cy, w, h) center_targets = (gt_centers[:, :2] - prior_centers[:, :2]) / prior_centers[:, 2:] size_targets = torch.log(gt_centers[:, 2:]) - torch.log(prior_centers[:, 2:]) all_targets = torch.cat((center_targets, size_targets), 1) loss = F.smooth_l1_loss(deltas, all_targets, size_average=False)/(eps + prior_centers.size(0)) return loss
Example #17
Source File: loss.py From SSD-variants with MIT License | 6 votes |
def forward(self, xloc, xconf, loc, label, k=3): # xconf is logits pos = label > 0 neg = label == 0 label = label.clamp(min=0) pos_idx = pos.unsqueeze(-1).expand_as(xloc) loc_loss = F.smooth_l1_loss(xloc[pos_idx].view(-1, 4), loc[pos_idx].view(-1, 4), size_average=False) conf_loss = _softmax_cross_entropy_with_logits(xconf, label) hard_neg = self._hard_negative_mining(conf_loss, pos, neg, k) conf_loss = conf_loss * (pos + hard_neg).gt(0).float() conf_loss = conf_loss.sum() N = pos.data.float().sum() + 1e-3#.clamp(min=1e-3) return loc_loss / N, conf_loss / N
Example #18
Source File: mrcnn.py From RegRCNN with Apache License 2.0 | 6 votes |
def compute_mrcnn_bbox_loss(mrcnn_pred_deltas, mrcnn_target_deltas, target_class_ids): """ :param mrcnn_target_deltas: (n_sampled_rois, (dy, dx, (dz), log(dh), log(dw), (log(dh))) :param mrcnn_pred_deltas: (n_sampled_rois, n_classes, (dy, dx, (dz), log(dh), log(dw), (log(dh))) :param target_class_ids: (n_sampled_rois) :return: loss: torch 1D tensor. """ if not 0 in torch.nonzero(target_class_ids > 0).size(): positive_roi_ix = torch.nonzero(target_class_ids > 0)[:, 0] positive_roi_class_ids = target_class_ids[positive_roi_ix].long() target_bbox = mrcnn_target_deltas[positive_roi_ix, :].detach() pred_bbox = mrcnn_pred_deltas[positive_roi_ix, positive_roi_class_ids, :] loss = F.smooth_l1_loss(pred_bbox, target_bbox) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #19
Source File: Load_Agent.py From FitML with MIT License | 6 votes |
def train_step(model, state_transitions, tgt, num_actions): if len(state_transitions) <=0: print("empty state transitions") return cur_states = torch.stack( ([torch.Tensor(s.state) for s in state_transitions]) ).to(model.device) rewards = torch.stack( ([torch.Tensor([s.reward]) for s in state_transitions]) ).to(model.device) Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(model.device) mask = torch.stack(([torch.Tensor([0]) if s.done else torch.Tensor([1]) for s in state_transitions])).to(model.device) next_states = torch.stack( ([torch.Tensor(s.next_state) for s in state_transitions]) ).to(model.device) actions = [s.action for s in state_transitions] # import ipdb; ipdb.set_trace() with torch.no_grad(): # actual_Q_values = Qs pred_qvals_next = model(next_states).max(-1)[0] model.opt.zero_grad() pred_qvals = model(cur_states) one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # loss = torch.mean(torch.sqrt((torch.sum(pred_qvals*one_hot_actions,-1) - actual_Q_values.view(-1) )**2)).to(model.device) # loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), actual_Q_values.view(-1) ) loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), rewards.view(-1)+0.99*mask[:,0]*pred_qvals_next.view(-1) ).mean() loss.backward() model.opt.step() return loss
Example #20
Source File: ATARI_DQN_CNN.py From FitML with MIT License | 6 votes |
def train_step(model, state_transitions, tgt, num_actions, gamma): if len(state_transitions) <=0: print("empty state transitions") return cur_states = torch.stack( ([torch.Tensor(s.state) for s in state_transitions]) ).to(model.device) rewards = torch.stack( ([torch.Tensor([s.reward]) for s in state_transitions]) ).to(model.device) Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(model.device) mask = torch.stack(([torch.Tensor([0]) if s.done else torch.Tensor([1]) for s in state_transitions])).to(model.device) next_states = torch.stack( ([torch.Tensor(s.next_state) for s in state_transitions]) ).to(model.device) actions = [s.action for s in state_transitions] # import ipdb; ipdb.set_trace() with torch.no_grad(): actual_Q_values = Qs # import ipdb; ipdb.set_trace() pred_qvals_next = model(next_states.view(len(state_transitions),3,160,140*3)).max(-1)[0] model.opt.zero_grad() pred_qvals = model(cur_states.view(len(state_transitions),3,160,140*3)) one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # loss = torch.mean(torch.sqrt((torch.sum(pred_qvals*one_hot_actions,-1) - actual_Q_values.view(-1) )**2)).to(model.device) loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), actual_Q_values.view(-1) ) # loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), rewards.view(-1)+gamma*mask[:,0]*pred_qvals_next.view(-1) ).mean() loss.backward() model.opt.step() return loss
Example #21
Source File: mrcnn.py From medicaldetectiontoolkit with Apache License 2.0 | 6 votes |
def compute_rpn_bbox_loss(rpn_target_deltas, rpn_pred_deltas, rpn_match): """ :param rpn_target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unsed bbox deltas. :param rpn_pred_deltas: predicted deltas from RPN. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param rpn_match: (n_anchors). [-1, 0, 1] for negative, neutral, and positive matched anchors. :return: loss: torch 1D tensor. """ if 0 not in torch.nonzero(rpn_match == 1).size(): indices = torch.nonzero(rpn_match == 1).squeeze(1) # Pick bbox deltas that contribute to the loss rpn_pred_deltas = rpn_pred_deltas[indices] # Trim target bounding box deltas to the same length as rpn_bbox. target_deltas = rpn_target_deltas[:rpn_pred_deltas.size()[0], :] # Smooth L1 loss loss = F.smooth_l1_loss(rpn_pred_deltas, target_deltas) else: loss = torch.FloatTensor([0]).cuda() return loss
Example #22
Source File: finetune.py From PSMNet with MIT License | 5 votes |
def train(imgL,imgR,disp_L): model.train() imgL = Variable(torch.FloatTensor(imgL)) imgR = Variable(torch.FloatTensor(imgR)) disp_L = Variable(torch.FloatTensor(disp_L)) if args.cuda: imgL, imgR, disp_true = imgL.cuda(), imgR.cuda(), disp_L.cuda() #--------- mask = (disp_true > 0) mask.detach_() #---- optimizer.zero_grad() if args.model == 'stackhourglass': output1, output2, output3 = model(imgL,imgR) output1 = torch.squeeze(output1,1) output2 = torch.squeeze(output2,1) output3 = torch.squeeze(output3,1) loss = 0.5*F.smooth_l1_loss(output1[mask], disp_true[mask], size_average=True) + 0.7*F.smooth_l1_loss(output2[mask], disp_true[mask], size_average=True) + F.smooth_l1_loss(output3[mask], disp_true[mask], size_average=True) elif args.model == 'basic': output = model(imgL,imgR) output = torch.squeeze(output3,1) loss = F.smooth_l1_loss(output3[mask], disp_true[mask], size_average=True) loss.backward() optimizer.step() return loss.data[0]
Example #23
Source File: aac_noisy.py From doom-net-pytorch with MIT License | 5 votes |
def backward(self): # # calculate step returns in reverse order #rewards = torch.stack(self.rewards, dim=0) rewards = self.rewards returns = torch.Tensor(len(rewards) - 1, *self.outputs[-1].value.size()) step_return = self.outputs[-1].value.detach().cpu() for i in range(len(rewards) - 2, -1, -1): step_return.mul_(self.discounts[i]).add_(rewards[i]) returns[i] = step_return returns = returns.to(device) # # calculate losses policy_loss = 0 value_loss = 0 steps = len(self.outputs) - 1 for i in range(steps): advantage = returns[i] - self.outputs[i].value.detach() policy_loss += -self.outputs[i].log_action * advantage value_loss += F.smooth_l1_loss(self.outputs[i].value, returns[i]) weights_l2 = 0 for param in self.parameters(): weights_l2 += param.norm(2) loss = policy_loss.mean() / steps + value_loss / steps + 0.00001 * weights_l2 loss.backward() # reset state self.reset()
Example #24
Source File: couplenet.py From RFCN_CoupleNet.pytorch with MIT License | 5 votes |
def ohem_detect_loss(self, cls_score, rois_label, bbox_pred, rois_target, rois_inside_ws, rois_outside_ws): def log_sum_exp(x): x_max = x.data.max() return torch.log(torch.sum(torch.exp(x - x_max), dim=1, keepdim=True)) + x_max num_hard = cfg.TRAIN.BATCH_SIZE * self.batch_size pos_idx = rois_label > 0 num_pos = pos_idx.int().sum() # classification loss num_classes = cls_score.size(1) weight = cls_score.data.new(num_classes).fill_(1.) weight[0] = num_pos.data[0] / num_hard conf_p = cls_score.detach() conf_t = rois_label.detach() # rank on cross_entropy loss loss_c = log_sum_exp(conf_p) - conf_p.gather(1, conf_t.view(-1,1)) loss_c[pos_idx] = 100. # include all positive samples _, topk_idx = torch.topk(loss_c.view(-1), num_hard) loss_cls = F.cross_entropy(cls_score[topk_idx], rois_label[topk_idx], weight=weight) # bounding box regression L1 loss pos_idx = pos_idx.unsqueeze(1).expand_as(bbox_pred) loc_p = bbox_pred[pos_idx].view(-1, 4) loc_t = rois_target[pos_idx].view(-1, 4) loss_box = F.smooth_l1_loss(loc_p, loc_t) return loss_cls, loss_box
Example #25
Source File: retina_net.py From medicaldetectiontoolkit with Apache License 2.0 | 5 votes |
def compute_bbox_loss(target_deltas, pred_deltas, anchor_matches): """ :param target_deltas: (b, n_positive_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))). Uses 0 padding to fill in unsed bbox deltas. :param pred_deltas: predicted deltas from bbox regression head. (b, n_anchors, (dy, dx, (dz), log(dh), log(dw), (log(dd)))) :param anchor_matches: (n_anchors). [-1, 0, class_id] for negative, neutral, and positive matched anchors. :return: loss: torch 1D tensor. """ if 0 not in torch.nonzero(anchor_matches > 0).size(): indices = torch.nonzero(anchor_matches > 0).squeeze(1) # Pick bbox deltas that contribute to the loss pred_deltas = pred_deltas[indices] # Trim target bounding box deltas to the same length as pred_deltas. target_deltas = target_deltas[:pred_deltas.size()[0], :] # Smooth L1 loss loss = F.smooth_l1_loss(pred_deltas, target_deltas) else: loss = torch.FloatTensor([0]).cuda() return loss ############################################################ # Output Handler ############################################################
Example #26
Source File: DQN_Cartpol_old_1.py From FitML with MIT License | 5 votes |
def train_step(model, state_transitions, tgt, num_actions): if len(state_transitions) <=0: print("empty state transitions") return cur_states = torch.stack( ([torch.Tensor(s.state) for s in state_transitions]) ).to(model.device) rewards = torch.stack( ([torch.Tensor([s.reward]) for s in state_transitions]) ).to(model.device) Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(model.device) mask = torch.stack(([torch.Tensor([0]) if s.done else torch.Tensor([1]) for s in state_transitions])).to(model.device) next_states = torch.stack( ([torch.Tensor(s.next_state) for s in state_transitions]) ).to(model.device) actions = [s.action for s in state_transitions] with torch.no_grad(): actual_Q_values = Qs # pred_qvals_next = tgt(next_states) # pred_qvals_next = pred_qvals_next.max(axis=1)[0] model.opt.zero_grad() pred_qvals = model(cur_states) one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # loss = (rewards + mask[:,0]*pred_qvals_next - torch.sum(pred_qvals*one_hot_actions,-1)).mean() # print("loss input", torch.sum(pred_qvals*one_hot_actions,-1)) # print("loss target", (rewards + 0.98*mask[:,0]*pred_qvals_next)) # loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), (rewards + 0.98*mask[:,0]*pred_qvals_next)[0] ) loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), actual_Q_values[0] ) loss.backward() model.opt.step() print("loss ", loss) return loss
Example #27
Source File: DQN_tut.py From FitML with MIT License | 5 votes |
def train_step(model, state_transitions, tgt, num_actions): if len(state_transitions) <=0: print("empty state transitions") return cur_states = torch.stack( ([torch.Tensor(s.state) for s in state_transitions]) ).to(model.device) rewards = torch.stack( ([torch.Tensor([s.reward]) for s in state_transitions]) ).to(model.device) Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(model.device) mask = torch.stack(([torch.Tensor([0]) if s.done else torch.Tensor([1]) for s in state_transitions])).to(model.device) next_states = torch.stack( ([torch.Tensor(s.next_state) for s in state_transitions]) ).to(model.device) actions = [s.action for s in state_transitions] with torch.no_grad(): actual_Q_values = Qs model.opt.zero_grad() pred_qvals = model(cur_states) one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # loss = torch.mean(torch.sqrt((torch.sum(pred_qvals*one_hot_actions,-1) - actual_Q_values.view(-1) )**2)).to(model.device) loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), actual_Q_values.view(-1) ) loss.backward() model.opt.step() return loss # def train_step3(model, state_transitions, tgt, num_actions): # cur_states = torch.stack( ([torch.Tensor(s.state) for s in state_transitions]) ).to(model.device) # rewards = torch.stack( ([torch.Tensor([s.reward]) for s in state_transitions]) ).to(model.device) # Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(model.device) # mask = torch.stack(([torch.Tensor([0]) if s.done else torch.Tensor([1]) for s in state_transitions])).to(model.device) # next_states = torch.stack( ([torch.Tensor(s.next_state) for s in state_transitions]) ).to(model.device) # actions = [s.action for s in state_transitions] # with torch.no_grad(): # qvals_next = tgt(next_states).max(-1)[0] # model.opt.zero_grad() # qvals = model(cur_states) # one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # loss = ( # ( rewards + mask[:,0]*qvals_next - torch.sum(qvals*one_hot_actions,-1) ) # ).mean() # loss.backward() # model.opt.step() # return loss
Example #28
Source File: ATARI_DQN_CNN.py From FitML with MIT License | 5 votes |
def train_step2(model, state_transitions, tgt, num_actions, gamma): if len(state_transitions) <=0: print("empty state transitions") return cur_states = torch.stack( ([torch.Tensor(s.state) for s in state_transitions]) ).to(model.device) Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(model.device) actions = [s.action for s in state_transitions] with torch.no_grad(): actual_Q_values = Qs model.opt.zero_grad() pred_qvals = model(cur_states.view(len(state_transitions),3,160,140*3)) target_qvals = pred_qvals.clone() one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # import ipdb;ipdb.set_trace() # for a,t,q in one_hot_actions,target_qvals,Qs: # t[torch.argmax(one_hot_actions).item()] = q # print("t[a]", t[a]) # import ipdb; ipdb.set_trace() for i in range(Qs.shape[0]): target_qvals[i][actions[i]] = Qs[i].item() # loss = torch.mean(torch.sqrt((torch.sum(pred_qvals*one_hot_actions,-1) - actual_Q_values.view(-1) )**2)).to(model.device) loss = F.smooth_l1_loss(pred_qvals, target_qvals ) # loss = F.smooth_l1_loss(torch.sum(pred_qvals*one_hot_actions,-1), rewards.view(-1)+gamma*mask[:,0]*pred_qvals_next.view(-1) ).mean() loss.backward() model.opt.step() return loss
Example #29
Source File: main.py From PSMNet with MIT License | 5 votes |
def train(imgL,imgR, disp_L): model.train() if args.cuda: imgL, imgR, disp_true = imgL.cuda(), imgR.cuda(), disp_L.cuda() #--------- mask = disp_true < args.maxdisp mask.detach_() #---- optimizer.zero_grad() if args.model == 'stackhourglass': output1, output2, output3 = model(imgL,imgR) output1 = torch.squeeze(output1,1) output2 = torch.squeeze(output2,1) output3 = torch.squeeze(output3,1) loss = 0.5*F.smooth_l1_loss(output1[mask], disp_true[mask], size_average=True) + 0.7*F.smooth_l1_loss(output2[mask], disp_true[mask], size_average=True) + F.smooth_l1_loss(output3[mask], disp_true[mask], size_average=True) elif args.model == 'basic': output = model(imgL,imgR) output = torch.squeeze(output,1) loss = F.smooth_l1_loss(output[mask], disp_true[mask], size_average=True) loss.backward() optimizer.step() return loss.data
Example #30
Source File: Load_AC_model.py From FitML with MIT License | 5 votes |
def train_critic(critic_model, state_transitions, num_actions): if len(state_transitions) <=0: print("empty state transitions") return cur_states = torch.stack( ([torch.Tensor(torch.cat((torch.Tensor(s.state),torch.Tensor(s.action)),0)) for s in state_transitions]) ).to(critic_model.device) rewards = torch.stack( ([torch.Tensor([s.reward]) for s in state_transitions]) ).to(critic_model.device) Qs = torch.stack( ([torch.Tensor([s.qval]) for s in state_transitions]) ).to(critic_model.device) mask = torch.stack(([torch.Tensor([0]) if s.done else torch.Tensor([1]) for s in state_transitions])).to(critic_model.device) next_states = torch.stack( ([torch.Tensor(s.next_state) for s in state_transitions]) ).to(critic_model.device) actions = [s.action for s in state_transitions] # import ipdb; ipdb.set_trace() with torch.no_grad(): actual_Q_values = Qs # pred_qvals_next = critic_model(next_states)[0] critic_model.opt.zero_grad() pred_qvals = critic_model(cur_states) # one_hot_actions = F.one_hot(torch.LongTensor(actions),num_actions).to(model.device) # loss = torch.mean(torch.sqrt((torch.sum(pred_qvals*one_hot_actions,-1) - actual_Q_values.view(-1) )**2)).to(model.device) loss = F.smooth_l1_loss(pred_qvals.view(-1), actual_Q_values.view(-1) ) # loss = F.smooth_l1_loss(torch.sum(pred_qvals,-1), rewards.view(-1)+0.98*mask[:,0]*pred_qvals_next.view(-1) ).mean() loss.backward() critic_model.opt.step() return loss