Python torch.backends.cudnn.fastest() Examples
The following are 8
code examples of torch.backends.cudnn.fastest().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.backends.cudnn
, or try the search function
.
Example #1
Source File: test.py From 3DMPPE_ROOTNET_RELEASE with MIT License | 6 votes |
def main(): args = parse_args() cfg.set_args(args.gpu_ids) cudnn.fastest = True cudnn.benchmark = True tester = Tester(args.test_epoch) tester._make_batch_generator() tester._make_model() preds = [] with torch.no_grad(): for itr, (input_img, cam_param) in enumerate(tqdm(tester.batch_generator)): coord_out = tester.model(input_img, cam_param) coord_out = coord_out.cpu().numpy() preds.append(coord_out) # evaluate preds = np.concatenate(preds, axis=0) tester._evaluate(preds, cfg.result_dir)
Example #2
Source File: benchmarks.py From dlcookbook-dlbs with Apache License 2.0 | 5 votes |
def benchmark_inference(model, opts): """Benchmarks inference phase. :param obj model: A model to benchmark :param dict opts: A dictionary of parameters. :rtype: tuple :return: A tuple of (model_name, list of batch times) """ if opts['phase'] != 'inference': raise "Phase in benchmark_inference func is '%s'" % opts['phase'] if opts['device'] == 'gpu' and opts['world_size'] != 1: raise "GPU inference can only be used with one GPU (world_size: %d)." % opts['world_size'] # Batch, Channels, Height, Width data = autograd.Variable(torch.randn((opts['batch_size'],) + model.input_shape)) if opts['device'] == 'gpu': # TODO: Is it good to enable cuDNN autotuning (batch size is fixed)? # https://github.com/soumith/cudnn.torch#modes # https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936 # How many iterations do we need to get cuDNN decide what kernels to use? cudnn.benchmark = opts['cudnn_benchmark'] cudnn.fastest = opts['cudnn_fastest'] data = data.cuda() model = model.cuda() if opts['dtype'] == 'float16': data = data.half() model = model.half() model.eval() # Do warmup round for i in range(opts['num_warmup_batches']): model(data) # Do benchmark round batch_times = np.zeros(opts['num_batches']) for i in range(opts['num_batches']): start_time = timeit.default_timer() model(data) batch_times[i] = timeit.default_timer() - start_time return (model.name, batch_times)
Example #3
Source File: test.py From 3DMPPE_POSENET_RELEASE with MIT License | 4 votes |
def main(): args = parse_args() cfg.set_args(args.gpu_ids) cudnn.fastest = True cudnn.benchmark = True cudnn.deterministic = False cudnn.enabled = True tester = Tester(args.test_epoch) tester._make_batch_generator() tester._make_model() preds = [] with torch.no_grad(): for itr, input_img in enumerate(tqdm(tester.batch_generator)): # forward coord_out = tester.model(input_img) if cfg.flip_test: flipped_input_img = flip(input_img, dims=3) flipped_coord_out = tester.model(flipped_input_img) flipped_coord_out[:, :, 0] = cfg.output_shape[1] - flipped_coord_out[:, :, 0] - 1 for pair in tester.flip_pairs: flipped_coord_out[:, pair[0], :], flipped_coord_out[:, pair[1], :] = flipped_coord_out[:, pair[1], :].clone(), flipped_coord_out[:, pair[0], :].clone() coord_out = (coord_out + flipped_coord_out)/2. vis = False if vis: filename = str(itr) tmpimg = input_img[0].cpu().numpy() tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(3,1,1) + np.array(cfg.pixel_mean).reshape(3,1,1) tmpimg = tmpimg.astype(np.uint8) tmpimg = tmpimg[::-1, :, :] tmpimg = np.transpose(tmpimg,(1,2,0)).copy() tmpkps = np.zeros((3,tester.joint_num)) tmpkps[:2,:] = coord_out[0,:,:2].cpu().numpy().transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0] tmpkps[2,:] = 1 tmpimg = vis_keypoints(tmpimg, tmpkps, tester.skeleton) cv2.imwrite(filename + '_output.jpg', tmpimg) coord_out = coord_out.cpu().numpy() preds.append(coord_out) # evaluate preds = np.concatenate(preds, axis=0) tester._evaluate(preds, cfg.result_dir)
Example #4
Source File: train.py From 3DMPPE_POSENET_RELEASE with MIT License | 4 votes |
def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.continue_train) cudnn.fastest = True cudnn.benchmark = True trainer = Trainer() trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.set_lr(epoch) trainer.tot_timer.tic() trainer.read_timer.tic() for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator): trainer.read_timer.toc() trainer.gpu_timer.tic() # forward trainer.optimizer.zero_grad() target = {'coord': joint_img, 'vis': joint_vis, 'have_depth': joints_have_depth} loss_coord = trainer.model(input_img, target) loss_coord = loss_coord.mean() # backward loss = loss_coord loss.backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.get_lr()), 'speed: %.2f(%.2fs r%.2f)s/itr' % ( trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), '%s: %.4f' % ('loss_coord', loss_coord.detach()), ] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model({ 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), }, epoch)
Example #5
Source File: train.py From 3DMPPE_ROOTNET_RELEASE with MIT License | 4 votes |
def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.continue_train) cudnn.fastest = True cudnn.benchmark = True trainer = Trainer() trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.set_lr(epoch) trainer.tot_timer.tic() trainer.read_timer.tic() for itr, (input_img, k_value, root_img, root_vis, joints_have_depth) in enumerate(trainer.batch_generator): trainer.read_timer.toc() trainer.gpu_timer.tic() # forward trainer.optimizer.zero_grad() target = {'coord': root_img, 'vis': root_vis, 'have_depth': joints_have_depth} loss_coord = trainer.model(input_img, k_value, target) loss_coord = loss_coord.mean(); # backward loss = loss_coord loss.backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.get_lr()), 'speed: %.2f(%.2fs r%.2f)s/itr' % ( trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), '%s: %.4f' % ('loss_coord', loss_coord.detach()), ] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model({ 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), }, epoch)
Example #6
Source File: user.py From lidar-bonnetal with MIT License | 4 votes |
def __init__(self, ARCH, DATA, datadir, logdir, modeldir): # parameters self.ARCH = ARCH self.DATA = DATA self.datadir = datadir self.logdir = logdir self.modeldir = modeldir # get the data parserModule = imp.load_source("parserModule", booger.TRAIN_PATH + '/tasks/semantic/dataset/' + self.DATA["name"] + '/parser.py') self.parser = parserModule.Parser(root=self.datadir, train_sequences=self.DATA["split"]["train"], valid_sequences=self.DATA["split"]["valid"], test_sequences=self.DATA["split"]["test"], labels=self.DATA["labels"], color_map=self.DATA["color_map"], learning_map=self.DATA["learning_map"], learning_map_inv=self.DATA["learning_map_inv"], sensor=self.ARCH["dataset"]["sensor"], max_points=self.ARCH["dataset"]["max_points"], batch_size=1, workers=self.ARCH["train"]["workers"], gt=True, shuffle_train=False) # concatenate the encoder and the head with torch.no_grad(): self.model = Segmentator(self.ARCH, self.parser.get_n_classes(), self.modeldir) # use knn post processing? self.post = None if self.ARCH["post"]["KNN"]["use"]: self.post = KNN(self.ARCH["post"]["KNN"]["params"], self.parser.get_n_classes()) # GPU? self.gpu = False self.model_single = self.model self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("Infering in device: ", self.device) if torch.cuda.is_available() and torch.cuda.device_count() > 0: cudnn.benchmark = True cudnn.fastest = True self.gpu = True self.model.cuda()
Example #7
Source File: userPytorch.py From bonnetal with MIT License | 4 votes |
def __init__(self, path): # parameters self.path = path # config from path try: yaml_path = self.path + "/cfg.yaml" print("Opening config file %s" % yaml_path) self.CFG = yaml.safe_load(open(yaml_path, 'r')) except Exception as e: print(e) print("Error opening cfg.yaml file from trained model.") quit() # make a colorizer self.colorizer = Colorizer(self.CFG["dataset"]["color_map"]) # get the data parserModule = imp.load_source("parserModule", booger.TRAIN_PATH + '/tasks/segmentation/dataset/' + self.CFG["dataset"]["name"] + '/parser.py') self.parser = parserModule.Parser(img_prop=self.CFG["dataset"]["img_prop"], img_means=self.CFG["dataset"]["img_means"], img_stds=self.CFG["dataset"]["img_stds"], classes=self.CFG["dataset"]["labels"], train=False) # some useful data self.data_h, self.data_w, self.data_d = self.parser.get_img_size() self.means, self.stds = self.parser.get_means_stds() self.means = torch.tensor(self.means) self.stds = torch.tensor(self.stds) self.nclasses = self.parser.get_n_classes() # architecture definition # get weights? try: self.pytorch_path = os.path.join(self.path, "model.pytorch") self.model = torch.jit.load(self.pytorch_path) print("Successfully Pytorch-traced model from ", self.pytorch_path) except Exception as e: print("Couldn't load Pytorch-traced network. Error: ", e) quit() # GPU? self.gpu = False self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available() and torch.cuda.device_count() > 0: self.gpu = True cudnn.benchmark = True cudnn.fastest = True self.model.cuda() self.means = self.means.cuda() self.stds = self.stds.cuda()
Example #8
Source File: userPytorch.py From bonnetal with MIT License | 4 votes |
def __init__(self, path): # parameters self.path = path # config from path try: yaml_path = self.path + "/cfg.yaml" print("Opening config file %s" % yaml_path) self.CFG = yaml.safe_load(open(yaml_path, 'r')) except Exception as e: print(e) print("Error opening cfg.yaml file from trained model.") quit() # get the data parserModule = imp.load_source("parserModule", booger.TRAIN_PATH + '/tasks/classification/dataset/' + self.CFG["dataset"]["name"] + '/parser.py') self.parser = parserModule.Parser(img_prop=self.CFG["dataset"]["img_prop"], img_means=self.CFG["dataset"]["img_means"], img_stds=self.CFG["dataset"]["img_stds"], classes=self.CFG["dataset"]["labels"], train=False) # some useful data self.data_h, self.data_w, self.data_d = self.parser.get_img_size() self.means, self.stds = self.parser.get_means_stds() self.means = torch.tensor(self.means) self.stds = torch.tensor(self.stds) self.nclasses = self.parser.get_n_classes() # architecture definition # get weights? try: self.pytorch_path = os.path.join(self.path, "model.pytorch") self.model = torch.jit.load(self.pytorch_path) print("Successfully Pytorch-traced model from ", self.pytorch_path) except Exception as e: print("Couldn't load Pytorch-traced network. Error: ", e) quit() # GPU? self.gpu = False self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available() and torch.cuda.device_count() > 0: self.gpu = True cudnn.benchmark = True cudnn.fastest = True self.model.cuda() self.means = self.means.cuda() self.stds = self.stds.cuda()