Python torch.backends.cudnn.fastest() Examples

The following are 8 code examples of torch.backends.cudnn.fastest(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.backends.cudnn , or try the search function .
Example #1
Source File: test.py    From 3DMPPE_ROOTNET_RELEASE with MIT License 6 votes vote down vote up
def main():

    args = parse_args()
    cfg.set_args(args.gpu_ids)
    cudnn.fastest = True
    cudnn.benchmark = True

    tester = Tester(args.test_epoch)
    tester._make_batch_generator()
    tester._make_model()

    preds = []
    with torch.no_grad():
        for itr, (input_img, cam_param) in enumerate(tqdm(tester.batch_generator)):
            
            coord_out = tester.model(input_img, cam_param)
            coord_out = coord_out.cpu().numpy()
            preds.append(coord_out)
            
    # evaluate
    preds = np.concatenate(preds, axis=0)
    tester._evaluate(preds, cfg.result_dir) 
Example #2
Source File: benchmarks.py    From dlcookbook-dlbs with Apache License 2.0 5 votes vote down vote up
def benchmark_inference(model, opts):
    """Benchmarks inference phase.

    :param obj model: A model to benchmark
    :param dict opts: A dictionary of parameters.
    :rtype: tuple
    :return: A tuple of (model_name, list of batch times)
    """
    if opts['phase'] != 'inference':
        raise "Phase in benchmark_inference func is '%s'" % opts['phase']
    if opts['device'] == 'gpu' and opts['world_size'] != 1:
        raise "GPU inference can only be used with one GPU (world_size: %d)." % opts['world_size']

    # Batch, Channels, Height, Width
    data = autograd.Variable(torch.randn((opts['batch_size'],) + model.input_shape))
    if opts['device'] == 'gpu':
        # TODO: Is it good to enable cuDNN autotuning (batch size is fixed)?
        #   https://github.com/soumith/cudnn.torch#modes
        #   https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
        # How many iterations do we need to get cuDNN decide what kernels to use?
        cudnn.benchmark = opts['cudnn_benchmark']
        cudnn.fastest = opts['cudnn_fastest']

        data = data.cuda()
        model = model.cuda()
    if opts['dtype'] == 'float16':
        data = data.half()
        model = model.half()
    model.eval()
    # Do warmup round
    for i in range(opts['num_warmup_batches']):
        model(data)
    # Do benchmark round
    batch_times = np.zeros(opts['num_batches'])
    for i in range(opts['num_batches']):
        start_time = timeit.default_timer()
        model(data)
        batch_times[i] = timeit.default_timer() - start_time
    return (model.name, batch_times) 
Example #3
Source File: test.py    From 3DMPPE_POSENET_RELEASE with MIT License 4 votes vote down vote up
def main():

    args = parse_args()
    cfg.set_args(args.gpu_ids)
    cudnn.fastest = True
    cudnn.benchmark = True
    cudnn.deterministic = False
    cudnn.enabled = True

    tester = Tester(args.test_epoch)
    tester._make_batch_generator()
    tester._make_model()

    preds = []

    with torch.no_grad():
        for itr, input_img in enumerate(tqdm(tester.batch_generator)):
            
            # forward
            coord_out = tester.model(input_img)

            if cfg.flip_test:
                flipped_input_img = flip(input_img, dims=3)
                flipped_coord_out = tester.model(flipped_input_img)
                flipped_coord_out[:, :, 0] = cfg.output_shape[1] - flipped_coord_out[:, :, 0] - 1
                for pair in tester.flip_pairs:
                    flipped_coord_out[:, pair[0], :], flipped_coord_out[:, pair[1], :] = flipped_coord_out[:, pair[1], :].clone(), flipped_coord_out[:, pair[0], :].clone()
                coord_out = (coord_out + flipped_coord_out)/2.

            vis = False
            if vis:
                filename = str(itr)
                tmpimg = input_img[0].cpu().numpy()
                tmpimg = tmpimg * np.array(cfg.pixel_std).reshape(3,1,1) + np.array(cfg.pixel_mean).reshape(3,1,1)
                tmpimg = tmpimg.astype(np.uint8)
                tmpimg = tmpimg[::-1, :, :]
                tmpimg = np.transpose(tmpimg,(1,2,0)).copy()
                tmpkps = np.zeros((3,tester.joint_num))
                tmpkps[:2,:] = coord_out[0,:,:2].cpu().numpy().transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0]
                tmpkps[2,:] = 1
                tmpimg = vis_keypoints(tmpimg, tmpkps, tester.skeleton)
                cv2.imwrite(filename + '_output.jpg', tmpimg)

            coord_out = coord_out.cpu().numpy()
            preds.append(coord_out)
            
    # evaluate
    preds = np.concatenate(preds, axis=0)
    tester._evaluate(preds, cfg.result_dir) 
Example #4
Source File: train.py    From 3DMPPE_POSENET_RELEASE with MIT License 4 votes vote down vote up
def main():
    
    # argument parse and create log
    args = parse_args()
    cfg.set_args(args.gpu_ids, args.continue_train)
    cudnn.fastest = True
    cudnn.benchmark = True

    trainer = Trainer()
    trainer._make_batch_generator()
    trainer._make_model()

    # train
    for epoch in range(trainer.start_epoch, cfg.end_epoch):
        
        trainer.set_lr(epoch)
        trainer.tot_timer.tic()
        trainer.read_timer.tic()

        for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator):
            trainer.read_timer.toc()
            trainer.gpu_timer.tic()

            # forward
            trainer.optimizer.zero_grad()
            target = {'coord': joint_img, 'vis': joint_vis, 'have_depth': joints_have_depth}
            loss_coord = trainer.model(input_img, target)
            loss_coord = loss_coord.mean()

            # backward
            loss = loss_coord
            loss.backward()
            trainer.optimizer.step()
            
            trainer.gpu_timer.toc()
            screen = [
                'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
                'lr: %g' % (trainer.get_lr()),
                'speed: %.2f(%.2fs r%.2f)s/itr' % (
                    trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
                '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
                '%s: %.4f' % ('loss_coord', loss_coord.detach()),
                ]
            trainer.logger.info(' '.join(screen))
            trainer.tot_timer.toc()
            trainer.tot_timer.tic()
            trainer.read_timer.tic()

        trainer.save_model({
            'epoch': epoch,
            'network': trainer.model.state_dict(),
            'optimizer': trainer.optimizer.state_dict(),
        }, epoch) 
Example #5
Source File: train.py    From 3DMPPE_ROOTNET_RELEASE with MIT License 4 votes vote down vote up
def main():
    
    # argument parse and create log
    args = parse_args()
    cfg.set_args(args.gpu_ids, args.continue_train)
    cudnn.fastest = True
    cudnn.benchmark = True

    trainer = Trainer()
    trainer._make_batch_generator()
    trainer._make_model()

    # train
    for epoch in range(trainer.start_epoch, cfg.end_epoch):
        
        trainer.set_lr(epoch)
        trainer.tot_timer.tic()
        trainer.read_timer.tic()

        for itr, (input_img, k_value, root_img, root_vis, joints_have_depth) in enumerate(trainer.batch_generator):
            trainer.read_timer.toc()
            trainer.gpu_timer.tic()

            # forward
            trainer.optimizer.zero_grad()
            target = {'coord': root_img, 'vis': root_vis, 'have_depth': joints_have_depth}
            loss_coord = trainer.model(input_img, k_value, target)
            loss_coord = loss_coord.mean();

            # backward
            loss = loss_coord

            loss.backward()
            trainer.optimizer.step()
            
            trainer.gpu_timer.toc()

            screen = [
                'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
                'lr: %g' % (trainer.get_lr()),
                'speed: %.2f(%.2fs r%.2f)s/itr' % (
                    trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
                '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
                '%s: %.4f' % ('loss_coord', loss_coord.detach()),
                ]
            trainer.logger.info(' '.join(screen))

            trainer.tot_timer.toc()
            trainer.tot_timer.tic()
            trainer.read_timer.tic()

        trainer.save_model({
            'epoch': epoch,
            'network': trainer.model.state_dict(),
            'optimizer': trainer.optimizer.state_dict(),
        }, epoch) 
Example #6
Source File: user.py    From lidar-bonnetal with MIT License 4 votes vote down vote up
def __init__(self, ARCH, DATA, datadir, logdir, modeldir):
    # parameters
    self.ARCH = ARCH
    self.DATA = DATA
    self.datadir = datadir
    self.logdir = logdir
    self.modeldir = modeldir

    # get the data
    parserModule = imp.load_source("parserModule",
                                   booger.TRAIN_PATH + '/tasks/semantic/dataset/' +
                                   self.DATA["name"] + '/parser.py')
    self.parser = parserModule.Parser(root=self.datadir,
                                      train_sequences=self.DATA["split"]["train"],
                                      valid_sequences=self.DATA["split"]["valid"],
                                      test_sequences=self.DATA["split"]["test"],
                                      labels=self.DATA["labels"],
                                      color_map=self.DATA["color_map"],
                                      learning_map=self.DATA["learning_map"],
                                      learning_map_inv=self.DATA["learning_map_inv"],
                                      sensor=self.ARCH["dataset"]["sensor"],
                                      max_points=self.ARCH["dataset"]["max_points"],
                                      batch_size=1,
                                      workers=self.ARCH["train"]["workers"],
                                      gt=True,
                                      shuffle_train=False)

    # concatenate the encoder and the head
    with torch.no_grad():
      self.model = Segmentator(self.ARCH,
                               self.parser.get_n_classes(),
                               self.modeldir)

    # use knn post processing?
    self.post = None
    if self.ARCH["post"]["KNN"]["use"]:
      self.post = KNN(self.ARCH["post"]["KNN"]["params"],
                      self.parser.get_n_classes())

    # GPU?
    self.gpu = False
    self.model_single = self.model
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Infering in device: ", self.device)
    if torch.cuda.is_available() and torch.cuda.device_count() > 0:
      cudnn.benchmark = True
      cudnn.fastest = True
      self.gpu = True
      self.model.cuda() 
Example #7
Source File: userPytorch.py    From bonnetal with MIT License 4 votes vote down vote up
def __init__(self, path):
    # parameters
    self.path = path

    # config from path
    try:
      yaml_path = self.path + "/cfg.yaml"
      print("Opening config file %s" % yaml_path)
      self.CFG = yaml.safe_load(open(yaml_path, 'r'))
    except Exception as e:
      print(e)
      print("Error opening cfg.yaml file from trained model.")
      quit()

    # make a colorizer
    self.colorizer = Colorizer(self.CFG["dataset"]["color_map"])

    # get the data
    parserModule = imp.load_source("parserModule",
                                   booger.TRAIN_PATH + '/tasks/segmentation/dataset/' +
                                   self.CFG["dataset"]["name"] + '/parser.py')
    self.parser = parserModule.Parser(img_prop=self.CFG["dataset"]["img_prop"],
                                      img_means=self.CFG["dataset"]["img_means"],
                                      img_stds=self.CFG["dataset"]["img_stds"],
                                      classes=self.CFG["dataset"]["labels"],
                                      train=False)

    # some useful data
    self.data_h, self.data_w, self.data_d = self.parser.get_img_size()
    self.means, self.stds = self.parser.get_means_stds()
    self.means = torch.tensor(self.means)
    self.stds = torch.tensor(self.stds)
    self.nclasses = self.parser.get_n_classes()

    # architecture definition
    # get weights?
    try:
      self.pytorch_path = os.path.join(self.path, "model.pytorch")
      self.model = torch.jit.load(self.pytorch_path)
      print("Successfully Pytorch-traced model from ", self.pytorch_path)
    except Exception as e:
      print("Couldn't load Pytorch-traced network. Error: ", e)
      quit()

    # GPU?
    self.gpu = False
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available() and torch.cuda.device_count() > 0:
      self.gpu = True
      cudnn.benchmark = True
      cudnn.fastest = True
      self.model.cuda()
      self.means = self.means.cuda()
      self.stds = self.stds.cuda() 
Example #8
Source File: userPytorch.py    From bonnetal with MIT License 4 votes vote down vote up
def __init__(self, path):
    # parameters
    self.path = path

    # config from path
    try:
      yaml_path = self.path + "/cfg.yaml"
      print("Opening config file %s" % yaml_path)
      self.CFG = yaml.safe_load(open(yaml_path, 'r'))
    except Exception as e:
      print(e)
      print("Error opening cfg.yaml file from trained model.")
      quit()

    # get the data
    parserModule = imp.load_source("parserModule",
                                   booger.TRAIN_PATH + '/tasks/classification/dataset/' +
                                   self.CFG["dataset"]["name"] + '/parser.py')
    self.parser = parserModule.Parser(img_prop=self.CFG["dataset"]["img_prop"],
                                      img_means=self.CFG["dataset"]["img_means"],
                                      img_stds=self.CFG["dataset"]["img_stds"],
                                      classes=self.CFG["dataset"]["labels"],
                                      train=False)

    # some useful data
    self.data_h, self.data_w, self.data_d = self.parser.get_img_size()
    self.means, self.stds = self.parser.get_means_stds()
    self.means = torch.tensor(self.means)
    self.stds = torch.tensor(self.stds)
    self.nclasses = self.parser.get_n_classes()

    # architecture definition
    # get weights?
    try:
      self.pytorch_path = os.path.join(self.path, "model.pytorch")
      self.model = torch.jit.load(self.pytorch_path)
      print("Successfully Pytorch-traced model from ", self.pytorch_path)
    except Exception as e:
      print("Couldn't load Pytorch-traced network. Error: ", e)
      quit()

    # GPU?
    self.gpu = False
    self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available() and torch.cuda.device_count() > 0:
      self.gpu = True
      cudnn.benchmark = True
      cudnn.fastest = True
      self.model.cuda()
      self.means = self.means.cuda()
      self.stds = self.stds.cuda()