Python torch.utils.data.distributed() Examples
The following are 30
code examples of torch.utils.data.distributed().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.utils.data
, or try the search function
.
Example #1
Source File: imagenet_torch_loader.py From pytorch_quantization with MIT License | 6 votes |
def main(): if cfg.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if cfg.dist_url == "env://" and cfg.world_size == -1: cfg.world_size = int(os.environ["WORLD_SIZE"]) cfg.distributed = cfg.world_size > 1 or cfg.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if cfg.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly cfg.world_size = ngpus_per_node * cfg.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, cfg)) else: # Simply call main_worker function main_worker(cfg.gpu, ngpus_per_node, cfg)
Example #2
Source File: mnist.py From sagemaker-python-sdk with Apache License 2.0 | 6 votes |
def _get_train_data_loader(training_dir, is_distributed, batch_size, **kwargs): logger.info("Get train data loader") dataset = datasets.MNIST( training_dir, train=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), download=False, # True sets a dependency on an external site for our canaries. ) train_sampler = ( torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None ) train_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=train_sampler is None, sampler=train_sampler, **kwargs ) return train_sampler, train_loader
Example #3
Source File: jh_warm.py From imagenet-fast with Apache License 2.0 | 6 votes |
def get_parser(): parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('data', metavar='DIR', help='path to dataset') parser.add_argument('--save-dir', type=str, default=Path.home()/'imagenet_training', help='Directory to save logs and models.') parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', choices=model_names, help='model architecture'), parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('-b', '--batch-size', default=256, type=int, metavar='N', help='mini-batch size (default: 256)') parser.add_argument('--fp16', action='store_true', help='Run model fp16 mode.') parser.add_argument('--dist-url', default='file://sync.file', type=str, help='url used to set up distributed training') parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend') parser.add_argument('--world-size', default=1, type=int, help='Number of GPUs to use. Can either be manually set ' + 'or automatically set by using \'python -m multiproc\'.') parser.add_argument('--rank', default=0, type=int, help='Used for multi-process training. Can either be manually set ' + 'or automatically set by using \'python -m multiproc\'.') return parser
Example #4
Source File: imagenet.py From pytorch-lightning with Apache License 2.0 | 6 votes |
def get_args(): parent_parser = ArgumentParser(add_help=False) parent_parser.add_argument('--data-path', metavar='DIR', type=str, help='path to dataset') parent_parser.add_argument('--save-path', metavar='DIR', default=".", type=str, help='path to save output') parent_parser.add_argument('--gpus', type=int, default=1, help='how many gpus') parent_parser.add_argument('--distributed-backend', type=str, default='dp', choices=('dp', 'ddp', 'ddp2'), help='supports three options dp, ddp, ddp2') parent_parser.add_argument('--use-16bit', dest='use_16bit', action='store_true', help='if true uses 16 bit precision') parent_parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') parser = ImageNetLightningModel.add_model_specific_args(parent_parser) return parser.parse_args()
Example #5
Source File: main.py From sparse_learning with MIT License | 6 votes |
def get_val_step(model_and_loss): def _step(input, target): input_var = Variable(input) target_var = Variable(target) with torch.no_grad(): loss, output = model_and_loss(input_var, target_var) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) if torch.distributed.is_initialized(): reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) else: reduced_loss = loss.data torch.cuda.synchronize() return reduced_loss, prec1, prec5 return _step
Example #6
Source File: train_imagenet_nv.py From imagenet18_old with The Unlicense | 6 votes |
def distributed_predict(input, target, model, criterion): # Allows distributed prediction on uneven batches. Test set isn't always large enough for every GPU to get a batch batch_size = input.size(0) output = loss = corr1 = corr5 = valid_batches = 0 if batch_size: with torch.no_grad(): output = model(input) loss = criterion(output, target).data # measure accuracy and record loss valid_batches = 1 corr1, corr5 = correct(output.data, target, topk=(1, 5)) metrics = torch.tensor([batch_size, valid_batches, loss, corr1, corr5]).float().cuda() batch_total, valid_batches, reduced_loss, corr1, corr5 = dist_utils.sum_tensor(metrics).cpu().numpy() reduced_loss = reduced_loss/valid_batches top1 = corr1*(100.0/batch_total) top5 = corr5*(100.0/batch_total) return top1, top5, reduced_loss, batch_total
Example #7
Source File: main.py From sparse_learning with MIT License | 6 votes |
def get_train_loader(data_path, batch_size, workers=5, _worker_init_fn=None): traindir = os.path.join(data_path, 'train') train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), #transforms.ToTensor(), Too slow #normalize, ])) if torch.distributed.is_initialized(): train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=(train_sampler is None), num_workers=workers, worker_init_fn=_worker_init_fn, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate, drop_last=True) return train_loader
Example #8
Source File: main.py From elastic with BSD 3-Clause "New" or "Revised" License | 6 votes |
def initialize_model( arch: str, lr: float, momentum: float, weight_decay: float, device_id: int ): print(f"=> creating model: {arch}") model = models.__dict__[arch]() # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. model.cuda(device_id) cudnn.benchmark = True model = DistributedDataParallel(model, device_ids=[device_id]) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(device_id) optimizer = SGD( model.parameters(), lr, momentum=momentum, weight_decay=weight_decay ) return model, criterion, optimizer
Example #9
Source File: main.py From sparse_learning with MIT License | 5 votes |
def __init__(self, args, arch, loss, pretrained_weights=None, state=None, cuda=True, fp16=False, distributed=False): super(ModelAndLoss, self).__init__() self.arch = arch self.mask = None print("=> creating model '{}'".format(arch)) model = models.build_resnet(arch[0], arch[1]) if pretrained_weights is not None: print("=> using pre-trained model from a file '{}'".format(arch)) model.load_state_dict(pretrained_weights) if cuda: model = model.cuda() if fp16: model = network_to_half(model) if distributed: model = DDP(model) if not state is None: model.load_state_dict(state) # define loss function (criterion) and optimizer criterion = loss() if cuda: criterion = criterion.cuda() self.model = model self.loss = criterion
Example #10
Source File: jh_tmp.py From imagenet-fast with Apache License 2.0 | 5 votes |
def get_parser(): parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('data', metavar='DIR', help='path to dataset') parser.add_argument('--save-dir', type=str, default=Path.home()/'imagenet_training', help='Directory to save logs and models.') parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', choices=model_names, help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, metavar='W', help='weight decay (default: 1e-4)') parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model') parser.add_argument('--fp16', action='store_true', help='Run model fp16 mode.') parser.add_argument('--prof', dest='prof', action='store_true', help='Only run a few iters for profiling.') parser.add_argument('--dist-url', default='file://sync.file', type=str, help='url used to set up distributed training') parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend') parser.add_argument('--world-size', default=1, type=int, help='Number of GPUs to use. Can either be manually set ' + 'or automatically set by using \'python -m multiproc\'.') parser.add_argument('--rank', default=0, type=int, help='Used for multi-process training. Can either be manually set ' + 'or automatically set by using \'python -m multiproc\'.') return parser
Example #11
Source File: imagenet.py From pytorch-lightning with Apache License 2.0 | 5 votes |
def train_dataloader(self): normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], ) train_dir = os.path.join(self.data_path, 'train') train_dataset = datasets.ImageFolder( train_dir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if self.use_ddp: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=self.batch_size, shuffle=(train_sampler is None), num_workers=0, sampler=train_sampler ) return train_loader
Example #12
Source File: train_net.py From DSGN with MIT License | 5 votes |
def main(): args = get_parser() if args.debug: args.savemodel = './outputs/debug/' args.btrain = 1 args.workers = 0 global cfg exp = Experimenter(args.savemodel, cfg_path=args.cfg) cfg = exp.config reset_seed(args.seed) cfg.debug = args.debug cfg.warmup = getattr(cfg, 'warmup', True) if not args.debug else False ### distributed training ### if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) ngpus_per_node = torch.cuda.device_count() print('ngpus_per_node: {}'.format(ngpus_per_node)) args.ngpus_per_node = ngpus_per_node args.distributed = ngpus_per_node > 0 and (args.world_size > 1 or args.multiprocessing_distributed) args.multiprocessing_distributed = args.distributed if args.distributed and args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, cfg, exp)) else: # Simply call main_worker function main_worker(0, ngpus_per_node, args, cfg, exp)
Example #13
Source File: main.py From GroupNorm-reproduce with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #14
Source File: main.py From online-normalization with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed if args.distributed: raise NotImplementedError('multiprocessing with ON not implemented') ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #15
Source File: imagenet.py From Compact-Global-Descriptor with BSD 2-Clause "Simplified" License | 5 votes |
def main(): # Use CUDA os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id use_cuda = torch.cuda.is_available() gpus = list(range(len(args.gpu_id.split(',')))) # Random seed if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu_id, ngpus_per_node, args)
Example #16
Source File: train_dist.py From PoseNFS with MIT License | 5 votes |
def args(): parser = argparse.ArgumentParser(description='Architecture Search') parser.add_argument('--cfg', help='experiment configure file name', required=True, default='config.yaml', type=str) parser.add_argument('--exp_name', help='experiment name', default='NAS-0' , type=str) parser.add_argument('--gpu', help='gpu ids', default = '0,1', type =str) parser.add_argument('--load_ckpt', help='reload the last save ckeckpoint in current directory', action='store_true', default=False) parser.add_argument('--debug', help='save batch images ', action='store_true', default=False) parser.add_argument('--num_workers', help='workers number (debug=0) ', default = 8, type =int) parser.add_argument('--param_flop', help=' ', action='store_true', default=False) parser.add_argument('--show_arch_value',help='show_arch_value ', action='store_true', default=False) parser.add_argument('--search' , help = 'search method: None,random,sync,second_order_gradient,first_order_gradient',type=str) parser.add_argument('--batchsize', help='', type =int) parser.add_argument('--visualize', help=' ', action='store_true', default=False) parser.add_argument('--distributed', help="single node multi-gpus. \ see more in https://pytorch.org/tutorials/intermediate/ddp_tutorial.html", action='store_true' ,default= False) parser.add_argument('--local_rank', default=0, type=int, help='node rank for distributed training') # parser.add_argument('--world-size', default=-1, type=int, # help='number of nodes for distributed training') # arser.add_argument('--rank', default=-1, type=int, # help='node rank for distributed training') # parser.add_argument('--dist-url', default='tcp://127.0.0.1:FREEPORT', type=str, # help='url used to set up distributed training') args = parser.parse_args() return args
Example #17
Source File: main.py From PyTorch with MIT License | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #18
Source File: mnist.py From sagemaker-pytorch-training-toolkit with Apache License 2.0 | 5 votes |
def _get_train_data_loader(batch_size, training_dir, is_distributed, **kwargs): logger.info("Get train data loader") dataset = datasets.MNIST(training_dir, train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train_sampler is None, sampler=train_sampler, **kwargs)
Example #19
Source File: main.py From sparse_learning with MIT License | 5 votes |
def train_loop(args, model_and_loss, optimizer, lr_scheduler, train_loader, val_loader, epochs, fp16, logger, should_backup_checkpoint, best_prec1 = 0, start_epoch = 0, prof = False): for epoch in range(start_epoch, epochs): if torch.distributed.is_initialized(): train_loader.sampler.set_epoch(epoch) lr_scheduler(optimizer, epoch) train(train_loader, model_and_loss, optimizer, fp16, logger, epoch, prof = prof) prec1 = validate(val_loader, model_and_loss, fp16, logger, epoch, prof = prof) if not torch.distributed.is_initialized() or torch.distributed.get_rank() == 0: is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if should_backup_checkpoint(epoch): backup_filename = 'checkpoint-{}.pth.tar'.format(epoch + 1) else: backup_filename = None save_checkpoint({ 'epoch': epoch + 1, 'arch': model_and_loss.arch, 'state_dict': model_and_loss.model.state_dict(), 'best_prec1': best_prec1, 'optimizer' : optimizer.state_dict(), }, is_best, backup_filename=backup_filename) if not args.dense and epoch < epochs: model_and_loss.mask.at_end_of_epoch() # }}} # Data Loading functions {{{
Example #20
Source File: main.py From sparse_learning with MIT License | 5 votes |
def get_train_step(model_and_loss, optimizer, fp16): def _step(input, target): input_var = Variable(input) target_var = Variable(target) loss, output = model_and_loss(input_var, target_var) prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) if torch.distributed.is_initialized(): reduced_loss = reduce_tensor(loss.data) prec1 = reduce_tensor(prec1) prec5 = reduce_tensor(prec5) else: reduced_loss = loss.data optimizer.zero_grad() if fp16: optimizer.backward(loss) else: loss.backward() if model_and_loss.mask is None: optimizer.step() else: model_and_loss.mask.step() torch.cuda.synchronize() return reduced_loss, prec1, prec5 return _step
Example #21
Source File: main.py From sparse_learning with MIT License | 5 votes |
def reduce_tensor(tensor): rt = tensor.clone() dist.all_reduce(rt, op=dist.reduce_op.SUM) rt /= torch.distributed.get_world_size() return rt
Example #22
Source File: imagenet.py From pytorch-dp with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn( "You have chosen to seed training. " "This will turn on the CUDNN deterministic setting, " "which can slow down your training considerably! " "You may see unexpected behavior when restarting " "from checkpoints." ) if args.gpu is not None: warnings.warn( "You have chosen a specific GPU. This will completely " "disable data parallelism." ) if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #23
Source File: main.py From proxylessnas with Apache License 2.0 | 5 votes |
def save_checkpoint(epoch): if hvd.rank() == 0: os.remove(args.checkpoint_format.format(epoch=epoch)) filepath = args.checkpoint_format.format(epoch=epoch + 1) state = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(state, filepath) # Horovod: average metrics from distributed training.
Example #24
Source File: mnist.py From aws-step-functions-data-science-sdk-python with Apache License 2.0 | 5 votes |
def _get_train_data_loader(batch_size, training_dir, is_distributed, **kwargs): logger.info("Get train data loader") dataset = datasets.MNIST(training_dir, train=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) if is_distributed else None return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train_sampler is None, sampler=train_sampler, **kwargs)
Example #25
Source File: main.py From TF2 with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #26
Source File: main.py From TF2 with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #27
Source File: imagenet_pytorch_horovod.py From DistributedDeepLearning with MIT License | 5 votes |
def save_checkpoint(epoch): if hvd.rank() == 0: filepath = args.checkpoint_format.format(epoch=epoch + 1) state = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), } torch.save(state, filepath) # Horovod: average metrics from distributed training.
Example #28
Source File: train_imagenet_nv.py From imagenet18_old with The Unlicense | 5 votes |
def preload_data(self, ep, sz, bs, trndir, valdir, **kwargs): # dummy ep var to prevent error if 'lr' in kwargs: del kwargs['lr'] # in case we mix schedule and data phases """Pre-initializes data-loaders. Use set_data to start using it.""" if sz == 128: val_bs = max(bs, 512) elif sz == 224: val_bs = max(bs, 256) else: val_bs = max(bs, 128) return dataloader.get_loaders(trndir, valdir, bs=bs, val_bs=val_bs, sz=sz, workers=args.workers, distributed=args.distributed, **kwargs) # ### Learning rate scheduler
Example #29
Source File: train_imagenet_nv.py From imagenet18_old with The Unlicense | 5 votes |
def validate(val_loader, model, criterion, epoch, start_time): timer = TimeMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() eval_start_time = time.time() for i,(input,target) in enumerate(val_loader): if args.short_epoch and (i > 10): break batch_num = i+1 timer.batch_start() if args.distributed: top1acc, top5acc, loss, batch_total = distributed_predict(input, target, model, criterion) else: with torch.no_grad(): output = model(input) loss = criterion(output, target).data batch_total = input.size(0) top1acc, top5acc = accuracy(output.data, target, topk=(1,5)) # Eval batch done. Logging results timer.batch_end() losses.update(to_python_float(loss), to_python_float(batch_total)) top1.update(to_python_float(top1acc), to_python_float(batch_total)) top5.update(to_python_float(top5acc), to_python_float(batch_total)) should_print = (batch_num%args.print_freq == 0) or (batch_num==len(val_loader)) if args.local_rank == 0 and should_print: output = (f'Test: [{epoch}][{batch_num}/{len(val_loader)}]\t' f'Time {timer.batch_time.val:.3f} ({timer.batch_time.avg:.3f})\t' f'Loss {losses.val:.4f} ({losses.avg:.4f})\t' f'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\t' f'Acc@5 {top5.val:.3f} ({top5.avg:.3f})') log.verbose(output) tb.log_eval(top1.avg, top5.avg, time.time()-eval_start_time) tb.log('epoch', epoch) return top1.avg, top5.avg
Example #30
Source File: main.py From Count-Sketch-Optimizers with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)