Python torch.multiprocessing() Examples
The following are 30
code examples of torch.multiprocessing().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: util.py From ConvLab with MIT License | 6 votes |
def set_cuda_id(spec): '''Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.''' # Don't trigger any cuda call if not using GPU. Otherwise will break multiprocessing on machines with CUDA. # see issues https://github.com/pytorch/pytorch/issues/334 https://github.com/pytorch/pytorch/issues/3491 https://github.com/pytorch/pytorch/issues/9996 for agent_spec in spec['agent']: if 'net' not in agent_spec or not agent_spec['net'].get('gpu'): return meta_spec = spec['meta'] trial_idx = meta_spec['trial'] or 0 session_idx = meta_spec['session'] or 0 if meta_spec['distributed'] == 'shared': # shared hogwild uses only global networks, offset them to idx 0 session_idx = 0 job_idx = trial_idx * meta_spec['max_session'] + session_idx job_idx += meta_spec['cuda_offset'] device_count = torch.cuda.device_count() cuda_id = None if not device_count else job_idx % device_count for agent_spec in spec['agent']: agent_spec['net']['cuda_id'] = cuda_id
Example #2
Source File: data_silo.py From FARM with Apache License 2.0 | 6 votes |
def _dataset_from_chunk(cls, chunk, processor): """ Creating a dataset for a chunk (= subset) of dicts. In multiprocessing: * we read in all dicts from a file * split all dicts into chunks * feed *one chunk* to *one process* => the *one chunk* gets converted to *one dataset* (that's what we do here) * all datasets get collected and concatenated :param chunk: Instead of only having a list of dicts here we also supply an index (ascending int) for each. => [(0, dict), (1, dict) ...] :type chunk: list of tuples :param processor: FARM Processor (e.g. TextClassificationProcessor) :return: PyTorch Dataset """ dicts = [d[1] for d in chunk] indices = [x[0] for x in chunk] dataset = processor.dataset_from_dicts(dicts=dicts, indices=indices) return dataset
Example #3
Source File: util.py From SLM-Lab with MIT License | 6 votes |
def set_cuda_id(spec): '''Use trial and session id to hash and modulo cuda device count for a cuda_id to maximize device usage. Sets the net_spec for the base Net class to pick up.''' # Don't trigger any cuda call if not using GPU. Otherwise will break multiprocessing on machines with CUDA. # see issues https://github.com/pytorch/pytorch/issues/334 https://github.com/pytorch/pytorch/issues/3491 https://github.com/pytorch/pytorch/issues/9996 for agent_spec in spec['agent']: if not agent_spec['net'].get('gpu'): return meta_spec = spec['meta'] trial_idx = meta_spec['trial'] or 0 session_idx = meta_spec['session'] or 0 if meta_spec['distributed'] == 'shared': # shared hogwild uses only global networks, offset them to idx 0 session_idx = 0 job_idx = trial_idx * meta_spec['max_session'] + session_idx job_idx += meta_spec['cuda_offset'] device_count = torch.cuda.device_count() cuda_id = job_idx % device_count if torch.cuda.is_available() else None for agent_spec in spec['agent']: agent_spec['net']['cuda_id'] = cuda_id
Example #4
Source File: imagenet_torch_loader.py From pytorch_quantization with MIT License | 6 votes |
def main(): if cfg.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if cfg.dist_url == "env://" and cfg.world_size == -1: cfg.world_size = int(os.environ["WORLD_SIZE"]) cfg.distributed = cfg.world_size > 1 or cfg.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if cfg.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly cfg.world_size = ngpus_per_node * cfg.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, cfg)) else: # Simply call main_worker function main_worker(cfg.gpu, ngpus_per_node, cfg)
Example #5
Source File: main.py From GroupNorm-reproduce with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #6
Source File: imagenet_train.py From Fixup with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #7
Source File: spatial.py From rising with MIT License | 5 votes |
def __init__(self, scheduler: scheduler_type, mode: str = 'nearest', align_corners: bool = None, preserve_range: bool = False, keys: Sequence = ('data',), grad: bool = False, **kwargs): """ Args: scheduler: scheduler which determined the current size. The scheduler is called with the current iteration of the transform mode: one of ``nearest``, ``linear``, ``bilinear``, ``bicubic``, ``trilinear``, ``area`` (for more inforamtion see :func:`torch.nn.functional.interpolate`) align_corners: input and output tensors are aligned by the center points of their corners pixels, preserving the values at the corner pixels. preserve_range: output tensor has same range as input tensor keys: keys which should be augmented grad: enable gradient computation inside transformation **kwargs: keyword arguments passed to augment_fn Warnings: When this transformations is used in combination with multiprocessing, the step counter is not perfectly synchronized between multiple processes. As a result the step count my jump between values in a range of the number of processes used. """ super().__init__(size=0, mode=mode, align_corners=align_corners, preserve_range=preserve_range, keys=keys, grad=grad, **kwargs) self.scheduler = scheduler self._step = Value('i', 0)
Example #8
Source File: dataloader.py From keyphrase-generation-rl with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #9
Source File: train_net.py From DSGN with MIT License | 5 votes |
def main(): args = get_parser() if args.debug: args.savemodel = './outputs/debug/' args.btrain = 1 args.workers = 0 global cfg exp = Experimenter(args.savemodel, cfg_path=args.cfg) cfg = exp.config reset_seed(args.seed) cfg.debug = args.debug cfg.warmup = getattr(cfg, 'warmup', True) if not args.debug else False ### distributed training ### if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) ngpus_per_node = torch.cuda.device_count() print('ngpus_per_node: {}'.format(ngpus_per_node)) args.ngpus_per_node = ngpus_per_node args.distributed = ngpus_per_node > 0 and (args.world_size > 1 or args.multiprocessing_distributed) args.multiprocessing_distributed = args.distributed if args.distributed and args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args, cfg, exp)) else: # Simply call main_worker function main_worker(0, ngpus_per_node, args, cfg, exp)
Example #10
Source File: main.py From mrqa with Apache License 2.0 | 5 votes |
def main(args): # data loading before initializing model pickled_folder = args.pickled_folder + "_{}_{}".format(args.bert_model, str(args.skip_no_ans)) if not os.path.exists(pickled_folder): os.mkdir(pickled_folder) file_num = iter_main(args) args.num_classes = file_num # make save and result directory save_dir = os.path.join("./save", "{}_{}".format("adv" if args.adv else "base", time.strftime("%m%d%H%M"))) if not os.path.exists(save_dir): os.makedirs(save_dir) args.save_dir = save_dir result_dir = os.path.join("./result", "{}_{}".format("adv" if args.adv else "base", time.strftime("%m%d%H%M"))) if not os.path.exists(result_dir): os.makedirs(result_dir) args.result_dir = result_dir args.devices = [int(gpu) for gpu in args.devices.split('_')] args.use_cuda = (args.use_cuda and torch.cuda.is_available()) args.distributed = (args.use_cuda and args.distributed) ngpus_per_node = 0 if args.use_cuda: ngpus_per_node = len(args.devices) assert ngpus_per_node <= torch.cuda.device_count(), "GPU device number exceeds max capacity. select device ids correctly." if args.distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: worker(None, ngpus_per_node, args)
Example #11
Source File: distributed_run.py From mrqa with Apache License 2.0 | 5 votes |
def distributed_main(args): ngpus_per_node = len(args.devices) assert ngpus_per_node <= torch.cuda.device_count(), "GPU device num exceeds max capacity." # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args))
Example #12
Source File: main.py From EfficientNet-PyTorch with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #13
Source File: main.py From SASA-pytorch with MIT License | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) if not args.cpu: cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #14
Source File: dataloader_new.py From MetaFGNet with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #15
Source File: generic_distributed_util_test.py From ClassyVision with MIT License | 5 votes |
def run_in_process_group(world_size, filename, fn, inputs): if torch.distributed.is_initialized(): torch.distributed.destroy_process_group() processes = [] q = Queue() wait_event = Event() # run the remaining processes # for rank in range(world_size - 1): for rank in range(world_size): p = Process( target=init_and_run_process, args=(rank, world_size, filename, fn, inputs[rank], q, wait_event), ) p.start() processes.append(p) # fetch the results from the queue before joining, the background processes # need to be alive if the queue contains tensors. See # https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3 # noqa: B950 results = [] for _ in range(len(processes)): results.append(q.get()) wait_event.set() for p in processes: p.join() return results
Example #16
Source File: dataloader.py From EMANet with GNU General Public License v3.0 | 5 votes |
def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, num_workers=0, collate_fn=default_collate, pin_memory=False, drop_last=False, timeout=0, worker_init_fn=None): self.dataset = dataset self.batch_size = batch_size self.num_workers = num_workers self.collate_fn = collate_fn self.pin_memory = pin_memory self.drop_last = drop_last self.timeout = timeout self.worker_init_fn = worker_init_fn if timeout < 0: raise ValueError('timeout option should be non-negative') if batch_sampler is not None: if batch_size > 1 or shuffle or sampler is not None or drop_last: raise ValueError('batch_sampler is mutually exclusive with ' 'batch_size, shuffle, sampler, and drop_last') if sampler is not None and shuffle: raise ValueError('sampler is mutually exclusive with shuffle') if self.num_workers < 0: raise ValueError('num_workers cannot be negative; ' 'use num_workers=0 to disable multiprocessing.') if batch_sampler is None: if sampler is None: if shuffle: sampler = RandomSampler(dataset) else: sampler = SequentialSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) self.sampler = sampler self.batch_sampler = batch_sampler
Example #17
Source File: torchloader.py From mxbox with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.Queue() self.data_queue = multiprocessing.Queue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #18
Source File: main.py From online-normalization with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(): if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed if args.distributed: raise NotImplementedError('multiprocessing with ON not implemented') ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #19
Source File: imagenet.py From Compact-Global-Descriptor with BSD 2-Clause "Simplified" License | 5 votes |
def main(): # Use CUDA os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_id use_cuda = torch.cuda.is_available() gpus = list(range(len(args.gpu_id.split(',')))) # Random seed if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if use_cuda: torch.cuda.manual_seed_all(args.manualSeed) start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed if not os.path.isdir(args.checkpoint): mkdir_p(args.checkpoint) ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu_id, ngpus_per_node, args)
Example #20
Source File: epi_sampler.py From machina with MIT License | 5 votes |
def mp_sample(pol, env, max_steps, max_epis, n_steps_global, n_epis_global, epis, exec_flag, deterministic_flag, process_id, prepro=None, seed=256): """ Multiprocess sample. Sampling episodes until max_steps or max_epis is achieved. Parameters ---------- pol : Pol env : gym.Env max_steps : int maximum steps of episodes max_epis : int maximum episodes of episodes n_steps_global : torch.Tensor shared Tensor n_epis_global : torch.Tensor shared Tensor epis : list multiprocessing's list for sharing episodes between processes. exec_flag : torch.Tensor execution flag deterministic_flag : torch.Tensor process_id : int prepro : Prepro seed : int """ np.random.seed(seed + process_id) torch.manual_seed(seed + process_id) torch.set_num_threads(1) while True: time.sleep(0.1) if exec_flag > 0: while max_steps > n_steps_global and max_epis > n_epis_global: l, epi = one_epi(env, pol, deterministic_flag, prepro) n_steps_global += l n_epis_global += 1 epis.append(epi) exec_flag.zero_()
Example #21
Source File: main.py From PyTorch with MIT License | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #22
Source File: utils.py From rl-agents with MIT License | 5 votes |
def load_pytorch(): torch.backends.cudnn.benchmark = False torch.backends.cudnn.enabled = False logger.info("Using torch.multiprocessing.set_start_method('spawn')") import torch.multiprocessing as multiprocessing try: multiprocessing.set_start_method('spawn') except RuntimeError as e: logger.warning(str(e))
Example #23
Source File: dataloader.py From keyphrase-gan with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #24
Source File: imagenet.py From pytorch-dp with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn( "You have chosen to seed training. " "This will turn on the CUDNN deterministic setting, " "which can slow down your training considerably! " "You may see unexpected behavior when restarting " "from checkpoints." ) if args.gpu is not None: warnings.warn( "You have chosen a specific GPU. This will completely " "disable data parallelism." ) if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #25
Source File: my_data_loader.py From ps_pytorch with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #26
Source File: pytorch_dataloader.py From dlupi-heteroscedastic-dropout with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #27
Source File: mcmc.py From hypothesis with BSD 3-Clause "New" or "Revised" License | 5 votes |
def __init__(self, sampler, chains=2, workers=torch.multiprocessing.cpu_count()): self.chains = chains self.sampler = sampler self.workers = workers
Example #28
Source File: control.py From SLM-Lab with MIT License | 5 votes |
def mp_run_session(spec, global_nets, mp_dict): '''Wrap for multiprocessing with shared variable''' session = Session(spec, global_nets) metrics = session.run() mp_dict[session.index] = metrics
Example #29
Source File: main.py From TF2 with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
Example #30
Source File: main.py From TF2 with Apache License 2.0 | 5 votes |
def main(): args = parser.parse_args() if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') if args.dist_url == "env://" and args.world_size == -1: args.world_size = int(os.environ["WORLD_SIZE"]) args.distributed = args.world_size > 1 or args.multiprocessing_distributed ngpus_per_node = torch.cuda.device_count() if args.multiprocessing_distributed: # Since we have ngpus_per_node processes per node, the total world_size # needs to be adjusted accordingly args.world_size = ngpus_per_node * args.world_size # Use torch.multiprocessing.spawn to launch distributed processes: the # main_worker process function mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, args)) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)