Python torch.nn.parallel.scatter_gather.gather() Examples
The following are 30
code examples of torch.nn.parallel.scatter_gather.gather().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.nn.parallel.scatter_gather
, or try the search function
.
Example #1
Source File: data_parallel.py From neural_sp with Apache License 2.0 | 6 votes |
def gather(self, outputs, output_device): n_returns = len(outputs[0]) n_gpus = len(outputs) if n_returns == 2: losses = [output[0] for output in outputs] observation_mean = {} for output in outputs: for k, v in output[1].items(): if v is None: continue if k not in observation_mean.keys(): observation_mean[k] = v else: observation_mean[k] += v observation_mean = {k: v / n_gpus for k, v in observation_mean.items()} return gather(losses, output_device, dim=self.dim).mean(), observation_mean else: raise ValueError(n_returns)
Example #2
Source File: _parallel_utils.py From fastNLP with Apache License 2.0 | 6 votes |
def _data_parallel_wrapper(func_name, device_ids, output_device): r""" 这个函数是用于对需要多卡执行的函数的wrapper函数。参考的nn.DataParallel的forward函数 :param str, func_name: 对network中的这个函数进行多卡运行 :param device_ids: nn.DataParallel中的device_ids :param output_device: nn.DataParallel中的output_device :return: """ def wrapper(network, *inputs, **kwargs): inputs, kwargs = scatter_kwargs(inputs, kwargs, device_ids, dim=0) if len(device_ids) == 1: return getattr(network, func_name)(*inputs[0], **kwargs[0]) replicas = replicate(network, device_ids[:len(inputs)]) outputs = parallel_apply(replicas, func_name, inputs, kwargs, device_ids[:len(replicas)]) return gather(outputs, output_device) return wrapper
Example #3
Source File: data_parallel.py From centerNet-deep-sort with GNU General Public License v3.0 | 5 votes |
def forward(self, *inputs, **kwargs): if not self.device_ids: return self.module(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) if len(self.device_ids) == 1: return self.module(*inputs[0], **kwargs[0]) replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(replicas, inputs, kwargs) return self.gather(outputs, self.output_device)
Example #4
Source File: data_parallel.py From flowseq with Apache License 2.0 | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #5
Source File: multitask_trainer.py From scicite with Apache License 2.0 | 5 votes |
def _data_parallel(self, batch): """ Do the forward pass using multiple GPUs. This is a simplification of torch.nn.parallel.data_parallel to support the allennlp model interface. """ inputs, module_kwargs = scatter_kwargs((), batch, self._cuda_devices, 0) used_device_ids = self._cuda_devices[:len(inputs)] replicas = replicate(self._model, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) # Only the 'loss' is needed. # a (num_gpu, ) tensor with loss on each GPU losses = gather([output['loss'].unsqueeze(0) for output in outputs], used_device_ids[0], 0) return {'loss': losses.mean()}
Example #6
Source File: multitask_trainer_two_tasks.py From scicite with Apache License 2.0 | 5 votes |
def _data_parallel(self, batch): """ Do the forward pass using multiple GPUs. This is a simplification of torch.nn.parallel.data_parallel to support the allennlp model interface. """ inputs, module_kwargs = scatter_kwargs((), batch, self._cuda_devices, 0) used_device_ids = self._cuda_devices[:len(inputs)] replicas = replicate(self._model, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) # Only the 'loss' is needed. # a (num_gpu, ) tensor with loss on each GPU losses = gather([output['loss'].unsqueeze(0) for output in outputs], used_device_ids[0], 0) return {'loss': losses.mean()}
Example #7
Source File: data_parallel.py From CenterNet-CondInst with MIT License | 5 votes |
def forward(self, *inputs, **kwargs): if not self.device_ids: return self.module(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) if len(self.device_ids) == 1: return self.module(*inputs[0], **kwargs[0]) replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(replicas, inputs, kwargs) return self.gather(outputs, self.output_device)
Example #8
Source File: data_parallel.py From CenterNet-CondInst with MIT License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #9
Source File: data_parallel.py From CenterNet-CondInst with MIT License | 5 votes |
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): r"""Evaluates module(input) in parallel across the GPUs given in device_ids. This is the functional version of the DataParallel module. Args: module: the module to evaluate in parallel inputs: inputs to the module device_ids: GPU ids on which to replicate module output_device: GPU location of the output Use -1 to indicate the CPU. (default: device_ids[0]) Returns: a Variable containing the result of module(input) located on output_device """ if not isinstance(inputs, tuple): inputs = (inputs,) if device_ids is None: device_ids = list(range(torch.cuda.device_count())) if output_device is None: output_device = device_ids[0] inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) if len(device_ids) == 1: return module(*inputs[0], **module_kwargs[0]) used_device_ids = device_ids[:len(inputs)] replicas = replicate(module, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) return gather(outputs, output_device, dim)
Example #10
Source File: data_parallel.py From CornerNet with BSD 3-Clause "New" or "Revised" License | 5 votes |
def forward(self, *inputs, **kwargs): if not self.device_ids: return self.module(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) if len(self.device_ids) == 1: return self.module(*inputs[0], **kwargs[0]) replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(replicas, inputs, kwargs) return self.gather(outputs, self.output_device)
Example #11
Source File: data_parallel.py From CornerNet with BSD 3-Clause "New" or "Revised" License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #12
Source File: data_parallel.py From CornerNet with BSD 3-Clause "New" or "Revised" License | 5 votes |
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): r"""Evaluates module(input) in parallel across the GPUs given in device_ids. This is the functional version of the DataParallel module. Args: module: the module to evaluate in parallel inputs: inputs to the module device_ids: GPU ids on which to replicate module output_device: GPU location of the output Use -1 to indicate the CPU. (default: device_ids[0]) Returns: a Variable containing the result of module(input) located on output_device """ if not isinstance(inputs, tuple): inputs = (inputs,) if device_ids is None: device_ids = list(range(torch.cuda.device_count())) if output_device is None: output_device = device_ids[0] inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) if len(device_ids) == 1: return module(*inputs[0], **module_kwargs[0]) used_device_ids = device_ids[:len(inputs)] replicas = replicate(module, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) return gather(outputs, output_device, dim)
Example #13
Source File: data_parallel.py From flowseq with Apache License 2.0 | 5 votes |
def backward(self, *inputs, **kwargs) -> Tuple[torch.Tensor, torch.Tensor]: if not self.device_ids: return self.flow.backward(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) if len(self.device_ids) == 1: return self.flow.backward(*inputs[0], **kwargs[0]) replicas = self.replicate(self.flow, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(replicas, inputs, kwargs, backward=True) return self.gather(outputs, self.output_device)
Example #14
Source File: data_parallel.py From centerNet-deep-sort with GNU General Public License v3.0 | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #15
Source File: data_parallel.py From centerNet-deep-sort with GNU General Public License v3.0 | 5 votes |
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): r"""Evaluates module(input) in parallel across the GPUs given in device_ids. This is the functional version of the DataParallel module. Args: module: the module to evaluate in parallel inputs: inputs to the module device_ids: GPU ids on which to replicate module output_device: GPU location of the output Use -1 to indicate the CPU. (default: device_ids[0]) Returns: a Variable containing the result of module(input) located on output_device """ if not isinstance(inputs, tuple): inputs = (inputs,) if device_ids is None: device_ids = list(range(torch.cuda.device_count())) if output_device is None: output_device = device_ids[0] inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) if len(device_ids) == 1: return module(*inputs[0], **module_kwargs[0]) used_device_ids = device_ids[:len(inputs)] replicas = replicate(module, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) return gather(outputs, output_device, dim)
Example #16
Source File: data_parallel_dist.py From ps_pytorch with MIT License | 5 votes |
def forward(self, *inputs, **kwargs): inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) self._sync_params() if len(self.device_ids) == 1: return self.module(*inputs[0], **kwargs[0]) outputs = self.parallel_apply(self._module_copies, inputs, kwargs) return self.gather(outputs, self.output_device)
Example #17
Source File: data_parallel_dist.py From ps_pytorch with MIT License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #18
Source File: dataparallel.py From mt-dnn with MIT License | 5 votes |
def forward(self, *inputs, **kwargs): #if not self.device_ids: # return self.module(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids) #if len(self.device_ids) == 1: # return self.module(*inputs[0], **kwargs[0]) #replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(self.replicas[:len(inputs)], inputs, kwargs) return self.gather(outputs, self.output_device)
Example #19
Source File: dataparallel.py From mt-dnn with MIT License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #20
Source File: data_parallel.py From CenterNet with MIT License | 5 votes |
def forward(self, *inputs, **kwargs): if not self.device_ids: return self.module(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) if len(self.device_ids) == 1: return self.module(*inputs[0], **kwargs[0]) replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(replicas, inputs, kwargs) return self.gather(outputs, self.output_device)
Example #21
Source File: data_parallel.py From CenterNet with MIT License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #22
Source File: data_parallel.py From CenterNet with MIT License | 5 votes |
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): r"""Evaluates module(input) in parallel across the GPUs given in device_ids. This is the functional version of the DataParallel module. Args: module: the module to evaluate in parallel inputs: inputs to the module device_ids: GPU ids on which to replicate module output_device: GPU location of the output Use -1 to indicate the CPU. (default: device_ids[0]) Returns: a Variable containing the result of module(input) located on output_device """ if not isinstance(inputs, tuple): inputs = (inputs,) if device_ids is None: device_ids = list(range(torch.cuda.device_count())) if output_device is None: output_device = device_ids[0] inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) if len(device_ids) == 1: return module(*inputs[0], **module_kwargs[0]) used_device_ids = device_ids[:len(inputs)] replicas = replicate(module, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) return gather(outputs, output_device, dim)
Example #23
Source File: data_parallel.py From CenterNet with MIT License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #24
Source File: data_parallel.py From CornerNet-Lite-Pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)
Example #25
Source File: data_parallel.py From CornerNet-Lite-Pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): r"""Evaluates module(input) in parallel across the GPUs given in device_ids. This is the functional version of the DataParallel module. Args: module: the module to evaluate in parallel inputs: inputs to the module device_ids: GPU ids on which to replicate module output_device: GPU location of the output Use -1 to indicate the CPU. (default: device_ids[0]) Returns: a Variable containing the result of module(input) located on output_device """ if not isinstance(inputs, tuple): inputs = (inputs,) if device_ids is None: device_ids = list(range(torch.cuda.device_count())) if output_device is None: output_device = device_ids[0] inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) if len(device_ids) == 1: return module(*inputs[0], **module_kwargs[0]) used_device_ids = device_ids[:len(inputs)] replicas = replicate(module, used_device_ids) outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) return gather(outputs, output_device, dim)
Example #26
Source File: train.py From PyTorch-Encoding with MIT License | 5 votes |
def validation(self, epoch): # Fast test during the training def eval_batch(model, image, target): outputs = model(image) outputs = gather(outputs, 0, dim=0) pred = outputs[0] target = target.cuda() correct, labeled = utils.batch_pix_accuracy(pred.data, target) inter, union = utils.batch_intersection_union(pred.data, target, self.nclass) return correct, labeled, inter, union is_best = False self.model.eval() total_inter, total_union, total_correct, total_label = 0, 0, 0, 0 tbar = tqdm(self.valloader, desc='\r') for i, (image, target) in enumerate(tbar): with torch.no_grad(): correct, labeled, inter, union = eval_batch(self.model, image, target) total_correct += correct total_label += labeled total_inter += inter total_union += union pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label) IoU = 1.0 * total_inter / (np.spacing(1) + total_union) mIoU = IoU.mean() tbar.set_description( 'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU)) new_pred = (pixAcc + mIoU)/2 if new_pred > self.best_pred: is_best = True self.best_pred = new_pred utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': self.model.module.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_pred': self.best_pred, }, self.args, is_best)
Example #27
Source File: data_parallel.py From openseg.pytorch with MIT License | 5 votes |
def gather(self, outputs, output_device): if self.gather_: return gather(outputs, output_device, dim=self.dim) return outputs
Example #28
Source File: module_runner.py From openseg.pytorch with MIT License | 5 votes |
def gather(self, outputs, target_device=None, dim=0): r""" Gathers tensors from different GPUs on a specified device (-1 means the CPU). """ if not self.configer.get('network', 'gathered'): if target_device is None: target_device = list(range(torch.cuda.device_count()))[0] return torch_gather(outputs, target_device, dim=dim) else: return outputs
Example #29
Source File: data_parallel.py From ExtremeNet with BSD 3-Clause "New" or "Revised" License | 5 votes |
def forward(self, *inputs, **kwargs): if not self.device_ids: return self.module(*inputs, **kwargs) inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) if len(self.device_ids) == 1: return self.module(*inputs[0], **kwargs[0]) replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) outputs = self.parallel_apply(replicas, inputs, kwargs) return self.gather(outputs, self.output_device)
Example #30
Source File: data_parallel.py From ExtremeNet with BSD 3-Clause "New" or "Revised" License | 5 votes |
def gather(self, outputs, output_device): return gather(outputs, output_device, dim=self.dim)