Python torch.nn.parallel.scatter_gather.gather() Examples

The following are 30 code examples of torch.nn.parallel.scatter_gather.gather(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.nn.parallel.scatter_gather , or try the search function .
Example #1
Source File: data_parallel.py    From neural_sp with Apache License 2.0 6 votes vote down vote up
def gather(self, outputs, output_device):
        n_returns = len(outputs[0])
        n_gpus = len(outputs)
        if n_returns == 2:
            losses = [output[0] for output in outputs]
            observation_mean = {}
            for output in outputs:
                for k, v in output[1].items():
                    if v is None:
                        continue
                    if k not in observation_mean.keys():
                        observation_mean[k] = v
                    else:
                        observation_mean[k] += v
                observation_mean = {k: v / n_gpus for k, v in observation_mean.items()}
            return gather(losses, output_device, dim=self.dim).mean(), observation_mean
        else:
            raise ValueError(n_returns) 
Example #2
Source File: _parallel_utils.py    From fastNLP with Apache License 2.0 6 votes vote down vote up
def _data_parallel_wrapper(func_name, device_ids, output_device):
    r"""
    这个函数是用于对需要多卡执行的函数的wrapper函数。参考的nn.DataParallel的forward函数

    :param str, func_name: 对network中的这个函数进行多卡运行
    :param device_ids: nn.DataParallel中的device_ids
    :param output_device: nn.DataParallel中的output_device
    :return:
    """
    
    def wrapper(network, *inputs, **kwargs):
        inputs, kwargs = scatter_kwargs(inputs, kwargs, device_ids, dim=0)
        if len(device_ids) == 1:
            return getattr(network, func_name)(*inputs[0], **kwargs[0])
        replicas = replicate(network, device_ids[:len(inputs)])
        outputs = parallel_apply(replicas, func_name, inputs, kwargs, device_ids[:len(replicas)])
        return gather(outputs, output_device)
    
    return wrapper 
Example #3
Source File: data_parallel.py    From centerNet-deep-sort with GNU General Public License v3.0 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
        if not self.device_ids:
            return self.module(*inputs, **kwargs)
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs)
        return self.gather(outputs, self.output_device) 
Example #4
Source File: data_parallel.py    From flowseq with Apache License 2.0 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #5
Source File: multitask_trainer.py    From scicite with Apache License 2.0 5 votes vote down vote up
def _data_parallel(self, batch):
        """
        Do the forward pass using multiple GPUs.  This is a simplification
        of torch.nn.parallel.data_parallel to support the allennlp model
        interface.
        """
        inputs, module_kwargs = scatter_kwargs((), batch, self._cuda_devices, 0)
        used_device_ids = self._cuda_devices[:len(inputs)]
        replicas = replicate(self._model, used_device_ids)
        outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)

        # Only the 'loss' is needed.
        # a (num_gpu, ) tensor with loss on each GPU
        losses = gather([output['loss'].unsqueeze(0) for output in outputs], used_device_ids[0], 0)
        return {'loss': losses.mean()} 
Example #6
Source File: multitask_trainer_two_tasks.py    From scicite with Apache License 2.0 5 votes vote down vote up
def _data_parallel(self, batch):
        """
        Do the forward pass using multiple GPUs.  This is a simplification
        of torch.nn.parallel.data_parallel to support the allennlp model
        interface.
        """
        inputs, module_kwargs = scatter_kwargs((), batch, self._cuda_devices, 0)
        used_device_ids = self._cuda_devices[:len(inputs)]
        replicas = replicate(self._model, used_device_ids)
        outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)

        # Only the 'loss' is needed.
        # a (num_gpu, ) tensor with loss on each GPU
        losses = gather([output['loss'].unsqueeze(0) for output in outputs], used_device_ids[0], 0)
        return {'loss': losses.mean()} 
Example #7
Source File: data_parallel.py    From CenterNet-CondInst with MIT License 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
        if not self.device_ids:
            return self.module(*inputs, **kwargs)
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs)
        return self.gather(outputs, self.output_device) 
Example #8
Source File: data_parallel.py    From CenterNet-CondInst with MIT License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #9
Source File: data_parallel.py    From CenterNet-CondInst with MIT License 5 votes vote down vote up
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
    r"""Evaluates module(input) in parallel across the GPUs given in device_ids.

    This is the functional version of the DataParallel module.

    Args:
        module: the module to evaluate in parallel
        inputs: inputs to the module
        device_ids: GPU ids on which to replicate module
        output_device: GPU location of the output  Use -1 to indicate the CPU.
            (default: device_ids[0])
    Returns:
        a Variable containing the result of module(input) located on
        output_device
    """
    if not isinstance(inputs, tuple):
        inputs = (inputs,)

    if device_ids is None:
        device_ids = list(range(torch.cuda.device_count()))

    if output_device is None:
        output_device = device_ids[0]

    inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
    if len(device_ids) == 1:
        return module(*inputs[0], **module_kwargs[0])
    used_device_ids = device_ids[:len(inputs)]
    replicas = replicate(module, used_device_ids)
    outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
    return gather(outputs, output_device, dim) 
Example #10
Source File: data_parallel.py    From CornerNet with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
        if not self.device_ids:
            return self.module(*inputs, **kwargs)
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs)
        return self.gather(outputs, self.output_device) 
Example #11
Source File: data_parallel.py    From CornerNet with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #12
Source File: data_parallel.py    From CornerNet with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
    r"""Evaluates module(input) in parallel across the GPUs given in device_ids.

    This is the functional version of the DataParallel module.

    Args:
        module: the module to evaluate in parallel
        inputs: inputs to the module
        device_ids: GPU ids on which to replicate module
        output_device: GPU location of the output  Use -1 to indicate the CPU.
            (default: device_ids[0])
    Returns:
        a Variable containing the result of module(input) located on
        output_device
    """
    if not isinstance(inputs, tuple):
        inputs = (inputs,)

    if device_ids is None:
        device_ids = list(range(torch.cuda.device_count()))

    if output_device is None:
        output_device = device_ids[0]

    inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
    if len(device_ids) == 1:
        return module(*inputs[0], **module_kwargs[0])
    used_device_ids = device_ids[:len(inputs)]
    replicas = replicate(module, used_device_ids)
    outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
    return gather(outputs, output_device, dim) 
Example #13
Source File: data_parallel.py    From flowseq with Apache License 2.0 5 votes vote down vote up
def backward(self, *inputs, **kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
        if not self.device_ids:
            return self.flow.backward(*inputs, **kwargs)
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
        if len(self.device_ids) == 1:
            return self.flow.backward(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.flow, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs, backward=True)
        return self.gather(outputs, self.output_device) 
Example #14
Source File: data_parallel.py    From centerNet-deep-sort with GNU General Public License v3.0 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #15
Source File: data_parallel.py    From centerNet-deep-sort with GNU General Public License v3.0 5 votes vote down vote up
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
    r"""Evaluates module(input) in parallel across the GPUs given in device_ids.

    This is the functional version of the DataParallel module.

    Args:
        module: the module to evaluate in parallel
        inputs: inputs to the module
        device_ids: GPU ids on which to replicate module
        output_device: GPU location of the output  Use -1 to indicate the CPU.
            (default: device_ids[0])
    Returns:
        a Variable containing the result of module(input) located on
        output_device
    """
    if not isinstance(inputs, tuple):
        inputs = (inputs,)

    if device_ids is None:
        device_ids = list(range(torch.cuda.device_count()))

    if output_device is None:
        output_device = device_ids[0]

    inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
    if len(device_ids) == 1:
        return module(*inputs[0], **module_kwargs[0])
    used_device_ids = device_ids[:len(inputs)]
    replicas = replicate(module, used_device_ids)
    outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
    return gather(outputs, output_device, dim) 
Example #16
Source File: data_parallel_dist.py    From ps_pytorch with MIT License 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
        self._sync_params()
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        outputs = self.parallel_apply(self._module_copies, inputs, kwargs)
        return self.gather(outputs, self.output_device) 
Example #17
Source File: data_parallel_dist.py    From ps_pytorch with MIT License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #18
Source File: dataparallel.py    From mt-dnn with MIT License 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
    #if not self.device_ids:
    #  return self.module(*inputs, **kwargs)
    inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
    #if len(self.device_ids) == 1:
    #  return self.module(*inputs[0], **kwargs[0])
    #replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
    outputs = self.parallel_apply(self.replicas[:len(inputs)], inputs, kwargs)
    return self.gather(outputs, self.output_device) 
Example #19
Source File: dataparallel.py    From mt-dnn with MIT License 5 votes vote down vote up
def gather(self, outputs, output_device):
    return gather(outputs, output_device, dim=self.dim) 
Example #20
Source File: data_parallel.py    From CenterNet with MIT License 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
        if not self.device_ids:
            return self.module(*inputs, **kwargs)
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs)
        return self.gather(outputs, self.output_device) 
Example #21
Source File: data_parallel.py    From CenterNet with MIT License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #22
Source File: data_parallel.py    From CenterNet with MIT License 5 votes vote down vote up
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
    r"""Evaluates module(input) in parallel across the GPUs given in device_ids.

    This is the functional version of the DataParallel module.

    Args:
        module: the module to evaluate in parallel
        inputs: inputs to the module
        device_ids: GPU ids on which to replicate module
        output_device: GPU location of the output  Use -1 to indicate the CPU.
            (default: device_ids[0])
    Returns:
        a Variable containing the result of module(input) located on
        output_device
    """
    if not isinstance(inputs, tuple):
        inputs = (inputs,)

    if device_ids is None:
        device_ids = list(range(torch.cuda.device_count()))

    if output_device is None:
        output_device = device_ids[0]

    inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
    if len(device_ids) == 1:
        return module(*inputs[0], **module_kwargs[0])
    used_device_ids = device_ids[:len(inputs)]
    replicas = replicate(module, used_device_ids)
    outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
    return gather(outputs, output_device, dim) 
Example #23
Source File: data_parallel.py    From CenterNet with MIT License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #24
Source File: data_parallel.py    From CornerNet-Lite-Pytorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim) 
Example #25
Source File: data_parallel.py    From CornerNet-Lite-Pytorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None):
    r"""Evaluates module(input) in parallel across the GPUs given in device_ids.

    This is the functional version of the DataParallel module.

    Args:
        module: the module to evaluate in parallel
        inputs: inputs to the module
        device_ids: GPU ids on which to replicate module
        output_device: GPU location of the output  Use -1 to indicate the CPU.
            (default: device_ids[0])
    Returns:
        a Variable containing the result of module(input) located on
        output_device
    """
    if not isinstance(inputs, tuple):
        inputs = (inputs,)

    if device_ids is None:
        device_ids = list(range(torch.cuda.device_count()))

    if output_device is None:
        output_device = device_ids[0]

    inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim)
    if len(device_ids) == 1:
        return module(*inputs[0], **module_kwargs[0])
    used_device_ids = device_ids[:len(inputs)]
    replicas = replicate(module, used_device_ids)
    outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids)
    return gather(outputs, output_device, dim) 
Example #26
Source File: train.py    From PyTorch-Encoding with MIT License 5 votes vote down vote up
def validation(self, epoch):
        # Fast test during the training
        def eval_batch(model, image, target):
            outputs = model(image)
            outputs = gather(outputs, 0, dim=0)
            pred = outputs[0]
            target = target.cuda()
            correct, labeled = utils.batch_pix_accuracy(pred.data, target)
            inter, union = utils.batch_intersection_union(pred.data, target, self.nclass)
            return correct, labeled, inter, union

        is_best = False
        self.model.eval()
        total_inter, total_union, total_correct, total_label = 0, 0, 0, 0
        tbar = tqdm(self.valloader, desc='\r')
        for i, (image, target) in enumerate(tbar):
            with torch.no_grad():
                correct, labeled, inter, union = eval_batch(self.model, image, target)

            total_correct += correct
            total_label += labeled
            total_inter += inter
            total_union += union
            pixAcc = 1.0 * total_correct / (np.spacing(1) + total_label)
            IoU = 1.0 * total_inter / (np.spacing(1) + total_union)
            mIoU = IoU.mean()
            tbar.set_description(
                'pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

        new_pred = (pixAcc + mIoU)/2
        if new_pred > self.best_pred:
            is_best = True
            self.best_pred = new_pred
        utils.save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': self.model.module.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'best_pred': self.best_pred,
        }, self.args, is_best) 
Example #27
Source File: data_parallel.py    From openseg.pytorch with MIT License 5 votes vote down vote up
def gather(self, outputs, output_device):
        if self.gather_:
            return gather(outputs, output_device, dim=self.dim)

        return outputs 
Example #28
Source File: module_runner.py    From openseg.pytorch with MIT License 5 votes vote down vote up
def gather(self, outputs, target_device=None, dim=0):
        r"""
        Gathers tensors from different GPUs on a specified device
          (-1 means the CPU).
        """
        if not self.configer.get('network', 'gathered'):
            if target_device is None:
                target_device = list(range(torch.cuda.device_count()))[0]

            return torch_gather(outputs, target_device, dim=dim)

        else:
            return outputs 
Example #29
Source File: data_parallel.py    From ExtremeNet with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def forward(self, *inputs, **kwargs):
        if not self.device_ids:
            return self.module(*inputs, **kwargs)
        inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes)
        if len(self.device_ids) == 1:
            return self.module(*inputs[0], **kwargs[0])
        replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
        outputs = self.parallel_apply(replicas, inputs, kwargs)
        return self.gather(outputs, self.output_device) 
Example #30
Source File: data_parallel.py    From ExtremeNet with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def gather(self, outputs, output_device):
        return gather(outputs, output_device, dim=self.dim)