Python torch.cuda.device_count() Examples

The following are 14 code examples of torch.cuda.device_count(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.cuda , or try the search function .
Example #1
Source File: data_parallel.py    From EMANet with GNU General Public License v3.0 5 votes vote down vote up
def _get_stream(device):
    """Gets a background stream for copying between CPU and GPU"""
    global _streams
    if device == -1:
        return None
    if _streams is None:
        _streams = [None] * cuda.device_count()
    if _streams[device] is None: _streams[device] = cuda.Stream(device)
    return _streams[device] 
Example #2
Source File: data_parallel.py    From semantic-segmentation-pytorch with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _get_stream(device):
    """Gets a background stream for copying between CPU and GPU"""
    global _streams
    if device == -1:
        return None
    if _streams is None:
        _streams = [None] * cuda.device_count()
    if _streams[device] is None: _streams[device] = cuda.Stream(device)
    return _streams[device] 
Example #3
Source File: trainer.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def __init__(
        self,
        serialization_dir: str,
        cuda_device: Optional[Union[int, torch.device]] = None,
        distributed: bool = False,
        local_rank: int = 0,
        world_size: int = 1,
    ) -> None:
        if cuda_device is None:
            from torch import cuda

            if cuda.device_count() > 0:
                cuda_device = 0
            else:
                cuda_device = -1

        check_for_gpu(cuda_device)
        self._serialization_dir = serialization_dir

        if isinstance(cuda_device, list):
            raise ConfigurationError(
                "In allennlp 1.0, the Trainer can only be assigned a single `cuda_device`. "
                "Instead, we use torch's DistributedDataParallel at the command level, meaning "
                "our Trainer always uses a single GPU per process."
            )

        if distributed and world_size <= 1:
            raise ConfigurationError(
                "Distributed training can be performed only with more than 1 device. Check "
                "`cuda_device` key in the experiment configuration."
            )

        self.cuda_device = int_to_device(cuda_device)

        self._distributed = distributed
        self._rank = local_rank
        self._master = self._rank == 0
        self._world_size = world_size 
Example #4
Source File: checks.py    From allennlp with Apache License 2.0 5 votes vote down vote up
def check_for_gpu(device: Union[int, torch.device, List[Union[int, torch.device]]]):
    if isinstance(device, list):
        for did in device:
            check_for_gpu(did)
    elif device is None:
        return
    else:
        from allennlp.common.util import int_to_device

        device = int_to_device(device)
        if device != torch.device("cpu"):
            num_devices_available = cuda.device_count()
            if num_devices_available == 0:
                # Torch will give a more informative exception than ours, so we want to include
                # that context as well if it's available.  For example, if you try to run torch 1.5
                # on a machine with CUDA10.1 you'll get the following:
                #
                #     The NVIDIA driver on your system is too old (found version 10010).
                #
                torch_gpu_error = ""
                try:
                    cuda._check_driver()
                except Exception as e:
                    torch_gpu_error = "\n{0}".format(e)

                raise ConfigurationError(
                    "Experiment specified a GPU but none is available;"
                    " if you want to run on CPU use the override"
                    " 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error
                )
            elif device.index >= num_devices_available:
                raise ConfigurationError(
                    f"Experiment specified GPU device {device.index}"
                    f" but there are only {num_devices_available} devices "
                    f" available."
                ) 
Example #5
Source File: checks.py    From magnitude with MIT License 5 votes vote down vote up
def check_for_gpu(device_id     ):
    if device_id is not None and device_id >= cuda.device_count():
        raise ConfigurationError(u"Experiment specified a GPU but none is available;"
                                 u" if you want to run on CPU use the override"
                                 u" 'trainer.cuda_device=-1' in the json config file.") 
Example #6
Source File: environment.py    From gtos with MIT License 5 votes vote down vote up
def check_for_gpu(params) -> object:
    device_id = params['cuda_device']
    if device_id is not None and device_id >= cuda.device_count():
        raise ConfigurationError("Experiment specified a GPU but none is available;"
                                 " if you want to run on CPU use the override"
                                 " 'trainer.cuda_device=-1' in the json config file.") 
Example #7
Source File: checks.py    From gtos with MIT License 5 votes vote down vote up
def check_for_gpu(device_id: int):
    if device_id is not None and device_id >= cuda.device_count():
        raise ConfigurationError("Experiment specified a GPU but none is available;"
                                 " if you want to run on CPU use the override"
                                 " 'trainer.cuda_device=-1' in the json config file.") 
Example #8
Source File: user_scattered.py    From Jacinle with MIT License 5 votes vote down vote up
def _get_stream(device):
    """Gets a background stream for copying between CPU and GPU"""
    global _streams
    if device == -1:
        return None
    if _streams is None:
        _streams = [None] * cuda.device_count()
    if _streams[device] is None: _streams[device] = cuda.Stream(device)
    return _streams[device] 
Example #9
Source File: data_parallel.py    From Jacinle with MIT License 5 votes vote down vote up
def __init__(self, module,
                 device_ids=None, output_device=None, dim=0,
                 allow_replication_callback=True,
                 user_scattered=False, use_scatter_stream=True,
                 scatter_func=None,
                 use_dict_gather=True, dict_gather_layout=None,
                 persistent=False, copy_parameters=False, copy_buffers=True):

        super(DataParallel, self).__init__()
        if device_ids is None:
            device_ids = list(range(cuda.device_count()))
        if output_device is None:
            output_device = device_ids[0]

        self.dim = dim
        self.module = module
        self.device_ids = device_ids
        self.output_device = output_device
        if len(self.device_ids) == 1:
            self.module.cuda(device_ids[0])

        self.allow_replication_callback = allow_replication_callback

        self.user_scattered = user_scattered
        self.use_scatter_stream = use_scatter_stream
        self.scatter_func = scatter_func

        self.use_dict_gather = use_dict_gather
        self.dict_gather_layout = dict_gather_layout

        self.persistent = persistent
        self.copy_parameters = copy_parameters
        self.copy_buffers = copy_buffers
        self.replicas = nn.ModuleList() 
Example #10
Source File: environment.py    From homura with Apache License 2.0 5 votes vote down vote up
def get_num_nodes() -> int:
    # assume all nodes have the same number of gpus
    if not is_distributed():
        return 1
    else:
        return get_world_size() // device_count() 
Example #11
Source File: environment.py    From stog with MIT License 5 votes vote down vote up
def check_for_gpu(params) -> object:
    device_id = params['cuda_device']
    if device_id is not None and device_id >= cuda.device_count():
        raise ConfigurationError("Experiment specified a GPU but none is available;"
                                 " if you want to run on CPU use the override"
                                 " 'trainer.cuda_device=-1' in the json config file.") 
Example #12
Source File: checks.py    From stog with MIT License 5 votes vote down vote up
def check_for_gpu(device_id: int):
    if device_id is not None and device_id >= cuda.device_count():
        raise ConfigurationError("Experiment specified a GPU but none is available;"
                                 " if you want to run on CPU use the override"
                                 " 'trainer.cuda_device=-1' in the json config file.") 
Example #13
Source File: gpu_info.py    From dreampower with GNU General Public License v3.0 5 votes vote down vote up
def get_info():
    """
    Get gpu info.

    :return: <dict> gpu info
    """
    return {
        "has_cuda": cuda.is_available(),
        "devices": [] if not cuda.is_available() else [cuda.get_device_name(i) for i in range(cuda.device_count())],
    } 
Example #14
Source File: environment.py    From homura with Apache License 2.0 4 votes vote down vote up
def init_distributed(use_horovod: bool = False,
                     backend: Optional[str] = None,
                     init_method: Optional[str] = None,
                     warning: bool = True):
    """ Simple initializer for distributed training.

    :param use_horovod:
    :param backend: backend when
    :param init_method:
    :param warning:
    :return:
    """

    if not is_distributed_available():
        raise RuntimeError('Distributed training is not available on this machine')

    global _DISTRIBUTED_FLAG
    _DISTRIBUTED_FLAG = True
    if use_horovod:
        if backend is not None or init_method is not None:
            raise RuntimeError('Try to use horovod, but `backend` and `init_method` are not None')

        if is_horovod_available():
            import horovod.torch as hvd

            hvd.init()
            logger.debug("init horovod")
        else:
            raise RuntimeError('horovod is not available!')

    else:
        if backend is None:
            backend = "nccl"
        if init_method:
            init_method = "env://"

        if not is_distributed():
            raise RuntimeError(
                f"For distributed training, use `python -m torch.distributed.launch "
                f"--nproc_per_node={device_count()} {args}` ...")

        if distributed.is_initialized():
            if warning:
                logger.warn("`distributed` is already initialized. Skipped.")
        else:
            distributed.init_process_group(backend=backend, init_method=init_method)
        logger.debug("init distributed")

    if not is_master():
        def no_print(*values, **kwargs):
            pass

        builtins.print = no_print