Python torch.cuda.device_count() Examples
The following are 14
code examples of torch.cuda.device_count().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.cuda
, or try the search function
.
Example #1
Source File: data_parallel.py From EMANet with GNU General Public License v3.0 | 5 votes |
def _get_stream(device): """Gets a background stream for copying between CPU and GPU""" global _streams if device == -1: return None if _streams is None: _streams = [None] * cuda.device_count() if _streams[device] is None: _streams[device] = cuda.Stream(device) return _streams[device]
Example #2
Source File: data_parallel.py From semantic-segmentation-pytorch with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _get_stream(device): """Gets a background stream for copying between CPU and GPU""" global _streams if device == -1: return None if _streams is None: _streams = [None] * cuda.device_count() if _streams[device] is None: _streams[device] = cuda.Stream(device) return _streams[device]
Example #3
Source File: trainer.py From allennlp with Apache License 2.0 | 5 votes |
def __init__( self, serialization_dir: str, cuda_device: Optional[Union[int, torch.device]] = None, distributed: bool = False, local_rank: int = 0, world_size: int = 1, ) -> None: if cuda_device is None: from torch import cuda if cuda.device_count() > 0: cuda_device = 0 else: cuda_device = -1 check_for_gpu(cuda_device) self._serialization_dir = serialization_dir if isinstance(cuda_device, list): raise ConfigurationError( "In allennlp 1.0, the Trainer can only be assigned a single `cuda_device`. " "Instead, we use torch's DistributedDataParallel at the command level, meaning " "our Trainer always uses a single GPU per process." ) if distributed and world_size <= 1: raise ConfigurationError( "Distributed training can be performed only with more than 1 device. Check " "`cuda_device` key in the experiment configuration." ) self.cuda_device = int_to_device(cuda_device) self._distributed = distributed self._rank = local_rank self._master = self._rank == 0 self._world_size = world_size
Example #4
Source File: checks.py From allennlp with Apache License 2.0 | 5 votes |
def check_for_gpu(device: Union[int, torch.device, List[Union[int, torch.device]]]): if isinstance(device, list): for did in device: check_for_gpu(did) elif device is None: return else: from allennlp.common.util import int_to_device device = int_to_device(device) if device != torch.device("cpu"): num_devices_available = cuda.device_count() if num_devices_available == 0: # Torch will give a more informative exception than ours, so we want to include # that context as well if it's available. For example, if you try to run torch 1.5 # on a machine with CUDA10.1 you'll get the following: # # The NVIDIA driver on your system is too old (found version 10010). # torch_gpu_error = "" try: cuda._check_driver() except Exception as e: torch_gpu_error = "\n{0}".format(e) raise ConfigurationError( "Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file." + torch_gpu_error ) elif device.index >= num_devices_available: raise ConfigurationError( f"Experiment specified GPU device {device.index}" f" but there are only {num_devices_available} devices " f" available." )
Example #5
Source File: checks.py From magnitude with MIT License | 5 votes |
def check_for_gpu(device_id ): if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError(u"Experiment specified a GPU but none is available;" u" if you want to run on CPU use the override" u" 'trainer.cuda_device=-1' in the json config file.")
Example #6
Source File: environment.py From gtos with MIT License | 5 votes |
def check_for_gpu(params) -> object: device_id = params['cuda_device'] if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError("Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.")
Example #7
Source File: checks.py From gtos with MIT License | 5 votes |
def check_for_gpu(device_id: int): if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError("Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.")
Example #8
Source File: user_scattered.py From Jacinle with MIT License | 5 votes |
def _get_stream(device): """Gets a background stream for copying between CPU and GPU""" global _streams if device == -1: return None if _streams is None: _streams = [None] * cuda.device_count() if _streams[device] is None: _streams[device] = cuda.Stream(device) return _streams[device]
Example #9
Source File: data_parallel.py From Jacinle with MIT License | 5 votes |
def __init__(self, module, device_ids=None, output_device=None, dim=0, allow_replication_callback=True, user_scattered=False, use_scatter_stream=True, scatter_func=None, use_dict_gather=True, dict_gather_layout=None, persistent=False, copy_parameters=False, copy_buffers=True): super(DataParallel, self).__init__() if device_ids is None: device_ids = list(range(cuda.device_count())) if output_device is None: output_device = device_ids[0] self.dim = dim self.module = module self.device_ids = device_ids self.output_device = output_device if len(self.device_ids) == 1: self.module.cuda(device_ids[0]) self.allow_replication_callback = allow_replication_callback self.user_scattered = user_scattered self.use_scatter_stream = use_scatter_stream self.scatter_func = scatter_func self.use_dict_gather = use_dict_gather self.dict_gather_layout = dict_gather_layout self.persistent = persistent self.copy_parameters = copy_parameters self.copy_buffers = copy_buffers self.replicas = nn.ModuleList()
Example #10
Source File: environment.py From homura with Apache License 2.0 | 5 votes |
def get_num_nodes() -> int: # assume all nodes have the same number of gpus if not is_distributed(): return 1 else: return get_world_size() // device_count()
Example #11
Source File: environment.py From stog with MIT License | 5 votes |
def check_for_gpu(params) -> object: device_id = params['cuda_device'] if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError("Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.")
Example #12
Source File: checks.py From stog with MIT License | 5 votes |
def check_for_gpu(device_id: int): if device_id is not None and device_id >= cuda.device_count(): raise ConfigurationError("Experiment specified a GPU but none is available;" " if you want to run on CPU use the override" " 'trainer.cuda_device=-1' in the json config file.")
Example #13
Source File: gpu_info.py From dreampower with GNU General Public License v3.0 | 5 votes |
def get_info(): """ Get gpu info. :return: <dict> gpu info """ return { "has_cuda": cuda.is_available(), "devices": [] if not cuda.is_available() else [cuda.get_device_name(i) for i in range(cuda.device_count())], }
Example #14
Source File: environment.py From homura with Apache License 2.0 | 4 votes |
def init_distributed(use_horovod: bool = False, backend: Optional[str] = None, init_method: Optional[str] = None, warning: bool = True): """ Simple initializer for distributed training. :param use_horovod: :param backend: backend when :param init_method: :param warning: :return: """ if not is_distributed_available(): raise RuntimeError('Distributed training is not available on this machine') global _DISTRIBUTED_FLAG _DISTRIBUTED_FLAG = True if use_horovod: if backend is not None or init_method is not None: raise RuntimeError('Try to use horovod, but `backend` and `init_method` are not None') if is_horovod_available(): import horovod.torch as hvd hvd.init() logger.debug("init horovod") else: raise RuntimeError('horovod is not available!') else: if backend is None: backend = "nccl" if init_method: init_method = "env://" if not is_distributed(): raise RuntimeError( f"For distributed training, use `python -m torch.distributed.launch " f"--nproc_per_node={device_count()} {args}` ...") if distributed.is_initialized(): if warning: logger.warn("`distributed` is already initialized. Skipped.") else: distributed.init_process_group(backend=backend, init_method=init_method) logger.debug("init distributed") if not is_master(): def no_print(*values, **kwargs): pass builtins.print = no_print