Python torch.distributed.get_backend() Examples
The following are 21
code examples of torch.distributed.get_backend().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.distributed
, or try the search function
.
Example #1
Source File: test_native.py From ignite with BSD 3-Clause "New" or "Revised" License | 6 votes |
def _test__native_dist_model_create_from_backend_no_dist(backend, true_device): from datetime import timedelta model = _NativeDistModel.create_from_backend(backend=backend, timeout=timedelta(seconds=20)) assert dist.is_available() and dist.is_initialized() assert dist.get_backend() == backend _assert_model( model, { "device": true_device, "local_rank": 0, "rank": 0, "world_size": 1, "node_index": 0, "nnodes": 1, "nproc_per_node": 1, }, ) model.finalize()
Example #2
Source File: comm.py From fast-reid with Apache License 2.0 | 6 votes |
def _serialize_to_tensor(data, group): backend = dist.get_backend(group) assert backend in ["gloo", "nccl"] device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) if len(buffer) > 1024 ** 3: logger = logging.getLogger(__name__) logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( get_rank(), len(buffer) / (1024 ** 3), device ) ) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to(device=device) return tensor
Example #3
Source File: comm.py From detectron2 with Apache License 2.0 | 6 votes |
def _serialize_to_tensor(data, group): backend = dist.get_backend(group) assert backend in ["gloo", "nccl"] device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) if len(buffer) > 1024 ** 3: logger = logging.getLogger(__name__) logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( get_rank(), len(buffer) / (1024 ** 3), device ) ) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to(device=device) return tensor
Example #4
Source File: comm.py From detectron2 with Apache License 2.0 | 6 votes |
def _serialize_to_tensor(data, group): backend = dist.get_backend(group) assert backend in ["gloo", "nccl"] device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) if len(buffer) > 1024 ** 3: logger = logging.getLogger(__name__) logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( get_rank(), len(buffer) / (1024 ** 3), device ) ) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to(device=device) return tensor
Example #5
Source File: comm.py From detectron2 with Apache License 2.0 | 5 votes |
def _get_global_gloo_group(): """ Return a process group based on gloo backend, containing all the ranks The result is cached. """ if dist.get_backend() == "nccl": return dist.new_group(backend="gloo") else: return dist.group.WORLD
Example #6
Source File: dist_utils.py From video_analyst with MIT License | 5 votes |
def _serialize_to_tensor(data, group): backend = dist.get_backend(group) assert backend in ["gloo", "nccl"] device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) if len(buffer) > 1024**3: logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}". format(get_rank(), len(buffer) / (1024**3), device)) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to(device=device) return tensor
Example #7
Source File: dist_utils.py From video_analyst with MIT License | 5 votes |
def _get_global_gloo_group(): """ Return a process group based on gloo backend, containing all the ranks The result is cached. """ if dist.get_backend() == "nccl": return dist.new_group(backend="gloo") else: return dist.group.WORLD
Example #8
Source File: test_native.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _test_dist_spawn_fn(local_rank, backend, world_size, device): from ignite.distributed.utils import _model assert dist.is_available() and dist.is_initialized() assert dist.get_backend() == backend assert isinstance(_model, _NativeDistModel), "{} vs _NativeDistModel".format(type(_model)) assert _model.get_local_rank() == local_rank assert _model.get_world_size() == world_size if backend == "nccl": assert _model.device() == torch.device("{}:{}".format(device, local_rank)) elif backend == "gloo": assert _model.device() == torch.device(device)
Example #9
Source File: test_native.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def _test__native_dist_model_create_from_backend_dist(local_rank, rank, world_size, backend, true_device): import os from datetime import timedelta timeout = timedelta(seconds=20) os.environ["RANK"] = "{}".format(rank) assert "MASTER_ADDR" not in os.environ assert "MASTER_PORT" not in os.environ model = _NativeDistModel.create_from_backend(backend=backend, timeout=timeout) assert dist.is_available() and dist.is_initialized() assert dist.get_backend() == backend with pytest.raises(RuntimeError, match=r"Can not create new distributed process group if default one is"): _NativeDistModel.create_from_backend(backend=backend, timeout=timeout) _assert_model( model, { "device": true_device, "local_rank": local_rank, "rank": rank, "world_size": world_size, "node_index": 0, "nnodes": 1, "nproc_per_node": world_size, }, ) model.finalize() del os.environ["RANK"] assert "MASTER_ADDR" not in os.environ assert "MASTER_PORT" not in os.environ assert "RANK" not in os.environ
Example #10
Source File: native.py From ignite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def backend(self) -> str: return dist.get_backend()
Example #11
Source File: distributed.py From SlowFast with Apache License 2.0 | 5 votes |
def _serialize_to_tensor(data, group): """ Seriialize the tensor to ByteTensor. Note that only `gloo` and `nccl` backend is supported. Args: data (data): data to be serialized. group (group): pytorch dist group. Returns: tensor (ByteTensor): tensor that serialized. """ backend = dist.get_backend(group) assert backend in ["gloo", "nccl"] device = torch.device("cpu" if backend == "gloo" else "cuda") buffer = pickle.dumps(data) if len(buffer) > 1024 ** 3: logger = logging.getLogger(__name__) logger.warning( "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( get_rank(), len(buffer) / (1024 ** 3), device ) ) storage = torch.ByteStorage.from_buffer(buffer) tensor = torch.ByteTensor(storage).to(device=device) return tensor
Example #12
Source File: distributed.py From SlowFast with Apache License 2.0 | 5 votes |
def _get_global_gloo_group(): """ Return a process group based on gloo backend, containing all the ranks The result is cached. Returns: (group): pytorch dist group. """ if dist.get_backend() == "nccl": return dist.new_group(backend="gloo") else: return dist.group.WORLD
Example #13
Source File: utils.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def reduce(self, op): """ Reduces average value over all workers. :param op: 'sum' or 'mean', reduction operator """ if op not in ('sum', 'mean'): raise NotImplementedError distributed = (get_world_size() > 1) if distributed: if(hasattr(dist, "get_backend")): backend = dist.get_backend() else: backend = dist._backend cuda = (backend == dist.dist_backend.NCCL) if cuda: avg = torch.cuda.FloatTensor([self.avg]) _sum = torch.cuda.FloatTensor([self.sum]) else: avg = torch.FloatTensor([self.avg]) _sum = torch.FloatTensor([self.sum]) dist.all_reduce(avg, op=dist.reduce_op.SUM) dist.all_reduce(_sum, op=dist.reduce_op.SUM) self.avg = avg.item() self.sum = _sum.item() if op == 'mean': self.avg /= get_world_size() self.sum /= get_world_size()
Example #14
Source File: comm.py From detectron2 with Apache License 2.0 | 5 votes |
def _get_global_gloo_group(): """ Return a process group based on gloo backend, containing all the ranks The result is cached. """ if dist.get_backend() == "nccl": return dist.new_group(backend="gloo") else: return dist.group.WORLD
Example #15
Source File: example.py From examples with BSD 3-Clause "New" or "Revised" License | 5 votes |
def spmd_main(local_world_size, local_rank): # These are the parameters used to initialize the process group env_dict = { key: os.environ[key] for key in ("MASTER_ADDR", "MASTER_PORT", "RANK", "WORLD_SIZE") } print(f"[{os.getpid()}] Initializing process group with: {env_dict}") dist.init_process_group(backend="nccl") print( f"[{os.getpid()}]: world_size = {dist.get_world_size()}, " + f"rank = {dist.get_rank()}, backend={dist.get_backend()}" ) demo_basic(local_world_size, local_rank) # Tear down the process group dist.destroy_process_group()
Example #16
Source File: comm.py From fast-reid with Apache License 2.0 | 5 votes |
def _get_global_gloo_group(): """ Return a process group based on gloo backend, containing all the ranks The result is cached. """ if dist.get_backend() == "nccl": return dist.new_group(backend="gloo") else: return dist.group.WORLD
Example #17
Source File: utils.py From pipedream with MIT License | 5 votes |
def reduce(self, op): """ Reduces average value over all workers. :param op: 'sum' or 'mean', reduction operator """ if op not in ('sum', 'mean'): raise NotImplementedError distributed = (get_world_size() > 1) if distributed: if(hasattr(dist, "get_backend")): backend = dist.get_backend() else: backend = dist._backend cuda = (backend == dist.dist_backend.NCCL) if cuda: avg = torch.cuda.FloatTensor([self.avg]) _sum = torch.cuda.FloatTensor([self.sum]) else: avg = torch.FloatTensor([self.avg]) _sum = torch.FloatTensor([self.sum]) dist.all_reduce(avg, op=dist.reduce_op.SUM) dist.all_reduce(_sum, op=dist.reduce_op.SUM) self.avg = avg.item() self.sum = _sum.item() if op == 'mean': self.avg /= get_world_size() self.sum /= get_world_size()
Example #18
Source File: distributed.py From dataset-distillation with MIT License | 5 votes |
def all_gather_coalesced(tensors, buffer_size=256 * MB): assert dist.get_backend() == dist.dist_backend.NCCL # gloo gives some weird device error world_size = dist.get_world_size() rcv_lsts = [[] for _ in range(world_size)] for tensors in _take_tensors(tensors, buffer_size): flat_tensors = _flatten_dense_tensors(tensors) tmp_rcv_lst = [torch.empty_like(flat_tensors) for _ in range(world_size)] dist.all_gather(tmp_rcv_lst, flat_tensors) for i, rcv_flat_tensors in enumerate(tmp_rcv_lst): for rcv_t in _unflatten_dense_tensors(rcv_flat_tensors, tensors): rcv_lsts[i].append(rcv_t) return rcv_lsts
Example #19
Source File: distributed_communicator.py From CrypTen with MIT License | 5 votes |
def get_distributed_backend(self): """Returns name of torch.distributed backend used.""" assert dist.is_initialized(), "initialize the communicator first" return dist.get_backend()
Example #20
Source File: utils.py From training with Apache License 2.0 | 4 votes |
def reduce(self, op): """ Reduces average value over all workers. :param op: 'sum' or 'mean', reduction operator """ if op not in ('sum', 'mean'): raise NotImplementedError distributed = (get_world_size() > 1) if distributed: # Backward/forward compatibility around # https://github.com/pytorch/pytorch/commit/540ef9b1fc5506369a48491af8a285a686689b36 and # https://github.com/pytorch/pytorch/commit/044d00516ccd6572c0d6ab6d54587155b02a3b86 # To accomodate change in Pytorch's distributed API if hasattr(dist, "get_backend"): _backend = dist.get_backend() if hasattr(dist, "DistBackend"): backend_enum_holder = dist.DistBackend else: backend_enum_holder = dist.Backend else: _backend = dist._backend backend_enum_holder = dist.dist_backend cuda = _backend == backend_enum_holder.NCCL if cuda: avg = torch.cuda.FloatTensor([self.avg]) _sum = torch.cuda.FloatTensor([self.sum]) else: avg = torch.FloatTensor([self.avg]) _sum = torch.FloatTensor([self.sum]) _reduce_op = dist.reduce_op if hasattr(dist, "reduce_op") else dist.ReduceOp dist.all_reduce(avg, op=_reduce_op.SUM) dist.all_reduce(_sum, op=_reduce_op.SUM) self.avg = avg.item() self.sum = _sum.item() if op == 'mean': self.avg /= get_world_size() self.sum /= get_world_size()
Example #21
Source File: util.py From allennlp with Apache License 2.0 | 4 votes |
def peak_memory_mb() -> Dict[int, float]: """ Get peak memory usage for each worker, as measured by max-resident-set size: https://unix.stackexchange.com/questions/30940/getrusage-system-call-what-is-maximum-resident-set-size Only works on OSX and Linux, otherwise the result will be 0.0 for every worker. """ if resource is None or sys.platform not in ("linux", "darwin"): peak_mb = 0.0 else: peak = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss if sys.platform == "darwin": # On OSX the result is in bytes. peak_mb = peak / 1_000_000 else: # On Linux the result is in kilobytes. peak_mb = peak / 1_000 if is_distributed(): global_rank = dist.get_rank() world_size = dist.get_world_size() peak_mb_tensor = torch.tensor([float(global_rank), peak_mb]) # All of these tensors will be gathered into this list. gather_results = [torch.tensor([0.0, 0.0]) for _ in range(world_size)] # If the backend is 'nccl', this means we're training on GPUs, so these tensors # need to be on GPU. if dist.get_backend() == "nccl": peak_mb_tensor = peak_mb_tensor.cuda() gather_results = [x.cuda() for x in gather_results] dist.all_gather(gather_results, peak_mb_tensor) results_dict: Dict[int, float] = {} for peak_mb_tensor in gather_results: worker = int(peak_mb_tensor[0]) peak_mb = round(float(peak_mb_tensor[1]), 3) results_dict[worker] = peak_mb return results_dict else: return {0: peak_mb}