Python torch.distributed.init_process_group() Examples
The following are 30
code examples of torch.distributed.init_process_group().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.distributed
, or try the search function
.
Example #1
Source File: predict_gpu.py From helen with MIT License | 6 votes |
def setup(rank, total_callers, args, all_input_files, all_devices): os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' # initialize the process group dist.init_process_group("gloo", rank=rank, world_size=total_callers) # expand the arguments output_filepath, model_path, batch_size, num_workers = args # call prediction function predict(all_input_files[rank], output_filepath, model_path, batch_size, num_workers, rank, all_devices[rank]) cleanup()
Example #2
Source File: predict_cpu.py From helen with MIT License | 6 votes |
def setup(rank, total_callers, args, all_input_files): os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' # initialize the process group dist.init_process_group("gloo", rank=rank, world_size=total_callers) # expand the arguments output_filepath, model_path, batch_size, num_workers, threads = args # call prediction function predict(all_input_files[rank], output_filepath, model_path, batch_size, num_workers, rank, threads) cleanup()
Example #3
Source File: ray_container.py From adeptRL with GNU General Public License v3.0 | 6 votes |
def __init__(self, rank, learner_ranks, worker_ranks, ip, port): world_size = len(learner_ranks) + len(worker_ranks) dist.init_process_group( "nccl", init_method="tcp://{}:{}".format(ip, port), rank=rank, world_size=world_size, ) groups = {} for learner_rank in learner_ranks: for worker_rank in worker_ranks: g = dist.new_group([learner_rank, worker_rank]) if worker_rank == rank: groups[learner_rank] = g dist.new_group(learner_ranks) self.groups = groups self.device = torch.device(f"cuda:{ray.get_gpu_ids()[0]}") self.rank = rank self.network = torch.zeros(3).to(self.device) self.exp = None self.network_handle = None
Example #4
Source File: train.py From gpt-2-output-dataset with MIT License | 6 votes |
def setup_distributed(port=29500): if not dist.is_available() or not torch.cuda.is_available() or torch.cuda.device_count() <= 1: return 0, 1 if 'MPIR_CVAR_CH3_INTERFACE_HOSTNAME' in os.environ: from mpi4py import MPI mpi_rank = MPI.COMM_WORLD.Get_rank() mpi_size = MPI.COMM_WORLD.Get_size() os.environ["MASTER_ADDR"] = '127.0.0.1' os.environ["MASTER_PORT"] = str(port) dist.init_process_group(backend="nccl", world_size=mpi_size, rank=mpi_rank) return mpi_rank, mpi_size dist.init_process_group(backend="nccl", init_method="env://") return dist.get_rank(), dist.get_world_size()
Example #5
Source File: distributed.py From Single-Path-NAS-PyTorch with Apache License 2.0 | 6 votes |
def init_process_group(backend): comm = MPI.COMM_WORLD world_size = comm.Get_size() rank = comm.Get_rank() info = dict() if rank == 0: host = socket.gethostname() address = socket.gethostbyname(host) info.update(dict(MASTER_ADDR=address, MASTER_PORT='1234')) info = comm.bcast(info, root=0) info.update(dict(WORLD_SIZE=str(world_size), RANK=str(rank))) os.environ.update(info) distributed.init_process_group(backend=backend)
Example #6
Source File: train_distributed.py From helen with MIT License | 6 votes |
def setup(rank, device_ids, args): os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' # initialize the process group dist.init_process_group("gloo", rank=rank, world_size=len(device_ids)) train_file, test_file, batch_size, epochs, gpu_mode, num_workers, retrain_model, \ retrain_model_path, gru_layers, hidden_size, learning_rate, weight_decay, model_dir, stats_dir, total_callers, \ train_mode = args # issue with semaphore lock: https://github.com/pytorch/pytorch/issues/2517 # mp.set_start_method('spawn') # Explicitly setting seed to make sure that models created in two processes # start from same random weights and biases. https://github.com/pytorch/pytorch/issues/2517 torch.manual_seed(42) train(train_file, test_file, batch_size, epochs, gpu_mode, num_workers, retrain_model, retrain_model_path, gru_layers, hidden_size, learning_rate, weight_decay, model_dir, stats_dir, train_mode, total_callers, rank, device_ids[rank]) cleanup()
Example #7
Source File: dist_a2c_atari.py From cherry with Apache License 2.0 | 5 votes |
def main(env='PongNoFrameskip-v4'): num_steps = 5000000 seed = 42 import argparse parser = argparse.ArgumentParser() parser.add_argument("--local_rank", type=int) args = parser.parse_args() dist.init_process_group('gloo', init_method='file:///home/seba-1511/.dist_init_' + env, rank=args.local_rank, world_size=16) rank = dist.get_rank() th.set_num_threads(1) random.seed(seed + rank) th.manual_seed(seed + rank) np.random.seed(seed + rank) env = gym.make(env) if rank == 0: env = envs.Logger(env, interval=1000) env = envs.OpenAIAtari(env) env = envs.Torch(env) env = envs.Runner(env) env.seed(seed + rank) policy = NatureCNN(env) optimizer = optim.RMSprop(policy.parameters(), lr=LR, alpha=0.99, eps=1e-5) optimizer = Distributed(policy.parameters(), optimizer) get_action = lambda state: get_action_value(state, policy) for step in range(num_steps // A2C_STEPS + 1): # Sample some transitions replay = env.run(get_action, steps=A2C_STEPS) # Update policy update(replay, optimizer, policy, env=env)
Example #8
Source File: train_IKC.py From IKC with Apache License 2.0 | 5 votes |
def init_dist(backend='nccl', **kwargs): ''' initialization for distributed training''' # if mp.get_start_method(allow_none=True) is None: if mp.get_start_method(allow_none=True) != 'spawn': #Return the name of start method used for starting processes mp.set_start_method('spawn', force=True) ##'spawn' is the default on Windows rank = int(os.environ['RANK']) #system env process ranks num_gpus = torch.cuda.device_count() #Returns the number of GPUs available torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs) #Initializes the default distributed process group
Example #9
Source File: env.py From RDSNet with Apache License 2.0 | 5 votes |
def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs)
Example #10
Source File: main.py From examples with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setup(rank, world_size): os.environ['MASTER_ADDR'] = 'localhost' os.environ['MASTER_PORT'] = '12355' # initialize the process group dist.init_process_group("gloo", rank=rank, world_size=world_size)
Example #11
Source File: pytorch_two_machines.py From ncluster with MIT License | 5 votes |
def worker(): """ Initialize the distributed environment. """ import torch import torch.distributed as dist from torch.multiprocessing import Process import numpy as np print("Initializing distributed pytorch") os.environ['MASTER_ADDR'] = str(args.master_addr) os.environ['MASTER_PORT'] = str(args.master_port) # Use TCP backend. Gloo needs nightly, where it currently fails with # dist.init_process_group('gloo', rank=args.rank, # AttributeError: module 'torch.distributed' has no attribute 'init_process_group' dist.init_process_group('tcp', rank=args.rank, world_size=args.size) tensor = torch.ones(args.size_mb*250*1000)*(args.rank+1) time_list = [] outfile = 'out' if args.rank == 0 else '/dev/null' log = util.FileLogger(outfile) for i in range(args.iters): # print('before: rank ', args.rank, ' has data ', tensor[0]) start_time = time.perf_counter() if args.rank == 0: dist.send(tensor=tensor, dst=1) else: dist.recv(tensor=tensor, src=0) elapsed_time_ms = (time.perf_counter() - start_time)*1000 time_list.append(elapsed_time_ms) # print('after: rank ', args.rank, ' has data ', tensor[0]) rate = args.size_mb/(elapsed_time_ms/1000) log('%03d/%d added %d MBs in %.1f ms: %.2f MB/second' % (i, args.iters, args.size_mb, elapsed_time_ms, rate)) min = np.min(time_list) median = np.median(time_list) log(f"min: {min:8.2f}, median: {median:8.2f}, mean: {np.mean(time_list):8.2f}")
Example #12
Source File: env.py From RDSNet with Apache License 2.0 | 5 votes |
def _init_dist_slurm(backend, port=29500, **kwargs): proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( 'scontrol show hostname {} | head -n1'.format(node_list)) os.environ['MASTER_PORT'] = str(port) os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend)
Example #13
Source File: trainer.py From pytorch-asr with GNU General Public License v3.0 | 5 votes |
def init_distributed(use_cuda, backend="nccl", init="slurm", local_rank=-1): #try: # mp.set_start_method('spawn') # spawn, forkserver, and fork #except RuntimeError: # pass try: if local_rank == -1: if init == "slurm": rank = int(os.environ['SLURM_PROCID']) world_size = int(os.environ['SLURM_NTASKS']) local_rank = int(os.environ['SLURM_LOCALID']) #maser_node = os.environ['SLURM_TOPOLOGY_ADDR'] #maser_port = '23456' elif init == "ompi": rank = int(os.environ['OMPI_COMM_WORLD_RANK']) world_size = int(os.environ['OMPI_COMM_WORLD_SIZE']) local_rank = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK']) if use_cuda: device = local_rank % torch.cuda.device_count() torch.cuda.set_device(device) print(f"set cuda device to cuda:{device}") master_node = os.environ["MASTER_ADDR"] master_port = os.environ["MASTER_PORT"] init_method = f"tcp://{master_node}:{master_port}" #init_method = "env://" dist.init_process_group(backend=backend, init_method=init_method, world_size=world_size, rank=rank) print(f"initialized as {rank}/{world_size} via {init_method}") else: if use_cuda: torch.cuda.set_device(local_rank) print(f"set cuda device to cuda:{local_rank}") dist.init_process_group(backend=backend, init_method="env://") print(f"initialized as {dist.get_rank()}/{dist.get_world_size()} via env://") except Exception as e: print(f"initialized as single process")
Example #14
Source File: env.py From IoU-Uniform-R-CNN with Apache License 2.0 | 5 votes |
def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs)
Example #15
Source File: train.py From nonparaSeq2seqVC_code with MIT License | 5 votes |
def init_distributed(hparams, n_gpus, rank, group_name): assert torch.cuda.is_available(), "Distributed mode requires CUDA." print("Initializing Distributed") # Set cuda device so everything is done on the right GPU. torch.cuda.set_device(rank % torch.cuda.device_count()) # Initialize distributed communication dist.init_process_group( backend=hparams.dist_backend, init_method=hparams.dist_url, world_size=n_gpus, rank=rank, group_name=group_name) print("Done initializing distributed")
Example #16
Source File: env.py From IoU-Uniform-R-CNN with Apache License 2.0 | 5 votes |
def _init_dist_slurm(backend, port=29500, **kwargs): proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( 'scontrol show hostname {} | head -n1'.format(node_list)) os.environ['MASTER_PORT'] = str(port) os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend)
Example #17
Source File: example.py From examples with BSD 3-Clause "New" or "Revised" License | 5 votes |
def spmd_main(local_world_size, local_rank): # These are the parameters used to initialize the process group env_dict = { key: os.environ[key] for key in ("MASTER_ADDR", "MASTER_PORT", "RANK", "WORLD_SIZE") } print(f"[{os.getpid()}] Initializing process group with: {env_dict}") dist.init_process_group(backend="nccl") print( f"[{os.getpid()}]: world_size = {dist.get_world_size()}, " + f"rank = {dist.get_rank()}, backend={dist.get_backend()}" ) demo_basic(local_world_size, local_rank) # Tear down the process group dist.destroy_process_group()
Example #18
Source File: env.py From kaggle-kuzushiji-recognition with MIT License | 5 votes |
def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs)
Example #19
Source File: train_SFTMD.py From IKC with Apache License 2.0 | 5 votes |
def init_dist(backend='nccl', **kwargs): ''' initialization for distributed training''' # if mp.get_start_method(allow_none=True) is None: if mp.get_start_method(allow_none=True) != 'spawn': #Return the name of start method used for starting processes mp.set_start_method('spawn', force=True) ##'spawn' is the default on Windows rank = int(os.environ['RANK']) #system env process ranks num_gpus = torch.cuda.device_count() #Returns the number of GPUs available torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs) #Initializes the default distributed process group
Example #20
Source File: distributed_communicator.py From CrypTen with MIT License | 5 votes |
def __init__(self, init_ttp=False): # no need to do anything if we already initialized the communicator: if not dist.is_initialized(): # get configuration variables from environmens: for key in ["distributed_backend", "rendezvous", "world_size", "rank"]: if key.upper() not in os.environ: raise ValueError("Environment variable %s must be set." % key) setattr(self, key.lower(), os.environ[key.upper()]) # make sure world size and rank are integers; comms stats are reset: self.world_size = int(self.world_size) self.rank = int(self.rank) self.reset_communication_stats() self._name = f"rank{self.rank}" # logging: logging.info("==================") logging.info("DistributedCommunicator with rank %d" % self.rank) logging.info("==================") # initialize process group: total_ws = self.world_size + 1 if init_ttp else self.world_size dist.init_process_group( backend=self.distributed_backend, init_method=self.rendezvous, world_size=total_ws, rank=self.rank, ) self.ttp_group = dist.new_group(list(range(total_ws))) self.main_group = dist.new_group(list(range(self.world_size))) self.ttp_initialized = init_ttp logging.info("World size = %d" % self.world_size)
Example #21
Source File: distributed_utils.py From Switchable-Whitening with MIT License | 5 votes |
def dist_init(port): if mp.get_start_method(allow_none=True) != 'spawn': mp.set_start_method('spawn') proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) if '[' in node_list: beg = node_list.find('[') pos1 = node_list.find('-', beg) if pos1 < 0: pos1 = 1000 pos2 = node_list.find(',', beg) if pos2 < 0: pos2 = 1000 node_list = node_list[:min(pos1, pos2)].replace('[', '') addr = node_list[8:].replace('-', '.') print(addr) os.environ['MASTER_PORT'] = port os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend='nccl') rank = dist.get_rank() world_size = dist.get_world_size() return rank, world_size
Example #22
Source File: env.py From PolarMask with Apache License 2.0 | 5 votes |
def _init_dist_slurm(backend, port=29500, **kwargs): proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( 'scontrol show hostname {} | head -n1'.format(node_list)) os.environ['MASTER_PORT'] = str(port) os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend)
Example #23
Source File: env.py From PolarMask with Apache License 2.0 | 5 votes |
def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs)
Example #24
Source File: engine.py From Centripetal-SGD with Apache License 2.0 | 5 votes |
def __init__(self): self.version = 0.01 self.state = State() self.devices = None self.distributed = False self.logger = None if 'WORLD_SIZE' in os.environ: self.distributed = int(os.environ['WORLD_SIZE']) >= 1 if self.distributed: print('Initialize Engine for distributed training.') self.local_rank = 0 # TODO we only use single-machine-multi-gpus self.world_size = int(os.environ['WORLD_SIZE']) self.world_rank = int(os.environ['RANK']) torch.cuda.set_device(self.local_rank) dist.init_process_group(backend="nccl", init_method='env://') dist.barrier() self.devices = [i for i in range(self.world_size)] else: # todo check non-distributed training print('Initialize Engine for non-distributed training.') self.world_size = 1 self.world_rank = 1 self.devices = parse_torch_devices('0') # TODO correct? torch.backends.cudnn.benchmark = True
Example #25
Source File: env.py From mmdetection_with_SENet154 with Apache License 2.0 | 5 votes |
def _init_dist_slurm(backend, port=29500, **kwargs): proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( 'scontrol show hostname {} | head -n1'.format(node_list)) os.environ['MASTER_PORT'] = str(port) os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend)
Example #26
Source File: env.py From mmdetection_with_SENet154 with Apache License 2.0 | 5 votes |
def _init_dist_pytorch(backend, **kwargs): # TODO: use local_rank instead of rank % num_gpus rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs)
Example #27
Source File: engine.py From ACNet with MIT License | 5 votes |
def __init__(self): self.version = 0.01 self.state = State() self.devices = None self.distributed = False self.logger = None if 'WORLD_SIZE' in os.environ: self.distributed = int(os.environ['WORLD_SIZE']) >= 1 if self.distributed: print('Initialize Engine for distributed training.') self.local_rank = 0 # TODO we only use single-machine-multi-gpus self.world_size = int(os.environ['WORLD_SIZE']) self.world_rank = int(os.environ['RANK']) torch.cuda.set_device(self.local_rank) dist.init_process_group(backend="nccl", init_method='env://') dist.barrier() self.devices = [i for i in range(self.world_size)] else: # todo check non-distributed training print('Initialize Engine for non-distributed training.') self.world_size = 1 self.world_rank = 1 self.devices = parse_torch_devices('0') # TODO correct? torch.backends.cudnn.benchmark = True
Example #28
Source File: test_syncbn.py From mmcv with Apache License 2.0 | 5 votes |
def dist_init(self): rank = int(os.environ['SLURM_PROCID']) world_size = int(os.environ['SLURM_NTASKS']) local_rank = int(os.environ['SLURM_LOCALID']) node_list = str(os.environ['SLURM_NODELIST']) node_parts = re.findall('[0-9]+', node_list) host_ip = '{}.{}.{}.{}'.format(node_parts[1], node_parts[2], node_parts[3], node_parts[4]) port = '12341' init_method = 'tcp://{}:{}'.format(host_ip, port) dist.init_process_group( 'nccl', init_method=init_method, world_size=world_size, rank=rank) torch.cuda.set_device(local_rank)
Example #29
Source File: dist_utils.py From mmcv with Apache License 2.0 | 5 votes |
def _init_dist_slurm(backend, port=None): """Initialize slurm distributed training environment. If argument ``port`` is not specified, then the master port will be system environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system environment variable, then a default port ``29500`` will be used. Args: backend (str): Backend of torch.distributed. port (int, optional): Master port. Defaults to None. """ proc_id = int(os.environ['SLURM_PROCID']) ntasks = int(os.environ['SLURM_NTASKS']) node_list = os.environ['SLURM_NODELIST'] num_gpus = torch.cuda.device_count() torch.cuda.set_device(proc_id % num_gpus) addr = subprocess.getoutput( f'scontrol show hostname {node_list} | head -n1') # specify master port if port is not None: os.environ['MASTER_PORT'] = str(port) elif 'MASTER_PORT' in os.environ: pass # use MASTER_PORT in the environment variable else: # 29500 is torch.distributed default port os.environ['MASTER_PORT'] = '29500' os.environ['MASTER_ADDR'] = addr os.environ['WORLD_SIZE'] = str(ntasks) os.environ['RANK'] = str(proc_id) dist.init_process_group(backend=backend)
Example #30
Source File: train_cifar10.py From mmcv with Apache License 2.0 | 5 votes |
def init_dist(backend='nccl', **kwargs): if mp.get_start_method(allow_none=True) is None: mp.set_start_method('spawn') rank = int(os.environ['RANK']) num_gpus = torch.cuda.device_count() torch.cuda.set_device(rank % num_gpus) dist.init_process_group(backend=backend, **kwargs)