Python torch.distributed.destroy_process_group() Examples

The following are 30 code examples of torch.distributed.destroy_process_group(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.distributed , or try the search function .
Example #1
Source File: test_native.py    From ignite with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _test__native_dist_model_create_from_context_no_dist(true_backend, true_device):

    assert _NativeDistModel.create_from_context() is None

    dist.init_process_group(true_backend, "tcp://0.0.0.0:2222", world_size=1, rank=0)
    dist.barrier()

    _test__native_dist_model_create_from_context_no_local_rank()

    true_conf = {
        "device": true_device,
        "local_rank": 0,
        "rank": 0,
        "world_size": 1,
        "node_index": 0,
        "nnodes": 1,
        "nproc_per_node": 1,
    }

    _test__native_dist_model_create_from_context_env_local_rank(true_conf)
    _test__native_dist_model_create_from_context_set_local_rank(true_conf)

    dist.destroy_process_group() 
Example #2
Source File: test_torch.py    From ray with Apache License 2.0 5 votes vote down vote up
def ray_start_4_cpus():
    address_info = ray.init(num_cpus=4)
    yield address_info
    # The code after the yield will run as teardown code.
    ray.shutdown()
    # Ensure that tests don't ALL fail
    if dist.is_initialized():
        dist.destroy_process_group() 
Example #3
Source File: dist_train.py    From video_analyst with MIT License 5 votes vote down vote up
def cleanup():
    """Cleanup distributed  
       Borrowed from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html
    """
    dist.destroy_process_group() 
Example #4
Source File: dist_train_sat.py    From video_analyst with MIT License 5 votes vote down vote up
def cleanup():
    """Cleanup distributed  
       Borrowed from https://pytorch.org/tutorials/intermediate/ddp_tutorial.html
    """
    dist.destroy_process_group() 
Example #5
Source File: extractive_summarization_cnndm_distributed_train.py    From nlp-recipes with MIT License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group()


# How often the statistics reports show up in training, unit is step. 
Example #6
Source File: conftest.py    From ignite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _destroy_mnodes_dist_context():
    dist.barrier()
    dist.destroy_process_group()

    from ignite.distributed.utils import _SerialModel, _set_model

    # We need to set synced model to initial state
    _set_model(_SerialModel()) 
Example #7
Source File: conftest.py    From ignite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _destroy_dist_context():

    dist.barrier()
    dist.destroy_process_group()

    from ignite.distributed.utils import _SerialModel, _set_model

    # We need to set synced model to initial state
    _set_model(_SerialModel()) 
Example #8
Source File: native.py    From ignite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def finalize(self):
            dist.destroy_process_group()
            # restore backed-up env
            if self._env_backup is not None:
                os.environ.clear()
                os.environ.update(self._env_backup) 
Example #9
Source File: utils.py    From ignite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def finalize():
    """Finalizes distributed configuration. For example, in case of native pytorch distributed configuration,
    it calls ``dist.destroy_process_group()``.
    """
    _model.finalize()
    _set_model(_SerialModel()) 
Example #10
Source File: main.py    From elastic with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def tmp_process_group(backend):
    cpu_pg = dist.new_group(backend=backend)
    try:
        yield cpu_pg
    finally:
        dist.destroy_process_group(cpu_pg) 
Example #11
Source File: impl.py    From ftlib with Apache License 2.0 5 votes vote down vote up
def abort_communicator(self):
        if dist.is_initialized():
            dist.destroy_process_group() 
Example #12
Source File: test_converters.py    From pytorch-lightning with Apache License 2.0 5 votes vote down vote up
def test_numpy_metric_ddp():
    tutils.reset_seed()
    tutils.set_random_master_port()
    world_size = 2
    mp.spawn(_ddp_test_numpy_metric, args=(world_size,), nprocs=world_size)
    # dist.destroy_process_group() 
Example #13
Source File: test_converters.py    From pytorch-lightning with Apache License 2.0 5 votes vote down vote up
def test_tensor_metric_ddp():
    tutils.reset_seed()
    tutils.set_random_master_port()

    world_size = 2
    mp.spawn(_ddp_test_tensor_metric, args=(world_size,), nprocs=world_size)
    # dist.destroy_process_group() 
Example #14
Source File: test_converters.py    From pytorch-lightning with Apache License 2.0 5 votes vote down vote up
def test_sync_reduce_ddp():
    """Make sure sync-reduce works with DDP"""
    tutils.reset_seed()
    tutils.set_random_master_port()

    worldsize = 2
    mp.spawn(_ddp_test_fn, args=(worldsize,), nprocs=worldsize)

    # dist.destroy_process_group() 
Example #15
Source File: training_loop.py    From pytorch-lightning with Apache License 2.0 5 votes vote down vote up
def run_training_teardown(self):
        if hasattr(self, '_teardown_already_run') and self._teardown_already_run:
            return

        self._teardown_already_run = True

        # Train end events
        with self.profiler.profile('on_train_end'):
            # callbacks
            self.on_train_end()
            # model hooks
            if self.is_function_implemented('on_train_end'):
                self.get_model().on_train_end()

        if self.logger is not None:
            self.logger.finalize("success")

        # summarize profile results
        if self.global_rank == 0:
            self.profiler.describe()

        if self.global_rank == 0:
            for proc in self.interactive_ddp_procs:
                subprocess.Popen.kill(proc)

        # clean up dist group
        if self.use_ddp or self.use_ddp2:
            torch_distrib.destroy_process_group() 
Example #16
Source File: distributed_torch_runner.py    From ray with Apache License 2.0 5 votes vote down vote up
def shutdown(self):
        """Attempts to shut down the worker."""
        # However, it seems to be harmless to remove permanently
        # since the processes are shutdown anyways. This comment can be
        # removed in a future release if it is still not documented
        # the stable Pytorch docs.
        dist.destroy_process_group()
        super(DistributedTorchRunner, self).shutdown() 
Example #17
Source File: i3d_learner.py    From deep-smoke-machine with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def clean_mp(self):
        if self.can_parallel:
            dist.destroy_process_group() 
Example #18
Source File: test_torch.py    From ray with Apache License 2.0 5 votes vote down vote up
def ray_start_2_cpus():
    address_info = ray.init(num_cpus=2)
    yield address_info
    # The code after the yield will run as teardown code.
    ray.shutdown()
    # Ensure that tests don't ALL fail
    if dist.is_initialized():
        dist.destroy_process_group() 
Example #19
Source File: main.py    From examples with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group() 
Example #20
Source File: example.py    From examples with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def spmd_main(local_world_size, local_rank):
    # These are the parameters used to initialize the process group
    env_dict = {
        key: os.environ[key]
        for key in ("MASTER_ADDR", "MASTER_PORT", "RANK", "WORLD_SIZE")
    }
    print(f"[{os.getpid()}] Initializing process group with: {env_dict}")
    dist.init_process_group(backend="nccl")
    print(
        f"[{os.getpid()}]: world_size = {dist.get_world_size()}, "
        + f"rank = {dist.get_rank()}, backend={dist.get_backend()}"
    )

    demo_basic(local_world_size, local_rank)

    # Tear down the process group
    dist.destroy_process_group() 
Example #21
Source File: test_distributed.py    From neural_chat with MIT License 5 votes vote down vote up
def tearDown(self):
        # we need to de-initialize the distributed world, otherwise other
        # tests will they're we're distributed when we're really not.
        dist.destroy_process_group() 
Example #22
Source File: test_distributed.py    From ParlAI with MIT License 5 votes vote down vote up
def tearDown(self):
        # we need to de-initialize the distributed world, otherwise other
        # tests will they're we're distributed when we're really not.
        dist.destroy_process_group() 
Example #23
Source File: distributed_communicator.py    From CrypTen with MIT License 5 votes vote down vote up
def shutdown(cls):
        if dist.get_rank() == 0 and cls.instance.ttp_initialized:
            cls.instance.send_obj(
                "terminate", cls.instance.get_ttp_rank(), cls.instance.ttp_group
            )
        dist.destroy_process_group(cls.instance.main_group)
        dist.destroy_process_group(cls.instance.ttp_group)
        dist.destroy_process_group()
        cls.instance = None 
Example #24
Source File: train_ddp.py    From space_time_pde with MIT License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group()

# pylint: disable=no-member 
Example #25
Source File: dist_trainer.py    From fastNLP with Apache License 2.0 5 votes vote down vote up
def close(self):
        r"""关闭Trainer,销毁进程"""
        dist.destroy_process_group() 
Example #26
Source File: predict_cpu.py    From helen with MIT License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group() 
Example #27
Source File: predict_gpu.py    From helen with MIT License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group() 
Example #28
Source File: train_distributed.py    From helen with MIT License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group() 
Example #29
Source File: distributed_data_parallel.py    From torchbearer with MIT License 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group() 
Example #30
Source File: trainer.py    From pytorch-project-template with Apache License 2.0 5 votes vote down vote up
def cleanup():
    dist.destroy_process_group()