Python torch.get_num_threads() Examples
The following are 7
code examples of torch.get_num_threads().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch
, or try the search function
.
Example #1
Source File: worker.py From rlpyt with MIT License | 6 votes |
def initialize_worker(rank, seed=None, cpu=None, torch_threads=None): """Assign CPU affinity, set random seed, set torch_threads if needed to prevent MKL deadlock. """ log_str = f"Sampler rank {rank} initialized" cpu = [cpu] if isinstance(cpu, int) else cpu p = psutil.Process() try: if cpu is not None: p.cpu_affinity(cpu) cpu_affin = p.cpu_affinity() except AttributeError: cpu_affin = "UNAVAILABLE MacOS" log_str += f", CPU affinity {cpu_affin}" torch_threads = (1 if torch_threads is None and cpu is not None else torch_threads) # Default to 1 to avoid possible MKL hang. if torch_threads is not None: torch.set_num_threads(torch_threads) log_str += f", Torch threads {torch.get_num_threads()}" if seed is not None: set_seed(seed) time.sleep(0.3) # (so the printing from set_seed is not intermixed) log_str += f", Seed {seed}" logger.log(log_str)
Example #2
Source File: async_rl.py From rlpyt with MIT License | 6 votes |
def optim_startup(self): """ Sets the hardware affinity, moves the agent's model parameters onto device and initialize data-parallel agent, if applicable. Computes optimizer throttling settings. """ main_affinity = self.affinity.optimizer[0] p = psutil.Process() if main_affinity.get("set_affinity", True): p.cpu_affinity(main_affinity["cpus"]) logger.log(f"Optimizer master CPU affinity: {p.cpu_affinity()}.") torch.set_num_threads(main_affinity["torch_threads"]) logger.log(f"Optimizer master Torch threads: {torch.get_num_threads()}.") self.agent.to_device(main_affinity.get("cuda_idx", None)) if self.world_size > 1: self.agent.data_parallel() self.algo.optim_initialize(rank=0) throttle_itr = 1 + getattr(self.algo, "min_steps_learn", 0) // self.sampler_batch_size delta_throttle_itr = (self.algo.batch_size * self.world_size * self.algo.updates_per_optimize / # (is updates_per_sync) (self.sampler_batch_size * self.algo.replay_ratio)) self.initialize_logging() return throttle_itr, delta_throttle_itr
Example #3
Source File: async_rl.py From rlpyt with MIT License | 6 votes |
def startup(self): torch.distributed.init_process_group( backend="nccl", rank=self.rank, world_size=self.world_size, init_method=f"tcp://127.0.0.1:{self.port}", ) p = psutil.Process() if self.affinity.get("set_affinity", True): p.cpu_affinity(self.affinity["cpus"]) logger.log(f"Optimizer rank {self.rank} CPU affinity: {p.cpu_affinity()}.") torch.set_num_threads(self.affinity["torch_threads"]) logger.log(f"Optimizer rank {self.rank} Torch threads: {torch.get_num_threads()}.") logger.log(f"Optimizer rank {self.rank} CUDA index: " f"{self.affinity.get('cuda_idx', None)}.") set_seed(self.seed) self.agent.to_device(cuda_idx=self.affinity.get("cuda_idx", None)) self.agent.data_parallel() self.algo.optim_initialize(rank=self.rank)
Example #4
Source File: mpi_pytorch.py From spinningup with MIT License | 5 votes |
def setup_pytorch_for_mpi(): """ Avoid slowdowns caused by each separate process's PyTorch using more than its fair share of CPU resources. """ #print('Proc %d: Reporting original number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True) if torch.get_num_threads()==1: return fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1) torch.set_num_threads(fair_num_threads) #print('Proc %d: Reporting new number of Torch threads as %d.'%(proc_id(), torch.get_num_threads()), flush=True)
Example #5
Source File: test_vgsl.py From kraken with Apache License 2.0 | 5 votes |
def test_helper_threads(self): """ Test openmp threads helper method. """ rnn = vgsl.TorchVGSLModel('[1,1,0,48 Lbx10 Do O1c57]') rnn.set_num_threads(4) self.assertEqual(torch.get_num_threads(), 4)
Example #6
Source File: mpi_torch.py From firedup with MIT License | 5 votes |
def setup_pytorch_for_mpi(): """ Avoid slowdowns caused by each separate process's PyTorch using more than its fair share of CPU resources. """ if torch.get_num_threads() == 1: return fair_num_threads = max(int(torch.get_num_threads() / num_procs()), 1) torch.set_num_threads(fair_num_threads)
Example #7
Source File: minibatch_rl.py From rlpyt with MIT License | 4 votes |
def startup(self): """ Sets hardware affinities, initializes the following: 1) sampler (which should initialize the agent), 2) agent device and data-parallel wrapper (if applicable), 3) algorithm, 4) logger. """ p = psutil.Process() try: if (self.affinity.get("master_cpus", None) is not None and self.affinity.get("set_affinity", True)): p.cpu_affinity(self.affinity["master_cpus"]) cpu_affin = p.cpu_affinity() except AttributeError: cpu_affin = "UNAVAILABLE MacOS" logger.log(f"Runner {getattr(self, 'rank', '')} master CPU affinity: " f"{cpu_affin}.") if self.affinity.get("master_torch_threads", None) is not None: torch.set_num_threads(self.affinity["master_torch_threads"]) logger.log(f"Runner {getattr(self, 'rank', '')} master Torch threads: " f"{torch.get_num_threads()}.") if self.seed is None: self.seed = make_seed() set_seed(self.seed) self.rank = rank = getattr(self, "rank", 0) self.world_size = world_size = getattr(self, "world_size", 1) examples = self.sampler.initialize( agent=self.agent, # Agent gets initialized in sampler. affinity=self.affinity, seed=self.seed + 1, bootstrap_value=getattr(self.algo, "bootstrap_value", False), traj_info_kwargs=self.get_traj_info_kwargs(), rank=rank, world_size=world_size, ) self.itr_batch_size = self.sampler.batch_spec.size * world_size n_itr = self.get_n_itr() self.agent.to_device(self.affinity.get("cuda_idx", None)) if world_size > 1: self.agent.data_parallel() self.algo.initialize( agent=self.agent, n_itr=n_itr, batch_spec=self.sampler.batch_spec, mid_batch_reset=self.sampler.mid_batch_reset, examples=examples, world_size=world_size, rank=rank, ) self.initialize_logging() return n_itr