Python torch.multiprocessing.Process() Examples
The following are 30
code examples of torch.multiprocessing.Process().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
torch.multiprocessing
, or try the search function
.
Example #1
Source File: agent_single_process.py From pytorch-rl with MIT License | 6 votes |
def __init__(self, master, process_id=0): super(AgentSingleProcess, self).__init__(name = "Process-%d" % process_id) # NOTE: self.master.* refers to parameters shared across all processes # NOTE: self.* refers to process-specific properties # NOTE: we are not copying self.master.* to self.* to keep the code clean self.master = master self.process_id = process_id # env self.env = self.master.env_prototype(self.master.env_params, self.process_id) # model self.model = self.master.model_prototype(self.master.model_params) self._sync_local_with_global() # experience self._reset_experience()
Example #2
Source File: run.py From ConvLab with MIT License | 6 votes |
def read_spec_and_run(spec_file, spec_name, lab_mode): '''Read a spec and run it in lab mode''' logger.info(f'Running lab spec_file:{spec_file} spec_name:{spec_name} in mode:{lab_mode}') if lab_mode in TRAIN_MODES: spec = spec_util.get(spec_file, spec_name) else: # eval mode if '@' in lab_mode: lab_mode, prename = lab_mode.split('@') spec = spec_util.get_eval_spec(spec_file, spec_name, prename) else: spec = spec_util.get(spec_file, spec_name) if 'spec_params' not in spec: run_spec(spec, lab_mode) else: # spec is parametrized; run them in parallel param_specs = spec_util.get_param_specs(spec) num_pro = spec['meta']['param_spec_process'] # can't use Pool since it cannot spawn nested Process, which is needed for VecEnv and parallel sessions. So these will run and wait by chunks workers = [mp.Process(target=run_spec, args=(spec, lab_mode)) for spec in param_specs] for chunk_w in ps.chunk(workers, num_pro): for w in chunk_w: w.start() for w in chunk_w: w.join()
Example #3
Source File: dataloader.py From video-to-pose3D with MIT License | 6 votes |
def start(self): # start a thread to read frames from the file video stream if self.format == 'ssd': if opt.sp: p = Thread(target=self.getitem_ssd, args=()) else: p = mp.Process(target=self.getitem_ssd, args=()) elif self.format == 'yolo': if opt.sp: p = Thread(target=self.getitem_yolo, args=()) else: p = mp.Process(target=self.getitem_yolo, args=()) else: raise NotImplementedError p.daemon = True p.start() return self
Example #4
Source File: a2c.py From minimalRL with MIT License | 6 votes |
def __init__(self, n_train_processes): self.nenvs = n_train_processes self.waiting = False self.closed = False self.workers = list() master_ends, worker_ends = zip(*[mp.Pipe() for _ in range(self.nenvs)]) self.master_ends, self.worker_ends = master_ends, worker_ends for worker_id, (master_end, worker_end) in enumerate(zip(master_ends, worker_ends)): p = mp.Process(target=worker, args=(worker_id, master_end, worker_end)) p.daemon = True p.start() self.workers.append(p) # Forbid master to use the worker end for messaging for worker_end in worker_ends: worker_end.close()
Example #5
Source File: pc_environment.py From malmo-challenge with MIT License | 6 votes |
def __init__(self, rank): docker_client = docker.from_env() agent_port, partner_port = 10000 + rank, 20000 + rank clients = [('127.0.0.1', agent_port), ('127.0.0.1', partner_port)] self.agent_type = GlobalVar() # Assume Minecraft launched if port has listener, launch otherwise if not _port_has_listener(agent_port): self._launch_malmo(docker_client, agent_port) print('Malmo running on port ' + str(agent_port)) if not _port_has_listener(partner_port): self._launch_malmo(docker_client, partner_port) print('Malmo running on port ' + str(partner_port)) # Set up partner agent env in separate process p = mp.Process(target=self._run_partner, args=(clients, )) p.daemon = True p.start() time.sleep(3) # Set up agent env self.env = PigChaseEnvironment(clients, PigChaseTopDownStateBuilder(gray=False), role=1, randomize_positions=True)
Example #6
Source File: deepwalk.py From dgl with Apache License 2.0 | 6 votes |
def fast_train_mp(self): """ multi-cpu-core or mix cpu & multi-gpu """ self.init_device_emb() self.emb_model.share_memory() start_all = time.time() ps = [] for i in range(len(self.args.gpus)): p = mp.Process(target=self.fast_train_sp, args=(self.args.gpus[i],)) ps.append(p) p.start() for p in ps: p.join() print("Used time: %.2fs" % (time.time()-start_all)) if self.args.save_in_txt: self.emb_model.save_embedding_txt(self.dataset, self.args.output_emb_file) else: self.emb_model.save_embedding(self.dataset, self.args.output_emb_file)
Example #7
Source File: BaseAgent.py From DeepRL with MIT License | 6 votes |
def __init__(self, config): mp.Process.__init__(self) self.config = config self.__pipe, self.__worker_pipe = mp.Pipe() self._state = None self._task = None self._network = None self._total_steps = 0 self.__cache_len = 2 if not config.async_actor: self.start = lambda: None self.step = self._sample self.close = lambda: None self._set_up() self._task = config.task_fn()
Example #8
Source File: distributed_sgd_test.py From machina with MIT License | 6 votes |
def test_step(self): def _run(rank, world_size): model = nn.Linear(10, 1) optimizer = DistributedSGD( model.parameters()) optimizer.zero_grad() loss = model(torch.ones(10).float()) loss.backward() optimizer.step() processes = [] world_size = 4 for rank in range(world_size): p = Process(target=init_processes, args=(rank, world_size, _run)) p.start() processes.append(p) for p in processes: p.join()
Example #9
Source File: distributed_adamw_test.py From machina with MIT License | 6 votes |
def test_step(self): def _run(rank, world_size): model = nn.Linear(10, 1) optimizer = DistributedAdamW( model.parameters()) optimizer.zero_grad() loss = model(torch.ones(10).float()) loss.backward() optimizer.step() processes = [] world_size = 4 for rank in range(world_size): p = Process(target=init_processes, args=(rank, world_size, _run)) p.start() processes.append(p) for p in processes: p.join()
Example #10
Source File: multiprocess_iterator.py From gtos with MIT License | 6 votes |
def __init__(self, base_iterator: DataIterator, num_workers: int = 1, output_queue_size: int = 1000) -> None: # pylint: disable=protected-access super().__init__() self.num_workers = num_workers self.batch_size = base_iterator._batch_size self.output_queue_size = output_queue_size # These two options make the iterator stateful, which means it can't be shared # across multiple processes. if base_iterator._cache_instances: raise ConfigurationError("cannot use Multiprocess iterator with cache_instances") if base_iterator._instances_per_epoch: raise ConfigurationError("cannot use instances_per_epoch with Multiprocess iterator") self.iterator = base_iterator self.processes: List[Process] = [] self.queuer: Optional[Process] = None
Example #11
Source File: neat_es.py From DistributedES with Apache License 2.0 | 5 votes |
def __init__(self, id, state_normalizer, task_q, result_q, stop, config, neat_config): mp.Process.__init__(self) self.id = id self.task_q = task_q self.result_q = result_q self.state_normalizer = state_normalizer self.stop = stop self.config = config self.env = config.env_fn() self.evaluator = GenomeEvaluator(config, neat_config, state_normalizer)
Example #12
Source File: train.py From pytorch-parallel with MIT License | 5 votes |
def train(self, size=2): processes = [] for rank in range(size): p = Process(target=self.init_processes, args=(rank, size, self.run)) p.start() processes.append(p) for p in processes: p.join()
Example #13
Source File: natural_es.py From DistributedES with Apache License 2.0 | 5 votes |
def __init__(self, id, param, state_normalizer, task_q, result_q, stop, config): mp.Process.__init__(self) self.id = id self.task_q = task_q self.param = param self.result_q = result_q self.stop = stop self.config = config self.evaluator = Evaluator(config, state_normalizer)
Example #14
Source File: cma_es.py From DistributedES with Apache License 2.0 | 5 votes |
def all_tasks(): configs = [] hidden_size = 16 # config = PendulumConfig(hidden_size) # configs.append(config) # config = ContinuousLunarLanderConfig(hidden_size) # configs.append(config) config = BipedalWalkerConfig(hidden_size) configs.append(config) config = BipedalWalkerHardcore(hidden_size) configs.append(config) ps = [] for cf in configs: cf.max_steps = int(1e7) cf.num_workers = 8 cf.pop_size = 64 cf.sigma = 1 cf.tag = 'CMA-%d' % (hidden_size) ps.append(mp.Process(target=multi_runs, args=(cf, ))) for p in ps: p.start() for p in ps: p.join()
Example #15
Source File: cma_es.py From DistributedES with Apache License 2.0 | 5 votes |
def __init__(self, id, state_normalizer, task_q, result_q, stop, config): mp.Process.__init__(self) self.task_queue = task_q self.result_q = result_q self.evaluator = Evaluator(config, state_normalizer) self.id = id self.stop = stop
Example #16
Source File: natural_es.py From DistributedES with Apache License 2.0 | 5 votes |
def all_tasks(): configs = [] hidden_size = 64 # config = PendulumConfig(hidden_size) # configs.append(config) # config = ContinuousLunarLanderConfig(hidden_size) # configs.append(config) config = BipedalWalkerConfig(hidden_size) configs.append(config) config = BipedalWalkerHardcore(hidden_size) configs.append(config) ps = [] for cf in configs: cf.num_workers = 8 cf.pop_size = 64 cf.sigma = 0.1 cf.learning_rate = 0.1 # cf.action_noise_std = 0.02 cf.max_steps = int(1e7) cf.tag = 'NES-%d' % (cf.hidden_size) ps.append(mp.Process(target=multi_runs, args=(cf, ))) for p in ps: p.start() for p in ps: p.join()
Example #17
Source File: epi_sampler.py From machina with MIT License | 5 votes |
def __init__(self, env, pol, num_parallel=8, prepro=None, seed=256): self.env = env self.pol = copy.deepcopy(pol) self.pol.to('cpu') self.pol.share_memory() self.pol.eval() self.num_parallel = num_parallel self.n_steps_global = torch.tensor(0, dtype=torch.long).share_memory_() self.max_steps = torch.tensor(0, dtype=torch.long).share_memory_() self.n_epis_global = torch.tensor( 0, dtype=torch.long).share_memory_() self.max_epis = torch.tensor(0, dtype=torch.long).share_memory_() self.exec_flags = [torch.tensor( 0, dtype=torch.long).share_memory_() for _ in range(self.num_parallel)] self.deterministic_flag = torch.tensor( 0, dtype=torch.uint8).share_memory_() self.epis = mp.Manager().list() self.processes = [] for ind in range(self.num_parallel): p = mp.Process(target=mp_sample, args=(self.pol, env, self.max_steps, self.max_epis, self.n_steps_global, self.n_epis_global, self.epis, self.exec_flags[ind], self.deterministic_flag, ind, prepro, seed)) p.start() self.processes.append(p)
Example #18
Source File: dataloader.py From keyphrase-generation-rl with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #19
Source File: multiprocess_input_pipeline.py From sigir19-neural-ir with Apache License 2.0 | 5 votes |
def get_multiprocess_batch_queue(name_prefix: str, target_function, files, conf, _logger, queue_size=100) -> Tuple[mp.Queue, List[mp.Process], mp.Event]: ctx = mp.get_context('spawn') # also set so that windows & linux behave the same _queue = ctx.Queue(queue_size) _processes = [] _finish_notification = ctx.Event() if len(files) == 0: _logger.error("No files for multiprocess loading specified, for: " + name_prefix) exit(1) else: _logger.info("Starting "+str(len(files))+" data loader processes, for:" + name_prefix) if conf["token_embedder_type"] == "fasttext": global fasttext_vocab_cached_mapping global fasttext_vocab_cached_data if fasttext_vocab_cached_data is None: fasttext_vocab_cached_mapping, fasttext_vocab_cached_data = FastTextVocab.load_ids(conf["fasttext_vocab_mapping"],conf["fasttext_max_subwords"]) fasttext_vocab_cached_data.share_memory_() for proc_number, file in enumerate(files): process = ctx.Process(name=name_prefix + "-" + str(proc_number), target=target_function, args=(proc_number, conf, _queue, _finish_notification, file,fasttext_vocab_cached_mapping,fasttext_vocab_cached_data)) process.start() _processes.append(process) return _queue, _processes, _finish_notification # # training instance generator # - filling the _queue with ready to run training batches # - everything is thread local #
Example #20
Source File: vector_env.py From habitat-api with MIT License | 5 votes |
def _spawn_workers( self, env_fn_args: Sequence[Tuple], make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn, ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]: parent_connections, worker_connections = zip( *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_envs)] ) self._workers = [] for worker_conn, parent_conn, env_args in zip( worker_connections, parent_connections, env_fn_args ): ps = self._mp_ctx.Process( target=self._worker_env, args=( worker_conn.recv, worker_conn.send, make_env_fn, env_args, self._auto_reset_done, worker_conn, parent_conn, ), ) self._workers.append(ps) ps.daemon = True ps.start() worker_conn.close() return ( [p.recv for p in parent_connections], [p.send for p in parent_connections], )
Example #21
Source File: multiprocess_iterator.py From gtos with MIT License | 5 votes |
def __call__(self, instances: Iterable[Instance], num_epochs: int = None, shuffle: bool = True) -> Iterator[TensorDict]: # If you run it forever, the multiprocesses won't shut down correctly. # TODO(joelgrus) find a solution for this if num_epochs is None: raise ConfigurationError("Multiprocess Iterator must be run for a fixed number of epochs") manager = Manager() output_queue = manager.Queue(self.output_queue_size) input_queue = manager.Queue(self.output_queue_size * self.batch_size) # Start process that populates the queue. self.queuer = Process(target=_queuer, args=(instances, input_queue, self.num_workers, num_epochs)) self.queuer.start() # Start the tensor-dict workers. for i in range(self.num_workers): args = (input_queue, output_queue, self.iterator, shuffle, i) process = Process(target=_create_tensor_dicts, args=args) process.start() self.processes.append(process) num_finished = 0 while num_finished < self.num_workers: item = output_queue.get() if isinstance(item, int): num_finished += 1 logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})") else: yield item for process in self.processes: process.join() self.processes.clear() if self.queuer is not None: self.queuer.join() self.queuer = None
Example #22
Source File: dataloader.py From video-to-pose3D with MIT License | 5 votes |
def start(self): # start a thread to read frames from the file video stream if opt.sp: t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=(), daemon=True) # p = mp.Process(target=self.update, args=()) # p.daemon = True p.start() return self
Example #23
Source File: run_ithemal.py From Ithemal with MIT License | 5 votes |
def graph_model_benchmark(base_params, benchmark_params): # type: (BaseParameters, BenchmarkParameters) -> None data = load_data(base_params) model = load_model(base_params, data) train = tr.Train( model, data, tr.PredictionType.REGRESSION, ls.mse_loss, 1, batch_size=benchmark_params.batch_size, clip=None, opt=tr.OptimizerType.ADAM_PRIVATE, lr=0.01, ) model.share_memory() mp_config = MPConfig(benchmark_params.threads) partition_size = benchmark_params.examples // benchmark_params.trainers processes = [] start_time = time.time() with mp_config: for rank in range(benchmark_params.trainers): mp_config.set_env(rank) partition = (rank * partition_size, (rank + 1) * partition_size) p = mp.Process(target=train, args=(rank, partition)) p.daemon = True p.start() processes.append(p) for p in processes: p.join() end_time = time.time() print('Time to process {} examples: {} seconds'.format( benchmark_params.examples, end_time - start_time, ))
Example #24
Source File: dataloader.py From video-to-pose3D with MIT License | 5 votes |
def start(self): # start a thread to read frames from the file video stream if opt.sp: # t = Thread(target=self.update, args=(), daemon=True) t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=(), daemon=True) # p = mp.Process(target=self.update, args=()) # p.daemon = True p.start() return self
Example #25
Source File: train.py From CornerNet-Lite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def main(gpu, ngpus_per_node, args): args.gpu = gpu if args.distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) rank = args.rank cfg_file = os.path.join("./configs", args.cfg_file + ".json") with open(cfg_file, "r") as f: config = json.load(f) config["system"]["snapshot_name"] = args.cfg_file system_config = SystemConfig().update_config(config["system"]) model_file = "core.models.{}".format(args.cfg_file) model_file = importlib.import_module(model_file) model = model_file.model() train_split = system_config.train_split val_split = system_config.val_split print("Process {}: loading all datasets...".format(rank)) dataset = system_config.dataset workers = args.workers print("Process {}: using {} workers".format(rank, workers)) training_dbs = [datasets[dataset](config["db"], split=train_split, sys_config=system_config) for _ in range(workers)] validation_db = datasets[dataset](config["db"], split=val_split, sys_config=system_config) if rank == 0: print("system config...") pprint.pprint(system_config.full) print("db config...") pprint.pprint(training_dbs[0].configs) print("len of db: {}".format(len(training_dbs[0].db_inds))) print("distributed: {}".format(args.distributed)) train(training_dbs, validation_db, system_config, model, args)
Example #26
Source File: train.py From CornerNet-Lite with BSD 3-Clause "New" or "Revised" License | 5 votes |
def init_parallel_jobs(system_config, dbs, queue, fn, data_aug): tasks = [Process(target=prefetch_data, args=(system_config, db, queue, fn, data_aug)) for db in dbs] for task in tasks: task.daemon = True task.start() return tasks
Example #27
Source File: mcts_base.py From doom-net-pytorch with MIT License | 5 votes |
def run_train(self, args): print("training...") model = self sim = Simulator(model) games = [] for i in range(1): games.append( args.instance_class(args.vizdoom_config, args.wad_path, args.skiprate, actions=args.action_set, id=i) ) for iter in range(100): print("iteration: ", iter) # # generate data # processes = [] for game in games: process = Process(target=self.generate_data, args=(game, sim, args)) process.start() processes.append(process) for process in processes: process.join() # # train model with new data # self.train_model(model)
Example #28
Source File: train.py From CenterNet with MIT License | 5 votes |
def init_parallel_jobs(dbs, queue, fn, data_aug): tasks = [Process(target=prefetch_data, args=(db, queue, fn, data_aug)) for db in dbs] for task in tasks: task.daemon = True task.start() return tasks
Example #29
Source File: dataloader.py From TAKG with MIT License | 5 votes |
def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = multiprocessing.SimpleQueue() self.data_queue = multiprocessing.SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process( target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers)] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread( target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices()
Example #30
Source File: dataloader.py From video-to-pose3D with MIT License | 5 votes |
def start(self): # start a thread to read frames from the file video stream if opt.sp: t = Thread(target=self.update, args=()) t.daemon = True t.start() else: p = mp.Process(target=self.update, args=()) p.daemon = True p.start() return self