Python torch.multiprocessing.Process() Examples

The following are 30 code examples of torch.multiprocessing.Process(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module torch.multiprocessing , or try the search function .
Example #1
Source File: agent_single_process.py    From pytorch-rl with MIT License 6 votes vote down vote up
def __init__(self, master, process_id=0):
        super(AgentSingleProcess, self).__init__(name = "Process-%d" % process_id)
        # NOTE: self.master.* refers to parameters shared across all processes
        # NOTE: self.*        refers to process-specific properties
        # NOTE: we are not copying self.master.* to self.* to keep the code clean

        self.master = master
        self.process_id = process_id

        # env
        self.env = self.master.env_prototype(self.master.env_params, self.process_id)
        # model
        self.model = self.master.model_prototype(self.master.model_params)
        self._sync_local_with_global()

        # experience
        self._reset_experience() 
Example #2
Source File: run.py    From ConvLab with MIT License 6 votes vote down vote up
def read_spec_and_run(spec_file, spec_name, lab_mode):
    '''Read a spec and run it in lab mode'''
    logger.info(f'Running lab spec_file:{spec_file} spec_name:{spec_name} in mode:{lab_mode}')
    if lab_mode in TRAIN_MODES:
        spec = spec_util.get(spec_file, spec_name)
    else:  # eval mode
        if '@' in lab_mode:
            lab_mode, prename = lab_mode.split('@')
            spec = spec_util.get_eval_spec(spec_file, spec_name, prename)
        else:
            spec = spec_util.get(spec_file, spec_name)

    if 'spec_params' not in spec:
        run_spec(spec, lab_mode)
    else:  # spec is parametrized; run them in parallel
        param_specs = spec_util.get_param_specs(spec)
        num_pro = spec['meta']['param_spec_process']
        # can't use Pool since it cannot spawn nested Process, which is needed for VecEnv and parallel sessions. So these will run and wait by chunks
        workers = [mp.Process(target=run_spec, args=(spec, lab_mode)) for spec in param_specs]
        for chunk_w in ps.chunk(workers, num_pro):
            for w in chunk_w:
                w.start()
            for w in chunk_w:
                w.join() 
Example #3
Source File: dataloader.py    From video-to-pose3D with MIT License 6 votes vote down vote up
def start(self):
        # start a thread to read frames from the file video stream
        if self.format == 'ssd':
            if opt.sp:
                p = Thread(target=self.getitem_ssd, args=())
            else:
                p = mp.Process(target=self.getitem_ssd, args=())
        elif self.format == 'yolo':
            if opt.sp:
                p = Thread(target=self.getitem_yolo, args=())
            else:
                p = mp.Process(target=self.getitem_yolo, args=())
        else:
            raise NotImplementedError
        p.daemon = True
        p.start()
        return self 
Example #4
Source File: a2c.py    From minimalRL with MIT License 6 votes vote down vote up
def __init__(self, n_train_processes):
        self.nenvs = n_train_processes
        self.waiting = False
        self.closed = False
        self.workers = list()

        master_ends, worker_ends = zip(*[mp.Pipe() for _ in range(self.nenvs)])
        self.master_ends, self.worker_ends = master_ends, worker_ends

        for worker_id, (master_end, worker_end) in enumerate(zip(master_ends, worker_ends)):
            p = mp.Process(target=worker,
                           args=(worker_id, master_end, worker_end))
            p.daemon = True
            p.start()
            self.workers.append(p)

        # Forbid master to use the worker end for messaging
        for worker_end in worker_ends:
            worker_end.close() 
Example #5
Source File: pc_environment.py    From malmo-challenge with MIT License 6 votes vote down vote up
def __init__(self, rank):
    docker_client = docker.from_env()
    agent_port, partner_port = 10000 + rank, 20000 + rank
    clients = [('127.0.0.1', agent_port), ('127.0.0.1', partner_port)]
    self.agent_type = GlobalVar()

    # Assume Minecraft launched if port has listener, launch otherwise
    if not _port_has_listener(agent_port):
      self._launch_malmo(docker_client, agent_port)
    print('Malmo running on port ' + str(agent_port))
    if not _port_has_listener(partner_port):
      self._launch_malmo(docker_client, partner_port)
    print('Malmo running on port ' + str(partner_port))

    # Set up partner agent env in separate process
    p = mp.Process(target=self._run_partner, args=(clients, ))
    p.daemon = True
    p.start()
    time.sleep(3)

    # Set up agent env
    self.env = PigChaseEnvironment(clients, PigChaseTopDownStateBuilder(gray=False), role=1, randomize_positions=True) 
Example #6
Source File: deepwalk.py    From dgl with Apache License 2.0 6 votes vote down vote up
def fast_train_mp(self):
        """ multi-cpu-core or mix cpu & multi-gpu """
        self.init_device_emb()
        self.emb_model.share_memory()

        start_all = time.time()
        ps = []

        for i in range(len(self.args.gpus)):
            p = mp.Process(target=self.fast_train_sp, args=(self.args.gpus[i],))
            ps.append(p)
            p.start()

        for p in ps:
            p.join()
        
        print("Used time: %.2fs" % (time.time()-start_all))
        if self.args.save_in_txt:
            self.emb_model.save_embedding_txt(self.dataset, self.args.output_emb_file)
        else:
            self.emb_model.save_embedding(self.dataset, self.args.output_emb_file) 
Example #7
Source File: BaseAgent.py    From DeepRL with MIT License 6 votes vote down vote up
def __init__(self, config):
        mp.Process.__init__(self)
        self.config = config
        self.__pipe, self.__worker_pipe = mp.Pipe()

        self._state = None
        self._task = None
        self._network = None
        self._total_steps = 0
        self.__cache_len = 2

        if not config.async_actor:
            self.start = lambda: None
            self.step = self._sample
            self.close = lambda: None
            self._set_up()
            self._task = config.task_fn() 
Example #8
Source File: distributed_sgd_test.py    From machina with MIT License 6 votes vote down vote up
def test_step(self):

        def _run(rank, world_size):
            model = nn.Linear(10, 1)
            optimizer = DistributedSGD(
                model.parameters())

            optimizer.zero_grad()
            loss = model(torch.ones(10).float())
            loss.backward()
            optimizer.step()

        processes = []
        world_size = 4
        for rank in range(world_size):
            p = Process(target=init_processes,
                        args=(rank,
                              world_size,
                              _run))
            p.start()
            processes.append(p)
        for p in processes:
            p.join() 
Example #9
Source File: distributed_adamw_test.py    From machina with MIT License 6 votes vote down vote up
def test_step(self):

        def _run(rank, world_size):
            model = nn.Linear(10, 1)
            optimizer = DistributedAdamW(
                model.parameters())

            optimizer.zero_grad()
            loss = model(torch.ones(10).float())
            loss.backward()
            optimizer.step()

        processes = []
        world_size = 4
        for rank in range(world_size):
            p = Process(target=init_processes,
                        args=(rank,
                              world_size,
                              _run))
            p.start()
            processes.append(p)
        for p in processes:
            p.join() 
Example #10
Source File: multiprocess_iterator.py    From gtos with MIT License 6 votes vote down vote up
def __init__(self,
                 base_iterator: DataIterator,
                 num_workers: int = 1,
                 output_queue_size: int = 1000) -> None:
        # pylint: disable=protected-access
        super().__init__()
        self.num_workers = num_workers
        self.batch_size = base_iterator._batch_size
        self.output_queue_size = output_queue_size

        # These two options make the iterator stateful, which means it can't be shared
        # across multiple processes.
        if base_iterator._cache_instances:
            raise ConfigurationError("cannot use Multiprocess iterator with cache_instances")
        if base_iterator._instances_per_epoch:
            raise ConfigurationError("cannot use instances_per_epoch with Multiprocess iterator")

        self.iterator = base_iterator

        self.processes: List[Process] = []
        self.queuer: Optional[Process] = None 
Example #11
Source File: neat_es.py    From DistributedES with Apache License 2.0 5 votes vote down vote up
def __init__(self, id, state_normalizer, task_q, result_q, stop, config, neat_config):
        mp.Process.__init__(self)
        self.id = id
        self.task_q = task_q
        self.result_q = result_q
        self.state_normalizer = state_normalizer
        self.stop = stop
        self.config = config
        self.env = config.env_fn()
        self.evaluator = GenomeEvaluator(config, neat_config, state_normalizer) 
Example #12
Source File: train.py    From pytorch-parallel with MIT License 5 votes vote down vote up
def train(self, size=2):
        processes = []
        for rank in range(size):
            p = Process(target=self.init_processes, args=(rank, size, self.run))
            p.start()
            processes.append(p)

        for p in processes:
            p.join() 
Example #13
Source File: natural_es.py    From DistributedES with Apache License 2.0 5 votes vote down vote up
def __init__(self, id, param, state_normalizer, task_q, result_q, stop, config):
        mp.Process.__init__(self)
        self.id = id
        self.task_q = task_q
        self.param = param
        self.result_q = result_q
        self.stop = stop
        self.config = config
        self.evaluator = Evaluator(config, state_normalizer) 
Example #14
Source File: cma_es.py    From DistributedES with Apache License 2.0 5 votes vote down vote up
def all_tasks():
    configs = []

    hidden_size = 16
    # config = PendulumConfig(hidden_size)
    # configs.append(config)

    # config = ContinuousLunarLanderConfig(hidden_size)
    # configs.append(config)

    config = BipedalWalkerConfig(hidden_size)
    configs.append(config)

    config = BipedalWalkerHardcore(hidden_size)
    configs.append(config)

    ps = []
    for cf in configs:
        cf.max_steps = int(1e7)
        cf.num_workers = 8
        cf.pop_size = 64
        cf.sigma = 1
        cf.tag = 'CMA-%d' % (hidden_size)
        ps.append(mp.Process(target=multi_runs, args=(cf, )))

    for p in ps: p.start()
    for p in ps: p.join() 
Example #15
Source File: cma_es.py    From DistributedES with Apache License 2.0 5 votes vote down vote up
def __init__(self, id, state_normalizer, task_q, result_q, stop, config):
        mp.Process.__init__(self)
        self.task_queue = task_q
        self.result_q = result_q
        self.evaluator = Evaluator(config, state_normalizer)
        self.id = id
        self.stop = stop 
Example #16
Source File: natural_es.py    From DistributedES with Apache License 2.0 5 votes vote down vote up
def all_tasks():
    configs = []

    hidden_size = 64
    # config = PendulumConfig(hidden_size)
    # configs.append(config)
    # config = ContinuousLunarLanderConfig(hidden_size)
    # configs.append(config)
    config = BipedalWalkerConfig(hidden_size)
    configs.append(config)
    config = BipedalWalkerHardcore(hidden_size)
    configs.append(config)

    ps = []
    for cf in configs:
        cf.num_workers = 8
        cf.pop_size = 64
        cf.sigma = 0.1
        cf.learning_rate = 0.1
        # cf.action_noise_std = 0.02
        cf.max_steps = int(1e7)
        cf.tag = 'NES-%d' % (cf.hidden_size)
        ps.append(mp.Process(target=multi_runs, args=(cf, )))

    for p in ps: p.start()
    for p in ps: p.join() 
Example #17
Source File: epi_sampler.py    From machina with MIT License 5 votes vote down vote up
def __init__(self, env, pol, num_parallel=8, prepro=None, seed=256):
        self.env = env
        self.pol = copy.deepcopy(pol)
        self.pol.to('cpu')
        self.pol.share_memory()
        self.pol.eval()
        self.num_parallel = num_parallel

        self.n_steps_global = torch.tensor(0, dtype=torch.long).share_memory_()
        self.max_steps = torch.tensor(0, dtype=torch.long).share_memory_()
        self.n_epis_global = torch.tensor(
            0, dtype=torch.long).share_memory_()
        self.max_epis = torch.tensor(0, dtype=torch.long).share_memory_()

        self.exec_flags = [torch.tensor(
            0, dtype=torch.long).share_memory_() for _ in range(self.num_parallel)]
        self.deterministic_flag = torch.tensor(
            0, dtype=torch.uint8).share_memory_()

        self.epis = mp.Manager().list()
        self.processes = []
        for ind in range(self.num_parallel):
            p = mp.Process(target=mp_sample, args=(self.pol, env, self.max_steps, self.max_epis, self.n_steps_global,
                                                   self.n_epis_global, self.epis, self.exec_flags[ind], self.deterministic_flag, ind, prepro, seed))
            p.start()
            self.processes.append(p) 
Example #18
Source File: dataloader.py    From keyphrase-generation-rl with MIT License 5 votes vote down vote up
def __init__(self, loader):
        self.dataset = loader.dataset
        self.collate_fn = loader.collate_fn
        self.batch_sampler = loader.batch_sampler
        self.num_workers = loader.num_workers
        self.pin_memory = loader.pin_memory
        self.done_event = threading.Event()

        self.sample_iter = iter(self.batch_sampler)

        if self.num_workers > 0:
            self.index_queue = multiprocessing.SimpleQueue()
            self.data_queue = multiprocessing.SimpleQueue()
            self.batches_outstanding = 0
            self.shutdown = False
            self.send_idx = 0
            self.rcvd_idx = 0
            self.reorder_dict = {}

            self.workers = [
                multiprocessing.Process(
                    target=_worker_loop,
                    args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn))
                for _ in range(self.num_workers)]

            for w in self.workers:
                w.daemon = True  # ensure that the worker exits on process exit
                w.start()

            if self.pin_memory:
                in_data = self.data_queue
                self.data_queue = queue.Queue()
                self.pin_thread = threading.Thread(
                    target=_pin_memory_loop,
                    args=(in_data, self.data_queue, self.done_event))
                self.pin_thread.daemon = True
                self.pin_thread.start()

            # prime the prefetch loop
            for _ in range(2 * self.num_workers):
                self._put_indices() 
Example #19
Source File: multiprocess_input_pipeline.py    From sigir19-neural-ir with Apache License 2.0 5 votes vote down vote up
def get_multiprocess_batch_queue(name_prefix: str, target_function, files, conf, _logger, queue_size=100) -> Tuple[mp.Queue, List[mp.Process], mp.Event]:
    ctx = mp.get_context('spawn') # also set so that windows & linux behave the same 
    _queue = ctx.Queue(queue_size)
    _processes = []
    _finish_notification = ctx.Event()

    if len(files) == 0:
        _logger.error("No files for multiprocess loading specified, for: " + name_prefix)
        exit(1)
    else:
        _logger.info("Starting "+str(len(files))+" data loader processes, for:" + name_prefix)

    if conf["token_embedder_type"] == "fasttext":
        global fasttext_vocab_cached_mapping
        global fasttext_vocab_cached_data
        if fasttext_vocab_cached_data is None:
            fasttext_vocab_cached_mapping, fasttext_vocab_cached_data = FastTextVocab.load_ids(conf["fasttext_vocab_mapping"],conf["fasttext_max_subwords"])
            fasttext_vocab_cached_data.share_memory_()

    for proc_number, file in enumerate(files):
        process = ctx.Process(name=name_prefix + "-" + str(proc_number),
                             target=target_function,
                             args=(proc_number, conf, _queue, _finish_notification, file,fasttext_vocab_cached_mapping,fasttext_vocab_cached_data))
        process.start()
        _processes.append(process)
    return _queue, _processes, _finish_notification


#
# training instance generator
#   - filling the _queue with ready to run training batches
#   - everything is thread local
# 
Example #20
Source File: vector_env.py    From habitat-api with MIT License 5 votes vote down vote up
def _spawn_workers(
        self,
        env_fn_args: Sequence[Tuple],
        make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
    ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
        parent_connections, worker_connections = zip(
            *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_envs)]
        )
        self._workers = []
        for worker_conn, parent_conn, env_args in zip(
            worker_connections, parent_connections, env_fn_args
        ):
            ps = self._mp_ctx.Process(
                target=self._worker_env,
                args=(
                    worker_conn.recv,
                    worker_conn.send,
                    make_env_fn,
                    env_args,
                    self._auto_reset_done,
                    worker_conn,
                    parent_conn,
                ),
            )
            self._workers.append(ps)
            ps.daemon = True
            ps.start()
            worker_conn.close()
        return (
            [p.recv for p in parent_connections],
            [p.send for p in parent_connections],
        ) 
Example #21
Source File: multiprocess_iterator.py    From gtos with MIT License 5 votes vote down vote up
def __call__(self,
                 instances: Iterable[Instance],
                 num_epochs: int = None,
                 shuffle: bool = True) -> Iterator[TensorDict]:

        # If you run it forever, the multiprocesses won't shut down correctly.
        # TODO(joelgrus) find a solution for this
        if num_epochs is None:
            raise ConfigurationError("Multiprocess Iterator must be run for a fixed number of epochs")

        manager = Manager()
        output_queue = manager.Queue(self.output_queue_size)
        input_queue = manager.Queue(self.output_queue_size * self.batch_size)

        # Start process that populates the queue.
        self.queuer = Process(target=_queuer, args=(instances, input_queue, self.num_workers, num_epochs))
        self.queuer.start()

        # Start the tensor-dict workers.
        for i in range(self.num_workers):
            args = (input_queue, output_queue, self.iterator, shuffle, i)
            process = Process(target=_create_tensor_dicts, args=args)
            process.start()
            self.processes.append(process)

        num_finished = 0
        while num_finished < self.num_workers:
            item = output_queue.get()
            if isinstance(item, int):
                num_finished += 1
                logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})")
            else:
                yield item

        for process in self.processes:
            process.join()
        self.processes.clear()

        if self.queuer is not None:
            self.queuer.join()
            self.queuer = None 
Example #22
Source File: dataloader.py    From video-to-pose3D with MIT License 5 votes vote down vote up
def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=(), daemon=True)
            # p = mp.Process(target=self.update, args=())
            # p.daemon = True
            p.start()
        return self 
Example #23
Source File: run_ithemal.py    From Ithemal with MIT License 5 votes vote down vote up
def graph_model_benchmark(base_params, benchmark_params):
    # type: (BaseParameters, BenchmarkParameters) -> None
    data = load_data(base_params)
    model = load_model(base_params, data)

    train = tr.Train(
        model, data, tr.PredictionType.REGRESSION, ls.mse_loss, 1,
        batch_size=benchmark_params.batch_size, clip=None, opt=tr.OptimizerType.ADAM_PRIVATE, lr=0.01,
    )

    model.share_memory()

    mp_config = MPConfig(benchmark_params.threads)
    partition_size = benchmark_params.examples // benchmark_params.trainers

    processes = []

    start_time = time.time()

    with mp_config:
        for rank in range(benchmark_params.trainers):
            mp_config.set_env(rank)

            partition = (rank * partition_size, (rank + 1) * partition_size)

            p = mp.Process(target=train, args=(rank, partition))
            p.daemon = True
            p.start()
            processes.append(p)

    for p in processes:
        p.join()

    end_time = time.time()
    print('Time to process {} examples: {} seconds'.format(
        benchmark_params.examples,
        end_time - start_time,
    )) 
Example #24
Source File: dataloader.py    From video-to-pose3D with MIT License 5 votes vote down vote up
def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            # t = Thread(target=self.update, args=(), daemon=True)
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=(), daemon=True)
            # p = mp.Process(target=self.update, args=())
            # p.daemon = True
            p.start()
        return self 
Example #25
Source File: train.py    From CornerNet-Lite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def main(gpu, ngpus_per_node, args):
    args.gpu = gpu
    if args.distributed:
        args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)

    rank = args.rank

    cfg_file = os.path.join("./configs", args.cfg_file + ".json")
    with open(cfg_file, "r") as f:
        config = json.load(f)

    config["system"]["snapshot_name"] = args.cfg_file
    system_config = SystemConfig().update_config(config["system"])

    model_file  = "core.models.{}".format(args.cfg_file)
    model_file  = importlib.import_module(model_file)
    model       = model_file.model()

    train_split = system_config.train_split
    val_split   = system_config.val_split

    print("Process {}: loading all datasets...".format(rank))
    dataset = system_config.dataset
    workers = args.workers
    print("Process {}: using {} workers".format(rank, workers))
    training_dbs = [datasets[dataset](config["db"], split=train_split, sys_config=system_config) for _ in range(workers)]
    validation_db = datasets[dataset](config["db"], split=val_split, sys_config=system_config)

    if rank == 0:
        print("system config...")
        pprint.pprint(system_config.full)

        print("db config...")
        pprint.pprint(training_dbs[0].configs)

        print("len of db: {}".format(len(training_dbs[0].db_inds)))
        print("distributed: {}".format(args.distributed))

    train(training_dbs, validation_db, system_config, model, args) 
Example #26
Source File: train.py    From CornerNet-Lite with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def init_parallel_jobs(system_config, dbs, queue, fn, data_aug):
    tasks = [Process(target=prefetch_data, args=(system_config, db, queue, fn, data_aug)) for db in dbs]
    for task in tasks:
        task.daemon = True
        task.start()
    return tasks 
Example #27
Source File: mcts_base.py    From doom-net-pytorch with MIT License 5 votes vote down vote up
def run_train(self, args):
        print("training...")

        model = self
        sim = Simulator(model)

        games = []
        for i in range(1):
            games.append(
                args.instance_class(args.vizdoom_config, args.wad_path, args.skiprate, actions=args.action_set, id=i)
            )

        for iter in range(100):
            print("iteration: ", iter)
            #
            # generate data
            #
            processes = []
            for game in games:
                process = Process(target=self.generate_data, args=(game, sim, args))
                process.start()
                processes.append(process)

            for process in processes:
                process.join()
            #
            # train model with new data
            #
            self.train_model(model) 
Example #28
Source File: train.py    From CenterNet with MIT License 5 votes vote down vote up
def init_parallel_jobs(dbs, queue, fn, data_aug):
    tasks = [Process(target=prefetch_data, args=(db, queue, fn, data_aug)) for db in dbs]
    for task in tasks:
        task.daemon = True
        task.start()
    return tasks 
Example #29
Source File: dataloader.py    From TAKG with MIT License 5 votes vote down vote up
def __init__(self, loader):
        self.dataset = loader.dataset
        self.collate_fn = loader.collate_fn
        self.batch_sampler = loader.batch_sampler
        self.num_workers = loader.num_workers
        self.pin_memory = loader.pin_memory
        self.done_event = threading.Event()

        self.sample_iter = iter(self.batch_sampler)

        if self.num_workers > 0:
            self.index_queue = multiprocessing.SimpleQueue()
            self.data_queue = multiprocessing.SimpleQueue()
            self.batches_outstanding = 0
            self.shutdown = False
            self.send_idx = 0
            self.rcvd_idx = 0
            self.reorder_dict = {}

            self.workers = [
                multiprocessing.Process(
                    target=_worker_loop,
                    args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn))
                for _ in range(self.num_workers)]

            for w in self.workers:
                w.daemon = True  # ensure that the worker exits on process exit
                w.start()

            if self.pin_memory:
                in_data = self.data_queue
                self.data_queue = queue.Queue()
                self.pin_thread = threading.Thread(
                    target=_pin_memory_loop,
                    args=(in_data, self.data_queue, self.done_event))
                self.pin_thread.daemon = True
                self.pin_thread.start()

            # prime the prefetch loop
            for _ in range(2 * self.num_workers):
                self._put_indices() 
Example #30
Source File: dataloader.py    From video-to-pose3D with MIT License 5 votes vote down vote up
def start(self):
        # start a thread to read frames from the file video stream
        if opt.sp:
            t = Thread(target=self.update, args=())
            t.daemon = True
            t.start()
        else:
            p = mp.Process(target=self.update, args=())
            p.daemon = True
            p.start()
        return self