Python Examples of torch.set_num

Source File: dataloader_new.py From MetaFGNet with MIT License

6 votes

def _worker_loop(dataset, index_queue, data_queue, collate_fn):
    global _use_shared_memory
    _use_shared_memory = True

    torch.set_num_threads(1)
    while True:
        r = index_queue.get()
        if r is None:
            data_queue.put(None)
            break
        idx, batch_indices = r
        try:
            samples = collate_fn([dataset[i] for i in batch_indices])
        except Exception:
            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
        else:
            data_queue.put((idx, samples))

Source File: buffer.py From rlpyt with MIT License

6 votes

def get_example_outputs(agent, env, examples, subprocess=False):
    """Do this in a sub-process to avoid setup conflict in master/workers (e.g.
    MKL)."""
    if subprocess:  # i.e. in subprocess.
        import torch
        torch.set_num_threads(1)  # Some fix to prevent MKL hang.
    o = env.reset()
    a = env.action_space.sample()
    o, r, d, env_info = env.step(a)
    r = np.asarray(r, dtype="float32")  # Must match torch float dtype here.
    agent.reset()
    agent_inputs = torchify_buffer(AgentInputs(o, a, r))
    a, agent_info = agent.step(*agent_inputs)
    if "prev_rnn_state" in agent_info:
        # Agent leaves B dimension in, strip it: [B,N,H] --> [N,H]
        agent_info = agent_info._replace(prev_rnn_state=agent_info.prev_rnn_state[0])
    examples["observation"] = o
    examples["reward"] = r
    examples["done"] = d
    examples["env_info"] = env_info
    examples["action"] = a  # OK to put torch tensor here, could numpify.
    examples["agent_info"] = agent_info

Source File: worker.py From rlpyt with MIT License

6 votes

def initialize_worker(rank, seed=None, cpu=None, torch_threads=None):
    """Assign CPU affinity, set random seed, set torch_threads if needed to
    prevent MKL deadlock.
    """
    log_str = f"Sampler rank {rank} initialized"
    cpu = [cpu] if isinstance(cpu, int) else cpu
    p = psutil.Process()
    try:
        if cpu is not None:
            p.cpu_affinity(cpu)
        cpu_affin = p.cpu_affinity()
    except AttributeError:
        cpu_affin = "UNAVAILABLE MacOS"
    log_str += f", CPU affinity {cpu_affin}"
    torch_threads = (1 if torch_threads is None and cpu is not None else
        torch_threads)  # Default to 1 to avoid possible MKL hang.
    if torch_threads is not None:
        torch.set_num_threads(torch_threads)
    log_str += f", Torch threads {torch.get_num_threads()}"
    if seed is not None:
        set_seed(seed)
        time.sleep(0.3)  # (so the printing from set_seed is not intermixed)
        log_str += f", Seed {seed}"
    logger.log(log_str)

Source File: torch_model.py From rltime with Apache License 2.0

6 votes

def __init__(self, observation_space):
        """Initializes the model with the given observation space

        Currently supported observation spaces are:
        - Box spaces
        - A tuple of box spaces, where the 1st one is the 'main' observation,
          and the rest contain additional 1D vectors of linear features for
          the model which are fed to one of the non-convolutional layers
          (Usually the RNN layer)
        """
        super().__init__()
        # When using multiple actors each with it's own CPU copy of the model,
        # we need to limit them to be single-threaded otherwise they slow each
        # other down. This should not effect training time if training is on
        # the GPU
        torch.set_num_threads(1)
        self._setup_inputs(observation_space)

Source File: async_actor.py From rltime with Apache License 2.0

6 votes

def run(self):
        # TODO Fix this dependency. The policy itself sets the thread limit
        # to 1, but this configuration seems to be per-thread in pytorch
        # so need to set it here too :(
        import torch
        torch.set_num_threads(1)

        while not self._close_event.is_set():
            # If queue is full, wait for it not to be
            while len(self._queue) >= self._max_pending:
                self._queue_empty_event.wait()
                self._queue_empty_event.clear()
            # Get the next sample(s)
            samples = super().get_samples(1)
            self._queue.extend(samples)
            self._queue_fill_event.set()

Source File: remote.py From leap with MIT License

6 votes

def __init__(
                self,
                env,
                policy,
                exploration_policy,
                max_path_length,
                train_rollout_function,
                eval_rollout_function,
        ):
            torch.set_num_threads(1)
            self._env = env
            self._policy = policy
            self._exploration_policy = exploration_policy
            self._max_path_length = max_path_length
            self.train_rollout_function = cloudpickle.loads(train_rollout_function)
            self.eval_rollout_function = cloudpickle.loads(eval_rollout_function)

Source File: cpu_sampler.py From rlpyt with MIT License

6 votes

def initialize(self, affinity):
        """
        Runs inside the main sampler process.  Sets process hardware affinity
        and calls the ``agent.async_cpu()`` initialization.  Then proceeds with
        usual parallel sampler initialization.
        """
        p = psutil.Process()
        if affinity.get("set_affinity", True):
            p.cpu_affinity(affinity["master_cpus"])
        torch.set_num_threads(1)  # Needed to prevent MKL hang :( .
        self.agent.async_cpu(share_memory=True)
        super().initialize(
            agent=self.agent,
            affinity=affinity,
            seed=self.seed,
            bootstrap_value=None,  # Don't need here.
            traj_info_kwargs=None,  # Already done.
            world_size=1,
            rank=0,
        )

Source File: mono_3d_estimation.py From 3d-vehicle-tracking with BSD 3-Clause "New" or "Revised" License

6 votes

def main():
    torch.set_num_threads(multiprocessing.cpu_count())
    args = parse_args()
    if args.set == 'gta':
        from model.model import Model
    elif args.set == 'kitti':
        from model.model_cen import Model
    else:
        raise ValueError("Model not found")

    model = Model(args.arch,
                  args.roi_name,
                  args.down_ratio,
                  args.roi_kernel)
    model = nn.DataParallel(model)
    model = model.to(args.device)

    if args.phase == 'train':
        run_training(model, args)
    elif args.phase == 'test':
        test_model(model, args)

Source File: dataloader.py From TAKG with MIT License

6 votes

def _worker_loop(dataset, index_queue, data_queue, collate_fn):
    global _use_shared_memory
    _use_shared_memory = True

    torch.set_num_threads(1)
    while True:
        r = index_queue.get()
        if r is None:
            data_queue.put(None)
            break
        idx, batch_indices = r
        try:
            samples = collate_fn([dataset[i] for i in batch_indices])
        except Exception:
            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
        else:
            data_queue.put((idx, samples))

Source File: dataloader.py From weakalign with MIT License

6 votes

def _worker_loop(dataset, index_queue, data_queue, collate_fn, rng_seed):
    global _use_shared_memory
    _use_shared_memory = True

    np.random.seed(rng_seed)
    torch.set_num_threads(1)
    while True:
        r = index_queue.get()
        if r is None:
            data_queue.put(None)
            break
        idx, batch_indices = r
        try:
            samples = collate_fn([dataset[i] for i in batch_indices])
        except Exception:
            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
        else:
            data_queue.put((idx, samples))

Source File: async_rl.py From rlpyt with MIT License

6 votes

def optim_startup(self):
        """
        Sets the hardware affinity, moves the agent's model parameters onto
        device and initialize data-parallel agent, if applicable.  Computes
        optimizer throttling settings.
        """
        main_affinity = self.affinity.optimizer[0]
        p = psutil.Process()
        if main_affinity.get("set_affinity", True):
            p.cpu_affinity(main_affinity["cpus"])
        logger.log(f"Optimizer master CPU affinity: {p.cpu_affinity()}.")
        torch.set_num_threads(main_affinity["torch_threads"])
        logger.log(f"Optimizer master Torch threads: {torch.get_num_threads()}.")
        self.agent.to_device(main_affinity.get("cuda_idx", None))
        if self.world_size > 1:
            self.agent.data_parallel()
        self.algo.optim_initialize(rank=0)
        throttle_itr = 1 + getattr(self.algo,
            "min_steps_learn", 0) // self.sampler_batch_size
        delta_throttle_itr = (self.algo.batch_size * self.world_size *
            self.algo.updates_per_optimize /  # (is updates_per_sync)
            (self.sampler_batch_size * self.algo.replay_ratio))
        self.initialize_logging()
        return throttle_itr, delta_throttle_itr

Source File: async_rl.py From rlpyt with MIT License

6 votes

def startup(self):
        torch.distributed.init_process_group(
            backend="nccl",
            rank=self.rank,
            world_size=self.world_size,
            init_method=f"tcp://127.0.0.1:{self.port}",
        )
        p = psutil.Process()
        if self.affinity.get("set_affinity", True):
            p.cpu_affinity(self.affinity["cpus"])
        logger.log(f"Optimizer rank {self.rank} CPU affinity: {p.cpu_affinity()}.")
        torch.set_num_threads(self.affinity["torch_threads"])
        logger.log(f"Optimizer rank {self.rank} Torch threads: {torch.get_num_threads()}.")
        logger.log(f"Optimizer rank {self.rank} CUDA index: "
            f"{self.affinity.get('cuda_idx', None)}.")
        set_seed(self.seed)
        self.agent.to_device(cuda_idx=self.affinity.get("cuda_idx", None))
        self.agent.data_parallel()
        self.algo.optim_initialize(rank=self.rank)

Source File: dataloader.py From keyphrase-generation-rl with MIT License

6 votes

def _worker_loop(dataset, index_queue, data_queue, collate_fn):
    global _use_shared_memory
    _use_shared_memory = True

    torch.set_num_threads(1)
    while True:
        r = index_queue.get()
        if r is None:
            data_queue.put(None)
            break
        idx, batch_indices = r
        try:
            samples = collate_fn([dataset[i] for i in batch_indices])
        except Exception:
            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
        else:
            data_queue.put((idx, samples))

Source File: data_collector.py From torchsupport with MIT License

6 votes

def _collector_worker(statistics, buffer, distributor,
                      collector, done, piecewise):
  torch.set_num_threads(1)
  while True:
    if done.value:
      break
    result = collector.sample_trajectory()
    trajectory_statistics = collector.compute_statistics(result)
    trajectory = distributor.commit_trajectory(result)

    if piecewise:
      for item in trajectory:
        buffer.append(item)
    else:
      buffer.append(trajectory)

    statistics.update(trajectory_statistics)

Source File: path_collector.py From oac-explore with MIT License

6 votes

def __init__(self,
                 domain_name, env_seed, policy_producer,
                 max_num_epoch_paths_saved=None,
                 render=False,
                 render_kwargs=None,
                 ):

        torch.set_num_threads(1)

        env = env_producer(domain_name, env_seed)

        self._policy_producer = policy_producer

        super().__init__(env,
                         max_num_epoch_paths_saved=max_num_epoch_paths_saved,
                         render=render,
                         render_kwargs=render_kwargs,
                         )

Source File: trainer.py From mrqa with Apache License 2.0

6 votes

def set_random_seed(random_seed):
        if random_seed is not None:
            print("Set random seed as {}".format(random_seed))
            os.environ['PYTHONHASHSEED'] = str(random_seed)
            random.seed(random_seed)
            np.random.seed(random_seed)
            torch.manual_seed(random_seed)
            torch.cuda.manual_seed_all(random_seed)
            torch.set_num_threads(1)
            cudnn.benchmark = False
            cudnn.deterministic = True
            warnings.warn('You have chosen to seed training. '
                          'This will turn on the CUDNN deterministic setting, '
                          'which can slow down your training considerably! '
                          'You may see unexpected behavior when restarting '
                          'from checkpoints.')

Source File: torch_ranker_agent.py From KBRD with MIT License

6 votes

def share(self):
        """Share model parameters."""
        shared = super().share()
        shared['model'] = self.model
        if self.opt.get('numthreads', 1) > 1 and isinstance(self.metrics, dict):
            torch.set_num_threads(1)
            # move metrics and model to shared memory
            self.metrics = SharedTable(self.metrics)
            self.model.share_memory()
        shared['metrics'] = self.metrics
        shared['fixed_candidates'] = self.fixed_candidates
        shared['fixed_candidate_vecs'] = self.fixed_candidate_vecs
        shared['fixed_candidate_encs'] = self.fixed_candidate_encs
        shared['vocab_candidates'] = self.vocab_candidates
        shared['vocab_candidate_vecs'] = self.vocab_candidate_vecs
        shared['optimizer'] = self.optimizer
        return shared

Source File: dataloader.py From Dense-CoAttention-Network with MIT License

6 votes

def _worker_process_loop(dataset, index_queue, data_queue, collate_fn):
    global _use_shared_memory
    _use_shared_memory = True

    torch.set_num_threads(1)
    while True:
        r = index_queue.get()
        if r is None:
            data_queue.put(None)
            break
        idx, batch_indices = r
        try:
            samples = collate_fn([dataset[i] for i in batch_indices])
        except Exception:
            data_queue.put((idx, ExceptionWrapper(sys.exc_info())))
        else:
            data_queue.put((idx, samples))