Python stable_baselines.common.vec_env.SubprocVecEnv() Examples
The following are 14
code examples of stable_baselines.common.vec_env.SubprocVecEnv().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
stable_baselines.common.vec_env
, or try the search function
.
Example #1
Source File: cmd_util.py From stable-baselines with MIT License | 6 votes |
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0, allow_early_resets=True, start_method=None, use_subprocess=False): """ Create a wrapped, monitored VecEnv for Atari. :param env_id: (str) the environment ID :param num_env: (int) the number of environment you wish to have in subprocesses :param seed: (int) the initial seed for RNG :param wrapper_kwargs: (dict) the parameters for wrap_deepmind function :param start_index: (int) start rank index :param allow_early_resets: (bool) allows early reset of the environment :param start_method: (str) method used to start the subprocesses. See SubprocVecEnv doc for more information :param use_subprocess: (bool) Whether to use `SubprocVecEnv` or `DummyVecEnv` when `num_env` > 1, `DummyVecEnv` is usually faster. Default: False :return: (VecEnv) The atari environment """ if wrapper_kwargs is None: wrapper_kwargs = {} def make_env(rank): def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=allow_early_resets) return wrap_deepmind(env, **wrapper_kwargs) return _thunk set_global_seeds(seed) # When using one environment, no need to start subprocesses if num_env == 1 or not use_subprocess: return DummyVecEnv([make_env(i + start_index) for i in range(num_env)]) return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)], start_method=start_method)
Example #2
Source File: test_vec_envs.py From stable-baselines with MIT License | 6 votes |
def test_subproc_start_method(): start_methods = [None] # Only test thread-safe methods. Others may deadlock tests! (gh/428) safe_methods = {'forkserver', 'spawn'} available_methods = multiprocessing.get_all_start_methods() start_methods += list(safe_methods.intersection(available_methods)) space = gym.spaces.Discrete(2) def obs_assert(obs): return check_vecenv_obs(obs, space) for start_method in start_methods: vec_env_class = functools.partial(SubprocVecEnv, start_method=start_method) check_vecenv_spaces(vec_env_class, space, obs_assert) with pytest.raises(ValueError, match="cannot find context for 'illegal_method'"): vec_env_class = functools.partial(SubprocVecEnv, start_method='illegal_method') check_vecenv_spaces(vec_env_class, space, obs_assert)
Example #3
Source File: test_utils.py From stable-baselines with MIT License | 6 votes |
def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class): env = make_vec_env(env_id, n_envs, vec_env_cls=vec_env_cls, wrapper_class=wrapper_class, monitor_dir=None, seed=0) assert env.num_envs == n_envs if vec_env_cls is None: assert isinstance(env, DummyVecEnv) if wrapper_class is not None: assert isinstance(env.envs[0], wrapper_class) else: assert isinstance(env.envs[0], Monitor) else: assert isinstance(env, SubprocVecEnv) # Kill subprocesses env.close()
Example #4
Source File: evaluator.py From MazeExplorer with MIT License | 5 votes |
def load_stable_baselines_env(cfg_path, vector_length, mp, n_stack, number_maps, action_frame_repeat, scaled_resolution): env_fn = lambda: MazeExplorer.load_vizdoom_env(cfg_path, number_maps, action_frame_repeat, scaled_resolution) if mp: env = SubprocVecEnv([env_fn for _ in range(vector_length)]) else: env = DummyVecEnv([env_fn for _ in range(vector_length)]) if n_stack > 0: env = VecFrameStack(env, n_stack=n_stack) return env
Example #5
Source File: test_lstm_policy.py From stable-baselines with MIT License | 5 votes |
def test_lstm_train(): """Test that LSTM models are able to achieve >=150 (out of 500) reward on CartPoleNoVelEnv. This environment requires memory to perform well in.""" def make_env(i): env = CartPoleNoVelEnv() env = TimeLimit(env, max_episode_steps=500) env = bench.Monitor(env, None, allow_early_resets=True) env.seed(i) return env env = SubprocVecEnv([lambda: make_env(i) for i in range(NUM_ENVS)]) env = VecNormalize(env) model = PPO2(MlpLstmPolicy, env, n_steps=128, nminibatches=NUM_ENVS, lam=0.95, gamma=0.99, noptepochs=10, ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1) eprewmeans = [] def reward_callback(local, _): nonlocal eprewmeans eprewmeans.append(safe_mean([ep_info['r'] for ep_info in local['ep_info_buf']])) model.learn(total_timesteps=100000, callback=reward_callback) # Maximum episode reward is 500. # In CartPole-v1, a non-recurrent policy can easily get >= 450. # In CartPoleNoVelEnv, a non-recurrent policy doesn't get more than ~50. # LSTM policies can reach above 400, but it varies a lot between runs; consistently get >=150. # See PR #244 for more detailed benchmarks. average_reward = sum(eprewmeans[-NUM_EPISODES_FOR_SCORE:]) / NUM_EPISODES_FOR_SCORE assert average_reward >= 150, "Mean reward below 150; per-episode rewards {}".format(average_reward)
Example #6
Source File: train_ppo.py From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_train_env(num_envs, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, policy, disc_action_space, normalize): # Choosing environment wrapper according to the policy if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy": if disc_action_space: env_temp = RosEnvDiscImg else: env_temp = RosEnvContImg elif policy == "CNN1DPolicy": if disc_action_space: env_temp = RosEnvDiscRawScanPrepWp else: env_temp = RosEnvContRawScanPrepWp elif policy == "CNN1DPolicy_multi_input": if disc_action_space: env_temp = RosEnvDiscRaw else: env_temp = RosEnvContRaw elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2": if disc_action_space: env_temp = RosEnvDiscImgVel else: env_temp = RosEnvContImgVel env = SubprocVecEnv([lambda k=k: Monitor(env_temp("sim%d" % (k+1), StateCollector("sim%s"%(k+1), "train") , stack_offset, num_stacks, robot_radius, rew_fnc, debug, "train", task_mode), '%s/%s/sim_%d'%(path_to_models, agent_name, k+1), allow_early_resets=True) for k in range(num_envs)]) # Normalizing? if normalize: env = VecNormalize(env, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0, gamma=0.99, epsilon=1e-08) else: env = env # Stack of data? if num_stacks > 1: env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset) return env
Example #7
Source File: train_maneuver_DDPG.py From flappy with MIT License | 5 votes |
def main(args): start = time.time() env_id = 'fwmav_maneuver-v0' env = DummyVecEnv([make_env(env_id, 0)]) # env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)]) n_actions = env.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) model = DDPG( policy = MyDDPGPolicy, env = env, gamma = 1.0, nb_train_steps=5000, nb_rollout_steps=10000, nb_eval_steps=10000, param_noise=param_noise, action_noise=action_noise, tau=0.003, batch_size=256, observation_range=(-np.inf, np.inf), actor_lr=0.0001, critic_lr=0.001, reward_scale=0.05, memory_limit=10000000, verbose=1, ) model.learn(total_timesteps=args.time_step) model.save(args.model_path) end = time.time() print("Time used: ", end - start)
Example #8
Source File: train_DDPG.py From flappy with MIT License | 5 votes |
def main(args): start = time.time() env_id = 'fwmav_hover-v0' env = DummyVecEnv([make_env(env_id, 0)]) # env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)]) n_actions = env.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) model = DDPG( policy = MyDDPGPolicy, env = env, gamma = 1.0, nb_train_steps=5000, nb_rollout_steps=10000, nb_eval_steps=10000, param_noise=param_noise, action_noise=action_noise, tau=0.003, batch_size=256, observation_range=(-np.inf, np.inf), actor_lr=0.0001, critic_lr=0.001, reward_scale=0.05, memory_limit=10000000, verbose=1, ) model.learn(total_timesteps=args.time_step) model.save(args.model_path) end = time.time() print("Time used: ", end - start)
Example #9
Source File: train.py From flappy with MIT License | 5 votes |
def main(args): try: model_cls = getattr(importlib.import_module( 'stable_baselines'), args.model_type) except AttributeError: print(args.model_type, "Error: wrong model type") return try: policy_cls = getattr(importlib.import_module( 'stable_baselines.common.policies'), args.policy_type) except AttributeError: print(args.policy_type, "Error: wrong policy type") return start = time.time() env_id = 'fwmav_hover-v0' # env = DummyVecEnv([make_env(env_id, 1)]) env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)]) model = model_cls(policy_cls, env, verbose=0) model.learn(total_timesteps=args.time_step) model.save(args.model_path) end = time.time() print("Time used: ", end - start)
Example #10
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None): """ :param args: (argparse.Namespace Object) :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs :param env_kwargs: (dict) The extra arguments for the environment :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted. :return: (Gym VecEnv) """ # imported here to prevent cyclic imports from environments.registry import registered_env from state_representation.registry import registered_srl, SRLType assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \ "Error: cannot have more than 1 CPU for the environment {}".format(args.env) if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL: srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs) for i in range(args.num_cpu)] if len(envs) == 1: # No need for subprocesses when having only one env envs = DummyVecEnv(envs) else: envs = SubprocVecEnv(envs) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #11
Source File: train.py From flow with MIT License | 5 votes |
def run_model_stablebaseline(flow_params, num_cpus=1, rollout_size=50, num_steps=50): """Run the model for num_steps if provided. Parameters ---------- flow_params : dict flow-specific parameters num_cpus : int number of CPUs used during training rollout_size : int length of a single rollout num_steps : int total number of training steps The total rollout length is rollout_size. Returns ------- stable_baselines.* the trained model """ from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from stable_baselines import PPO2 if num_cpus == 1: constructor = env_constructor(params=flow_params, version=0)() # The algorithms require a vectorized environment to run env = DummyVecEnv([lambda: constructor]) else: env = SubprocVecEnv([env_constructor(params=flow_params, version=i) for i in range(num_cpus)]) train_model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size) train_model.learn(total_timesteps=num_steps) return train_model
Example #12
Source File: RLTrader.py From RLTrader with GNU General Public License v3.0 | 4 votes |
def train(self, n_epochs: int = 10, save_every: int = 1, test_trained_model: bool = True, render_test_env: bool = False, render_report: bool = True, save_report: bool = False): train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage) del test_provider train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(self.n_envs)]) model_params = self.get_model_params() model = self.Model(self.Policy, train_env, verbose=self.model_verbose, nminibatches=self.n_minibatches, tensorboard_log=self.tensorboard_path, **model_params) self.logger.info(f'Training for {n_epochs} epochs') steps_per_epoch = len(train_provider.data_frame) for model_epoch in range(0, n_epochs): self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps') model.learn(total_timesteps=steps_per_epoch) if model_epoch % save_every == 0: model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl') model.save(model_path) if test_trained_model: self.test(model_epoch, render_env=render_test_env, render_report=render_report, save_report=save_report) self.logger.info(f'Trained {n_epochs} models')
Example #13
Source File: cmd_util.py From stable-baselines with MIT License | 4 votes |
def make_vec_env(env_id, n_envs=1, seed=None, start_index=0, monitor_dir=None, wrapper_class=None, env_kwargs=None, vec_env_cls=None, vec_env_kwargs=None): """ Create a wrapped, monitored `VecEnv`. By default it uses a `DummyVecEnv` which is usually faster than a `SubprocVecEnv`. :param env_id: (str or Type[gym.Env]) the environment ID or the environment class :param n_envs: (int) the number of environments you wish to have in parallel :param seed: (int) the initial seed for the random number generator :param start_index: (int) start rank index :param monitor_dir: (str) Path to a folder where the monitor files will be saved. If None, no file will be written, however, the env will still be wrapped in a Monitor wrapper to provide additional information about training. :param wrapper_class: (gym.Wrapper or callable) Additional wrapper to use on the environment. This can also be a function with single argument that wraps the environment in many things. :param env_kwargs: (dict) Optional keyword argument to pass to the env constructor :param vec_env_cls: (Type[VecEnv]) A custom `VecEnv` class constructor. Default: None. :param vec_env_kwargs: (dict) Keyword arguments to pass to the `VecEnv` class constructor. :return: (VecEnv) The wrapped environment """ env_kwargs = {} if env_kwargs is None else env_kwargs vec_env_kwargs = {} if vec_env_kwargs is None else vec_env_kwargs def make_env(rank): def _init(): if isinstance(env_id, str): env = gym.make(env_id) if len(env_kwargs) > 0: warnings.warn("No environment class was passed (only an env ID) so `env_kwargs` will be ignored") else: env = env_id(**env_kwargs) if seed is not None: env.seed(seed + rank) env.action_space.seed(seed + rank) # Wrap the env in a Monitor wrapper # to have additional training information monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None # Create the monitor folder if needed if monitor_path is not None: os.makedirs(monitor_dir, exist_ok=True) env = Monitor(env, filename=monitor_path) # Optionally, wrap the environment with the provided wrapper if wrapper_class is not None: env = wrapper_class(env) return env return _init # No custom VecEnv is passed if vec_env_cls is None: # Default: use a DummyVecEnv vec_env_cls = DummyVecEnv return vec_env_cls([make_env(i + start_index) for i in range(n_envs)], **vec_env_kwargs)
Example #14
Source File: train.py From rl-baselines-zoo with MIT License | 4 votes |
def create_env(n_envs, eval_env=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :return: (Union[gym.Env, VecEnv]) :return: (gym.Env) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env else save_path if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif algo_ in ['dqn', 'ddpg']: if hyperparams.get('normalize', False): print("WARNING: normalization not supported yet for DDPG/DQN") env = gym.make(env_id, **env_kwargs) env.seed(args.seed) if env_wrapper is not None: env = env_wrapper(env) else: if n_envs == 1: env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)]) if normalize: if args.verbose > 0: if len(normalize_kwargs) > 0: print("Normalization activated: {}".format(normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) del hyperparams['frame_stack'] if args.algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(env, VecEnv): env = _UnvecWrapper(env) env = HERGoalEnvWrapper(env) return env