Python stable_baselines.common.vec_env.SubprocVecEnv() Examples

The following are 14 code examples of stable_baselines.common.vec_env.SubprocVecEnv(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module stable_baselines.common.vec_env , or try the search function .
Example #1
Source File: cmd_util.py    From stable-baselines with MIT License 6 votes vote down vote up
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None,
                   start_index=0, allow_early_resets=True,
                   start_method=None, use_subprocess=False):
    """
    Create a wrapped, monitored VecEnv for Atari.

    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environment you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param wrapper_kwargs: (dict) the parameters for wrap_deepmind function
    :param start_index: (int) start rank index
    :param allow_early_resets: (bool) allows early reset of the environment
    :param start_method: (str) method used to start the subprocesses.
        See SubprocVecEnv doc for more information
    :param use_subprocess: (bool) Whether to use `SubprocVecEnv` or `DummyVecEnv` when
        `num_env` > 1, `DummyVecEnv` is usually faster. Default: False
    :return: (VecEnv) The atari environment
    """
    if wrapper_kwargs is None:
        wrapper_kwargs = {}

    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
                          allow_early_resets=allow_early_resets)
            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)

    # When using one environment, no need to start subprocesses
    if num_env == 1 or not use_subprocess:
        return DummyVecEnv([make_env(i + start_index) for i in range(num_env)])

    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)],
                         start_method=start_method) 
Example #2
Source File: test_vec_envs.py    From stable-baselines with MIT License 6 votes vote down vote up
def test_subproc_start_method():
    start_methods = [None]
    # Only test thread-safe methods. Others may deadlock tests! (gh/428)
    safe_methods = {'forkserver', 'spawn'}
    available_methods = multiprocessing.get_all_start_methods()
    start_methods += list(safe_methods.intersection(available_methods))
    space = gym.spaces.Discrete(2)

    def obs_assert(obs):
        return check_vecenv_obs(obs, space)

    for start_method in start_methods:
        vec_env_class = functools.partial(SubprocVecEnv, start_method=start_method)
        check_vecenv_spaces(vec_env_class, space, obs_assert)

    with pytest.raises(ValueError, match="cannot find context for 'illegal_method'"):
        vec_env_class = functools.partial(SubprocVecEnv, start_method='illegal_method')
        check_vecenv_spaces(vec_env_class, space, obs_assert) 
Example #3
Source File: test_utils.py    From stable-baselines with MIT License 6 votes vote down vote up
def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class):
    env = make_vec_env(env_id, n_envs, vec_env_cls=vec_env_cls,
                       wrapper_class=wrapper_class, monitor_dir=None, seed=0)

    assert env.num_envs == n_envs

    if vec_env_cls is None:
        assert isinstance(env, DummyVecEnv)
        if wrapper_class is not None:
            assert isinstance(env.envs[0], wrapper_class)
        else:
            assert isinstance(env.envs[0], Monitor)
    else:
        assert isinstance(env, SubprocVecEnv)
    # Kill subprocesses
    env.close() 
Example #4
Source File: evaluator.py    From MazeExplorer with MIT License 5 votes vote down vote up
def load_stable_baselines_env(cfg_path, vector_length, mp, n_stack, number_maps, action_frame_repeat,
                              scaled_resolution):
    env_fn = lambda: MazeExplorer.load_vizdoom_env(cfg_path, number_maps, action_frame_repeat, scaled_resolution)

    if mp:
        env = SubprocVecEnv([env_fn for _ in range(vector_length)])
    else:
        env = DummyVecEnv([env_fn for _ in range(vector_length)])

    if n_stack > 0:
        env = VecFrameStack(env, n_stack=n_stack)

    return env 
Example #5
Source File: test_lstm_policy.py    From stable-baselines with MIT License 5 votes vote down vote up
def test_lstm_train():
    """Test that LSTM models are able to achieve >=150 (out of 500) reward on CartPoleNoVelEnv.

    This environment requires memory to perform well in."""
    def make_env(i):
        env = CartPoleNoVelEnv()
        env = TimeLimit(env, max_episode_steps=500)
        env = bench.Monitor(env, None, allow_early_resets=True)
        env.seed(i)
        return env

    env = SubprocVecEnv([lambda: make_env(i) for i in range(NUM_ENVS)])
    env = VecNormalize(env)
    model = PPO2(MlpLstmPolicy, env, n_steps=128, nminibatches=NUM_ENVS, lam=0.95, gamma=0.99,
                 noptepochs=10, ent_coef=0.0, learning_rate=3e-4, cliprange=0.2, verbose=1)

    eprewmeans = []
    def reward_callback(local, _):
        nonlocal eprewmeans
        eprewmeans.append(safe_mean([ep_info['r'] for ep_info in local['ep_info_buf']]))

    model.learn(total_timesteps=100000, callback=reward_callback)

    # Maximum episode reward is 500.
    # In CartPole-v1, a non-recurrent policy can easily get >= 450.
    # In CartPoleNoVelEnv, a non-recurrent policy doesn't get more than ~50.
    # LSTM policies can reach above 400, but it varies a lot between runs; consistently get >=150.
    # See PR #244 for more detailed benchmarks.

    average_reward = sum(eprewmeans[-NUM_EPISODES_FOR_SCORE:]) / NUM_EPISODES_FOR_SCORE
    assert average_reward >= 150, "Mean reward below 150; per-episode rewards {}".format(average_reward) 
Example #6
Source File: train_ppo.py    From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_train_env(num_envs, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, policy, disc_action_space, normalize):
    # Choosing environment wrapper according to the policy
    if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscImg
        else:
            env_temp = RosEnvContImg
    elif policy == "CNN1DPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscRawScanPrepWp
        else:
            env_temp = RosEnvContRawScanPrepWp
    elif policy == "CNN1DPolicy_multi_input":
        if disc_action_space:
            env_temp = RosEnvDiscRaw
        else:
            env_temp = RosEnvContRaw
    elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
        if disc_action_space:
            env_temp = RosEnvDiscImgVel
        else:
            env_temp = RosEnvContImgVel

    env = SubprocVecEnv([lambda k=k: Monitor(env_temp("sim%d" % (k+1), StateCollector("sim%s"%(k+1), "train") , stack_offset, num_stacks, robot_radius, rew_fnc, debug, "train", task_mode), '%s/%s/sim_%d'%(path_to_models, agent_name, k+1), allow_early_resets=True) for k in range(num_envs)])

    # Normalizing?
    if normalize:
        env = VecNormalize(env, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0,
                           gamma=0.99, epsilon=1e-08)
    else:
        env = env

    # Stack of data?
    if num_stacks > 1:
        env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)

    return env 
Example #7
Source File: train_maneuver_DDPG.py    From flappy with MIT License 5 votes vote down vote up
def main(args):

	start = time.time()

	env_id = 'fwmav_maneuver-v0'
	env = DummyVecEnv([make_env(env_id, 0)])
	# env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])

	n_actions = env.action_space.shape[-1]
	param_noise = None
	action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

	model = DDPG(
			policy = MyDDPGPolicy,
			env = env,
			gamma = 1.0,
			nb_train_steps=5000,
			nb_rollout_steps=10000,
			nb_eval_steps=10000,
			param_noise=param_noise,
			action_noise=action_noise,
			tau=0.003,
			batch_size=256,
			observation_range=(-np.inf, np.inf),
			actor_lr=0.0001,
			critic_lr=0.001,
			reward_scale=0.05,
			memory_limit=10000000,
			verbose=1,
	)

	model.learn(total_timesteps=args.time_step)
	model.save(args.model_path)

	end = time.time()
	print("Time used: ", end - start) 
Example #8
Source File: train_DDPG.py    From flappy with MIT License 5 votes vote down vote up
def main(args):

	start = time.time()

	env_id = 'fwmav_hover-v0'
	env = DummyVecEnv([make_env(env_id, 0)])
	# env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])

	n_actions = env.action_space.shape[-1]
	param_noise = None
	action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

	model = DDPG(
			policy = MyDDPGPolicy,
			env = env,
			gamma = 1.0,
			nb_train_steps=5000,
			nb_rollout_steps=10000,
			nb_eval_steps=10000,
			param_noise=param_noise,
			action_noise=action_noise,
			tau=0.003,
			batch_size=256,
			observation_range=(-np.inf, np.inf),
			actor_lr=0.0001,
			critic_lr=0.001,
			reward_scale=0.05,
			memory_limit=10000000,
			verbose=1,
	)

	model.learn(total_timesteps=args.time_step)
	model.save(args.model_path)

	end = time.time()
	print("Time used: ", end - start) 
Example #9
Source File: train.py    From flappy with MIT License 5 votes vote down vote up
def main(args):

    try:
        model_cls = getattr(importlib.import_module(
            'stable_baselines'), args.model_type)
    except AttributeError:
        print(args.model_type, "Error: wrong model type")
        return

    try:
        policy_cls = getattr(importlib.import_module(
            'stable_baselines.common.policies'), args.policy_type)
    except AttributeError:
        print(args.policy_type, "Error: wrong policy type")
        return

    start = time.time()

    env_id = 'fwmav_hover-v0'
    # env = DummyVecEnv([make_env(env_id, 1)])
    env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])

    model = model_cls(policy_cls, env, verbose=0)
    model.learn(total_timesteps=args.time_step)
    model.save(args.model_path)

    end = time.time()
    print("Time used: ", end - start) 
Example #10
Source File: utils.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None):
    """
    :param args: (argparse.Namespace Object)
    :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
    :param env_kwargs: (dict) The extra arguments for the environment
    :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
    :return: (Gym VecEnv)
    """
    # imported here to prevent cyclic imports
    from environments.registry import registered_env
    from state_representation.registry import registered_srl, SRLType

    assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
        "Error: cannot have more than 1 CPU for the environment {}".format(args.env)

    if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL:
        srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs)
        env_kwargs["state_dim"] = srl_model.state_dim
        env_kwargs["srl_pipe"] = srl_model.pipe
    envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs)
            for i in range(args.num_cpu)]

    if len(envs) == 1:
        # No need for subprocesses when having only one env
        envs = DummyVecEnv(envs)
    else:
        envs = SubprocVecEnv(envs)

    envs = VecFrameStack(envs, args.num_stack)

    if args.srl_model != "raw_pixels":
        printYellow("Using MLP policy because working on state representation")
        envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
        envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

    return envs 
Example #11
Source File: train.py    From flow with MIT License 5 votes vote down vote up
def run_model_stablebaseline(flow_params,
                             num_cpus=1,
                             rollout_size=50,
                             num_steps=50):
    """Run the model for num_steps if provided.

    Parameters
    ----------
    flow_params : dict
        flow-specific parameters
    num_cpus : int
        number of CPUs used during training
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps
    The total rollout length is rollout_size.

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
    from stable_baselines import PPO2

    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([env_constructor(params=flow_params, version=i)
                             for i in range(num_cpus)])

    train_model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size)
    train_model.learn(total_timesteps=num_steps)
    return train_model 
Example #12
Source File: RLTrader.py    From RLTrader with GNU General Public License v3.0 4 votes vote down vote up
def train(self,
              n_epochs: int = 10,
              save_every: int = 1,
              test_trained_model: bool = True,
              render_test_env: bool = False,
              render_report: bool = True,
              save_report: bool = False):
        train_provider, test_provider = self.data_provider.split_data_train_test(self.train_split_percentage)

        del test_provider

        train_env = SubprocVecEnv([make_env(train_provider, i) for i in range(self.n_envs)])

        model_params = self.get_model_params()

        model = self.Model(self.Policy,
                           train_env,
                           verbose=self.model_verbose,
                           nminibatches=self.n_minibatches,
                           tensorboard_log=self.tensorboard_path,
                           **model_params)

        self.logger.info(f'Training for {n_epochs} epochs')

        steps_per_epoch = len(train_provider.data_frame)

        for model_epoch in range(0, n_epochs):
            self.logger.info(f'[{model_epoch}] Training for: {steps_per_epoch} time steps')

            model.learn(total_timesteps=steps_per_epoch)

            if model_epoch % save_every == 0:
                model_path = path.join('data', 'agents', f'{self.study_name}__{model_epoch}.pkl')
                model.save(model_path)

                if test_trained_model:
                    self.test(model_epoch,
                              render_env=render_test_env,
                              render_report=render_report,
                              save_report=save_report)

        self.logger.info(f'Trained {n_epochs} models') 
Example #13
Source File: cmd_util.py    From stable-baselines with MIT License 4 votes vote down vote up
def make_vec_env(env_id, n_envs=1, seed=None, start_index=0,
                 monitor_dir=None, wrapper_class=None,
                 env_kwargs=None, vec_env_cls=None, vec_env_kwargs=None):
    """
    Create a wrapped, monitored `VecEnv`.
    By default it uses a `DummyVecEnv` which is usually faster
    than a `SubprocVecEnv`.

    :param env_id: (str or Type[gym.Env]) the environment ID or the environment class
    :param n_envs: (int) the number of environments you wish to have in parallel
    :param seed: (int) the initial seed for the random number generator
    :param start_index: (int) start rank index
    :param monitor_dir: (str) Path to a folder where the monitor files will be saved.
        If None, no file will be written, however, the env will still be wrapped
        in a Monitor wrapper to provide additional information about training.
    :param wrapper_class: (gym.Wrapper or callable) Additional wrapper to use on the environment.
        This can also be a function with single argument that wraps the environment in many things.
    :param env_kwargs: (dict) Optional keyword argument to pass to the env constructor
    :param vec_env_cls: (Type[VecEnv]) A custom `VecEnv` class constructor. Default: None.
    :param vec_env_kwargs: (dict) Keyword arguments to pass to the `VecEnv` class constructor.
    :return: (VecEnv) The wrapped environment
    """
    env_kwargs = {} if env_kwargs is None else env_kwargs
    vec_env_kwargs = {} if vec_env_kwargs is None else vec_env_kwargs

    def make_env(rank):
        def _init():
            if isinstance(env_id, str):
                env = gym.make(env_id)
                if len(env_kwargs) > 0:
                    warnings.warn("No environment class was passed (only an env ID) so `env_kwargs` will be ignored")
            else:
                env = env_id(**env_kwargs)
            if seed is not None:
                env.seed(seed + rank)
                env.action_space.seed(seed + rank)
            # Wrap the env in a Monitor wrapper
            # to have additional training information
            monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
            # Create the monitor folder if needed
            if monitor_path is not None:
                os.makedirs(monitor_dir, exist_ok=True)
            env = Monitor(env, filename=monitor_path)
            # Optionally, wrap the environment with the provided wrapper
            if wrapper_class is not None:
                env = wrapper_class(env)
            return env
        return _init

    # No custom VecEnv is passed
    if vec_env_cls is None:
        # Default: use a DummyVecEnv
        vec_env_cls = DummyVecEnv

    return vec_env_cls([make_env(i + start_index) for i in range(n_envs)], **vec_env_kwargs) 
Example #14
Source File: train.py    From rl-baselines-zoo with MIT License 4 votes vote down vote up
def create_env(n_envs, eval_env=False):
        """
        Create the environment and wrap it if necessary
        :param n_envs: (int)
        :param eval_env: (bool) Whether is it an environment used for evaluation or not
        :return: (Union[gym.Env, VecEnv])
        :return: (gym.Env)
        """
        global hyperparams
        global env_kwargs

        # Do not log eval env (issue with writing the same file)
        log_dir = None if eval_env else save_path

        if is_atari:
            if args.verbose > 0:
                print("Using Atari wrapper")
            env = make_atari_env(env_id, num_env=n_envs, seed=args.seed)
            # Frame-stacking with 4 frames
            env = VecFrameStack(env, n_stack=4)
        elif algo_ in ['dqn', 'ddpg']:
            if hyperparams.get('normalize', False):
                print("WARNING: normalization not supported yet for DDPG/DQN")
            env = gym.make(env_id, **env_kwargs)
            env.seed(args.seed)
            if env_wrapper is not None:
                env = env_wrapper(env)
        else:
            if n_envs == 1:
                env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)])
            else:
                # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)])
                # On most env, SubprocVecEnv does not help and is quite memory hungry
                env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir,
                                            wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)])
            if normalize:
                if args.verbose > 0:
                    if len(normalize_kwargs) > 0:
                        print("Normalization activated: {}".format(normalize_kwargs))
                    else:
                        print("Normalizing input and reward")
                env = VecNormalize(env, **normalize_kwargs)
        # Optional Frame-stacking
        if hyperparams.get('frame_stack', False):
            n_stack = hyperparams['frame_stack']
            env = VecFrameStack(env, n_stack)
            print("Stacking {} frames".format(n_stack))
            del hyperparams['frame_stack']
        if args.algo == 'her':
            # Wrap the env if need to flatten the dict obs
            if isinstance(env, VecEnv):
                env = _UnvecWrapper(env)
            env = HERGoalEnvWrapper(env)
        return env