Python Examples of stable_baselines.common.vec

Source File: cmd_util.py From stable-baselines with MIT License

6 votes

def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None,
                   start_index=0, allow_early_resets=True,
                   start_method=None, use_subprocess=False):
    """
    Create a wrapped, monitored VecEnv for Atari.

    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environment you wish to have in subprocesses
    :param seed: (int) the initial seed for RNG
    :param wrapper_kwargs: (dict) the parameters for wrap_deepmind function
    :param start_index: (int) start rank index
    :param allow_early_resets: (bool) allows early reset of the environment
    :param start_method: (str) method used to start the subprocesses.
        See SubprocVecEnv doc for more information
    :param use_subprocess: (bool) Whether to use `SubprocVecEnv` or `DummyVecEnv` when
        `num_env` > 1, `DummyVecEnv` is usually faster. Default: False
    :return: (VecEnv) The atari environment
    """
    if wrapper_kwargs is None:
        wrapper_kwargs = {}

    def make_env(rank):
        def _thunk():
            env = make_atari(env_id)
            env.seed(seed + rank)
            env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)),
                          allow_early_resets=allow_early_resets)
            return wrap_deepmind(env, **wrapper_kwargs)
        return _thunk
    set_global_seeds(seed)

    # When using one environment, no need to start subprocesses
    if num_env == 1 or not use_subprocess:
        return DummyVecEnv([make_env(i + start_index) for i in range(num_env)])

    return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)],
                         start_method=start_method)

Source File: test_utils.py From stable-baselines with MIT License

6 votes

def test_custom_vec_env():
    """
    Stand alone test for a special case (passing a custom VecEnv class) to avoid doubling the number of tests.
    """
    monitor_dir = 'logs/test_make_vec_env/'
    env = make_vec_env('CartPole-v1', n_envs=1,
                       monitor_dir=monitor_dir, seed=0,
                       vec_env_cls=SubprocVecEnv, vec_env_kwargs={'start_method': None})

    assert env.num_envs == 1
    assert isinstance(env, SubprocVecEnv)
    assert os.path.isdir('logs/test_make_vec_env/')
    # Kill subprocess
    env.close()
    # Cleanup folder
    shutil.rmtree(monitor_dir)

    # This should fail because DummyVecEnv does not have any keyword argument
    with pytest.raises(TypeError):
        make_vec_env('CartPole-v1', n_envs=1, vec_env_kwargs={'dummy': False})

Source File: test_utils.py From stable-baselines with MIT License

6 votes

def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class):
    env = make_vec_env(env_id, n_envs, vec_env_cls=vec_env_cls,
                       wrapper_class=wrapper_class, monitor_dir=None, seed=0)

    assert env.num_envs == n_envs

    if vec_env_cls is None:
        assert isinstance(env, DummyVecEnv)
        if wrapper_class is not None:
            assert isinstance(env.envs[0], wrapper_class)
        else:
            assert isinstance(env.envs[0], Monitor)
    else:
        assert isinstance(env, SubprocVecEnv)
    # Kill subprocesses
    env.close()

Source File: RLTrader.py From RLTrader with GNU General Public License v3.0

6 votes

def initialize_optuna(self):
        try:
            train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)])
            model = self.Model(self.Policy, train_env, nminibatches=1)
            strategy = self.Reward_Strategy()

            self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}__{strategy.__class__.__name__}'
        except:
            self.study_name = f'UnknownModel__UnknownPolicy__UnknownStrategy'

        self.optuna_study = optuna.create_study(
            study_name=self.study_name, storage=self.params_db_path, load_if_exists=True)

        self.logger.debug('Initialized Optuna:')

        try:
            self.logger.debug(
                f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}')
        except:
            self.logger.debug('No trials have been finished yet.')

Source File: test_action_space.py From stable-baselines with MIT License

6 votes

def test_identity_multidiscrete(model_class):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)
    with a multidiscrete action space

    :param model_class: (BaseRLModel) A RL Model
    """
    env = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(10)])

    model = model_class("MlpPolicy", env)
    model.learn(total_timesteps=1000)
    evaluate_policy(model, env, n_eval_episodes=5)
    obs = env.reset()

    assert np.array(model.action_probability(obs)).shape == (2, 1, 10), \
        "Error: action_probability not returning correct shape"
    assert np.prod(model.action_probability(obs, actions=env.action_space.sample()).shape) == 1, \
        "Error: not scalar probability"

Source File: test_vec_envs.py From stable-baselines with MIT License

6 votes

def test_vecenv_wrapper_getattr():
    def make_env():
        return CustomGymEnv(gym.spaces.Box(low=np.zeros(2), high=np.ones(2)))
    vec_env = DummyVecEnv([make_env for _ in range(N_ENVS)])
    wrapped = CustomWrapperA(CustomWrapperBB(vec_env))
    assert wrapped.var_a == 'a'
    assert wrapped.var_b == 'b'
    assert wrapped.var_bb == 'bb'
    assert wrapped.func_b() == 'b'
    assert wrapped.name_test() == CustomWrapperBB

    double_wrapped = CustomWrapperA(CustomWrapperB(wrapped))
    dummy = double_wrapped.var_a  # should not raise as it is directly defined here
    with pytest.raises(AttributeError):  # should raise due to ambiguity
        dummy = double_wrapped.var_b
    with pytest.raises(AttributeError):  # should raise as does not exist
        dummy = double_wrapped.nonexistent_attribute
    del dummy  # keep linter happy

Source File: test_multiple_learn.py From stable-baselines with MIT License

6 votes

def test_model_multiple_learn_no_reset(model_class):
    """Check that when we call learn multiple times, we don't unnecessarily
    reset the environment.
    """
    if model_class is ACER:
        def make_env():
            return IdentityEnv(ep_length=1e10, dim=2)
    else:
        def make_env():
            return IdentityEnvBox(ep_length=1e10)
    env = make_env()
    venv = DummyVecEnv([lambda: env])
    model = model_class(policy="MlpPolicy", env=venv)
    _check_reset_count(model, env)

    # Try again following a `set_env`.
    env = make_env()
    venv = DummyVecEnv([lambda: env])
    assert env.num_resets == 0

    model.set_env(venv)
    _check_reset_count(model, env)

Source File: test_identity.py From stable-baselines with MIT License

6 votes

def test_identity(model_name):
    """
    Test if the algorithm (with a given policy)
    can learn an identity transformation (i.e. return observation as an action)

    :param model_name: (str) Name of the RL model
    """
    env = DummyVecEnv([lambda: IdentityEnv(10)])

    model = LEARN_FUNC_DICT[model_name](env)
    evaluate_policy(model, env, n_eval_episodes=20, reward_threshold=90)

    obs = env.reset()
    assert model.action_probability(obs).shape == (1, 10), "Error: action_probability not returning correct shape"
    action = env.action_space.sample()
    action_prob = model.action_probability(obs, actions=action)
    assert np.prod(action_prob.shape) == 1, "Error: not scalar probability"
    action_logprob = model.action_probability(obs, actions=action, logp=True)
    assert np.allclose(action_prob, np.exp(action_logprob)), (action_prob, action_logprob)

    # Free memory
    del model, env

Source File: test_simple.py From flappy with MIT License

6 votes

def main(args):
	env_id = 'fwmav_hover-v1'

	env = DummyVecEnv([make_env(env_id, 0, random_init = args.rand_init, randomize_sim = args.rand_dynamics, phantom_sensor = args.phantom_sensor)])

	model = LazyModel(env.envs[0],args.model_type)

	obs = env.reset()

	while True:
		if env.envs[0].is_sim_on == False:
			env.envs[0].gui.cv.wait()
		elif env.envs[0].is_sim_on:
			action, _ = model.predict(obs)
			obs, rewards, done, info = env.step(action)
			if done:
				obs = env.reset()

Source File: test_rollout.py From imitation with MIT License

6 votes

def test_unwrap_traj():
    """Check that unwrap_traj reverses `ObsRewIncrementWrapper`.

    Also check that unwrapping twice is a no-op.
    """
    env = gym.make("CartPole-v1")
    env = wrappers.RolloutInfoWrapper(env)
    env = ObsRewHalveWrapper(env)
    venv = vec_env.DummyVecEnv([lambda: env])

    with serialize.load_policy("zero", "UNUSED", venv) as policy:
        trajs = rollout.generate_trajectories(policy, venv, rollout.min_episodes(10))
    trajs_unwrapped = [rollout.unwrap_traj(t) for t in trajs]
    trajs_unwrapped_twice = [rollout.unwrap_traj(t) for t in trajs_unwrapped]

    for t, t_unwrapped in zip(trajs, trajs_unwrapped):
        np.testing.assert_allclose(t.acts, t_unwrapped.acts)
        np.testing.assert_allclose(t.obs, t_unwrapped.obs / 2)
        np.testing.assert_allclose(t.rews, t_unwrapped.rews / 2)

    for t1, t2 in zip(trajs_unwrapped, trajs_unwrapped_twice):
        np.testing.assert_equal(t1.acts, t2.acts)
        np.testing.assert_equal(t1.obs, t2.obs)
        np.testing.assert_equal(t1.rews, t2.rews)

Source File: test_rollout.py From imitation with MIT License

6 votes

def test_rollout_stats():
    """Applying `ObsRewIncrementWrapper` halves the reward mean.

    `rollout_stats` should reflect this.
    """
    env = gym.make("CartPole-v1")
    env = bench.Monitor(env, None)
    env = ObsRewHalveWrapper(env)
    venv = vec_env.DummyVecEnv([lambda: env])

    with serialize.load_policy("zero", "UNUSED", venv) as policy:
        trajs = rollout.generate_trajectories(policy, venv, rollout.min_episodes(10))
    s = rollout.rollout_stats(trajs)

    np.testing.assert_allclose(s["return_mean"], s["monitor_return_mean"] / 2)
    np.testing.assert_allclose(s["return_std"], s["monitor_return_std"] / 2)
    np.testing.assert_allclose(s["return_min"], s["monitor_return_min"] / 2)
    np.testing.assert_allclose(s["return_max"], s["monitor_return_max"] / 2)

Source File: trpo.py From robotics-rl-srl with MIT License

6 votes

def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])
        envs = VecFrameStack(envs, args.num_stack)

        if args.srl_model != "raw_pixels":
            printYellow("Using MLP policy because working on state representation")
            envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
            envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

        return envs

Source File: ppo1.py From robotics-rl-srl with MIT License

6 votes

def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])
        envs = VecFrameStack(envs, args.num_stack)

        if args.srl_model != "raw_pixels":
            printYellow("Using MLP policy because working on state representation")
            envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
            envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

        return envs

Source File: utils.py From robotics-rl-srl with MIT License

5 votes

def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None):
    """
    :param args: (argparse.Namespace Object)
    :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
    :param env_kwargs: (dict) The extra arguments for the environment
    :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
    :return: (Gym VecEnv)
    """
    # imported here to prevent cyclic imports
    from environments.registry import registered_env
    from state_representation.registry import registered_srl, SRLType

    assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
        "Error: cannot have more than 1 CPU for the environment {}".format(args.env)

    if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL:
        srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs)
        env_kwargs["state_dim"] = srl_model.state_dim
        env_kwargs["srl_pipe"] = srl_model.pipe
    envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs)
            for i in range(args.num_cpu)]

    if len(envs) == 1:
        # No need for subprocesses when having only one env
        envs = DummyVecEnv(envs)
    else:
        envs = SubprocVecEnv(envs)

    envs = VecFrameStack(envs, args.num_stack)

    if args.srl_model != "raw_pixels":
        printYellow("Using MLP policy because working on state representation")
        envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
        envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

    return envs

Source File: train.py From flappy with MIT License

5 votes

def main(args):

    try:
        model_cls = getattr(importlib.import_module(
            'stable_baselines'), args.model_type)
    except AttributeError:
        print(args.model_type, "Error: wrong model type")
        return

    try:
        policy_cls = getattr(importlib.import_module(
            'stable_baselines.common.policies'), args.policy_type)
    except AttributeError:
        print(args.policy_type, "Error: wrong policy type")
        return

    start = time.time()

    env_id = 'fwmav_hover-v0'
    # env = DummyVecEnv([make_env(env_id, 1)])
    env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])

    model = model_cls(policy_cls, env, verbose=0)
    model.learn(total_timesteps=args.time_step)
    model.save(args.model_path)

    end = time.time()
    print("Time used: ", end - start)

Source File: ddpg.py From robotics-rl-srl with MIT License

5 votes

def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])

        if args.srl_model != "raw_pixels":
            env = VecNormalize(env, norm_reward=False)
            env = loadRunningAverage(env, load_path_normalise=load_path_normalise)

        return env

Source File: test_vec_check_nan.py From stable-baselines with MIT License

5 votes

def test_check_nan():
    """Test VecCheckNan Object"""

    env = DummyVecEnv([NanAndInfEnv])
    env = VecCheckNan(env, raise_exception=True)

    env.step([[0]])

    try:
        env.step([[float('NaN')]])
    except ValueError:
        pass
    else:
        assert False

    try:
        env.step([[float('inf')]])
    except ValueError:
        pass
    else:
        assert False

    try:
        env.step([[-1]])
    except ValueError:
        pass
    else:
        assert False

    try:
        env.step([[1]])
    except ValueError:
        pass
    else:
        assert False

    env.step(np.array([[0, 1], [0, 1]]))

Source File: train_DDPG.py From flappy with MIT License

5 votes

def main(args):

	start = time.time()

	env_id = 'fwmav_hover-v0'
	env = DummyVecEnv([make_env(env_id, 0)])
	# env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])

	n_actions = env.action_space.shape[-1]
	param_noise = None
	action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

	model = DDPG(
			policy = MyDDPGPolicy,
			env = env,
			gamma = 1.0,
			nb_train_steps=5000,
			nb_rollout_steps=10000,
			nb_eval_steps=10000,
			param_noise=param_noise,
			action_noise=action_noise,
			tau=0.003,
			batch_size=256,
			observation_range=(-np.inf, np.inf),
			actor_lr=0.0001,
			critic_lr=0.001,
			reward_scale=0.05,
			memory_limit=10000000,
			verbose=1,
	)

	model.learn(total_timesteps=args.time_step)
	model.save(args.model_path)

	end = time.time()
	print("Time used: ", end - start)

Source File: sac.py From robotics-rl-srl with MIT License

5 votes

def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])

        if args.srl_model != "raw_pixels":
            env = VecNormalize(env, norm_reward=False)
            env = loadRunningAverage(env, load_path_normalise=load_path_normalise)

        return env

Source File: train_maneuver_DDPG.py From flappy with MIT License

5 votes

def main(args):

	start = time.time()

	env_id = 'fwmav_maneuver-v0'
	env = DummyVecEnv([make_env(env_id, 0)])
	# env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)])

	n_actions = env.action_space.shape[-1]
	param_noise = None
	action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

	model = DDPG(
			policy = MyDDPGPolicy,
			env = env,
			gamma = 1.0,
			nb_train_steps=5000,
			nb_rollout_steps=10000,
			nb_eval_steps=10000,
			param_noise=param_noise,
			action_noise=action_noise,
			tau=0.003,
			batch_size=256,
			observation_range=(-np.inf, np.inf),
			actor_lr=0.0001,
			critic_lr=0.001,
			reward_scale=0.05,
			memory_limit=10000000,
			verbose=1,
	)

	model.learn(total_timesteps=args.time_step)
	model.save(args.model_path)

	end = time.time()
	print("Time used: ", end - start)

Source File: test.py From flappy with MIT License

5 votes

def main(args):
	env_id = 'fwmav_hover-v0'

	env = DummyVecEnv([make_env(env_id, 0, random_init = args.rand_init, randomize_sim = args.rand_dynamics, phantom_sensor = args.phantom_sensor)])

	if args.model_type != 'PID' and args.model_type != 'ARC':
		try:
			model_cls = getattr(
				importlib.import_module('stable_baselines'), args.model_type)
		except AttributeError:
			print(args.model_type, "Error: wrong model type")
			return
		try:
			model = model_cls.load(args.model_path)
		except:
			print(args.model_path, "Error: wrong model path")
	else:
		model = LazyModel(env.envs[0],args.model_type)

	obs = env.reset()

	while True:
		if env.envs[0].is_sim_on == False:
			env.envs[0].gui.cv.wait()
		elif env.envs[0].is_sim_on:
			action, _ = model.predict(obs)
			obs, rewards, done, info = env.step(action)
			# if done:
			# 	obs = env.reset()

Source File: run_ppo.py From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License

5 votes

def load_train_env(ns, state_collector, robot_radius, rew_fnc, num_stacks,
                   stack_offset, debug, task_mode, rl_mode, policy, disc_action_space, normalize):
    # Choosing environment wrapper according to the policy
    if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscImg
        else:
            env_temp = RosEnvContImg
    elif policy in ["CNN1DPolicy", "CNN1DPolicy2", "CNN1DPolicy3"]:
        if disc_action_space:
            env_temp = RosEnvDiscRawScanPrepWp
        else:
            env_temp = RosEnvContRawScanPrepWp
    elif policy == "CNN1DPolicy_multi_input":
        if disc_action_space:
            env_temp = RosEnvDiscRaw
        else:
            env_temp = RosEnvContRaw
    elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
        if disc_action_space:
            env_temp = RosEnvDiscImgVel
        else:
            env_temp = RosEnvContImgVel


    env_raw = DummyVecEnv([lambda: env_temp(ns, state_collector, stack_offset, num_stacks, robot_radius, rew_fnc, debug, rl_mode, task_mode)])

    if normalize:
        env = VecNormalize(env_raw, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0,
                           gamma=0.99, epsilon=1e-08)
    else:
        env = env_raw

    # Stack of data?
    if num_stacks > 1:
        env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)

    return env

Source File: train.py From flow with MIT License

5 votes

def run_model_stablebaseline(flow_params,
                             num_cpus=1,
                             rollout_size=50,
                             num_steps=50):
    """Run the model for num_steps if provided.

    Parameters
    ----------
    flow_params : dict
        flow-specific parameters
    num_cpus : int
        number of CPUs used during training
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps
    The total rollout length is rollout_size.

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv
    from stable_baselines import PPO2

    if num_cpus == 1:
        constructor = env_constructor(params=flow_params, version=0)()
        # The algorithms require a vectorized environment to run
        env = DummyVecEnv([lambda: constructor])
    else:
        env = SubprocVecEnv([env_constructor(params=flow_params, version=i)
                             for i in range(num_cpus)])

    train_model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size)
    train_model.learn(total_timesteps=num_steps)
    return train_model

Source File: trpo_runner.py From flow with MIT License

5 votes

def run_model(params, rollout_size=50, num_steps=50):
    """Perform the training operation.

    Parameters
    ----------
    params : dict
        flow-specific parameters (see flow/utils/registry.py)
    rollout_size : int
        length of a single rollout
    num_steps : int
        total number of training steps

    Returns
    -------
    stable_baselines.*
        the trained model
    """
    constructor = env_constructor(params, version=0)()
    env = DummyVecEnv([lambda: constructor])

    model = TRPO(
        'MlpPolicy',
        env,
        verbose=2,
        timesteps_per_batch=rollout_size,
        gamma=0.999,
        policy_kwargs={
            "net_arch": [100, 50, 25]
        },
    )
    model.learn(total_timesteps=num_steps)

    return model

Source File: test_gail.py From stable-baselines with MIT License

5 votes

def test_generate_vec_env_non_image_observation():
    env = DummyVecEnv([lambda: gym.make('CartPole-v1')] * 2)

    model = PPO2('MlpPolicy', env)
    model.learn(total_timesteps=5000)

    generate_expert_traj(model, save_path='.', n_timesteps=0, n_episodes=5)

Source File: test_auto_vec_detection.py From stable-baselines with MIT License

5 votes

def check_shape(make_env, model_class, shape_1, shape_2):
    model = model_class(policy="MlpPolicy", env=DummyVecEnv([make_env]))

    env0 = make_env()
    env1 = DummyVecEnv([make_env])

    for env, expected_shape in [(env0, shape_1), (env1, shape_2)]:
        def callback(locals_, _globals):
            assert np.array(locals_['action']).shape == expected_shape
        evaluate_policy(model, env, n_eval_episodes=5, callback=callback)

Source File: env_checker.py From stable-baselines with MIT License

5 votes

def _check_nan(env: gym.Env) -> None:
    """Check for Inf and NaN using the VecWrapper."""
    vec_env = VecCheckNan(DummyVecEnv([lambda: env]))
    for _ in range(10):
        action = [env.action_space.sample()]
        _, _, _, _ = vec_env.step(action)

Source File: callbacks.py From stable-baselines with MIT License

5 votes

def __init__(self, eval_env: Union[gym.Env, VecEnv],
                 callback_on_new_best: Optional[BaseCallback] = None,
                 n_eval_episodes: int = 5,
                 eval_freq: int = 10000,
                 log_path: str = None,
                 best_model_save_path: str = None,
                 deterministic: bool = True,
                 render: bool = False,
                 verbose: int = 1):
        super(EvalCallback, self).__init__(callback_on_new_best, verbose=verbose)
        self.n_eval_episodes = n_eval_episodes
        self.eval_freq = eval_freq
        self.best_mean_reward = -np.inf
        self.last_mean_reward = -np.inf
        self.deterministic = deterministic
        self.render = render

        # Convert to VecEnv for consistency
        if not isinstance(eval_env, VecEnv):
            eval_env = DummyVecEnv([lambda: eval_env])

        assert eval_env.num_envs == 1, "You must pass only one environment for evaluation"

        self.eval_env = eval_env
        self.best_model_save_path = best_model_save_path
        # Logs will be written in `evaluations.npz`
        if log_path is not None:
            log_path = os.path.join(log_path, 'evaluations')
        self.log_path = log_path
        self.evaluations_results = []
        self.evaluations_timesteps = []
        self.evaluations_length = []

Source File: test_buffering_wrapper.py From imitation with MIT License

5 votes

def _make_buffering_venv(error_on_premature_reset: bool,) -> BufferingWrapper:
    venv = DummyVecEnv([_CountingEnv] * 2)
    venv = BufferingWrapper(venv, error_on_premature_reset)
    venv.reset()
    return venv

Source File: test_rollout.py From imitation with MIT License

5 votes

def _sample_fixed_length_trajectories(
    episode_lengths: Sequence[int], min_episodes: int, **kwargs,
) -> Sequence[types.Trajectory]:
    venv = vec_env.DummyVecEnv(
        [functools.partial(TerminalSentinelEnv, length) for length in episode_lengths]
    )
    policy = RandomPolicy(venv.observation_space, venv.action_space)
    sample_until = rollout.min_episodes(min_episodes)
    trajectories = rollout.generate_trajectories(
        policy, venv, sample_until=sample_until, **kwargs,
    )
    return trajectories

Python stable_baselines.common.vec_env.DummyVecEnv() Examples