Python stable_baselines.common.vec_env.DummyVecEnv() Examples
The following are 30
code examples of stable_baselines.common.vec_env.DummyVecEnv().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
stable_baselines.common.vec_env
, or try the search function
.
Example #1
Source File: cmd_util.py From stable-baselines with MIT License | 6 votes |
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0, allow_early_resets=True, start_method=None, use_subprocess=False): """ Create a wrapped, monitored VecEnv for Atari. :param env_id: (str) the environment ID :param num_env: (int) the number of environment you wish to have in subprocesses :param seed: (int) the initial seed for RNG :param wrapper_kwargs: (dict) the parameters for wrap_deepmind function :param start_index: (int) start rank index :param allow_early_resets: (bool) allows early reset of the environment :param start_method: (str) method used to start the subprocesses. See SubprocVecEnv doc for more information :param use_subprocess: (bool) Whether to use `SubprocVecEnv` or `DummyVecEnv` when `num_env` > 1, `DummyVecEnv` is usually faster. Default: False :return: (VecEnv) The atari environment """ if wrapper_kwargs is None: wrapper_kwargs = {} def make_env(rank): def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=allow_early_resets) return wrap_deepmind(env, **wrapper_kwargs) return _thunk set_global_seeds(seed) # When using one environment, no need to start subprocesses if num_env == 1 or not use_subprocess: return DummyVecEnv([make_env(i + start_index) for i in range(num_env)]) return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)], start_method=start_method)
Example #2
Source File: test_utils.py From stable-baselines with MIT License | 6 votes |
def test_custom_vec_env(): """ Stand alone test for a special case (passing a custom VecEnv class) to avoid doubling the number of tests. """ monitor_dir = 'logs/test_make_vec_env/' env = make_vec_env('CartPole-v1', n_envs=1, monitor_dir=monitor_dir, seed=0, vec_env_cls=SubprocVecEnv, vec_env_kwargs={'start_method': None}) assert env.num_envs == 1 assert isinstance(env, SubprocVecEnv) assert os.path.isdir('logs/test_make_vec_env/') # Kill subprocess env.close() # Cleanup folder shutil.rmtree(monitor_dir) # This should fail because DummyVecEnv does not have any keyword argument with pytest.raises(TypeError): make_vec_env('CartPole-v1', n_envs=1, vec_env_kwargs={'dummy': False})
Example #3
Source File: test_utils.py From stable-baselines with MIT License | 6 votes |
def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class): env = make_vec_env(env_id, n_envs, vec_env_cls=vec_env_cls, wrapper_class=wrapper_class, monitor_dir=None, seed=0) assert env.num_envs == n_envs if vec_env_cls is None: assert isinstance(env, DummyVecEnv) if wrapper_class is not None: assert isinstance(env.envs[0], wrapper_class) else: assert isinstance(env.envs[0], Monitor) else: assert isinstance(env, SubprocVecEnv) # Kill subprocesses env.close()
Example #4
Source File: RLTrader.py From RLTrader with GNU General Public License v3.0 | 6 votes |
def initialize_optuna(self): try: train_env = DummyVecEnv([lambda: TradingEnv(self.data_provider)]) model = self.Model(self.Policy, train_env, nminibatches=1) strategy = self.Reward_Strategy() self.study_name = f'{model.__class__.__name__}__{model.act_model.__class__.__name__}__{strategy.__class__.__name__}' except: self.study_name = f'UnknownModel__UnknownPolicy__UnknownStrategy' self.optuna_study = optuna.create_study( study_name=self.study_name, storage=self.params_db_path, load_if_exists=True) self.logger.debug('Initialized Optuna:') try: self.logger.debug( f'Best reward in ({len(self.optuna_study.trials)}) trials: {self.optuna_study.best_value}') except: self.logger.debug('No trials have been finished yet.')
Example #5
Source File: test_action_space.py From stable-baselines with MIT License | 6 votes |
def test_identity_multidiscrete(model_class): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) with a multidiscrete action space :param model_class: (BaseRLModel) A RL Model """ env = DummyVecEnv([lambda: IdentityEnvMultiDiscrete(10)]) model = model_class("MlpPolicy", env) model.learn(total_timesteps=1000) evaluate_policy(model, env, n_eval_episodes=5) obs = env.reset() assert np.array(model.action_probability(obs)).shape == (2, 1, 10), \ "Error: action_probability not returning correct shape" assert np.prod(model.action_probability(obs, actions=env.action_space.sample()).shape) == 1, \ "Error: not scalar probability"
Example #6
Source File: test_vec_envs.py From stable-baselines with MIT License | 6 votes |
def test_vecenv_wrapper_getattr(): def make_env(): return CustomGymEnv(gym.spaces.Box(low=np.zeros(2), high=np.ones(2))) vec_env = DummyVecEnv([make_env for _ in range(N_ENVS)]) wrapped = CustomWrapperA(CustomWrapperBB(vec_env)) assert wrapped.var_a == 'a' assert wrapped.var_b == 'b' assert wrapped.var_bb == 'bb' assert wrapped.func_b() == 'b' assert wrapped.name_test() == CustomWrapperBB double_wrapped = CustomWrapperA(CustomWrapperB(wrapped)) dummy = double_wrapped.var_a # should not raise as it is directly defined here with pytest.raises(AttributeError): # should raise due to ambiguity dummy = double_wrapped.var_b with pytest.raises(AttributeError): # should raise as does not exist dummy = double_wrapped.nonexistent_attribute del dummy # keep linter happy
Example #7
Source File: test_multiple_learn.py From stable-baselines with MIT License | 6 votes |
def test_model_multiple_learn_no_reset(model_class): """Check that when we call learn multiple times, we don't unnecessarily reset the environment. """ if model_class is ACER: def make_env(): return IdentityEnv(ep_length=1e10, dim=2) else: def make_env(): return IdentityEnvBox(ep_length=1e10) env = make_env() venv = DummyVecEnv([lambda: env]) model = model_class(policy="MlpPolicy", env=venv) _check_reset_count(model, env) # Try again following a `set_env`. env = make_env() venv = DummyVecEnv([lambda: env]) assert env.num_resets == 0 model.set_env(venv) _check_reset_count(model, env)
Example #8
Source File: test_identity.py From stable-baselines with MIT License | 6 votes |
def test_identity(model_name): """ Test if the algorithm (with a given policy) can learn an identity transformation (i.e. return observation as an action) :param model_name: (str) Name of the RL model """ env = DummyVecEnv([lambda: IdentityEnv(10)]) model = LEARN_FUNC_DICT[model_name](env) evaluate_policy(model, env, n_eval_episodes=20, reward_threshold=90) obs = env.reset() assert model.action_probability(obs).shape == (1, 10), "Error: action_probability not returning correct shape" action = env.action_space.sample() action_prob = model.action_probability(obs, actions=action) assert np.prod(action_prob.shape) == 1, "Error: not scalar probability" action_logprob = model.action_probability(obs, actions=action, logp=True) assert np.allclose(action_prob, np.exp(action_logprob)), (action_prob, action_logprob) # Free memory del model, env
Example #9
Source File: test_simple.py From flappy with MIT License | 6 votes |
def main(args): env_id = 'fwmav_hover-v1' env = DummyVecEnv([make_env(env_id, 0, random_init = args.rand_init, randomize_sim = args.rand_dynamics, phantom_sensor = args.phantom_sensor)]) model = LazyModel(env.envs[0],args.model_type) obs = env.reset() while True: if env.envs[0].is_sim_on == False: env.envs[0].gui.cv.wait() elif env.envs[0].is_sim_on: action, _ = model.predict(obs) obs, rewards, done, info = env.step(action) if done: obs = env.reset()
Example #10
Source File: test_rollout.py From imitation with MIT License | 6 votes |
def test_unwrap_traj(): """Check that unwrap_traj reverses `ObsRewIncrementWrapper`. Also check that unwrapping twice is a no-op. """ env = gym.make("CartPole-v1") env = wrappers.RolloutInfoWrapper(env) env = ObsRewHalveWrapper(env) venv = vec_env.DummyVecEnv([lambda: env]) with serialize.load_policy("zero", "UNUSED", venv) as policy: trajs = rollout.generate_trajectories(policy, venv, rollout.min_episodes(10)) trajs_unwrapped = [rollout.unwrap_traj(t) for t in trajs] trajs_unwrapped_twice = [rollout.unwrap_traj(t) for t in trajs_unwrapped] for t, t_unwrapped in zip(trajs, trajs_unwrapped): np.testing.assert_allclose(t.acts, t_unwrapped.acts) np.testing.assert_allclose(t.obs, t_unwrapped.obs / 2) np.testing.assert_allclose(t.rews, t_unwrapped.rews / 2) for t1, t2 in zip(trajs_unwrapped, trajs_unwrapped_twice): np.testing.assert_equal(t1.acts, t2.acts) np.testing.assert_equal(t1.obs, t2.obs) np.testing.assert_equal(t1.rews, t2.rews)
Example #11
Source File: test_rollout.py From imitation with MIT License | 6 votes |
def test_rollout_stats(): """Applying `ObsRewIncrementWrapper` halves the reward mean. `rollout_stats` should reflect this. """ env = gym.make("CartPole-v1") env = bench.Monitor(env, None) env = ObsRewHalveWrapper(env) venv = vec_env.DummyVecEnv([lambda: env]) with serialize.load_policy("zero", "UNUSED", venv) as policy: trajs = rollout.generate_trajectories(policy, venv, rollout.min_episodes(10)) s = rollout.rollout_stats(trajs) np.testing.assert_allclose(s["return_mean"], s["monitor_return_mean"] / 2) np.testing.assert_allclose(s["return_std"], s["monitor_return_std"] / 2) np.testing.assert_allclose(s["return_min"], s["monitor_return_min"] / 2) np.testing.assert_allclose(s["return_max"], s["monitor_return_max"] / 2)
Example #12
Source File: trpo.py From robotics-rl-srl with MIT License | 6 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #13
Source File: ppo1.py From robotics-rl-srl with MIT License | 6 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #14
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None): """ :param args: (argparse.Namespace Object) :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs :param env_kwargs: (dict) The extra arguments for the environment :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted. :return: (Gym VecEnv) """ # imported here to prevent cyclic imports from environments.registry import registered_env from state_representation.registry import registered_srl, SRLType assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \ "Error: cannot have more than 1 CPU for the environment {}".format(args.env) if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL: srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs) for i in range(args.num_cpu)] if len(envs) == 1: # No need for subprocesses when having only one env envs = DummyVecEnv(envs) else: envs = SubprocVecEnv(envs) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #15
Source File: train.py From flappy with MIT License | 5 votes |
def main(args): try: model_cls = getattr(importlib.import_module( 'stable_baselines'), args.model_type) except AttributeError: print(args.model_type, "Error: wrong model type") return try: policy_cls = getattr(importlib.import_module( 'stable_baselines.common.policies'), args.policy_type) except AttributeError: print(args.policy_type, "Error: wrong policy type") return start = time.time() env_id = 'fwmav_hover-v0' # env = DummyVecEnv([make_env(env_id, 1)]) env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)]) model = model_cls(policy_cls, env, verbose=0) model.learn(total_timesteps=args.time_step) model.save(args.model_path) end = time.time() print("Time used: ", end - start)
Example #16
Source File: ddpg.py From robotics-rl-srl with MIT License | 5 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) if args.srl_model != "raw_pixels": env = VecNormalize(env, norm_reward=False) env = loadRunningAverage(env, load_path_normalise=load_path_normalise) return env
Example #17
Source File: test_vec_check_nan.py From stable-baselines with MIT License | 5 votes |
def test_check_nan(): """Test VecCheckNan Object""" env = DummyVecEnv([NanAndInfEnv]) env = VecCheckNan(env, raise_exception=True) env.step([[0]]) try: env.step([[float('NaN')]]) except ValueError: pass else: assert False try: env.step([[float('inf')]]) except ValueError: pass else: assert False try: env.step([[-1]]) except ValueError: pass else: assert False try: env.step([[1]]) except ValueError: pass else: assert False env.step(np.array([[0, 1], [0, 1]]))
Example #18
Source File: train_DDPG.py From flappy with MIT License | 5 votes |
def main(args): start = time.time() env_id = 'fwmav_hover-v0' env = DummyVecEnv([make_env(env_id, 0)]) # env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)]) n_actions = env.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) model = DDPG( policy = MyDDPGPolicy, env = env, gamma = 1.0, nb_train_steps=5000, nb_rollout_steps=10000, nb_eval_steps=10000, param_noise=param_noise, action_noise=action_noise, tau=0.003, batch_size=256, observation_range=(-np.inf, np.inf), actor_lr=0.0001, critic_lr=0.001, reward_scale=0.05, memory_limit=10000000, verbose=1, ) model.learn(total_timesteps=args.time_step) model.save(args.model_path) end = time.time() print("Time used: ", end - start)
Example #19
Source File: sac.py From robotics-rl-srl with MIT License | 5 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) if args.srl_model != "raw_pixels": env = VecNormalize(env, norm_reward=False) env = loadRunningAverage(env, load_path_normalise=load_path_normalise) return env
Example #20
Source File: train_maneuver_DDPG.py From flappy with MIT License | 5 votes |
def main(args): start = time.time() env_id = 'fwmav_maneuver-v0' env = DummyVecEnv([make_env(env_id, 0)]) # env = SubprocVecEnv([make_env(env_id, i) for i in range(args.n_cpu)]) n_actions = env.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) model = DDPG( policy = MyDDPGPolicy, env = env, gamma = 1.0, nb_train_steps=5000, nb_rollout_steps=10000, nb_eval_steps=10000, param_noise=param_noise, action_noise=action_noise, tau=0.003, batch_size=256, observation_range=(-np.inf, np.inf), actor_lr=0.0001, critic_lr=0.001, reward_scale=0.05, memory_limit=10000000, verbose=1, ) model.learn(total_timesteps=args.time_step) model.save(args.model_path) end = time.time() print("Time used: ", end - start)
Example #21
Source File: test.py From flappy with MIT License | 5 votes |
def main(args): env_id = 'fwmav_hover-v0' env = DummyVecEnv([make_env(env_id, 0, random_init = args.rand_init, randomize_sim = args.rand_dynamics, phantom_sensor = args.phantom_sensor)]) if args.model_type != 'PID' and args.model_type != 'ARC': try: model_cls = getattr( importlib.import_module('stable_baselines'), args.model_type) except AttributeError: print(args.model_type, "Error: wrong model type") return try: model = model_cls.load(args.model_path) except: print(args.model_path, "Error: wrong model path") else: model = LazyModel(env.envs[0],args.model_type) obs = env.reset() while True: if env.envs[0].is_sim_on == False: env.envs[0].gui.cv.wait() elif env.envs[0].is_sim_on: action, _ = model.predict(obs) obs, rewards, done, info = env.step(action) # if done: # obs = env.reset()
Example #22
Source File: run_ppo.py From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_train_env(ns, state_collector, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, rl_mode, policy, disc_action_space, normalize): # Choosing environment wrapper according to the policy if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy": if disc_action_space: env_temp = RosEnvDiscImg else: env_temp = RosEnvContImg elif policy in ["CNN1DPolicy", "CNN1DPolicy2", "CNN1DPolicy3"]: if disc_action_space: env_temp = RosEnvDiscRawScanPrepWp else: env_temp = RosEnvContRawScanPrepWp elif policy == "CNN1DPolicy_multi_input": if disc_action_space: env_temp = RosEnvDiscRaw else: env_temp = RosEnvContRaw elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2": if disc_action_space: env_temp = RosEnvDiscImgVel else: env_temp = RosEnvContImgVel env_raw = DummyVecEnv([lambda: env_temp(ns, state_collector, stack_offset, num_stacks, robot_radius, rew_fnc, debug, rl_mode, task_mode)]) if normalize: env = VecNormalize(env_raw, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0, gamma=0.99, epsilon=1e-08) else: env = env_raw # Stack of data? if num_stacks > 1: env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset) return env
Example #23
Source File: train.py From flow with MIT License | 5 votes |
def run_model_stablebaseline(flow_params, num_cpus=1, rollout_size=50, num_steps=50): """Run the model for num_steps if provided. Parameters ---------- flow_params : dict flow-specific parameters num_cpus : int number of CPUs used during training rollout_size : int length of a single rollout num_steps : int total number of training steps The total rollout length is rollout_size. Returns ------- stable_baselines.* the trained model """ from stable_baselines.common.vec_env import DummyVecEnv, SubprocVecEnv from stable_baselines import PPO2 if num_cpus == 1: constructor = env_constructor(params=flow_params, version=0)() # The algorithms require a vectorized environment to run env = DummyVecEnv([lambda: constructor]) else: env = SubprocVecEnv([env_constructor(params=flow_params, version=i) for i in range(num_cpus)]) train_model = PPO2('MlpPolicy', env, verbose=1, n_steps=rollout_size) train_model.learn(total_timesteps=num_steps) return train_model
Example #24
Source File: trpo_runner.py From flow with MIT License | 5 votes |
def run_model(params, rollout_size=50, num_steps=50): """Perform the training operation. Parameters ---------- params : dict flow-specific parameters (see flow/utils/registry.py) rollout_size : int length of a single rollout num_steps : int total number of training steps Returns ------- stable_baselines.* the trained model """ constructor = env_constructor(params, version=0)() env = DummyVecEnv([lambda: constructor]) model = TRPO( 'MlpPolicy', env, verbose=2, timesteps_per_batch=rollout_size, gamma=0.999, policy_kwargs={ "net_arch": [100, 50, 25] }, ) model.learn(total_timesteps=num_steps) return model
Example #25
Source File: test_gail.py From stable-baselines with MIT License | 5 votes |
def test_generate_vec_env_non_image_observation(): env = DummyVecEnv([lambda: gym.make('CartPole-v1')] * 2) model = PPO2('MlpPolicy', env) model.learn(total_timesteps=5000) generate_expert_traj(model, save_path='.', n_timesteps=0, n_episodes=5)
Example #26
Source File: test_auto_vec_detection.py From stable-baselines with MIT License | 5 votes |
def check_shape(make_env, model_class, shape_1, shape_2): model = model_class(policy="MlpPolicy", env=DummyVecEnv([make_env])) env0 = make_env() env1 = DummyVecEnv([make_env]) for env, expected_shape in [(env0, shape_1), (env1, shape_2)]: def callback(locals_, _globals): assert np.array(locals_['action']).shape == expected_shape evaluate_policy(model, env, n_eval_episodes=5, callback=callback)
Example #27
Source File: env_checker.py From stable-baselines with MIT License | 5 votes |
def _check_nan(env: gym.Env) -> None: """Check for Inf and NaN using the VecWrapper.""" vec_env = VecCheckNan(DummyVecEnv([lambda: env])) for _ in range(10): action = [env.action_space.sample()] _, _, _, _ = vec_env.step(action)
Example #28
Source File: callbacks.py From stable-baselines with MIT License | 5 votes |
def __init__(self, eval_env: Union[gym.Env, VecEnv], callback_on_new_best: Optional[BaseCallback] = None, n_eval_episodes: int = 5, eval_freq: int = 10000, log_path: str = None, best_model_save_path: str = None, deterministic: bool = True, render: bool = False, verbose: int = 1): super(EvalCallback, self).__init__(callback_on_new_best, verbose=verbose) self.n_eval_episodes = n_eval_episodes self.eval_freq = eval_freq self.best_mean_reward = -np.inf self.last_mean_reward = -np.inf self.deterministic = deterministic self.render = render # Convert to VecEnv for consistency if not isinstance(eval_env, VecEnv): eval_env = DummyVecEnv([lambda: eval_env]) assert eval_env.num_envs == 1, "You must pass only one environment for evaluation" self.eval_env = eval_env self.best_model_save_path = best_model_save_path # Logs will be written in `evaluations.npz` if log_path is not None: log_path = os.path.join(log_path, 'evaluations') self.log_path = log_path self.evaluations_results = [] self.evaluations_timesteps = [] self.evaluations_length = []
Example #29
Source File: test_buffering_wrapper.py From imitation with MIT License | 5 votes |
def _make_buffering_venv(error_on_premature_reset: bool,) -> BufferingWrapper: venv = DummyVecEnv([_CountingEnv] * 2) venv = BufferingWrapper(venv, error_on_premature_reset) venv.reset() return venv
Example #30
Source File: test_rollout.py From imitation with MIT License | 5 votes |
def _sample_fixed_length_trajectories( episode_lengths: Sequence[int], min_episodes: int, **kwargs, ) -> Sequence[types.Trajectory]: venv = vec_env.DummyVecEnv( [functools.partial(TerminalSentinelEnv, length) for length in episode_lengths] ) policy = RandomPolicy(venv.observation_space, venv.action_space) sample_until = rollout.min_episodes(min_episodes) trajectories = rollout.generate_trajectories( policy, venv, sample_until=sample_until, **kwargs, ) return trajectories