Python utils.make_env() Examples
The following are 4
code examples of utils.make_env().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: main.py From BCQ with MIT License | 6 votes |
def eval_policy(policy, env_name, seed, eval_episodes=10): eval_env, _, _, _ = utils.make_env(env_name, atari_preprocessing) eval_env.seed(seed + 100) avg_reward = 0. for _ in range(eval_episodes): state, done = eval_env.reset(), False while not done: action = policy.select_action(np.array(state), eval=True) state, reward, done, _ = eval_env.step(action) avg_reward += reward avg_reward /= eval_episodes print("---------------------------------------") print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f}") print("---------------------------------------") return avg_reward
Example #2
Source File: run.py From learning-from-human-preferences with MIT License | 5 votes |
def make_envs(env_id, n_envs, seed): def wrap_make_env(env_id, rank): def _thunk(): return make_env(env_id, seed + rank) return _thunk set_global_seeds(seed) env = SubprocVecEnv(env_id, [wrap_make_env(env_id, i) for i in range(n_envs)]) return env
Example #3
Source File: run_checkpoint.py From learning-from-human-preferences with MIT License | 5 votes |
def main(): args = parse_args() env = make_env(args.env) model = get_model(args.policy_ckpt_dir) if args.reward_predictor_ckpt_dir: reward_predictor = get_reward_predictor(args.reward_predictor_ckpt_dir) else: reward_predictor = None run_agent(env, model, reward_predictor, args.frame_interval_ms)
Example #4
Source File: train.py From rl-baselines-zoo with MIT License | 4 votes |
def create_env(n_envs, eval_env=False): """ Create the environment and wrap it if necessary :param n_envs: (int) :param eval_env: (bool) Whether is it an environment used for evaluation or not :return: (Union[gym.Env, VecEnv]) :return: (gym.Env) """ global hyperparams global env_kwargs # Do not log eval env (issue with writing the same file) log_dir = None if eval_env else save_path if is_atari: if args.verbose > 0: print("Using Atari wrapper") env = make_atari_env(env_id, num_env=n_envs, seed=args.seed) # Frame-stacking with 4 frames env = VecFrameStack(env, n_stack=4) elif algo_ in ['dqn', 'ddpg']: if hyperparams.get('normalize', False): print("WARNING: normalization not supported yet for DDPG/DQN") env = gym.make(env_id, **env_kwargs) env.seed(args.seed) if env_wrapper is not None: env = env_wrapper(env) else: if n_envs == 1: env = DummyVecEnv([make_env(env_id, 0, args.seed, wrapper_class=env_wrapper, log_dir=log_dir, env_kwargs=env_kwargs)]) else: # env = SubprocVecEnv([make_env(env_id, i, args.seed) for i in range(n_envs)]) # On most env, SubprocVecEnv does not help and is quite memory hungry env = DummyVecEnv([make_env(env_id, i, args.seed, log_dir=log_dir, wrapper_class=env_wrapper, env_kwargs=env_kwargs) for i in range(n_envs)]) if normalize: if args.verbose > 0: if len(normalize_kwargs) > 0: print("Normalization activated: {}".format(normalize_kwargs)) else: print("Normalizing input and reward") env = VecNormalize(env, **normalize_kwargs) # Optional Frame-stacking if hyperparams.get('frame_stack', False): n_stack = hyperparams['frame_stack'] env = VecFrameStack(env, n_stack) print("Stacking {} frames".format(n_stack)) del hyperparams['frame_stack'] if args.algo == 'her': # Wrap the env if need to flatten the dict obs if isinstance(env, VecEnv): env = _UnvecWrapper(env) env = HERGoalEnvWrapper(env) return env