Python gym.wrappers() Examples
The following are 30
code examples of gym.wrappers().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym
, or try the search function
.
Example #1
Source File: train_soft_actor_critic_atlas.py From chainerrl with MIT License | 6 votes |
def make_env(args, seed, test): if args.env.startswith('Roboschool'): # Check gym version because roboschool does not work with gym>=0.15.6 from distutils.version import StrictVersion gym_version = StrictVersion(gym.__version__) if gym_version >= StrictVersion('0.15.6'): raise RuntimeError('roboschool does not work with gym>=0.15.6') import roboschool # NOQA env = gym.make(args.env) # Unwrap TimiLimit wrapper assert isinstance(env, gym.wrappers.TimeLimit) env = env.env # Use different random seeds for train and test envs env_seed = 2 ** 32 - 1 - seed if test else seed env.seed(int(env_seed)) # Cast observations to float32 because our model uses float32 env = chainerrl.wrappers.CastObservationToFloat32(env) # Normalize action space to [-1, 1]^n env = chainerrl.wrappers.NormalizeActionSpace(env) if args.monitor: env = chainerrl.wrappers.Monitor( env, args.outdir, force=True, video_callable=lambda _: True) if args.render: env = chainerrl.wrappers.Render(env, mode='human') return env
Example #2
Source File: ppo_atari_visual.py From cleanrl with MIT License | 6 votes |
def make_env(gym_id, seed, idx): def thunk(): env = gym.make(gym_id) env = wrap_atari(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = ProbsVisualizationWrapper(env) env = Monitor(env, f'videos/{experiment_name}') env = wrap_pytorch( wrap_deepmind( env, clip_rewards=True, frame_stack=True, scale=False, ) ) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #3
Source File: ppo_atari.py From cleanrl with MIT License | 6 votes |
def make_env(gym_id, seed, idx): def thunk(): env = gym.make(gym_id) env = wrap_atari(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env = wrap_pytorch( wrap_deepmind( env, clip_rewards=True, frame_stack=True, scale=False, ) ) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #4
Source File: core.py From chi with MIT License | 6 votes |
def run_episode(self, env: gym.Env): meta_wrapper = get_wrapper(env, chi.rl.wrappers.Wrapper) done = False ob = env.reset() a, meta = self.act(ob) rs = [] while not done: if meta_wrapper: meta_wrapper.set_meta(meta) # send meta information to wrappers ob, r, done, info = env.step(a) a, meta = self.act(ob, r, done, info) rs.append(r) return sum(rs)
Example #5
Source File: gym_utils.py From BERT with Apache License 2.0 | 6 votes |
def remove_time_limit_wrapper(env): """Removes top level TimeLimit Wrapper. Removes TimeLimit Wrapper from top level if exists, throws error if any other TimeLimit Wrapper is present in stack. Args: env: environment Returns: the env with removed time limit wrapper. """ if isinstance(env, gym.wrappers.TimeLimit): env = env.env env_ = env while isinstance(env_, gym.Wrapper): if isinstance(env_, gym.wrappers.TimeLimit): raise ValueError("Can remove only top-level TimeLimit gym.Wrapper.") env_ = env_.env return env
Example #6
Source File: cmd_util.py From stable-baselines with MIT License | 6 votes |
def make_robotics_env(env_id, seed, rank=0, allow_early_resets=True): """ Create a wrapped, monitored gym.Env for MuJoCo. :param env_id: (str) the environment ID :param seed: (int) the initial seed for RNG :param rank: (int) the rank of the environment (for logging) :param allow_early_resets: (bool) allows early reset of the environment :return: (Gym Environment) The robotic environment """ set_global_seeds(seed) env = gym.make(env_id) keys = ['observation', 'desired_goal'] # TODO: remove try-except once most users are running modern Gym try: # for modern Gym (>=0.15.4) from gym.wrappers import FilterObservation, FlattenObservation env = FlattenObservation(FilterObservation(env, keys)) except ImportError: # for older gym (<=0.15.3) from gym.wrappers import FlattenDictWrapper # pytype:disable=import-error env = FlattenDictWrapper(env, keys) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), info_keywords=('is_success',), allow_early_resets=allow_early_resets) env.seed(seed) return env
Example #7
Source File: __init__.py From irl-benchmark with GNU General Public License v3.0 | 6 votes |
def make_env(env_id: str): """Make a basic gym environment, without any special wrappers. Parameters ---------- env_id: str The environment's id, e.g. 'FrozenLake-v0'. Returns ------- gym.Env A gym environment. """ assert env_id in ENV_IDS if not env_id in ENV_IDS_NON_GYM: env = gym.make(env_id) else: if env_id == 'MazeWorld0-v0': env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200) elif env_id == 'MazeWorld1-v0': env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200) else: raise NotImplementedError() return env
Example #8
Source File: gym_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def remove_time_limit_wrapper(env): """Removes top level TimeLimit Wrapper. Removes TimeLimit Wrapper from top level if exists, throws error if any other TimeLimit Wrapper is present in stack. Args: env: environment Returns: the env with removed time limit wrapper. """ if isinstance(env, gym.wrappers.TimeLimit): env = env.env env_ = env while isinstance(env_, gym.Wrapper): if isinstance(env_, gym.wrappers.TimeLimit): raise ValueError("Can remove only top-level TimeLimit gym.Wrapper.") env_ = env_.env return env
Example #9
Source File: __init__.py From muzero-pytorch with MIT License | 5 votes |
def new_game(self, seed=None, save_video=False, save_path=None, video_callable=None, uid=None): env = gym.make(self.env_name) if seed is not None: env.seed(seed) if save_video: from gym.wrappers import Monitor env = Monitor(env, directory=save_path, force=True, video_callable=video_callable, uid=uid) return ClassicControlWrapper(env, discount=self.discount, k=4)
Example #10
Source File: local.py From retro-contest with MIT License | 5 votes |
def make(game, state=retro.State.DEFAULT, discrete_actions=False, bk2dir=None): use_restricted_actions = retro.Actions.FILTERED if discrete_actions: use_restricted_actions = retro.Actions.DISCRETE try: env = retro.make(game, state, scenario='contest', use_restricted_actions=use_restricted_actions) except Exception: env = retro.make(game, state, use_restricted_actions=use_restricted_actions) if bk2dir: env.auto_record(bk2dir) env = retro_contest.StochasticFrameSkip(env, n=4, stickprob=0.25) env = gym.wrappers.TimeLimit(env, max_episode_steps=4500) return env
Example #11
Source File: envs.py From bezos with MIT License | 5 votes |
def step(self, action): done = False total_reward = 0 current_step = 0 while current_step < (self.repeat_count + 1) and not done: self.stepcount += 1 obs, reward, done, info = self.env.step(action) total_reward += reward current_step += 1 if 'skip.stepcount' in info: raise gym.error.Error('Key "skip.stepcount" already in info. Make sure you are not stacking ' 'the SkipWrapper wrappers.') info['skip.stepcount'] = self.stepcount return obs, total_reward, done, info
Example #12
Source File: gym.py From Jacinle with MIT License | 5 votes |
def __init__(self, name, dump_dir=None, force_dump=False, state_mode='DEFAULT'): super().__init__() with get_env_lock(): self._gym = self._make_env(name) if dump_dir: jacio.mkdir(dump_dir) self._gym = gym.wrappers.Monitor(self._gym, dump_dir, force=force_dump) assert state_mode in ('DEFAULT', 'RENDER', 'BOTH') self._state_mode = state_mode
Example #13
Source File: gym_env.py From A2C with Apache License 2.0 | 5 votes |
def __init__(self, env_name, id, seed): super().__init__(env_name, id) self.seed = seed self.make() # Get the inside of the wrappers! self.gym_env = self.env.env.env.env.env.env.env self.monitor = self.env.env.env.env.env.env.monitor
Example #14
Source File: gym_env.py From mapr2 with Apache License 2.0 | 5 votes |
def __init__(self, env_name, record_video=False, video_schedule=None, log_dir=None, record_log=False, force_reset=True): if log_dir is None: if logger.get_snapshot_dir() is None: logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.") else: log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when # the time limit specified for each environment has been passed and # therefore the environment is not Markovian (terminal condition depends # on time rather than state). env = env.env self.env = env self.env_id = env.spec.id assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) logger.log("observation space: {}".format(self._observation_space)) self._action_space = convert_gym_space(env.action_space) logger.log("action space: {}".format(self._action_space)) self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] self._log_dir = log_dir self._force_reset = force_reset
Example #15
Source File: wrappers.py From rtrl with MIT License | 5 votes |
def __init__(self, env, max_steps=None, key='reset'): super().__init__(env) self.reset_key = key from gym.wrappers import TimeLimit self.enforce = bool(max_steps) if max_steps is None: tl = get_wrapper_by_class(env, TimeLimit) max_steps = 1 << 31 if tl is None else tl._max_episode_steps # print("TimeLimitResetWrapper.max_steps =", max_steps) self.max_steps = max_steps self.t = 0
Example #16
Source File: openai_gym.py From tensorforce with Apache License 2.0 | 5 votes |
def reset(self): import gym.wrappers if isinstance(self.environment, gym.wrappers.Monitor): self.environment.stats_recorder.done = True states = self.environment.reset() self.timestep = 0 states = OpenAIGym.flatten_state(state=states, states_spec=self.states_spec) if self.drop_states_indices is not None: for index in reversed(self.drop_states_indices): states = np.concatenate([states[:index], states[index + 1:]]) return states
Example #17
Source File: gym_env.py From mapr2 with Apache License 2.0 | 5 votes |
def reset(self): if self._force_reset and self.monitoring: from gym.wrappers.monitoring import Monitor assert isinstance(self.env, Monitor) recorder = self.env.stats_recorder if recorder is not None: recorder.done = True return self.env.reset()
Example #18
Source File: rl_utils.py From numpy-ml with GNU General Public License v3.0 | 5 votes |
def is_continuous(env, tuple_action, tuple_obs): """ Check if an `env`'s observation and action spaces are continuous. Parameters ---------- env : ``gym.wrappers`` or ``gym.envs`` instance The environment to evaluate. tuple_action : bool Whether the `env`'s action space is an instance of `gym.spaces.Tuple` or `gym.spaces.Dict`. tuple_obs : bool Whether the `env`'s observation space is an instance of `gym.spaces.Tuple` or `gym.spaces.Dict`. Returns ------- cont_action : bool Whether the `env`'s action space is continuous. cont_obs : bool Whether the `env`'s observation space is continuous. """ Continuous = gym.spaces.box.Box if tuple_obs: spaces = env.observation_space.spaces cont_obs = all(isinstance(s, Continuous) for s in spaces) else: cont_obs = isinstance(env.observation_space, Continuous) if tuple_action: spaces = env.action_space.spaces cont_action = all(isinstance(s, Continuous) for s in spaces) else: cont_action = isinstance(env.action_space, Continuous) return cont_action, cont_obs
Example #19
Source File: rl_utils.py From numpy-ml with GNU General Public License v3.0 | 5 votes |
def is_multidimensional(env): """ Check if the action and observation spaces for `env` are multidimensional or ``Tuple`` spaces. Notes ----- A multidimensional space is any space whose actions / observations have more than one element in them. This includes ``Tuple`` spaces, but also includes single action/observation spaces with several dimensions. Parameters ---------- env : ``gym.wrappers`` or ``gym.envs`` instance The environment to evaluate. Returns ------- md_action : bool Whether the `env`'s action space is multidimensional. md_obs : bool Whether the `env`'s observation space is multidimensional. tuple_action : bool Whether the `env`'s action space is a ``Tuple`` instance. tuple_obs : bool Whether the `env`'s observation space is a ``Tuple`` instance. """ md_action, md_obs = True, True tuple_action, tuple_obs = is_tuple(env) if not tuple_action: act = env.action_space.sample() md_action = isinstance(act, (list, tuple, np.ndarray)) and len(act) > 1 if not tuple_obs: OS = env.observation_space obs = OS.low if "low" in dir(OS) else OS.sample() # sample causes problems md_obs = isinstance(obs, (list, tuple, np.ndarray)) and len(obs) > 1 return md_action, md_obs, tuple_action, tuple_obs
Example #20
Source File: ppo_self_play.py From cleanrl with MIT License | 5 votes |
def make_env(gym_id, seed, idx): def thunk(): env = SlimeVolleySelfPlayEnv() env = MultiBinaryTooMultiDiscrete(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #21
Source File: ppo_continuous_action.py From cleanrl with MIT License | 5 votes |
def make_env(gym_id, seed, idx): def thunk(): env = gym.make(gym_id) env = ClipActionsWrapper(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env = NormalizedEnv(env) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #22
Source File: ppo.py From cleanrl with MIT License | 5 votes |
def make_env(gym_id, seed, idx): def thunk(): env = gym.make(gym_id) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #23
Source File: behavior_cloning.py From ICML2019-TREX with MIT License | 5 votes |
def eval(args): env = gym.make(args.env_id) logdir = str(Path(args.logbase_path) / args.env_id) policy = Policy(env.observation_space.shape[0],env.action_space.shape[0]) ### Initialize Parameters init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Training configuration config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession() sess.run(init_op) policy.saver.restore(sess,logdir+'/model.ckpt') from performance_checker import gen_traj from gym.wrappers import Monitor perfs = [] for j in range(args.num_trajs): if j == 0 and args.video_record: wrapped = Monitor(env, './video/',force=True) else: wrapped = env perfs.append(gen_traj(wrapped,policy,args.render,args.max_len)) print(logdir, ',', np.mean(perfs), ',', np.std(perfs))
Example #24
Source File: ssbm_env.py From gym-dolphin with MIT License | 5 votes |
def simpleSSBMEnv(act_every=3, **kwargs): env = SSBMEnv(**kwargs) # TODO: make this a wrapper env.action_space = spaces.Discrete(len(ssbm.simpleControllerStates)) env.realController = lambda action: ssbm.simpleControllerStates[action].realController() from .box_wrapper import BoxWrapper env = BoxWrapper(env) from gym.wrappers import SkipWrapper return SkipWrapper(3)(env)
Example #25
Source File: gym_utils.py From BERT with Apache License 2.0 | 5 votes |
def gym_env_wrapper(env, rl_env_max_episode_steps, maxskip_env, rendered_env, rendered_env_resize_to, sticky_actions, output_dtype): """Wraps a gym environment. see make_gym_env for details.""" # rl_env_max_episode_steps is None or int. assert ((not rl_env_max_episode_steps) or isinstance(rl_env_max_episode_steps, int)) wrap_with_time_limit = ((not rl_env_max_episode_steps) or rl_env_max_episode_steps >= 0) if wrap_with_time_limit: env = remove_time_limit_wrapper(env) if sticky_actions: env = StickyActionEnv(env) if maxskip_env: env = MaxAndSkipEnv(env) # pylint: disable=redefined-variable-type if rendered_env: env = RenderedEnv( env, resize_to=rendered_env_resize_to, output_dtype=output_dtype) if wrap_with_time_limit and rl_env_max_episode_steps is not None: env = gym.wrappers.TimeLimit( env, max_episode_steps=rl_env_max_episode_steps) return env
Example #26
Source File: gym_utils.py From tensor2tensor with Apache License 2.0 | 5 votes |
def gym_env_wrapper(env, rl_env_max_episode_steps, maxskip_env, rendered_env, rendered_env_resize_to, sticky_actions, output_dtype, num_actions): """Wraps a gym environment. see make_gym_env for details.""" # rl_env_max_episode_steps is None or int. assert ((not rl_env_max_episode_steps) or isinstance(rl_env_max_episode_steps, int)) wrap_with_time_limit = ((not rl_env_max_episode_steps) or rl_env_max_episode_steps >= 0) if wrap_with_time_limit: env = remove_time_limit_wrapper(env) if num_actions is not None: logging.log_first_n( logging.INFO, "Number of discretized actions: %d", 1, num_actions) env = ActionDiscretizeWrapper(env, num_actions=num_actions) if sticky_actions: env = StickyActionEnv(env) if maxskip_env: env = MaxAndSkipEnv(env) # pylint: disable=redefined-variable-type if rendered_env: env = RenderedEnv( env, resize_to=rendered_env_resize_to, output_dtype=output_dtype) if wrap_with_time_limit and rl_env_max_episode_steps is not None: env = gym.wrappers.TimeLimit( env, max_episode_steps=rl_env_max_episode_steps) return env
Example #27
Source File: gym_env.py From pytorchrl with MIT License | 5 votes |
def reset(self): if self._force_reset and self.monitoring: from gym.wrappers.monitoring import Monitor assert isinstance(self.env, Monitor) recorder = self.env.stats_recorder if recorder is not None: recorder.done = True return self.env.reset()
Example #28
Source File: gym_env.py From pytorchrl with MIT License | 5 votes |
def __init__(self, env_name, record_video=True, video_schedule=None, log_dir=None, record_log=True, force_reset=False): if log_dir is None: if logger.get_snapshot_dir() is None: logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.") else: log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) self.env = env self.env_id = env.spec.id assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) logger.log("observation space: {}".format(self._observation_space)) self._action_space = convert_gym_space(env.action_space) logger.log("action space: {}".format(self._action_space)) self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] self._log_dir = log_dir self._force_reset = force_reset
Example #29
Source File: rl_utils.py From numpy-ml with GNU General Public License v3.0 | 4 votes |
def obs_stats(env, md_obs, cont_obs): """ Get information on the observation space for `env`. Parameters ---------- env : ``gym.wrappers`` or ``gym.envs`` instance The environment to evaluate. md_obs : bool Whether the `env`'s action space is multidimensional. cont_obs : bool Whether the `env`'s observation space is multidimensional. Returns ------- n_obs_per_dim : list of length (obs_dim,) The number of possible observation classes for each dimension of the observation space. obs_ids : list or None A list of all valid observations within the space. If `cont_obs` is True, this value will be None. obs_dim : int or None The number of dimensions in a single observation. """ if cont_obs: obs_ids = None obs_dim = env.observation_space.shape[0] n_obs_per_dim = [np.inf for _ in range(obs_dim)] else: if md_obs: n_obs_per_dim = [ space.n if hasattr(space, "n") else np.inf for space in env.observation_space.spaces ] obs_ids = ( None if np.inf in n_obs_per_dim else list(product(*[range(i) for i in n_obs_per_dim])) ) obs_dim = len(n_obs_per_dim) else: obs_dim = 1 n_obs_per_dim = [env.observation_space.n] obs_ids = list(range(n_obs_per_dim[0])) return n_obs_per_dim, obs_ids, obs_dim
Example #30
Source File: rl_utils.py From numpy-ml with GNU General Public License v3.0 | 4 votes |
def env_stats(env): """ Compute statistics for the current environment. Parameters ---------- env : ``gym.wrappers`` or ``gym.envs`` instance The environment to evaluate. Returns ------- env_info : dict A dictionary containing information about the action and observation spaces of `env`. """ md_action, md_obs, tuple_action, tuple_obs = is_multidimensional(env) cont_action, cont_obs = is_continuous(env, tuple_action, tuple_obs) n_actions_per_dim, action_ids, action_dim = action_stats( env, md_action, cont_action, ) n_obs_per_dim, obs_ids, obs_dim = obs_stats(env, md_obs, cont_obs) env_info = { "id": env.spec.id, "seed": env.spec.seed if "seed" in dir(env.spec) else None, "deterministic": bool(~env.spec.nondeterministic), "tuple_actions": tuple_action, "tuple_observations": tuple_obs, "multidim_actions": md_action, "multidim_observations": md_obs, "continuous_actions": cont_action, "continuous_observations": cont_obs, "n_actions_per_dim": n_actions_per_dim, "action_dim": action_dim, "n_obs_per_dim": n_obs_per_dim, "obs_dim": obs_dim, "action_ids": action_ids, "obs_ids": obs_ids, } return env_info