Python gym.Wrapper() Examples
The following are 30
code examples of gym.Wrapper().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym
, or try the search function
.
Example #1
Source File: retro_wrappers.py From HardRLWithYoutube with MIT License | 6 votes |
def __init__(self, env): gym.Wrapper.__init__(self, env) self.action_space = env.action_space self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32) self.original_os = env.observation_space if isinstance(self.original_os, gym.spaces.Dict): import copy ordered_dict = copy.deepcopy(self.original_os.spaces) ordered_dict['value_estimation_timeout'] = self.timeout_space self.observation_space = gym.spaces.Dict(ordered_dict) self.dict_mode = True else: self.observation_space = gym.spaces.Dict({ 'original': self.original_os, 'value_estimation_timeout': self.timeout_space }) self.dict_mode = False self.ac_count = None while 1: if not hasattr(env, "_max_episode_steps"): # Looking for TimeLimit wrapper that has this field env = env.env continue break self.timeout = env._max_episode_steps
Example #2
Source File: atari_wrappers.py From chainerrl with MIT License | 6 votes |
def __init__(self, env, k, channel_order='hwc'): """Stack k last frames. Returns lazy array, which is much more memory efficient. See Also -------- baselines.common.atari_wrappers.LazyFrames """ gym.Wrapper.__init__(self, env) self.k = k self.frames = deque([], maxlen=k) self.stack_axis = {'hwc': 2, 'chw': 0}[channel_order] orig_obs_space = env.observation_space low = np.repeat(orig_obs_space.low, k, axis=self.stack_axis) high = np.repeat(orig_obs_space.high, k, axis=self.stack_axis) self.observation_space = spaces.Box( low=low, high=high, dtype=orig_obs_space.dtype)
Example #3
Source File: misc_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def get_wrapper_by_name(env, classname): """Given an a gym environment possibly wrapped multiple times, returns a wrapper of class named classname or raises ValueError if no such wrapper was applied Parameters ---------- env: gym.Env of gym.Wrapper gym environment classname: str name of the wrapper Returns ------- wrapper: gym.Wrapper wrapper named classname """ currentenv = env while True: if classname == currentenv.class_name(): return currentenv elif isinstance(currentenv, gym.Wrapper): currentenv = currentenv.env else: raise ValueError("Couldn't find wrapper named %s" % classname)
Example #4
Source File: misc_util.py From HardRLWithYoutube with MIT License | 6 votes |
def get_wrapper_by_name(env, classname): """Given an a gym environment possibly wrapped multiple times, returns a wrapper of class named classname or raises ValueError if no such wrapper was applied Parameters ---------- env: gym.Env of gym.Wrapper gym environment classname: str name of the wrapper Returns ------- wrapper: gym.Wrapper wrapper named classname """ currentenv = env while True: if classname == currentenv.class_name(): return currentenv elif isinstance(currentenv, gym.Wrapper): currentenv = currentenv.env else: raise ValueError("Couldn't find wrapper named %s" % classname)
Example #5
Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__(self, env): gym.Wrapper.__init__(self, env) self.action_space = env.action_space self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32) self.original_os = env.observation_space if isinstance(self.original_os, gym.spaces.Dict): import copy ordered_dict = copy.deepcopy(self.original_os.spaces) ordered_dict['value_estimation_timeout'] = self.timeout_space self.observation_space = gym.spaces.Dict(ordered_dict) self.dict_mode = True else: self.observation_space = gym.spaces.Dict({ 'original': self.original_os, 'value_estimation_timeout': self.timeout_space }) self.dict_mode = False self.ac_count = None while 1: if not hasattr(env, "_max_episode_steps"): # Looking for TimeLimit wrapper that has this field env = env.env continue break self.timeout = env._max_episode_steps
Example #6
Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__(self, env): gym.Wrapper.__init__(self, env) self.action_space = env.action_space self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32) self.original_os = env.observation_space if isinstance(self.original_os, gym.spaces.Dict): import copy ordered_dict = copy.deepcopy(self.original_os.spaces) ordered_dict['value_estimation_timeout'] = self.timeout_space self.observation_space = gym.spaces.Dict(ordered_dict) self.dict_mode = True else: self.observation_space = gym.spaces.Dict({ 'original': self.original_os, 'value_estimation_timeout': self.timeout_space }) self.dict_mode = False self.ac_count = None while 1: if not hasattr(env, "_max_episode_steps"): # Looking for TimeLimit wrapper that has this field env = env.env continue break self.timeout = env._max_episode_steps
Example #7
Source File: misc_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def get_wrapper_by_name(env, classname): """Given an a gym environment possibly wrapped multiple times, returns a wrapper of class named classname or raises ValueError if no such wrapper was applied Parameters ---------- env: gym.Env of gym.Wrapper gym environment classname: str name of the wrapper Returns ------- wrapper: gym.Wrapper wrapper named classname """ currentenv = env while True: if classname == currentenv.class_name(): return currentenv elif isinstance(currentenv, gym.Wrapper): currentenv = currentenv.env else: raise ValueError("Couldn't find wrapper named %s" % classname)
Example #8
Source File: misc_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def get_wrapper_by_name(env, classname): """Given an a gym environment possibly wrapped multiple times, returns a wrapper of class named classname or raises ValueError if no such wrapper was applied Parameters ---------- env: gym.Env of gym.Wrapper gym environment classname: str name of the wrapper Returns ------- wrapper: gym.Wrapper wrapper named classname """ currentenv = env while True: if classname == currentenv.class_name(): return currentenv elif isinstance(currentenv, gym.Wrapper): currentenv = currentenv.env else: raise ValueError("Couldn't find wrapper named %s" % classname)
Example #9
Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__(self, env): gym.Wrapper.__init__(self, env) self.action_space = env.action_space self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32) self.original_os = env.observation_space if isinstance(self.original_os, gym.spaces.Dict): import copy ordered_dict = copy.deepcopy(self.original_os.spaces) ordered_dict['value_estimation_timeout'] = self.timeout_space self.observation_space = gym.spaces.Dict(ordered_dict) self.dict_mode = True else: self.observation_space = gym.spaces.Dict({ 'original': self.original_os, 'value_estimation_timeout': self.timeout_space }) self.dict_mode = False self.ac_count = None while 1: if not hasattr(env, "_max_episode_steps"): # Looking for TimeLimit wrapper that has this field env = env.env continue break self.timeout = env._max_episode_steps
Example #10
Source File: gym_utils.py From tensor2tensor with Apache License 2.0 | 6 votes |
def remove_time_limit_wrapper(env): """Removes top level TimeLimit Wrapper. Removes TimeLimit Wrapper from top level if exists, throws error if any other TimeLimit Wrapper is present in stack. Args: env: environment Returns: the env with removed time limit wrapper. """ if isinstance(env, gym.wrappers.TimeLimit): env = env.env env_ = env while isinstance(env_, gym.Wrapper): if isinstance(env_, gym.wrappers.TimeLimit): raise ValueError("Can remove only top-level TimeLimit gym.Wrapper.") env_ = env_.env return env
Example #11
Source File: wrapper.py From irl-benchmark with GNU General Public License v3.0 | 6 votes |
def is_unwrappable_to(env: gym.Env, to_wrapper: Type[gym.Wrapper]) -> bool: """Check if env can be unwrapped to to_wrapper. Parameters ---------- env: gym.Env A gym environment (potentially wrapped). to_wrapper: Type[gym.Wrapper] A wrapper class extending gym.Wrapper. Returns ------- bool True if env could be unwrapped to desired wrapper, False otherwise. """ if isinstance(env, to_wrapper): return True while hasattr(env, 'env'): env = env.env if isinstance(env, to_wrapper): return True return False
Example #12
Source File: atari_wrapper.py From tf2rl with MIT License | 5 votes |
def __init__(self, env, skip=4): """ Return only every `skip`-th frame """ gym.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros( (2,)+env.observation_space.shape, dtype=np.uint8) self._skip = skip
Example #13
Source File: atari_wrapper.py From tf2rl with MIT License | 5 votes |
def __init__(self, env): """ Make end-of-life == end-of-episode, but only reset on true game over. Done by DeepMind for the DQN and co. since it helps value estimation. """ gym.Wrapper.__init__(self, env) self.lives = 0 self.was_real_done = True
Example #14
Source File: gym_utils.py From tensor2tensor with Apache License 2.0 | 5 votes |
def __init__(self, env, num_actions): """Constructs a wrapper for discretizing the action space. Args: env: environment to wrap. num_actions: A np.array of the same shape as the environment's action_spec. Elements in the array specify the number of actions to discretize to for each dimension. Raises: ValueError: IF the action_spec shape and the limits shape are not equal. """ if not isinstance(env.action_space, gym.spaces.box.Box): raise ValueError( "The action space is {}, but gym.spaces.box.Box is expected".format( env.action_space)) gym.Wrapper.__init__(self, env) # We convert a scalar num_actions to array [num_actions, num_actions, ...] self._num_actions = np.broadcast_to(num_actions, env.action_space.shape) if env.action_space.shape != self._num_actions.shape: raise ValueError("Spec {} and limit shape do not match. Got {}".format( env.action_space.shape, self._num_actions.shape)) self.action_space = gym.spaces.MultiDiscrete(nvec=self._num_actions) self._action_map = self._discretize_env(env)
Example #15
Source File: atari_wrapper.py From tf2rl with MIT License | 5 votes |
def __init__(self, env): """ Take action on reset for environments that are fixed until firing. """ gym.Wrapper.__init__(self, env) assert env.unwrapped.get_action_meanings()[1] == 'FIRE' assert len(env.unwrapped.get_action_meanings()) >= 3
Example #16
Source File: gym_utils.py From tensor2tensor with Apache License 2.0 | 5 votes |
def __init__(self, env, p=0.25): gym.Wrapper.__init__(self, env) self.p = p self.last_action = 0
Example #17
Source File: atari_wrapper.py From EasyRL with Apache License 2.0 | 5 votes |
def __init__(self, env): """ Make end-of-life == end-of-episode, but only reset on true game over. Done by DeepMind for the DQN and co. since it helps value estimation. """ gym.Wrapper.__init__(self, env) self.lives = 0 self.was_real_done = True
Example #18
Source File: atari_wrapper.py From tf2rl with MIT License | 5 votes |
def __init__(self, env, k): """ Stack k last frames. Returns lazy array, which is much more memory efficient. See also baselines.common.atari_wrappers.LazyFrames """ gym.Wrapper.__init__(self, env) self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = spaces.Box( low=0, high=255, shape=(shp[:-1] + (shp[-1] * k,)), dtype=env.observation_space.dtype)
Example #19
Source File: atari_wrapper.py From EasyRL with Apache License 2.0 | 5 votes |
def __init__(self, env): """ Take action on reset for environments that are fixed until firing. """ gym.Wrapper.__init__(self, env) assert env.unwrapped.get_action_meanings()[1] == 'FIRE' assert len(env.unwrapped.get_action_meanings()) >= 3
Example #20
Source File: atari_wrapper.py From EasyRL with Apache License 2.0 | 5 votes |
def __init__(self, env, noop_max=30): """ Sample initial states by taking random number of no-ops on reset. no-op is assumed to be action 0. """ gym.Wrapper.__init__(self, env) self.noop_max = noop_max self.override_num_noops = None self.noop_action = 0 assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
Example #21
Source File: monte_carlo.py From adversarial-policies with MIT License | 5 votes |
def __init__(self, env): """Wraps a MujocoEnv, adding get_state and set_state methods. :param env: a MujocoEnv. NOTE: it must not be wrapped in a TimeLimit.""" if hasattr(env, "_max_episode_steps"): raise TypeError( "Environment must not have a time limit " "(try passing in env.unwrapped instead)." ) gym.Wrapper.__init__(self, env) self.sim = env.unwrapped.sim
Example #22
Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, k): """Stack k last frames. Returns lazy array, which is much more memory efficient. See Also -------- baselines.common.atari_wrappers.LazyFrames """ gym.Wrapper.__init__(self, env) self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)
Example #23
Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, skip=4): """Return only every `skip`-th frame""" gym.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8) self._skip = skip
Example #24
Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env): """Take action on reset for environments that are fixed until firing.""" gym.Wrapper.__init__(self, env) assert env.unwrapped.get_action_meanings()[1] == 'FIRE' assert len(env.unwrapped.get_action_meanings()) >= 3
Example #25
Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, noop_max=30): """Sample initial states by taking random number of no-ops on reset. No-op is assumed to be action 0. """ gym.Wrapper.__init__(self, env) self.noop_max = noop_max self.override_num_noops = None self.noop_action = 0 assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
Example #26
Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, max_random_steps, on_startup=True, every_episode=False): gym.Wrapper.__init__(self, env) self.on_startup = on_startup self.every_episode = every_episode self.random_steps = max_random_steps self.last_obs = None if on_startup: self.some_random_steps()
Example #27
Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, savedir, k): gym.Wrapper.__init__(self, env) self.savedir = savedir self.k = k self.epcount = 0
Example #28
Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, n, stickprob): gym.Wrapper.__init__(self, env) self.n = n self.stickprob = stickprob self.curac = None self.rng = np.random.RandomState() self.supports_want_render = hasattr(env, "supports_want_render")
Example #29
Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, k): """Stack k last frames. Returns lazy array, which is much more memory efficient. See Also -------- baselines.common.atari_wrappers.LazyFrames """ gym.Wrapper.__init__(self, env) self.k = k self.frames = deque([], maxlen=k) shp = env.observation_space.shape self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)
Example #30
Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__(self, env, skip=4): """Return only every `skip`-th frame""" gym.Wrapper.__init__(self, env) # most recent raw observations (for max pooling across time steps) self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8) self._skip = skip