Python Examples of gym.Wrapper

Source File: retro_wrappers.py From HardRLWithYoutube with MIT License

6 votes

def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        self.action_space = env.action_space
        self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32)
        self.original_os = env.observation_space
        if isinstance(self.original_os, gym.spaces.Dict):
            import copy
            ordered_dict = copy.deepcopy(self.original_os.spaces)
            ordered_dict['value_estimation_timeout'] = self.timeout_space
            self.observation_space = gym.spaces.Dict(ordered_dict)
            self.dict_mode = True
        else:
            self.observation_space = gym.spaces.Dict({
                'original': self.original_os,
                'value_estimation_timeout': self.timeout_space
                })
            self.dict_mode = False
        self.ac_count = None
        while 1:
            if not hasattr(env, "_max_episode_steps"):  # Looking for TimeLimit wrapper that has this field
                env = env.env
                continue
            break
        self.timeout = env._max_episode_steps

Source File: atari_wrappers.py From chainerrl with MIT License

6 votes

def __init__(self, env, k, channel_order='hwc'):
        """Stack k last frames.

        Returns lazy array, which is much more memory efficient.

        See Also
        --------
        baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        self.stack_axis = {'hwc': 2, 'chw': 0}[channel_order]
        orig_obs_space = env.observation_space
        low = np.repeat(orig_obs_space.low, k, axis=self.stack_axis)
        high = np.repeat(orig_obs_space.high, k, axis=self.stack_axis)
        self.observation_space = spaces.Box(
            low=low, high=high, dtype=orig_obs_space.dtype)

Source File: misc_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def get_wrapper_by_name(env, classname):
    """Given an a gym environment possibly wrapped multiple times, returns a wrapper
    of class named classname or raises ValueError if no such wrapper was applied

    Parameters
    ----------
    env: gym.Env of gym.Wrapper
        gym environment
    classname: str
        name of the wrapper

    Returns
    -------
    wrapper: gym.Wrapper
        wrapper named classname
    """
    currentenv = env
    while True:
        if classname == currentenv.class_name():
            return currentenv
        elif isinstance(currentenv, gym.Wrapper):
            currentenv = currentenv.env
        else:
            raise ValueError("Couldn't find wrapper named %s" % classname)

Source File: misc_util.py From HardRLWithYoutube with MIT License

6 votes

def get_wrapper_by_name(env, classname):
    """Given an a gym environment possibly wrapped multiple times, returns a wrapper
    of class named classname or raises ValueError if no such wrapper was applied

    Parameters
    ----------
    env: gym.Env of gym.Wrapper
        gym environment
    classname: str
        name of the wrapper

    Returns
    -------
    wrapper: gym.Wrapper
        wrapper named classname
    """
    currentenv = env
    while True:
        if classname == currentenv.class_name():
            return currentenv
        elif isinstance(currentenv, gym.Wrapper):
            currentenv = currentenv.env
        else:
            raise ValueError("Couldn't find wrapper named %s" % classname)

Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        self.action_space = env.action_space
        self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32)
        self.original_os = env.observation_space
        if isinstance(self.original_os, gym.spaces.Dict):
            import copy
            ordered_dict = copy.deepcopy(self.original_os.spaces)
            ordered_dict['value_estimation_timeout'] = self.timeout_space
            self.observation_space = gym.spaces.Dict(ordered_dict)
            self.dict_mode = True
        else:
            self.observation_space = gym.spaces.Dict({
                'original': self.original_os,
                'value_estimation_timeout': self.timeout_space
                })
            self.dict_mode = False
        self.ac_count = None
        while 1:
            if not hasattr(env, "_max_episode_steps"):  # Looking for TimeLimit wrapper that has this field
                env = env.env
                continue
            break
        self.timeout = env._max_episode_steps

Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        self.action_space = env.action_space
        self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32)
        self.original_os = env.observation_space
        if isinstance(self.original_os, gym.spaces.Dict):
            import copy
            ordered_dict = copy.deepcopy(self.original_os.spaces)
            ordered_dict['value_estimation_timeout'] = self.timeout_space
            self.observation_space = gym.spaces.Dict(ordered_dict)
            self.dict_mode = True
        else:
            self.observation_space = gym.spaces.Dict({
                'original': self.original_os,
                'value_estimation_timeout': self.timeout_space
                })
            self.dict_mode = False
        self.ac_count = None
        while 1:
            if not hasattr(env, "_max_episode_steps"):  # Looking for TimeLimit wrapper that has this field
                env = env.env
                continue
            break
        self.timeout = env._max_episode_steps

Source File: misc_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def get_wrapper_by_name(env, classname):
    """Given an a gym environment possibly wrapped multiple times, returns a wrapper
    of class named classname or raises ValueError if no such wrapper was applied

    Parameters
    ----------
    env: gym.Env of gym.Wrapper
        gym environment
    classname: str
        name of the wrapper

    Returns
    -------
    wrapper: gym.Wrapper
        wrapper named classname
    """
    currentenv = env
    while True:
        if classname == currentenv.class_name():
            return currentenv
        elif isinstance(currentenv, gym.Wrapper):
            currentenv = currentenv.env
        else:
            raise ValueError("Couldn't find wrapper named %s" % classname)

Source File: misc_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def get_wrapper_by_name(env, classname):
    """Given an a gym environment possibly wrapped multiple times, returns a wrapper
    of class named classname or raises ValueError if no such wrapper was applied

    Parameters
    ----------
    env: gym.Env of gym.Wrapper
        gym environment
    classname: str
        name of the wrapper

    Returns
    -------
    wrapper: gym.Wrapper
        wrapper named classname
    """
    currentenv = env
    while True:
        if classname == currentenv.class_name():
            return currentenv
        elif isinstance(currentenv, gym.Wrapper):
            currentenv = currentenv.env
        else:
            raise ValueError("Couldn't find wrapper named %s" % classname)

Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        self.action_space = env.action_space
        self.timeout_space = gym.spaces.Box(low=np.array([0.0]), high=np.array([1.0]), dtype=np.float32)
        self.original_os = env.observation_space
        if isinstance(self.original_os, gym.spaces.Dict):
            import copy
            ordered_dict = copy.deepcopy(self.original_os.spaces)
            ordered_dict['value_estimation_timeout'] = self.timeout_space
            self.observation_space = gym.spaces.Dict(ordered_dict)
            self.dict_mode = True
        else:
            self.observation_space = gym.spaces.Dict({
                'original': self.original_os,
                'value_estimation_timeout': self.timeout_space
                })
            self.dict_mode = False
        self.ac_count = None
        while 1:
            if not hasattr(env, "_max_episode_steps"):  # Looking for TimeLimit wrapper that has this field
                env = env.env
                continue
            break
        self.timeout = env._max_episode_steps

Source File: gym_utils.py From tensor2tensor with Apache License 2.0

6 votes

def remove_time_limit_wrapper(env):
  """Removes top level TimeLimit Wrapper.

  Removes TimeLimit Wrapper from top level if exists, throws error if any other
  TimeLimit Wrapper is present in stack.

  Args:
    env: environment

  Returns:
    the env with removed time limit wrapper.
  """
  if isinstance(env, gym.wrappers.TimeLimit):
    env = env.env
  env_ = env
  while isinstance(env_, gym.Wrapper):
    if isinstance(env_, gym.wrappers.TimeLimit):
      raise ValueError("Can remove only top-level TimeLimit gym.Wrapper.")
    env_ = env_.env
  return env

Source File: wrapper.py From irl-benchmark with GNU General Public License v3.0

6 votes

def is_unwrappable_to(env: gym.Env, to_wrapper: Type[gym.Wrapper]) -> bool:
    """Check if env can be unwrapped to to_wrapper.

    Parameters
    ----------
    env: gym.Env
        A gym environment (potentially wrapped).
    to_wrapper: Type[gym.Wrapper]
        A wrapper class extending gym.Wrapper.

    Returns
    -------
    bool
        True if env could be unwrapped to desired wrapper, False otherwise.
    """
    if isinstance(env, to_wrapper):
        return True
    while hasattr(env, 'env'):
        env = env.env
        if isinstance(env, to_wrapper):
            return True
    return False

Source File: atari_wrapper.py From tf2rl with MIT License

5 votes

def __init__(self, env, skip=4):
        """
        Return only every `skip`-th frame
        """
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros(
            (2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip = skip

Source File: atari_wrapper.py From tf2rl with MIT License

5 votes

def __init__(self, env):
        """
        Make end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        """
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done = True

Source File: gym_utils.py From tensor2tensor with Apache License 2.0

5 votes

def __init__(self, env, num_actions):
    """Constructs a wrapper for discretizing the action space.

    Args:
      env: environment to wrap.
      num_actions: A np.array of the same shape as the environment's
        action_spec. Elements in the array specify the number of actions to
        discretize to for each dimension.

    Raises:
      ValueError: IF the action_spec shape and the limits shape are not equal.
    """

    if not isinstance(env.action_space, gym.spaces.box.Box):
      raise ValueError(
          "The action space is {}, but gym.spaces.box.Box is expected".format(
              env.action_space))

    gym.Wrapper.__init__(self, env)

    # We convert a scalar num_actions to array [num_actions, num_actions, ...]
    self._num_actions = np.broadcast_to(num_actions, env.action_space.shape)

    if env.action_space.shape != self._num_actions.shape:
      raise ValueError("Spec {} and limit shape do not match. Got {}".format(
          env.action_space.shape, self._num_actions.shape))
    self.action_space = gym.spaces.MultiDiscrete(nvec=self._num_actions)
    self._action_map = self._discretize_env(env)

Source File: atari_wrapper.py From tf2rl with MIT License

5 votes

def __init__(self, env):
        """
        Take action on reset for environments that are fixed until firing.
        """
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

Source File: gym_utils.py From tensor2tensor with Apache License 2.0

5 votes

def __init__(self, env, p=0.25):
    gym.Wrapper.__init__(self, env)
    self.p = p
    self.last_action = 0

Source File: atari_wrapper.py From EasyRL with Apache License 2.0

5 votes

def __init__(self, env):
        """
        Make end-of-life == end-of-episode, but only reset on true game over.
        Done by DeepMind for the DQN and co. since it helps value estimation.
        """
        gym.Wrapper.__init__(self, env)
        self.lives = 0
        self.was_real_done = True

Source File: atari_wrapper.py From tf2rl with MIT License

5 votes

def __init__(self, env, k):
        """
        Stack k last frames.
        Returns lazy array, which is much more memory efficient.
        See also baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(
            low=0, high=255, shape=(shp[:-1] + (shp[-1] * k,)),
            dtype=env.observation_space.dtype)

Source File: atari_wrapper.py From EasyRL with Apache License 2.0

5 votes

def __init__(self, env):
        """
        Take action on reset for environments that are fixed until firing.
        """
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

Source File: atari_wrapper.py From EasyRL with Apache License 2.0

5 votes

def __init__(self, env, noop_max=30):
        """
        Sample initial states by taking random number of no-ops on reset.
        no-op is assumed to be action 0.
        """
        gym.Wrapper.__init__(self, env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

Source File: monte_carlo.py From adversarial-policies with MIT License

5 votes

def __init__(self, env):
        """Wraps a MujocoEnv, adding get_state and set_state methods.
        :param env: a MujocoEnv. NOTE: it must not be wrapped in a TimeLimit."""
        if hasattr(env, "_max_episode_steps"):
            raise TypeError(
                "Environment must not have a time limit " "(try passing in env.unwrapped instead)."
            )
        gym.Wrapper.__init__(self, env)
        self.sim = env.unwrapped.sim

Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, k):
        """Stack k last frames.

        Returns lazy array, which is much more memory efficient.

        See Also
        --------
        baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)

Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, skip=4):
        """Return only every `skip`-th frame"""
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip       = skip

Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env):
        """Take action on reset for environments that are fixed until firing."""
        gym.Wrapper.__init__(self, env)
        assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
        assert len(env.unwrapped.get_action_meanings()) >= 3

Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, noop_max=30):
        """Sample initial states by taking random number of no-ops on reset.
        No-op is assumed to be action 0.
        """
        gym.Wrapper.__init__(self, env)
        self.noop_max = noop_max
        self.override_num_noops = None
        self.noop_action = 0
        assert env.unwrapped.get_action_meanings()[0] == 'NOOP'

Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, max_random_steps, on_startup=True, every_episode=False):
        gym.Wrapper.__init__(self, env)
        self.on_startup = on_startup
        self.every_episode = every_episode
        self.random_steps = max_random_steps
        self.last_obs = None
        if on_startup:
            self.some_random_steps()

Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, savedir, k):
        gym.Wrapper.__init__(self, env)
        self.savedir = savedir
        self.k = k
        self.epcount = 0

Source File: retro_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, n, stickprob):
        gym.Wrapper.__init__(self, env)
        self.n = n
        self.stickprob = stickprob
        self.curac = None
        self.rng = np.random.RandomState()
        self.supports_want_render = hasattr(env, "supports_want_render")

Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, k):
        """Stack k last frames.

        Returns lazy array, which is much more memory efficient.

        See Also
        --------
        baselines.common.atari_wrappers.LazyFrames
        """
        gym.Wrapper.__init__(self, env)
        self.k = k
        self.frames = deque([], maxlen=k)
        shp = env.observation_space.shape
        self.observation_space = spaces.Box(low=0, high=255, shape=(shp[0], shp[1], shp[2] * k), dtype=env.observation_space.dtype)

Source File: atari_wrappers.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

5 votes

def __init__(self, env, skip=4):
        """Return only every `skip`-th frame"""
        gym.Wrapper.__init__(self, env)
        # most recent raw observations (for max pooling across time steps)
        self._obs_buffer = np.zeros((2,)+env.observation_space.shape, dtype=np.uint8)
        self._skip       = skip

Python gym.Wrapper() Examples