Python gym.wrappers.TimeLimit() Examples

The following are 12 code examples of gym.wrappers.TimeLimit(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gym.wrappers , or try the search function .
Example #1
Source File: __init__.py    From irl-benchmark with GNU General Public License v3.0 6 votes vote down vote up
def make_env(env_id: str):
    """Make a basic gym environment, without any special wrappers.

    Parameters
    ----------
    env_id: str
        The environment's id, e.g. 'FrozenLake-v0'.
    Returns
    -------
    gym.Env
        A gym environment.
    """
    assert env_id in ENV_IDS
    if not env_id in ENV_IDS_NON_GYM:
        env = gym.make(env_id)
    else:
        if env_id == 'MazeWorld0-v0':
            env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200)
        elif env_id == 'MazeWorld1-v0':
            env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200)
        else:
            raise NotImplementedError()
    return env 
Example #2
Source File: monitor.py    From rltf with MIT License 6 votes vote down vote up
def _attach_env_methods(self):
    """Attach self._env_step to the TimeLimit wrapper of the environment or if not present, to
    the unwrapped environment. Attach to TimeLimit in order to track the true env reset signals"""

    # Get the TimeLimit Wrapper or the unwrapped env
    currentenv = self.env
    while True:
      if isinstance(currentenv, (TimeLimit, MaxEpisodeLen)):
        break
      elif isinstance(currentenv, Wrapper):
        currentenv = currentenv.env
      else:
        break

    # Attach the **env** step function
    self.base_env         = currentenv
    self.base_env_step    = self.base_env.step
    self.base_env_reset   = self.base_env.reset
    self.base_env_render  = self.base_env.render
    self.base_env.step    = self._env_step
    self.base_env.reset   = self._env_reset
    self.base_env.render  = self._env_render


  #pylint: disable=method-hidden 
Example #3
Source File: wrappers.py    From rl-baselines-zoo with MIT License 5 votes vote down vote up
def __init__(self, env, max_steps=1000, test_mode=False):
        assert isinstance(env.observation_space, gym.spaces.Box)
        # Add a time feature to the observation
        low, high = env.observation_space.low, env.observation_space.high
        low, high= np.concatenate((low, [0])), np.concatenate((high, [1.]))
        env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)

        super(TimeFeatureWrapper, self).__init__(env)

        if isinstance(env, TimeLimit):
            self._max_steps = env._max_episode_steps
        else:
            self._max_steps = max_steps
        self._current_step = 0
        self._test_mode = test_mode 
Example #4
Source File: dopamine_connector.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_create_env_fun(batch_env_fn, time_limit):
  """TODO(konradczechowski): Add doc-string."""

  def create_env_fun(game_name, sticky_actions=True):
    del game_name, sticky_actions
    batch_env = batch_env_fn(in_graph=False)
    env = FlatBatchEnv(batch_env)
    env = TimeLimit(env, max_episode_steps=time_limit)
    env = ResizeObservation(env)  # pylint: disable=redefined-variable-type
    env = GameOverOnDone(env)
    return env

  return create_env_fun 
Example #5
Source File: dopamine_connector.py    From training_results_v0.5 with Apache License 2.0 5 votes vote down vote up
def get_create_env_fun(batch_env_fn, time_limit):
  """TODO(konradczechowski): Add doc-string."""

  def create_env_fun(game_name, sticky_actions=True):
    del game_name, sticky_actions
    batch_env = batch_env_fn(in_graph=False)
    env = FlatBatchEnv(batch_env)
    env = TimeLimit(env, max_episode_steps=time_limit)
    env = ResizeObservation(env)  # pylint: disable=redefined-variable-type
    env = GameOverOnDone(env)
    return env

  return create_env_fun 
Example #6
Source File: wrappers.py    From rtrl with MIT License 5 votes vote down vote up
def __init__(self, env, max_steps=None, key='reset'):
    super().__init__(env)
    self.reset_key = key
    from gym.wrappers import TimeLimit
    self.enforce = bool(max_steps)
    if max_steps is None:
      tl = get_wrapper_by_class(env, TimeLimit)
      max_steps = 1 << 31 if tl is None else tl._max_episode_steps
      # print("TimeLimitResetWrapper.max_steps =", max_steps)

    self.max_steps = max_steps
    self.t = 0 
Example #7
Source File: wrappers.py    From pytorch-maml-rl with MIT License 5 votes vote down vote up
def mujoco_wrapper(entry_point, **kwargs):
    normalization_scale = kwargs.pop('normalization_scale', 1.)
    max_episode_steps = kwargs.pop('max_episode_steps', 200)

    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)

    # Normalization wrapper
    env = NormalizedActionWrapper(env, scale=normalization_scale)

    # Time limit
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    return env 
Example #8
Source File: monitor.py    From rltf with MIT License 5 votes vote down vote up
def _env_step(self, action):
    """Corresponds to the step function of the TimeLimit wrapper or the unwrapped environment"""
    self._before_env_step(action)
    # Call the actual env.step function
    obs, reward, done, info = self.base_env_step(action)
    self._after_env_step(obs, reward, done, info)
    return obs, reward, done, info 
Example #9
Source File: gym_adapter.py    From mbpo with MIT License 5 votes vote down vote up
def __init__(self,
                 domain,
                 task,
                 *args,
                 env=None,
                 normalize=True,
                 observation_keys=None,
                 unwrap_time_limit=True,
                 **kwargs):
        assert not args, (
            "Gym environments don't support args. Use kwargs instead.")

        self.normalize = normalize
        self.observation_keys = observation_keys
        self.unwrap_time_limit = unwrap_time_limit

        self._Serializable__initialize(locals())
        super(GymAdapter, self).__init__(domain, task, *args, **kwargs)

        if env is None:
            assert (domain is not None and task is not None), (domain, task)
            env_id = f"{domain}-{task}"
            env = gym.envs.make(env_id, **kwargs)
        else:
            assert domain is None and task is None, (domain, task)

        if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit:
            # Remove the TimeLimit wrapper that sets 'done = True' when
            # the time limit specified for each environment has been passed and
            # therefore the environment is not Markovian (terminal condition
            # depends on time rather than state).
            env = env.env

        if isinstance(env.observation_space, spaces.Dict):
            observation_keys = (
                observation_keys or list(env.observation_space.spaces.keys()))
        if normalize:
            env = NormalizeActionWrapper(env)

        self._env = env 
Example #10
Source File: ddpg.py    From chi with MIT License 5 votes vote down vote up
def test_ddpg():
    import gym_mix
    env = gym.make('ContinuousCopyRand-v0')
    env = wrappers.TimeLimit(env, max_episode_steps=0)

    @model(optimizer=tf.train.AdamOptimizer(0.0001),
                 tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
    def actor(x):
        x = layers.fully_connected(x, 50, biases_initializer=layers.xavier_initializer())
        a = layers.fully_connected(x, env.action_space.shape[0], None,
                                                             weights_initializer=tf.random_normal_initializer(0, 1e-4))
        return a

    @model(optimizer=tf.train.AdamOptimizer(.001),
                 tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
    def critic(x, a):
        x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
        x = tf.concat([x, a], axis=1)
        x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
        x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
        q = layers.fully_connected(x, 1, None, weights_initializer=tf.random_normal_initializer(0, 1e-4))
        return tf.squeeze(q, 1)

    agent = DdpgAgent(env, actor, critic)

    for ep in range(10000):
        R, _ = agent.play_episode()

        if ep % 100 == 0:
            print(f'Return after episode {ep} is {R}') 
Example #11
Source File: test_monitor.py    From chainerrl with MIT License 4 votes vote down vote up
def test(self):
        steps = 15

        env = gym.make('CartPole-v1')
        # unwrap default TimeLimit and wrap with new one to simulate done=True
        # at step 5
        self.assertIsInstance(env, TimeLimit)
        env = env.env  # unwrap
        env = TimeLimit(env, max_episode_steps=5)  # wrap

        tmpdir = tempfile.mkdtemp()
        try:
            env = chainerrl.wrappers.Monitor(
                env, directory=tmpdir, video_callable=lambda episode_id: True)
            episode_idx = 0
            episode_len = 0
            t = 0
            _ = env.reset()
            while True:
                _, _, done, info = env.step(env.action_space.sample())
                episode_len += 1
                t += 1
                if episode_idx == 1 and episode_len >= 3:
                    info['needs_reset'] = True  # simulate ContinuingTimeLimit
                if done or info.get('needs_reset', False) or t == steps:
                    if episode_idx + 1 == self.n_episodes or t == steps:
                        break
                    env.reset()
                    episode_idx += 1
                    episode_len = 0
            # `env.close()` is called when `env` is gabage-collected
            # (or explicitly deleted/closed).
            del(env)
            # check if videos & meta files were generated
            files = os.listdir(tmpdir)
            mp4s = [f for f in files if f.endswith('.mp4')]
            metas = [f for f in files if f.endswith('.meta.json')]
            stats = [f for f in files if f.endswith('.stats.json')]
            manifests = [f for f in files if f.endswith('.manifest.json')]
            self.assertEqual(len(mp4s), self.n_episodes)
            self.assertEqual(len(metas), self.n_episodes)
            self.assertEqual(len(stats), 1)
            self.assertEqual(len(manifests), 1)

        finally:
            shutil.rmtree(tmpdir) 
Example #12
Source File: monitor.py    From rltf with MIT License 4 votes vote down vote up
def __init__(self, env, log_dir, mode, log_period=None, video_spec=None, eval_period=None):
    """
    Args:
      log_dir: str. The directory where to save the monitor videos and stats
      log_period: int. The period for logging statistic to stdout and to TensorBoard
      mode: str. Either 't' (train) or 'e' (eval) for the mode in which to start the monitor
      video_spec: lambda, int, False or None. Specifies how often to record episodes.
        - If lambda, it must take the episode number and return True/False if a video should be recorded.
        - `int` specifies a period in episodes
        - `False`, disables video recording
        - If `None`, every 1000th episode is recorded
      eval_period: int. Required only in evaluation mode. Needed to compute the correct logging step.
    """

    assert mode in ['t', 'e']

    super().__init__(env)

    log_dir   = os.path.join(log_dir, "monitor")
    video_dir = os.path.join(log_dir, "videos")

    # Member data
    self.video_dir    = video_dir
    self.log_dir      = log_dir
    self.enable_video = self._get_video_callable(video_spec)

    # Create the monitor directory
    self._make_log_dir()

    # Composition objects
    self.stats_recorder = StatsRecorder(log_dir, mode, log_period, eval_period)
    self.video_plotter  = VideoPlotter(self.env, mode=mode)
    self.video_recorder = None

    # Attach StatsRecorder agent methods
    self._before_agent_step   = self.stats_recorder.before_agent_step
    self._after_agent_step    = self.stats_recorder.after_agent_step
    self._before_agent_reset  = self.stats_recorder.before_agent_reset
    self._after_agent_reset   = self.stats_recorder.after_agent_reset

    # Find TimeLimit wrapper and attach step(), reset() and render()
    self.base_env         = None
    self.base_env_step    = None
    self.base_env_reset   = None
    self.base_env_render  = None
    self._attach_env_methods()

    # Attach member methods
    self.enable_video_plots = self.video_plotter.activate
    self.set_stdout_logs    = self.stats_recorder.set_stdout_logs
    self.set_summary_getter = self.stats_recorder.set_summary_getter
    self.save               = self.stats_recorder.save
    self.log_stats          = self.stats_recorder.log_stats