Python Examples of gym.wrappers.TimeLimit

Source File: __init__.py From irl-benchmark with GNU General Public License v3.0

6 votes

def make_env(env_id: str):
    """Make a basic gym environment, without any special wrappers.

    Parameters
    ----------
    env_id: str
        The environment's id, e.g. 'FrozenLake-v0'.
    Returns
    -------
    gym.Env
        A gym environment.
    """
    assert env_id in ENV_IDS
    if not env_id in ENV_IDS_NON_GYM:
        env = gym.make(env_id)
    else:
        if env_id == 'MazeWorld0-v0':
            env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200)
        elif env_id == 'MazeWorld1-v0':
            env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200)
        else:
            raise NotImplementedError()
    return env

Source File: monitor.py From rltf with MIT License

6 votes

def _attach_env_methods(self):
    """Attach self._env_step to the TimeLimit wrapper of the environment or if not present, to
    the unwrapped environment. Attach to TimeLimit in order to track the true env reset signals"""

    # Get the TimeLimit Wrapper or the unwrapped env
    currentenv = self.env
    while True:
      if isinstance(currentenv, (TimeLimit, MaxEpisodeLen)):
        break
      elif isinstance(currentenv, Wrapper):
        currentenv = currentenv.env
      else:
        break

    # Attach the **env** step function
    self.base_env         = currentenv
    self.base_env_step    = self.base_env.step
    self.base_env_reset   = self.base_env.reset
    self.base_env_render  = self.base_env.render
    self.base_env.step    = self._env_step
    self.base_env.reset   = self._env_reset
    self.base_env.render  = self._env_render


  #pylint: disable=method-hidden

Source File: wrappers.py From rl-baselines-zoo with MIT License

5 votes

def __init__(self, env, max_steps=1000, test_mode=False):
        assert isinstance(env.observation_space, gym.spaces.Box)
        # Add a time feature to the observation
        low, high = env.observation_space.low, env.observation_space.high
        low, high= np.concatenate((low, [0])), np.concatenate((high, [1.]))
        env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32)

        super(TimeFeatureWrapper, self).__init__(env)

        if isinstance(env, TimeLimit):
            self._max_steps = env._max_episode_steps
        else:
            self._max_steps = max_steps
        self._current_step = 0
        self._test_mode = test_mode

Source File: dopamine_connector.py From training_results_v0.5 with Apache License 2.0

5 votes

def get_create_env_fun(batch_env_fn, time_limit):
  """TODO(konradczechowski): Add doc-string."""

  def create_env_fun(game_name, sticky_actions=True):
    del game_name, sticky_actions
    batch_env = batch_env_fn(in_graph=False)
    env = FlatBatchEnv(batch_env)
    env = TimeLimit(env, max_episode_steps=time_limit)
    env = ResizeObservation(env)  # pylint: disable=redefined-variable-type
    env = GameOverOnDone(env)
    return env

  return create_env_fun

Source File: dopamine_connector.py From training_results_v0.5 with Apache License 2.0

5 votes

def get_create_env_fun(batch_env_fn, time_limit):
  """TODO(konradczechowski): Add doc-string."""

  def create_env_fun(game_name, sticky_actions=True):
    del game_name, sticky_actions
    batch_env = batch_env_fn(in_graph=False)
    env = FlatBatchEnv(batch_env)
    env = TimeLimit(env, max_episode_steps=time_limit)
    env = ResizeObservation(env)  # pylint: disable=redefined-variable-type
    env = GameOverOnDone(env)
    return env

  return create_env_fun

Source File: wrappers.py From rtrl with MIT License

5 votes

def __init__(self, env, max_steps=None, key='reset'):
    super().__init__(env)
    self.reset_key = key
    from gym.wrappers import TimeLimit
    self.enforce = bool(max_steps)
    if max_steps is None:
      tl = get_wrapper_by_class(env, TimeLimit)
      max_steps = 1 << 31 if tl is None else tl._max_episode_steps
      # print("TimeLimitResetWrapper.max_steps =", max_steps)

    self.max_steps = max_steps
    self.t = 0

Source File: wrappers.py From pytorch-maml-rl with MIT License

5 votes

def mujoco_wrapper(entry_point, **kwargs):
    normalization_scale = kwargs.pop('normalization_scale', 1.)
    max_episode_steps = kwargs.pop('max_episode_steps', 200)

    # Load the environment from its entry point
    env_cls = load(entry_point)
    env = env_cls(**kwargs)

    # Normalization wrapper
    env = NormalizedActionWrapper(env, scale=normalization_scale)

    # Time limit
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    return env

Source File: monitor.py From rltf with MIT License

5 votes

def _env_step(self, action):
    """Corresponds to the step function of the TimeLimit wrapper or the unwrapped environment"""
    self._before_env_step(action)
    # Call the actual env.step function
    obs, reward, done, info = self.base_env_step(action)
    self._after_env_step(obs, reward, done, info)
    return obs, reward, done, info

Source File: gym_adapter.py From mbpo with MIT License

5 votes

def __init__(self,
                 domain,
                 task,
                 *args,
                 env=None,
                 normalize=True,
                 observation_keys=None,
                 unwrap_time_limit=True,
                 **kwargs):
        assert not args, (
            "Gym environments don't support args. Use kwargs instead.")

        self.normalize = normalize
        self.observation_keys = observation_keys
        self.unwrap_time_limit = unwrap_time_limit

        self._Serializable__initialize(locals())
        super(GymAdapter, self).__init__(domain, task, *args, **kwargs)

        if env is None:
            assert (domain is not None and task is not None), (domain, task)
            env_id = f"{domain}-{task}"
            env = gym.envs.make(env_id, **kwargs)
        else:
            assert domain is None and task is None, (domain, task)

        if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit:
            # Remove the TimeLimit wrapper that sets 'done = True' when
            # the time limit specified for each environment has been passed and
            # therefore the environment is not Markovian (terminal condition
            # depends on time rather than state).
            env = env.env

        if isinstance(env.observation_space, spaces.Dict):
            observation_keys = (
                observation_keys or list(env.observation_space.spaces.keys()))
        if normalize:
            env = NormalizeActionWrapper(env)

        self._env = env

Source File: ddpg.py From chi with MIT License

5 votes

def test_ddpg():
    import gym_mix
    env = gym.make('ContinuousCopyRand-v0')
    env = wrappers.TimeLimit(env, max_episode_steps=0)

    @model(optimizer=tf.train.AdamOptimizer(0.0001),
                 tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
    def actor(x):
        x = layers.fully_connected(x, 50, biases_initializer=layers.xavier_initializer())
        a = layers.fully_connected(x, env.action_space.shape[0], None,
                                                             weights_initializer=tf.random_normal_initializer(0, 1e-4))
        return a

    @model(optimizer=tf.train.AdamOptimizer(.001),
                 tracker=tf.train.ExponentialMovingAverage(1 - 0.001))
    def critic(x, a):
        x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
        x = tf.concat([x, a], axis=1)
        x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
        x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer())
        q = layers.fully_connected(x, 1, None, weights_initializer=tf.random_normal_initializer(0, 1e-4))
        return tf.squeeze(q, 1)

    agent = DdpgAgent(env, actor, critic)

    for ep in range(10000):
        R, _ = agent.play_episode()

        if ep % 100 == 0:
            print(f'Return after episode {ep} is {R}')

Source File: test_monitor.py From chainerrl with MIT License

4 votes

def test(self):
        steps = 15

        env = gym.make('CartPole-v1')
        # unwrap default TimeLimit and wrap with new one to simulate done=True
        # at step 5
        self.assertIsInstance(env, TimeLimit)
        env = env.env  # unwrap
        env = TimeLimit(env, max_episode_steps=5)  # wrap

        tmpdir = tempfile.mkdtemp()
        try:
            env = chainerrl.wrappers.Monitor(
                env, directory=tmpdir, video_callable=lambda episode_id: True)
            episode_idx = 0
            episode_len = 0
            t = 0
            _ = env.reset()
            while True:
                _, _, done, info = env.step(env.action_space.sample())
                episode_len += 1
                t += 1
                if episode_idx == 1 and episode_len >= 3:
                    info['needs_reset'] = True  # simulate ContinuingTimeLimit
                if done or info.get('needs_reset', False) or t == steps:
                    if episode_idx + 1 == self.n_episodes or t == steps:
                        break
                    env.reset()
                    episode_idx += 1
                    episode_len = 0
            # `env.close()` is called when `env` is gabage-collected
            # (or explicitly deleted/closed).
            del(env)
            # check if videos & meta files were generated
            files = os.listdir(tmpdir)
            mp4s = [f for f in files if f.endswith('.mp4')]
            metas = [f for f in files if f.endswith('.meta.json')]
            stats = [f for f in files if f.endswith('.stats.json')]
            manifests = [f for f in files if f.endswith('.manifest.json')]
            self.assertEqual(len(mp4s), self.n_episodes)
            self.assertEqual(len(metas), self.n_episodes)
            self.assertEqual(len(stats), 1)
            self.assertEqual(len(manifests), 1)

        finally:
            shutil.rmtree(tmpdir)

Source File: monitor.py From rltf with MIT License

4 votes

def __init__(self, env, log_dir, mode, log_period=None, video_spec=None, eval_period=None):
    """
    Args:
      log_dir: str. The directory where to save the monitor videos and stats
      log_period: int. The period for logging statistic to stdout and to TensorBoard
      mode: str. Either 't' (train) or 'e' (eval) for the mode in which to start the monitor
      video_spec: lambda, int, False or None. Specifies how often to record episodes.
        - If lambda, it must take the episode number and return True/False if a video should be recorded.
        - `int` specifies a period in episodes
        - `False`, disables video recording
        - If `None`, every 1000th episode is recorded
      eval_period: int. Required only in evaluation mode. Needed to compute the correct logging step.
    """

    assert mode in ['t', 'e']

    super().__init__(env)

    log_dir   = os.path.join(log_dir, "monitor")
    video_dir = os.path.join(log_dir, "videos")

    # Member data
    self.video_dir    = video_dir
    self.log_dir      = log_dir
    self.enable_video = self._get_video_callable(video_spec)

    # Create the monitor directory
    self._make_log_dir()

    # Composition objects
    self.stats_recorder = StatsRecorder(log_dir, mode, log_period, eval_period)
    self.video_plotter  = VideoPlotter(self.env, mode=mode)
    self.video_recorder = None

    # Attach StatsRecorder agent methods
    self._before_agent_step   = self.stats_recorder.before_agent_step
    self._after_agent_step    = self.stats_recorder.after_agent_step
    self._before_agent_reset  = self.stats_recorder.before_agent_reset
    self._after_agent_reset   = self.stats_recorder.after_agent_reset

    # Find TimeLimit wrapper and attach step(), reset() and render()
    self.base_env         = None
    self.base_env_step    = None
    self.base_env_reset   = None
    self.base_env_render  = None
    self._attach_env_methods()

    # Attach member methods
    self.enable_video_plots = self.video_plotter.activate
    self.set_stdout_logs    = self.stats_recorder.set_stdout_logs
    self.set_summary_getter = self.stats_recorder.set_summary_getter
    self.save               = self.stats_recorder.save
    self.log_stats          = self.stats_recorder.log_stats

Python gym.wrappers.TimeLimit() Examples