Python gym.wrappers.TimeLimit() Examples
The following are 12
code examples of gym.wrappers.TimeLimit().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym.wrappers
, or try the search function
.
Example #1
Source File: __init__.py From irl-benchmark with GNU General Public License v3.0 | 6 votes |
def make_env(env_id: str): """Make a basic gym environment, without any special wrappers. Parameters ---------- env_id: str The environment's id, e.g. 'FrozenLake-v0'. Returns ------- gym.Env A gym environment. """ assert env_id in ENV_IDS if not env_id in ENV_IDS_NON_GYM: env = gym.make(env_id) else: if env_id == 'MazeWorld0-v0': env = TimeLimit(MazeWorld(map_id=0), max_episode_steps=200) elif env_id == 'MazeWorld1-v0': env = TimeLimit(MazeWorld(map_id=1), max_episode_steps=200) else: raise NotImplementedError() return env
Example #2
Source File: monitor.py From rltf with MIT License | 6 votes |
def _attach_env_methods(self): """Attach self._env_step to the TimeLimit wrapper of the environment or if not present, to the unwrapped environment. Attach to TimeLimit in order to track the true env reset signals""" # Get the TimeLimit Wrapper or the unwrapped env currentenv = self.env while True: if isinstance(currentenv, (TimeLimit, MaxEpisodeLen)): break elif isinstance(currentenv, Wrapper): currentenv = currentenv.env else: break # Attach the **env** step function self.base_env = currentenv self.base_env_step = self.base_env.step self.base_env_reset = self.base_env.reset self.base_env_render = self.base_env.render self.base_env.step = self._env_step self.base_env.reset = self._env_reset self.base_env.render = self._env_render #pylint: disable=method-hidden
Example #3
Source File: wrappers.py From rl-baselines-zoo with MIT License | 5 votes |
def __init__(self, env, max_steps=1000, test_mode=False): assert isinstance(env.observation_space, gym.spaces.Box) # Add a time feature to the observation low, high = env.observation_space.low, env.observation_space.high low, high= np.concatenate((low, [0])), np.concatenate((high, [1.])) env.observation_space = gym.spaces.Box(low=low, high=high, dtype=np.float32) super(TimeFeatureWrapper, self).__init__(env) if isinstance(env, TimeLimit): self._max_steps = env._max_episode_steps else: self._max_steps = max_steps self._current_step = 0 self._test_mode = test_mode
Example #4
Source File: dopamine_connector.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def get_create_env_fun(batch_env_fn, time_limit): """TODO(konradczechowski): Add doc-string.""" def create_env_fun(game_name, sticky_actions=True): del game_name, sticky_actions batch_env = batch_env_fn(in_graph=False) env = FlatBatchEnv(batch_env) env = TimeLimit(env, max_episode_steps=time_limit) env = ResizeObservation(env) # pylint: disable=redefined-variable-type env = GameOverOnDone(env) return env return create_env_fun
Example #5
Source File: dopamine_connector.py From training_results_v0.5 with Apache License 2.0 | 5 votes |
def get_create_env_fun(batch_env_fn, time_limit): """TODO(konradczechowski): Add doc-string.""" def create_env_fun(game_name, sticky_actions=True): del game_name, sticky_actions batch_env = batch_env_fn(in_graph=False) env = FlatBatchEnv(batch_env) env = TimeLimit(env, max_episode_steps=time_limit) env = ResizeObservation(env) # pylint: disable=redefined-variable-type env = GameOverOnDone(env) return env return create_env_fun
Example #6
Source File: wrappers.py From rtrl with MIT License | 5 votes |
def __init__(self, env, max_steps=None, key='reset'): super().__init__(env) self.reset_key = key from gym.wrappers import TimeLimit self.enforce = bool(max_steps) if max_steps is None: tl = get_wrapper_by_class(env, TimeLimit) max_steps = 1 << 31 if tl is None else tl._max_episode_steps # print("TimeLimitResetWrapper.max_steps =", max_steps) self.max_steps = max_steps self.t = 0
Example #7
Source File: wrappers.py From pytorch-maml-rl with MIT License | 5 votes |
def mujoco_wrapper(entry_point, **kwargs): normalization_scale = kwargs.pop('normalization_scale', 1.) max_episode_steps = kwargs.pop('max_episode_steps', 200) # Load the environment from its entry point env_cls = load(entry_point) env = env_cls(**kwargs) # Normalization wrapper env = NormalizedActionWrapper(env, scale=normalization_scale) # Time limit env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
Example #8
Source File: monitor.py From rltf with MIT License | 5 votes |
def _env_step(self, action): """Corresponds to the step function of the TimeLimit wrapper or the unwrapped environment""" self._before_env_step(action) # Call the actual env.step function obs, reward, done, info = self.base_env_step(action) self._after_env_step(obs, reward, done, info) return obs, reward, done, info
Example #9
Source File: gym_adapter.py From mbpo with MIT License | 5 votes |
def __init__(self, domain, task, *args, env=None, normalize=True, observation_keys=None, unwrap_time_limit=True, **kwargs): assert not args, ( "Gym environments don't support args. Use kwargs instead.") self.normalize = normalize self.observation_keys = observation_keys self.unwrap_time_limit = unwrap_time_limit self._Serializable__initialize(locals()) super(GymAdapter, self).__init__(domain, task, *args, **kwargs) if env is None: assert (domain is not None and task is not None), (domain, task) env_id = f"{domain}-{task}" env = gym.envs.make(env_id, **kwargs) else: assert domain is None and task is None, (domain, task) if isinstance(env, wrappers.TimeLimit) and unwrap_time_limit: # Remove the TimeLimit wrapper that sets 'done = True' when # the time limit specified for each environment has been passed and # therefore the environment is not Markovian (terminal condition # depends on time rather than state). env = env.env if isinstance(env.observation_space, spaces.Dict): observation_keys = ( observation_keys or list(env.observation_space.spaces.keys())) if normalize: env = NormalizeActionWrapper(env) self._env = env
Example #10
Source File: ddpg.py From chi with MIT License | 5 votes |
def test_ddpg(): import gym_mix env = gym.make('ContinuousCopyRand-v0') env = wrappers.TimeLimit(env, max_episode_steps=0) @model(optimizer=tf.train.AdamOptimizer(0.0001), tracker=tf.train.ExponentialMovingAverage(1 - 0.001)) def actor(x): x = layers.fully_connected(x, 50, biases_initializer=layers.xavier_initializer()) a = layers.fully_connected(x, env.action_space.shape[0], None, weights_initializer=tf.random_normal_initializer(0, 1e-4)) return a @model(optimizer=tf.train.AdamOptimizer(.001), tracker=tf.train.ExponentialMovingAverage(1 - 0.001)) def critic(x, a): x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer()) x = tf.concat([x, a], axis=1) x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer()) x = layers.fully_connected(x, 300, biases_initializer=layers.xavier_initializer()) q = layers.fully_connected(x, 1, None, weights_initializer=tf.random_normal_initializer(0, 1e-4)) return tf.squeeze(q, 1) agent = DdpgAgent(env, actor, critic) for ep in range(10000): R, _ = agent.play_episode() if ep % 100 == 0: print(f'Return after episode {ep} is {R}')
Example #11
Source File: test_monitor.py From chainerrl with MIT License | 4 votes |
def test(self): steps = 15 env = gym.make('CartPole-v1') # unwrap default TimeLimit and wrap with new one to simulate done=True # at step 5 self.assertIsInstance(env, TimeLimit) env = env.env # unwrap env = TimeLimit(env, max_episode_steps=5) # wrap tmpdir = tempfile.mkdtemp() try: env = chainerrl.wrappers.Monitor( env, directory=tmpdir, video_callable=lambda episode_id: True) episode_idx = 0 episode_len = 0 t = 0 _ = env.reset() while True: _, _, done, info = env.step(env.action_space.sample()) episode_len += 1 t += 1 if episode_idx == 1 and episode_len >= 3: info['needs_reset'] = True # simulate ContinuingTimeLimit if done or info.get('needs_reset', False) or t == steps: if episode_idx + 1 == self.n_episodes or t == steps: break env.reset() episode_idx += 1 episode_len = 0 # `env.close()` is called when `env` is gabage-collected # (or explicitly deleted/closed). del(env) # check if videos & meta files were generated files = os.listdir(tmpdir) mp4s = [f for f in files if f.endswith('.mp4')] metas = [f for f in files if f.endswith('.meta.json')] stats = [f for f in files if f.endswith('.stats.json')] manifests = [f for f in files if f.endswith('.manifest.json')] self.assertEqual(len(mp4s), self.n_episodes) self.assertEqual(len(metas), self.n_episodes) self.assertEqual(len(stats), 1) self.assertEqual(len(manifests), 1) finally: shutil.rmtree(tmpdir)
Example #12
Source File: monitor.py From rltf with MIT License | 4 votes |
def __init__(self, env, log_dir, mode, log_period=None, video_spec=None, eval_period=None): """ Args: log_dir: str. The directory where to save the monitor videos and stats log_period: int. The period for logging statistic to stdout and to TensorBoard mode: str. Either 't' (train) or 'e' (eval) for the mode in which to start the monitor video_spec: lambda, int, False or None. Specifies how often to record episodes. - If lambda, it must take the episode number and return True/False if a video should be recorded. - `int` specifies a period in episodes - `False`, disables video recording - If `None`, every 1000th episode is recorded eval_period: int. Required only in evaluation mode. Needed to compute the correct logging step. """ assert mode in ['t', 'e'] super().__init__(env) log_dir = os.path.join(log_dir, "monitor") video_dir = os.path.join(log_dir, "videos") # Member data self.video_dir = video_dir self.log_dir = log_dir self.enable_video = self._get_video_callable(video_spec) # Create the monitor directory self._make_log_dir() # Composition objects self.stats_recorder = StatsRecorder(log_dir, mode, log_period, eval_period) self.video_plotter = VideoPlotter(self.env, mode=mode) self.video_recorder = None # Attach StatsRecorder agent methods self._before_agent_step = self.stats_recorder.before_agent_step self._after_agent_step = self.stats_recorder.after_agent_step self._before_agent_reset = self.stats_recorder.before_agent_reset self._after_agent_reset = self.stats_recorder.after_agent_reset # Find TimeLimit wrapper and attach step(), reset() and render() self.base_env = None self.base_env_step = None self.base_env_reset = None self.base_env_render = None self._attach_env_methods() # Attach member methods self.enable_video_plots = self.video_plotter.activate self.set_stdout_logs = self.stats_recorder.set_stdout_logs self.set_summary_getter = self.stats_recorder.set_summary_getter self.save = self.stats_recorder.save self.log_stats = self.stats_recorder.log_stats