Python gym.wrappers.Monitor() Examples
The following are 30
code examples of gym.wrappers.Monitor().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym.wrappers
, or try the search function
.
Example #1
Source File: viz.py From learning2run with MIT License | 6 votes |
def main(env_id, policy_file, record, stochastic, extra_kwargs): import gym from gym import wrappers import tensorflow as tf from es_distributed.policies import MujocoPolicy import numpy as np env = gym.make(env_id) if record: import uuid env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) if extra_kwargs: import json extra_kwargs = json.loads(extra_kwargs) with tf.Session(): pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs) while True: rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) print('return={:.4f} len={}'.format(rews.sum(), t)) if record: env.close() return
Example #2
Source File: ppo_atari_visual.py From cleanrl with MIT License | 6 votes |
def make_env(gym_id, seed, idx): def thunk(): env = gym.make(gym_id) env = wrap_atari(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = ProbsVisualizationWrapper(env) env = Monitor(env, f'videos/{experiment_name}') env = wrap_pytorch( wrap_deepmind( env, clip_rewards=True, frame_stack=True, scale=False, ) ) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #3
Source File: ppo_atari.py From cleanrl with MIT License | 6 votes |
def make_env(gym_id, seed, idx): def thunk(): env = gym.make(gym_id) env = wrap_atari(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env = wrap_pytorch( wrap_deepmind( env, clip_rewards=True, frame_stack=True, scale=False, ) ) env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #4
Source File: gym.py From malmo-challenge with MIT License | 6 votes |
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None): assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder' assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple' if isinstance(repeat_action, int): assert repeat_action >= 1, "repeat_action should be >= 1" elif isinstance(repeat_action, tuple): assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)' assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]' super(GymEnvironment, self).__init__() self._state_builder = state_builder self._env = gym.make(env_name) self._env.env.frameskip = repeat_action self._no_op = max(0, no_op) self._done = True if monitoring_path is not None: self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
Example #5
Source File: run_dqn_atari.py From berkeleydeeprlcourse-homework-pytorch with MIT License | 6 votes |
def get_env(env_name, exp_name, seed): env = gym.make(env_name) set_global_seeds(seed) env.seed(seed) # Set Up Logger logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") logdir = osp.join('data', logdir) logdir = osp.join(logdir, '%d'%seed) logz.configure_output_dir(logdir) hyperparams = {'exp_name': exp_name, 'env_name': env_name} logz.save_hyperparams(hyperparams) expt_dir = '/tmp/hw3_vid_dir2/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind(env) return env
Example #6
Source File: run_dqn_ram.py From berkeleydeeprlcourse-homework-pytorch with MIT License | 6 votes |
def get_env(env_name, exp_name, seed): env = gym.make(env_name) set_global_seeds(seed) env.seed(seed) # Set Up Logger logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") logdir = osp.join('data', logdir) logdir = osp.join(logdir, '%d'%seed) logz.configure_output_dir(logdir) hyperparams = {'exp_name': exp_name, 'env_name': env_name} logz.save_hyperparams(hyperparams) expt_dir = '/tmp/hw3_vid_dir/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind_ram(env) return env
Example #7
Source File: viz.py From evolution-strategies-starter with MIT License | 6 votes |
def main(env_id, policy_file, record, stochastic, extra_kwargs): import gym from gym import wrappers import tensorflow as tf from es_distributed.policies import MujocoPolicy import numpy as np env = gym.make(env_id) if record: import uuid env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) if extra_kwargs: import json extra_kwargs = json.loads(extra_kwargs) with tf.Session(): pi = MujocoPolicy.Load(policy_file, extra_kwargs=extra_kwargs) while True: rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) print('return={:.4f} len={}'.format(rews.sum(), t)) if record: env.close() return
Example #8
Source File: gym.py From malmo-challenge with MIT License | 6 votes |
def __init__(self, env_name, state_builder=ALEStateBuilder(), repeat_action=4, no_op=30, monitoring_path=None): assert isinstance(state_builder, StateBuilder), 'state_builder should inherit from StateBuilder' assert isinstance(repeat_action, (int, tuple)), 'repeat_action should be int or tuple' if isinstance(repeat_action, int): assert repeat_action >= 1, "repeat_action should be >= 1" elif isinstance(repeat_action, tuple): assert len(repeat_action) == 2, 'repeat_action should be a length-2 tuple: (min frameskip, max frameskip)' assert repeat_action[0] < repeat_action[1], 'repeat_action[0] should be < repeat_action[1]' super(GymEnvironment, self).__init__() self._state_builder = state_builder self._env = gym.make(env_name) self._env.env.frameskip = repeat_action self._no_op = max(0, no_op) self._done = True if monitoring_path is not None: self._env = Monitor(self._env, monitoring_path, video_callable=need_record)
Example #9
Source File: test.py From es_on_gke with Apache License 2.0 | 5 votes |
def main(config): """Test policy.""" rewards = [] steps = [] env = utility.load_env() env = wrappers.Monitor( env, config.video_dir, force=True, video_callable=lambda x: x < 3) env.seed(config.seed) policy = utility.create_policy(env) model_file = os.path.join(config.log_dir, 'model_{}.npz'.format( config.checkpoint)) policy.load_model(model_file) for i in range(config.n_episodes): ob = env.reset() done = False ep_reward = 0 ep_steps = 0 while not done: action = policy.forward(ob) ob, reward, done, info = env.step(action) ep_reward += reward ep_steps += 1 if config.render: env.render() print('reward={0:.2f}, steps={1}'.format(ep_reward, ep_steps)) rewards.append(ep_reward) steps.append(ep_steps) result_df = pd.DataFrame({'reward': rewards, 'step': steps}) result_df['avg_reward_per_step'] = result_df.reward / result_df.step result_df.to_csv( os.path.join(config.log_dir, 'test_scores.csv'), index=False)
Example #10
Source File: dqn.py From MLAlgorithms with MIT License | 5 votes |
def init_environment(self, name="CartPole-v0", monitor=False): self.env = gym.make(name) if monitor: self.env = wrappers.Monitor(self.env, name, force=True, video_callable=False) self.n_states = self.env.observation_space.shape[0] self.n_actions = self.env.action_space.n # Experience replay self.replay = []
Example #11
Source File: run_dqn_ram.py From deep-reinforcement-learning with MIT License | 5 votes |
def get_env(seed): env = gym.make('Pong-ram-v0') set_global_seeds(seed) env.seed(seed) expt_dir = '/tmp/hw3_vid_dir/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind_ram(env) return env
Example #12
Source File: __init__.py From muzero-pytorch with MIT License | 5 votes |
def new_game(self, seed=None, save_video=False, save_path=None, video_callable=None, uid=None): env = gym.make(self.env_name) if seed is not None: env.seed(seed) if save_video: from gym.wrappers import Monitor env = Monitor(env, directory=save_path, force=True, video_callable=video_callable, uid=uid) return ClassicControlWrapper(env, discount=self.discount, k=4)
Example #13
Source File: monitor.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def env(self) -> Monitor: return self._env
Example #14
Source File: monitor.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def _init_monitor(self) -> Monitor: env = gym.make(self.name) monitor = Monitor( env, directory=self.directory, force=self.force, resume=self.resume, write_upon_reset=self.write_upon_reset, uid=self.uid, mode=self.mode, ) return monitor
Example #15
Source File: utils.py From highway-env with MIT License | 5 votes |
def record_videos(env, path="videos"): return Monitor(env, path, force=True, video_callable=lambda episode: True)
Example #16
Source File: monitor.py From A2C with Apache License 2.0 | 5 votes |
def monitor(self, is_monitor, is_train, experiment_dir="", record_video_every=10): if is_monitor: if is_train: self.env = wrappers.Monitor(self.env, experiment_dir + 'output', resume=True, video_callable=lambda count: count % record_video_every == 0) else: self.env = wrappers.Monitor(self.env, experiment_dir + 'test', resume=True, video_callable=lambda count: count % record_video_every == 0) else: self.env = wrappers.Monitor(self.env, experiment_dir + 'output', resume=True, video_callable=False) self.env.reset()
Example #17
Source File: enjoy-adv.py From rl-attack with MIT License | 5 votes |
def make_env(game_name): env = gym.make(game_name + "NoFrameskip-v4") #env = SimpleMonitor(env) env = wrappers.Monitor(env, './videos/' + str(time()) + '/') env = wrap_dqn(env) return env
Example #18
Source File: evaluate.py From reinforce_py with MIT License | 5 votes |
def main(args): env = gym.make('Walker2d-v1') env = wrappers.Monitor(env, './videos/', force=True) reward_history = [] agent = DDPG(env, args) agent.construct_model(args.gpu) saver = tf.train.Saver() if args.model_path is not None: # reuse saved model saver.restore(agent.sess, args.model_path) ep_base = int(args.model_path.split('_')[-1]) best_avg_rewards = float(args.model_path.split('/')[-1].split('_')[0]) else: raise ValueError('model_path required!') for ep in range(args.ep): # env init state = env.reset() ep_rewards = 0 for step in range(env.spec.timestep_limit): env.render() action = agent.sample_action(state[np.newaxis, :], noise=False) # act next_state, reward, done, _ = env.step(action[0]) ep_rewards += reward agent.store_experience(state, action, reward, next_state, done) # shift state = next_state if done: break reward_history.append(ep_rewards) print('Ep%d reward:%d' % (ep + 1, ep_rewards)) print('Average rewards: ', np.mean(reward_history))
Example #19
Source File: gym_envs.py From sagemaker-rl-container with Apache License 2.0 | 5 votes |
def run_simulation(env_name, output_dir): print('*' * 86) print('Running {} simulation.'.format(env_name)) env_dir = os.path.join(output_dir, env_name) print('Saving results to \'{}\'.'.format(env_dir)) if not os.path.exists(env_dir): os.makedirs(env_dir) env = gym.make(env_name) # record every episode # (by default video recorder only captures a sampling of episodes: # those with episodes numbers which are perfect cubes: 1, 8, 27, 64, ... and then every 1000th) env = wrappers.Monitor(env, env_dir, force=True, video_callable=lambda episode_id: True) for i_episode in range(3): print('Start Episode #' + str(i_episode) + '-' * 86) env.reset() total_reward = 0 for step in range(1000): env.render(mode='ansi') action = env.action_space.sample() # your agent here (this takes random actions) observation, reward, done, info = env.step(action) total_reward += reward if done: print("Env: {}, Episode: {},\tSteps: {},\tReward: {}".format( env_name, i_episode, step, total_reward)) break env.reset() print('End' + '-' * 86) env.close()
Example #20
Source File: run_dqn_atari.py From deep-reinforcement-learning with MIT License | 5 votes |
def get_env(task, seed): env_id = task.env_id env = gym.make(env_id) set_global_seeds(seed) env.seed(seed) expt_dir = '/tmp/hw3_vid_dir2/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind(env) return env
Example #21
Source File: run_dqn_lander.py From berkeleydeeprlcourse-homework-pytorch with MIT License | 5 votes |
def lander_stopping_criterion(num_timesteps): def stopping_criterion(env, t): # notice that here t is the number of steps of the wrapped env, # which is different from the number of steps in the underlying env return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps return stopping_criterion
Example #22
Source File: ddpg.py From deep-rl with MIT License | 5 votes |
def main(args): with tf.Session() as sess: env = gym.make(args['env']) np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) if args['use_gym_monitor']: if not args['render_env']: env = wrappers.Monitor( env, args['monitor_dir'], video_callable=False, force=True) else: env = wrappers.Monitor(env, args['monitor_dir'], force=True) train(sess, env, args, actor, critic, actor_noise) if args['use_gym_monitor']: env.monitor.close()
Example #23
Source File: monitor.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def env(self) -> Monitor: return self._env
Example #24
Source File: monitor.py From FractalAI with GNU Affero General Public License v3.0 | 5 votes |
def _init_monitor(self) -> Monitor: env = gym.make(self.name) monitor = Monitor(env, directory=self.directory, force=self.force, resume=self.resume, write_upon_reset=self.write_upon_reset, uid=self.uid, mode=self.mode) return monitor
Example #25
Source File: gym.py From pytorch-dqn with MIT License | 5 votes |
def get_ram_env(env, seed): set_global_seeds(seed) env.seed(seed) expt_dir = '/tmp/gym-results' env = wrappers.Monitor(env, expt_dir, force=True) env = wrap_deepmind_ram(env) return env
Example #26
Source File: es.py From rl_algorithms with MIT License | 5 votes |
def test(self, just_one=True): """ This is for test-time evaluation. No training is done here. By default, iterate through every snapshot. If `just_one` is true, this only runs one set of weights, to ensure that we record right away since OpenAI will only record subsets and less frequently. Changing the loop over snapshots is also needed. """ os.makedirs(self.args.directory+'/videos') self.env = wrappers.Monitor(self.env, self.args.directory+'/videos', force=True) headdir = self.args.directory+'/snapshots/' snapshots = os.listdir(headdir) snapshots.sort() num_rollouts = 10 if just_one: num_rollouts = 1 for sn in snapshots: print("\n***** Currently on snapshot {} *****".format(sn)) ### Add your own criteria here. # if "800" not in sn: # continue ### with open(headdir+sn, 'rb') as f: weights = pickle.load(f) self.sess.run(self.set_params_op, feed_dict={self.new_weights_v: weights}) returns = [] for i in range(num_rollouts): returns.append( self._compute_return(test=True) ) print("mean: \t{}".format(np.mean(returns))) print("std: \t{}".format(np.std(returns))) print("max: \t{}".format(np.max(returns))) print("min: \t{}".format(np.min(returns))) print("returns:\n{}".format(returns))
Example #27
Source File: run_dqn_ram.py From rl_algorithms with MIT License | 5 votes |
def get_env(seed): env = gym.make('Pong-ram-v0') set_global_seeds(seed) env.seed(seed) expt_dir = '/tmp/hw3_vid_dir/' env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) env = wrap_deepmind_ram(env) return env
Example #28
Source File: viz.py From learning2run with MIT License | 5 votes |
def main(env_id, policy_type, policy_file, record, stochastic, extra_kwargs, env_extra_kwargs): import gym from gym import wrappers import tensorflow as tf import numpy as np from es.es_distributed import policies if extra_kwargs: import json extra_kwargs = json.loads(extra_kwargs) if env_extra_kwargs: import json env_extra_kwargs = json.loads(env_extra_kwargs) if env_id == 'osim.env.run:RunEnv': from osim.env.run import RunEnv if env_extra_kwargs: env = RunEnv(True, **env_extra_kwargs) else: env = RunEnv(True) else: env = gym.make(env_id) if record: import uuid env = wrappers.Monitor(env, '/tmp/' + str(uuid.uuid4()), force=True) with tf.Session(): policy = getattr(policies, policy_type) pi = policy.Load(policy_file, extra_kwargs=extra_kwargs) while True: rews, t = pi.rollout(env, render=True, random_stream=np.random if stochastic else None) print('return={:.4f} len={}'.format(rews.sum(), t)) if record: env.close() return
Example #29
Source File: ppo_self_play.py From cleanrl with MIT License | 5 votes |
def make_env(gym_id, seed, idx): def thunk(): env = SlimeVolleySelfPlayEnv() env = MultiBinaryTooMultiDiscrete(env) env = gym.wrappers.RecordEpisodeStatistics(env) if args.capture_video: if idx == 0: env = Monitor(env, f'videos/{experiment_name}') env.seed(seed) env.action_space.seed(seed) env.observation_space.seed(seed) return env return thunk
Example #30
Source File: test_wrappers.py From DRL_DeliveryDuel with MIT License | 5 votes |
def test_no_double_wrapping(): temp = tempfile.mkdtemp() try: env = gym.make("FrozenLake-v0") env = wrappers.Monitor(env, temp) try: env = wrappers.Monitor(env, temp) except error.DoubleWrapperError: pass else: assert False, "Should not allow double wrapping" env.close() finally: shutil.rmtree(temp)