Python baselines.logger.get_dir() Examples
The following are 30
code examples of baselines.logger.get_dir().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
baselines.logger
, or try the search function
.
Example #1
Source File: run_mujoco.py From learning2run with MIT License | 6 votes |
def train(env_id, num_timesteps, seed): whoami = mpi_fork(num_cpu) if whoami == "parent": return import baselines.common.tf_util as U logger.session().__enter__() sess = U.single_threaded_session() sess.__enter__() rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env = gym.make(env_id) def policy_fn(name, ob_space, ac_space): return MlpPolicy(name=name, ob_space=env.observation_space, ac_space=env.action_space, hid_size=32, num_hid_layers=2) env = bench.Monitor(env, osp.join(logger.get_dir(), "%i.monitor.json" % rank)) env.seed(workerseed) gym.logger.setLevel(logging.WARN) trpo_mpi.learn(env, policy_fn, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, max_timesteps=num_timesteps, gamma=0.99, lam=0.98, vf_iters=5, vf_stepsize=1e-3) env.close()
Example #2
Source File: deepq.py From ICML2019-TREX with MIT License | 6 votes |
def save_act(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_variables(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #3
Source File: run_humanoid.py From ICML2019-TREX with MIT License | 6 votes |
def main(): logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render pi = train(num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() if done: ob = env.reset()
Example #4
Source File: tf_util.py From ICML2019-TREX with MIT License | 6 votes |
def launch_tensorboard_in_background(log_dir): ''' To log the Tensorflow graph when using rl-algs algorithms, you can run the following code in your main script: import threading, time def start_tensorboard(session): time.sleep(10) # Wait until graph is setup tb_path = osp.join(logger.get_dir(), 'tb') summary_writer = tf.summary.FileWriter(tb_path, graph=session.graph) summary_op = tf.summary.merge_all() launch_tensorboard_in_background(tb_path) session = tf.get_default_session() t = threading.Thread(target=start_tensorboard, args=([session])) t.start() ''' import subprocess subprocess.Popen(['tensorboard', '--logdir', log_dir])
Example #5
Source File: cmd_util.py From ICML2019-TREX with MIT License | 6 votes |
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}): mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
Example #6
Source File: cmd_util.py From MOREL with MIT License | 6 votes |
def make_atari_env(env_id, num_env, seed, hparams=None, wrapper_kwargs=None, start_index=0, nsteps=5, **kwargs): """ Create a wrapped, monitored SubprocVecEnv for Atari. """ if wrapper_kwargs is None: wrapper_kwargs = {} def make_env(rank): # pylint: disable=C0111 def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if rank == start_index and 'video_log_dir' in kwargs: env = VideoLogMonitor(env, kwargs['video_log_dir'] + '_rgb', write_attention_video=kwargs['write_attention_video'], hparams=hparams, nsteps=nsteps) return wrap_deepmind(env, **wrapper_kwargs) return _thunk set_global_seeds(seed) env_fns = [make_env(i + start_index) for i in range(num_env)] global my_subproc_vec_env assert my_subproc_vec_env == None my_subproc_vec_env = SubprocVecEnv(env_fns) return my_subproc_vec_env
Example #7
Source File: simple.py From lirpg with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #8
Source File: train_pong.py From ICML2019-TREX with MIT License | 6 votes |
def main(): logger.configure() env = make_atari('PongNoFrameskip-v4') env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.learn( env, "conv_only", convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True, lr=1e-4, total_timesteps=int(1e7), buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, ) model.save('pong_model.pkl') env.close()
Example #9
Source File: deepq.py From HardRLWithYoutube with MIT License | 6 votes |
def save_act(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #10
Source File: cmd_util.py From ICML2019-TREX with MIT License | 6 votes |
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}): mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
Example #11
Source File: tf_util.py From ICML2019-TREX with MIT License | 6 votes |
def launch_tensorboard_in_background(log_dir): ''' To log the Tensorflow graph when using rl-algs algorithms, you can run the following code in your main script: import threading, time def start_tensorboard(session): time.sleep(10) # Wait until graph is setup tb_path = osp.join(logger.get_dir(), 'tb') summary_writer = tf.summary.FileWriter(tb_path, graph=session.graph) summary_op = tf.summary.merge_all() launch_tensorboard_in_background(tb_path) session = tf.get_default_session() t = threading.Thread(target=start_tensorboard, args=([session])) t.start() ''' import subprocess subprocess.Popen(['tensorboard', '--logdir', log_dir])
Example #12
Source File: deepq.py From ICML2019-TREX with MIT License | 6 votes |
def save_act(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_variables(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #13
Source File: tf_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def launch_tensorboard_in_background(log_dir): ''' To log the Tensorflow graph when using rl-algs algorithms, you can run the following code in your main script: import threading, time def start_tensorboard(session): time.sleep(10) # Wait until graph is setup tb_path = osp.join(logger.get_dir(), 'tb') summary_writer = tf.summary.FileWriter(tb_path, graph=session.graph) summary_op = tf.summary.merge_all() launch_tensorboard_in_background(tb_path) session = tf.get_default_session() t = threading.Thread(target=start_tensorboard, args=([session])) t.start() ''' import subprocess subprocess.Popen(['tensorboard', '--logdir', log_dir])
Example #14
Source File: tf_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def launch_tensorboard_in_background(log_dir): ''' To log the Tensorflow graph when using rl-algs algorithms, you can run the following code in your main script: import threading, time def start_tensorboard(session): time.sleep(10) # Wait until graph is setup tb_path = osp.join(logger.get_dir(), 'tb') summary_writer = tf.summary.FileWriter(tb_path, graph=session.graph) summary_op = tf.summary.merge_all() launch_tensorboard_in_background(tb_path) session = tf.get_default_session() t = threading.Thread(target=start_tensorboard, args=([session])) t.start() ''' import subprocess subprocess.Popen(['tensorboard', '--logdir', log_dir])
Example #15
Source File: tf_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def launch_tensorboard_in_background(log_dir): ''' To log the Tensorflow graph when using rl-algs algorithms, you can run the following code in your main script: import threading, time def start_tensorboard(session): time.sleep(10) # Wait until graph is setup tb_path = osp.join(logger.get_dir(), 'tb') summary_writer = tf.summary.FileWriter(tb_path, graph=session.graph) summary_op = tf.summary.merge_all() launch_tensorboard_in_background(tb_path) session = tf.get_default_session() t = threading.Thread(target=start_tensorboard, args=([session])) t.start() ''' import subprocess subprocess.Popen(['tensorboard', '--logdir', log_dir])
Example #16
Source File: train_pong.py From ICML2019-TREX with MIT License | 6 votes |
def main(): logger.configure() env = make_atari('PongNoFrameskip-v4') env = bench.Monitor(env, logger.get_dir()) env = deepq.wrap_atari_dqn(env) model = deepq.learn( env, "conv_only", convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[256], dueling=True, lr=1e-4, total_timesteps=int(1e7), buffer_size=10000, exploration_fraction=0.1, exploration_final_eps=0.01, train_freq=4, learning_starts=10000, target_network_update_freq=1000, gamma=0.99, ) model.save('pong_model.pkl') env.close()
Example #17
Source File: simple.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #18
Source File: simple.py From DRL_DeliveryDuel with MIT License | 6 votes |
def save(self, path=None): """Save model to a pickle located at `path`""" if path is None: path = os.path.join(logger.get_dir(), "model.pkl") with tempfile.TemporaryDirectory() as td: save_state(os.path.join(td, "model")) arc_name = os.path.join(td, "packed.zip") with zipfile.ZipFile(arc_name, 'w') as zipf: for root, dirs, files in os.walk(td): for fname in files: file_path = os.path.join(root, fname) if file_path != arc_name: zipf.write(file_path, os.path.relpath(file_path, td)) with open(arc_name, "rb") as f: model_data = f.read() with open(path, "wb") as f: cloudpickle.dump((model_data, self._act_params), f)
Example #19
Source File: run_mujoco.py From learning2run with MIT License | 6 votes |
def train(env_id, num_timesteps, seed): from baselines.pposgd import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() logger.session().__enter__() set_global_seeds(seed) env = gym.make(env_id) def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = bench.Monitor(env, osp.join(logger.get_dir(), "monitor.json")) env.seed(seed) gym.logger.setLevel(logging.WARN) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, ) env.close()
Example #20
Source File: run_humanoid.py From ICML2019-TREX with MIT License | 6 votes |
def main(): logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render pi = train(num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() if done: ob = env.reset()
Example #21
Source File: run_humanoid.py From HardRLWithYoutube with MIT License | 6 votes |
def main(): logger.configure() parser = mujoco_arg_parser() parser.add_argument('--model-path', default=os.path.join(logger.get_dir(), 'humanoid_policy')) parser.set_defaults(num_timesteps=int(2e7)) args = parser.parse_args() if not args.play: # train the model train(num_timesteps=args.num_timesteps, seed=args.seed, model_path=args.model_path) else: # construct the model object, load pre-trained model and render pi = train(num_timesteps=1, seed=args.seed) U.load_state(args.model_path) env = make_mujoco_env('Humanoid-v2', seed=0) ob = env.reset() while True: action = pi.act(stochastic=False, ob=ob)[0] ob, _, done, _ = env.step(action) env.render() if done: ob = env.reset()
Example #22
Source File: run.py From reinmav-gym with BSD 3-Clause "New" or "Revised" License | 5 votes |
def train(args, extra_args): env_type, env_id = get_env_type(args) print('env_type: {}'.format(env_type)) total_timesteps = int(args.num_timesteps) seed = args.seed learn = get_learn_function(args.alg) alg_kwargs = get_learn_function_defaults(args.alg, env_type) alg_kwargs.update(extra_args) env = build_env(args) if args.save_video_interval != 0: env = VecVideoRecorder(env, osp.join(logger.get_dir(), "videos"), record_video_trigger=lambda x: x % args.save_video_interval == 0, video_length=args.save_video_length) if args.network: alg_kwargs['network'] = args.network else: if alg_kwargs.get('network') is None: alg_kwargs['network'] = get_default_network(env_type) print('Training {} on {}:{} with arguments \n{}'.format(args.alg, env_type, env_id, alg_kwargs)) model = learn( env=env, seed=seed, total_timesteps=total_timesteps, **alg_kwargs ) return model, env
Example #23
Source File: cmd_util.py From MOREL with MIT License | 5 votes |
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir()) env.seed(seed) return env
Example #24
Source File: run_mujoco.py From sonic_contest with MIT License | 5 votes |
def train(env_id, num_timesteps, seed): from baselines.common import set_global_seeds from baselines.common.vec_env.vec_normalize import VecNormalize from baselines.ppo2 import ppo2 from baselines.ppo2.policies import MlpPolicy import gym import tensorflow as tf from baselines.common.vec_env.dummy_vec_env import DummyVecEnv ncpu = 1 config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) tf.Session(config=config).__enter__() def make_env(): env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir()) return env env = DummyVecEnv([make_env]) env = VecNormalize(env) set_global_seeds(seed) policy = MlpPolicy ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32, lam=0.95, gamma=0.99, noptepochs=10, log_interval=1, ent_coef=0.0, lr=3e-4, cliprange=0.2, total_timesteps=num_timesteps)
Example #25
Source File: run_atari.py From learning2run with MIT License | 5 votes |
def train(env_id, num_timesteps, seed, num_cpu): from baselines.pposgd import pposgd_simple, cnn_policy import baselines.common.tf_util as U whoami = mpi_fork(num_cpu) if whoami == "parent": return rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() logger.session().__enter__() if rank != 0: logger.set_level(logger.DISABLED) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env = gym.make(env_id) def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space) env = bench.Monitor(env, osp.join(logger.get_dir(), "%i.monitor.json" % rank)) env.seed(workerseed) gym.logger.setLevel(logging.WARN) env = wrap_train(env) num_timesteps /= 4 # because we're wrapping the envs to do frame skip env.seed(workerseed) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_batch=256, clip_param=0.2, entcoeff=0.01, optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear' ) env.close()
Example #26
Source File: behavior_clone.py From ICML2019-TREX with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) savedir_fname = learn(env, policy_fn, dataset, max_iters=args.BC_max_iter, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, task_name=task_name, verbose=True) avg_len, avg_ret = runner(env, policy_fn, savedir_fname, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample, reuse=True)
Example #27
Source File: run_mujoco.py From DRL_DeliveryDuel with MIT License | 5 votes |
def train(env_id, num_timesteps, seed): from baselines.common import set_global_seeds from baselines.common.vec_env.vec_normalize import VecNormalize from baselines.ppo2 import ppo2 from baselines.ppo2.policies import MlpPolicy import gym import tensorflow as tf from baselines.common.vec_env.dummy_vec_env import DummyVecEnv ncpu = 1 config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=ncpu, inter_op_parallelism_threads=ncpu) tf.Session(config=config).__enter__() def make_env(): env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir()) return env env = DummyVecEnv([make_env]) env = VecNormalize(env) set_global_seeds(seed) policy = MlpPolicy ppo2.learn(policy=policy, env=env, nsteps=2048, nminibatches=32, lam=0.95, gamma=0.99, noptepochs=10, log_interval=1, ent_coef=0.0, lr=3e-4, cliprange=0.2, total_timesteps=num_timesteps)
Example #28
Source File: run_atari.py From DRL_DeliveryDuel with MIT License | 5 votes |
def train(env_id, num_timesteps, seed): from baselines.ppo1 import pposgd_simple, cnn_policy import baselines.common.tf_util as U rank = MPI.COMM_WORLD.Get_rank() sess = U.single_threaded_session() sess.__enter__() if rank == 0: logger.configure() else: logger.configure(format_strs=[]) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env = make_atari(env_id) def policy_fn(name, ob_space, ac_space): #pylint: disable=W0613 return cnn_policy.CnnPolicy(name=name, ob_space=ob_space, ac_space=ac_space) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), str(rank))) env.seed(workerseed) env = wrap_deepmind(env) env.seed(workerseed) pposgd_simple.learn(env, policy_fn, max_timesteps=int(num_timesteps * 1.1), timesteps_per_actorbatch=256, clip_param=0.2, entcoeff=0.01, optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear' ) env.close()
Example #29
Source File: cmd_util.py From DRL_DeliveryDuel with MIT License | 5 votes |
def make_atari_env(env_id, num_env, seed, wrapper_kwargs=None, start_index=0): """ Create a wrapped, monitored SubprocVecEnv for Atari. """ if wrapper_kwargs is None: wrapper_kwargs = {} def make_env(rank): # pylint: disable=C0111 def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs) return _thunk set_global_seeds(seed) return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)])
Example #30
Source File: behavior_clone.py From DRL_DeliveryDuel with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) savedir_fname = learn(env, policy_fn, dataset, max_iters=args.BC_max_iter, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, task_name=task_name, verbose=True) avg_len, avg_ret = runner(env, policy_fn, savedir_fname, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample, reuse=True)