Python baselines.common.tf_util.make_session() Examples
The following are 30
code examples of baselines.common.tf_util.make_session().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
baselines.common.tf_util
, or try the search function
.
Example #1
Source File: run_mujoco.py From BackpropThroughTheVoidRL with MIT License | 6 votes |
def train(env_id, num_timesteps, seed): from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() set_global_seeds(seed) env = gym.make(env_id) def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir()) env.seed(seed) gym.logger.setLevel(logging.WARN) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', ) env.close()
Example #2
Source File: run_mujoco.py From learning2run with MIT License | 6 votes |
def train(env_id, num_timesteps, seed): from baselines.pposgd import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() logger.session().__enter__() set_global_seeds(seed) env = gym.make(env_id) def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = bench.Monitor(env, osp.join(logger.get_dir(), "monitor.json")) env.seed(seed) gym.logger.setLevel(logging.WARN) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, ) env.close()
Example #3
Source File: test_microbatches.py From baselines with MIT License | 6 votes |
def test_microbatches(): def env_fn(): env = gym.make('CartPole-v0') env.seed(0) return env learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0) env_ref = DummyVecEnv([env_fn]) sess_ref = make_session(make_default=True, graph=tf.Graph()) learn_fn(env=env_ref) vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()} env_test = DummyVecEnv([env_fn]) sess_test = make_session(make_default=True, graph=tf.Graph()) learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2)) # learn_fn(env=env_test) vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()} for v in vars_ref: np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=3e-3)
Example #4
Source File: run_mujoco.py From deeprl-baselines with MIT License | 6 votes |
def train(env_id, num_timesteps, seed): from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() set_global_seeds(seed) env = gym.make(env_id) def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir()) env.seed(seed) gym.logger.setLevel(logging.WARN) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', ) env.close()
Example #5
Source File: wang2015_eval.py From distributional-dqn with MIT License | 6 votes |
def main(): set_global_seeds(1) args = parse_args() with U.make_session(4) as sess: # noqa _, env = make_env(args.env) model_parent_path = distdeepq.parent_path(args.model_dir) old_args = json.load(open(model_parent_path + '/args.json')) act = distdeepq.build_act( make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), p_dist_func=distdeepq.models.atari_model(), num_actions=env.action_space.n, dist_params={'Vmin': old_args['vmin'], 'Vmax': old_args['vmax'], 'nb_atoms': old_args['nb_atoms']}) U.load_state(os.path.join(args.model_dir, "saved")) wang2015_eval(args.env, act, stochastic=args.stochastic)
Example #6
Source File: test_microbatches.py From ICML2019-TREX with MIT License | 6 votes |
def test_microbatches(): def env_fn(): env = gym.make('CartPole-v0') env.seed(0) return env learn_fn = partial(learn, network='mlp', nsteps=32, total_timesteps=32, seed=0) env_ref = DummyVecEnv([env_fn]) sess_ref = make_session(make_default=True, graph=tf.Graph()) learn_fn(env=env_ref) vars_ref = {v.name: sess_ref.run(v) for v in tf.trainable_variables()} env_test = DummyVecEnv([env_fn]) sess_test = make_session(make_default=True, graph=tf.Graph()) learn_fn(env=env_test, model_fn=partial(MicrobatchedModel, microbatch_size=2)) vars_test = {v.name: sess_test.run(v) for v in tf.trainable_variables()} for v in vars_ref: np.testing.assert_allclose(vars_ref[v], vars_test[v], atol=1e-3)
Example #7
Source File: dqfd.py From pysc2-examples with Apache License 2.0 | 5 votes |
def load(path, act_params, num_cpu=16): with open(path, "rb") as f: model_data = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act)
Example #8
Source File: simple.py From distributional-dqn with MIT License | 5 votes |
def make_session(num_cpu): tf_config = tf.ConfigProto( inter_op_parallelism_threads=num_cpu, intra_op_parallelism_threads=num_cpu) tf_config.gpu_options.per_process_gpu_memory_fraction = 0.25 return tf.Session(config=tf_config)
Example #9
Source File: deepq_mineral_4way.py From pysc2-examples with Apache License 2.0 | 5 votes |
def load(path, act_params, num_cpu=16): with open(path, "rb") as f: model_data = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act)
Example #10
Source File: deepq_mineral_shards.py From pysc2-examples with Apache License 2.0 | 5 votes |
def load(path, act_params, num_cpu=16): with open(path, "rb") as f: model_data = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act)
Example #11
Source File: behavior_clone.py From baselines with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) savedir_fname = learn(env, policy_fn, dataset, max_iters=args.BC_max_iter, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, task_name=task_name, verbose=True) avg_len, avg_ret = runner(env, policy_fn, savedir_fname, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample, reuse=True)
Example #12
Source File: run_humanoid.py From baselines with MIT License | 5 votes |
def train(num_timesteps, seed, model_path=None): env_id = 'Humanoid-v2' from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = make_mujoco_env(env_id, seed) # parameters below were the best found in a simple random search # these are good enough to make humanoid walk, but whether those are # an absolute best or not is not certain env = RewScale(env, 0.1) logger.log("NOTE: reward will be scaled by a factor of 10 in logged stats. Check the monitor for unscaled reward.") pi = pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.1, entcoeff=0.0, optim_epochs=10, optim_stepsize=1e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', ) env.close() if model_path: U.save_state(model_path) return pi
Example #13
Source File: wang2015_eval.py From emdqn with MIT License | 5 votes |
def main(): set_global_seeds(1) args = parse_args() with U.make_session(4) as sess: # noqa _, env = make_env(args.env) act = deepq.build_act( make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), q_func=dueling_model if args.dueling else model, num_actions=env.action_space.n) U.load_state(os.path.join(args.model_dir, "saved")) wang2015_eval(args.env, act, stochastic=args.stochastic)
Example #14
Source File: simple.py From emdqn with MIT License | 5 votes |
def load(path, num_cpu=16): with open(path, "rb") as f: model_data, act_params = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act, act_params)
Example #15
Source File: wang2015_eval.py From BackpropThroughTheVoidRL with MIT License | 5 votes |
def main(): set_global_seeds(1) args = parse_args() with U.make_session(4): # noqa _, env = make_env(args.env) act = deepq.build_act( make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), q_func=dueling_model if args.dueling else model, num_actions=env.action_space.n) U.load_state(os.path.join(args.model_dir, "saved")) wang2015_eval(args.env, act, stochastic=args.stochastic)
Example #16
Source File: simple.py From distributional-dqn with MIT License | 5 votes |
def load(path, num_cpu=16): with open(path, "rb") as f: model_data, act_params = dill.load(f) act = distdeepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act, act_params)
Example #17
Source File: gail-eval.py From lirpg with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) print('Evaluating {}'.format(args.env)) bc_log = evaluate_env(args.env, args.seed, args.policy_hidden_size, args.stochastic_policy, False, 'BC') print('Evaluation for {}'.format(args.env)) print(bc_log) gail_log = evaluate_env(args.env, args.seed, args.policy_hidden_size, args.stochastic_policy, True, 'gail') print('Evaluation for {}'.format(args.env)) print(gail_log) plot(args.env, bc_log, gail_log, args.stochastic_policy)
Example #18
Source File: test_serialization.py From baselines with MIT License | 5 votes |
def test_coexistence(learn_fn, network_fn): ''' Test if more than one model can exist at a time ''' if learn_fn == 'deepq': # TODO enable multiple DQN models to be useable at the same time # github issue https://github.com/openai/baselines/issues/656 return if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']: # TODO make acktr work with recurrent policies # and test # github issue: https://github.com/openai/baselines/issues/660 return env = DummyVecEnv([lambda: gym.make('CartPole-v0')]) learn = get_learn_function(learn_fn) kwargs = {} kwargs.update(network_kwargs[network_fn]) kwargs.update(learn_kwargs[learn_fn]) learn = partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs) make_session(make_default=True, graph=tf.Graph()) model1 = learn(seed=1) make_session(make_default=True, graph=tf.Graph()) model2 = learn(seed=2) model1.step(env.observation_space.sample()) model2.step(env.observation_space.sample())
Example #19
Source File: behavior_clone.py From self-imitation-learning with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) savedir_fname = learn(env, policy_fn, dataset, max_iters=args.BC_max_iter, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, task_name=task_name, verbose=True) avg_len, avg_ret = runner(env, policy_fn, savedir_fname, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample, reuse=True)
Example #20
Source File: gail-eval.py From self-imitation-learning with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) print('Evaluating {}'.format(args.env)) bc_log = evaluate_env(args.env, args.seed, args.policy_hidden_size, args.stochastic_policy, False, 'BC') print('Evaluation for {}'.format(args.env)) print(bc_log) gail_log = evaluate_env(args.env, args.seed, args.policy_hidden_size, args.stochastic_policy, True, 'gail') print('Evaluation for {}'.format(args.env)) print(gail_log) plot(args.env, bc_log, gail_log, args.stochastic_policy)
Example #21
Source File: deepq_mineral_shards.py From A-Guide-to-DeepMinds-StarCraft-AI-Environment with Apache License 2.0 | 5 votes |
def load(path, act_params, num_cpu=16): with open(path, "rb") as f: model_data = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act)
Example #22
Source File: dqfd.py From A-Guide-to-DeepMinds-StarCraft-AI-Environment with Apache License 2.0 | 5 votes |
def load(path, act_params, num_cpu=16): with open(path, "rb") as f: model_data = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act)
Example #23
Source File: run_mujoco.py From sonic_contest with MIT License | 5 votes |
def train(env_id, num_timesteps, seed): from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = make_mujoco_env(env_id, seed) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', ) env.close()
Example #24
Source File: behavior_clone.py From sonic_contest with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) env = gym.make(args.env_id) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=args.policy_hidden_size, num_hid_layers=2) env = bench.Monitor(env, logger.get_dir() and osp.join(logger.get_dir(), "monitor.json")) env.seed(args.seed) gym.logger.setLevel(logging.WARN) task_name = get_task_name(args) args.checkpoint_dir = osp.join(args.checkpoint_dir, task_name) args.log_dir = osp.join(args.log_dir, task_name) dataset = Mujoco_Dset(expert_path=args.expert_path, traj_limitation=args.traj_limitation) savedir_fname = learn(env, policy_fn, dataset, max_iters=args.BC_max_iter, ckpt_dir=args.checkpoint_dir, log_dir=args.log_dir, task_name=task_name, verbose=True) avg_len, avg_ret = runner(env, policy_fn, savedir_fname, timesteps_per_batch=1024, number_trajs=10, stochastic_policy=args.stochastic_policy, save=args.save_sample, reuse=True)
Example #25
Source File: gail-eval.py From sonic_contest with MIT License | 5 votes |
def main(args): U.make_session(num_cpu=1).__enter__() set_global_seeds(args.seed) print('Evaluating {}'.format(args.env)) bc_log = evaluate_env(args.env, args.seed, args.policy_hidden_size, args.stochastic_policy, False, 'BC') print('Evaluation for {}'.format(args.env)) print(bc_log) gail_log = evaluate_env(args.env, args.seed, args.policy_hidden_size, args.stochastic_policy, True, 'gail') print('Evaluation for {}'.format(args.env)) print(gail_log) plot(args.env, bc_log, gail_log, args.stochastic_policy)
Example #26
Source File: simple.py From rl-attack-detection with MIT License | 5 votes |
def load(path, num_cpu=16): with open(path, "rb") as f: model_data, act_params = dill.load(f) act = deepq.build_act(**act_params) sess = U.make_session(num_cpu=num_cpu) sess.__enter__() with tempfile.TemporaryDirectory() as td: arc_path = os.path.join(td, "packed.zip") with open(arc_path, "wb") as f: f.write(model_data) zipfile.ZipFile(arc_path, 'r', zipfile.ZIP_DEFLATED).extractall(td) U.load_state(os.path.join(td, "model")) return ActWrapper(act, act_params)
Example #27
Source File: wang2015_eval.py From rl-attack-detection with MIT License | 5 votes |
def main(): set_global_seeds(1) args = parse_args() with U.make_session(4) as sess: # noqa _, env = make_env(args.env) act = deepq.build_act( make_obs_ph=lambda name: U.Uint8Input(env.observation_space.shape, name=name), q_func=dueling_model if args.dueling else model, num_actions=env.action_space.n) U.load_state(os.path.join(args.model_dir, "saved")) wang2015_eval(args.env, act, stochastic=args.stochastic)
Example #28
Source File: test_serialization.py From ICML2019-TREX with MIT License | 5 votes |
def test_coexistence(learn_fn, network_fn): ''' Test if more than one model can exist at a time ''' if learn_fn == 'deepq': # TODO enable multiple DQN models to be useable at the same time # github issue https://github.com/openai/baselines/issues/656 return if network_fn.endswith('lstm') and learn_fn in ['acktr', 'trpo_mpi', 'deepq']: # TODO make acktr work with recurrent policies # and test # github issue: https://github.com/openai/baselines/issues/660 return env = DummyVecEnv([lambda: gym.make('CartPole-v0')]) learn = get_learn_function(learn_fn) kwargs = {} kwargs.update(network_kwargs[network_fn]) kwargs.update(learn_kwargs[learn_fn]) learn = partial(learn, env=env, network=network_fn, total_timesteps=0, **kwargs) make_session(make_default=True, graph=tf.Graph()) model1 = learn(seed=1) make_session(make_default=True, graph=tf.Graph()) model2 = learn(seed=2) model1.step(env.observation_space.sample()) model2.step(env.observation_space.sample())
Example #29
Source File: run_mujoco.py From ICML2019-TREX with MIT License | 5 votes |
def train(env_id, num_timesteps, seed): from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = make_mujoco_env(env_id, seed) pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', ) env.close()
Example #30
Source File: run_humanoid.py From ICML2019-TREX with MIT License | 5 votes |
def train(num_timesteps, seed, model_path=None): env_id = 'Humanoid-v2' from baselines.ppo1 import mlp_policy, pposgd_simple U.make_session(num_cpu=1).__enter__() def policy_fn(name, ob_space, ac_space): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, hid_size=64, num_hid_layers=2) env = make_mujoco_env(env_id, seed) # parameters below were the best found in a simple random search # these are good enough to make humanoid walk, but whether those are # an absolute best or not is not certain env = RewScale(env, 0.1) pi = pposgd_simple.learn(env, policy_fn, max_timesteps=num_timesteps, timesteps_per_actorbatch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear', ) env.close() if model_path: U.save_state(model_path) return pi