Python mpi4py.MPI Examples
The following are 30
code examples of mpi4py.MPI().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
mpi4py
, or try the search function
.
Example #1
Source File: util.py From HardRLWithYoutube with MIT License | 7 votes |
def mpi_fork(n, extra_mpi_args=[]): """Re-launches the current script with workers Returns "parent" for original parent, "child" for MPI children """ if n <= 1: return "child" if os.getenv("IN_MPI") is None: env = os.environ.copy() env.update( MKL_NUM_THREADS="1", OMP_NUM_THREADS="1", IN_MPI="1" ) # "-bind-to core" is crucial for good performance args = ["mpirun", "-np", str(n)] + \ extra_mpi_args + \ [sys.executable] args += sys.argv subprocess.check_call(args, env=env) return "parent" else: install_mpi_excepthook() return "child"
Example #2
Source File: test_mvpa_voxel_selection.py From brainiak with Apache License 2.0 | 6 votes |
def test_mvpa_voxel_selection(): data = prng.rand(5, 5, 5, 8).astype(np.float32) # all MPI processes read the mask; the mask file is small mask = np.ones([5, 5, 5], dtype=np.bool) mask[0, 0, :] = False labels = [0, 1, 0, 1, 0, 1, 0, 1] # 2 subjects, 4 epochs per subject sl = Searchlight(sl_rad=1) mvs = MVPAVoxelSelector(data, mask, labels, 2, sl) # for cross validation, use SVM with precomputed kernel clf = svm.SVC(kernel='rbf', C=10, gamma='auto') result_volume, results = mvs.run(clf) if MPI.COMM_WORLD.Get_rank() == 0: output = [] for tuple in results: if tuple[1] > 0: output.append(int(8*tuple[1])) expected_output = [6, 6, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 1] assert np.allclose(output, expected_output, atol=1), \ 'voxel selection via SVM does not provide correct results'
Example #3
Source File: cmd_util.py From ICML2019-TREX with MIT License | 6 votes |
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0, gamestate=None): """ Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo. """ if wrapper_kwargs is None: wrapper_kwargs = {} mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = seed + 10000 * mpi_rank if seed is not None else None def make_thunk(rank): return lambda: make_env( env_id=env_id, env_type=env_type, subrank = rank, seed=seed, reward_scale=reward_scale, gamestate=gamestate, wrapper_kwargs=wrapper_kwargs ) set_global_seeds(seed) if num_env > 1: return SubprocVecEnv([make_thunk(i + start_index) for i in range(num_env)]) else: return DummyVecEnv([make_thunk(start_index)])
Example #4
Source File: cmd_util.py From ICML2019-TREX with MIT License | 6 votes |
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}): mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
Example #5
Source File: cmd_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0): """ Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo. """ if wrapper_kwargs is None: wrapper_kwargs = {} mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 def make_env(rank): # pylint: disable=C0111 def _thunk(): env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env return _thunk set_global_seeds(seed) if num_env > 1: return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) else: return DummyVecEnv([make_env(start_index)])
Example #6
Source File: cmd_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0): """ Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo. """ if wrapper_kwargs is None: wrapper_kwargs = {} mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 def make_env(rank): # pylint: disable=C0111 def _thunk(): env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env return _thunk set_global_seeds(seed) if num_env > 1: return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) else: return DummyVecEnv([make_env(start_index)])
Example #7
Source File: util.py From ICML2019-TREX with MIT License | 6 votes |
def mpi_fork(n, extra_mpi_args=[]): """Re-launches the current script with workers Returns "parent" for original parent, "child" for MPI children """ if n <= 1: return "child" if os.getenv("IN_MPI") is None: env = os.environ.copy() env.update( MKL_NUM_THREADS="1", OMP_NUM_THREADS="1", IN_MPI="1" ) # "-bind-to core" is crucial for good performance args = ["mpirun", "-np", str(n)] + \ extra_mpi_args + \ [sys.executable] args += sys.argv subprocess.check_call(args, env=env) return "parent" else: install_mpi_excepthook() return "child"
Example #8
Source File: util.py From ICML2019-TREX with MIT License | 6 votes |
def mpi_fork(n, extra_mpi_args=[]): """Re-launches the current script with workers Returns "parent" for original parent, "child" for MPI children """ if n <= 1: return "child" if os.getenv("IN_MPI") is None: env = os.environ.copy() env.update( MKL_NUM_THREADS="1", OMP_NUM_THREADS="1", IN_MPI="1" ) # "-bind-to core" is crucial for good performance args = ["mpirun", "-np", str(n)] + \ extra_mpi_args + \ [sys.executable] args += sys.argv subprocess.check_call(args, env=env) return "parent" else: install_mpi_excepthook() return "child"
Example #9
Source File: cmd_util.py From ICML2019-TREX with MIT License | 6 votes |
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0, gamestate=None): """ Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo. """ if wrapper_kwargs is None: wrapper_kwargs = {} mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 seed = seed + 10000 * mpi_rank if seed is not None else None def make_thunk(rank): return lambda: make_env( env_id=env_id, env_type=env_type, subrank = rank, seed=seed, reward_scale=reward_scale, gamestate=gamestate, wrapper_kwargs=wrapper_kwargs ) set_global_seeds(seed) if num_env > 1: return SubprocVecEnv([make_thunk(i + start_index) for i in range(num_env)]) else: return DummyVecEnv([make_thunk(start_index)])
Example #10
Source File: cmd_util.py From ICML2019-TREX with MIT License | 6 votes |
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}): mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
Example #11
Source File: cmd_util.py From HardRLWithYoutube with MIT License | 6 votes |
def make_mujoco_env(env_id, seed, reward_scale=1.0): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() myseed = seed + 1000 * rank if seed is not None else None set_global_seeds(myseed) env = gym.make(env_id) logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank)) env = Monitor(env, logger_path, allow_early_resets=True) env.seed(seed) if reward_scale != 1.0: from baselines.common.retro_wrappers import RewardScaler env = RewardScaler(env, reward_scale) return env
Example #12
Source File: cmd_util.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def make_vec_env(env_id, env_type, num_env, seed, wrapper_kwargs=None, start_index=0, reward_scale=1.0): """ Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo. """ if wrapper_kwargs is None: wrapper_kwargs = {} mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0 def make_env(rank): # pylint: disable=C0111 def _thunk(): env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env return _thunk set_global_seeds(seed) if num_env > 1: return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) else: return DummyVecEnv([make_env(start_index)])
Example #13
Source File: global_except_hook.py From chainer with MIT License | 6 votes |
def _add_hook_if_enabled(): # An MPI runtime is expected to kill all of its child processes # if one of them exits abnormally or without calling `MPI_Finalize()`. # However, when a Python program run on `mpi4py`, the MPI runtime # often fails to detect a process failure, and the rest of the processes # hang infinitely. # It is problematic especially when you run ChainerMN programs on a cloud # environment, on which you are charged on time basis. # See https://github.com/chainer/chainermn/issues/236 for more discussion. # # To activate this handler, set CHAINERMN_FORCE_ABORT_ON_EXCEPTION # to a non-empty value. # Note that you need to pass an argument to mpiexec (-x for Open MPI) # to activate the handler in all processes. var = os.environ.get('CHAINERMN_FORCE_ABORT_ON_EXCEPTION') if var is not None and len(var) > 0: add_hook()
Example #14
Source File: util.py From ICML2019-TREX with MIT License | 5 votes |
def reshape_for_broadcasting(source, target): """Reshapes a tensor (source) to have the correct shape and dtype of the target before broadcasting it with MPI. """ dim = len(target.get_shape()) shape = ([1] * (dim - 1)) + [-1] return tf.reshape(tf.cast(source, target.dtype), shape)
Example #15
Source File: global_except_hook.py From chainer with MIT License | 5 votes |
def _global_except_hook(exctype, value, traceback): """Catches an unhandled exception and call MPI_Abort().""" try: if _orig_except_hook: _orig_except_hook(exctype, value, traceback) else: sys.__excepthook__(exctype, value, traceback) finally: import mpi4py.MPI rank = mpi4py.MPI.COMM_WORLD.Get_rank() sys.stderr.write('\n') sys.stderr.write('******************************************\n') sys.stderr.write('ChainerMN:\n') sys.stderr.write(' Uncaught exception on rank {}.\n'.format(rank)) sys.stderr.write(' Calling MPI_Abort() to shut down MPI...\n') sys.stderr.write('******************************************\n') sys.stderr.write('\n\n') sys.stderr.flush() try: import mpi4py.MPI mpi4py.MPI.COMM_WORLD.Abort(1) except Exception as e: # Something is completely broken... # There's nothing we can do any more sys.stderr.write( 'Sorry, failed to stop MPI and the process may hang.\n') sys.stderr.flush() raise e
Example #16
Source File: test_node_aware_communicator_base.py From chainer with MIT License | 5 votes |
def test_intra_size_with_env(self): if 'MV2_COMM_WORLD_LOCAL_SIZE' in os.environ: # MVAPICH expected = int(os.environ['MV2_COMM_WORLD_LOCAL_RANK']) elif 'OMPI_COMM_WORLD_LOCAL_SIZE' in os.environ: # OpenMPI expected = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK']) else: pytest.skip('No MPI specified') self.assertEqual(self.communicator.intra_rank, expected)
Example #17
Source File: test_node_aware_communicator_base.py From chainer with MIT License | 5 votes |
def test_intra_rank_with_env(self): if 'MV2_COMM_WORLD_LOCAL_RANK' in os.environ: # MVAPICH expected = int(os.environ['MV2_COMM_WORLD_LOCAL_RANK']) elif 'OMPI_COMM_WORLD_LOCAL_RANK' in os.environ: # OpenMPI expected = int(os.environ['OMPI_COMM_WORLD_LOCAL_RANK']) else: pytest.skip('No MPI specified') self.assertEqual(self.communicator.intra_rank, expected)
Example #18
Source File: test_node_aware_communicator_base.py From chainer with MIT License | 5 votes |
def setUp(self): self.mpi_comm = mpi4py.MPI.COMM_WORLD self.communicator = NodeAwareNaiveCommunicator(self.mpi_comm)
Example #19
Source File: test_communicator.py From chainer with MIT License | 5 votes |
def setUp(self): if nccl.get_build_version() < 2000: pytest.skip('This test requires NCCL version >= 2.0') self.mpi_comm = mpi4py.MPI.COMM_WORLD
Example #20
Source File: run_mujoco.py From lirpg with MIT License | 5 votes |
def train(env, seed, policy_fn, reward_giver, dataset, algo, g_step, d_step, policy_entcoeff, num_timesteps, save_per_iter, checkpoint_dir, log_dir, pretrained, BC_max_iter, task_name=None): pretrained_weight = None if pretrained and (BC_max_iter > 0): # Pretrain with behavior cloning from baselines.gail import behavior_clone pretrained_weight = behavior_clone.learn(env, policy_fn, dataset, max_iters=BC_max_iter) if algo == 'trpo': from baselines.gail import trpo_mpi # Set up for MPI seed rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) trpo_mpi.learn(env, policy_fn, reward_giver, dataset, rank, pretrained=pretrained, pretrained_weight=pretrained_weight, g_step=g_step, d_step=d_step, entcoeff=policy_entcoeff, max_timesteps=num_timesteps, ckpt_dir=checkpoint_dir, log_dir=log_dir, save_per_iter=save_per_iter, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, gamma=0.995, lam=0.97, vf_iters=5, vf_stepsize=1e-3, task_name=task_name) else: raise NotImplementedError
Example #21
Source File: main.py From stable-baselines with MIT License | 5 votes |
def parse_args(): """ parse the arguments for DDPG training :return: (dict) the arguments """ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env-id', type=str, default='HalfCheetah-v1') boolean_flag(parser, 'render-eval', default=False) boolean_flag(parser, 'layer-norm', default=True) boolean_flag(parser, 'render', default=False) boolean_flag(parser, 'normalize-returns', default=False) boolean_flag(parser, 'normalize-observations', default=True) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--critic-l2-reg', type=float, default=1e-2) parser.add_argument('--batch-size', type=int, default=64) # per MPI worker parser.add_argument('--actor-lr', type=float, default=1e-4) parser.add_argument('--critic-lr', type=float, default=1e-3) boolean_flag(parser, 'enable-popart', default=False) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--reward-scale', type=float, default=1.) parser.add_argument('--clip-norm', type=float, default=None) parser.add_argument('--nb-train-steps', type=int, default=50) # per epoch cycle and MPI worker parser.add_argument('--nb-eval-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--nb-rollout-steps', type=int, default=100) # per epoch cycle and MPI worker # choices are adaptive-param_xx, ou_xx, normal_xx, none parser.add_argument('--noise-type', type=str, default='adaptive-param_0.2') parser.add_argument('--num-timesteps', type=int, default=int(1e6)) boolean_flag(parser, 'evaluation', default=False) args = parser.parse_args() dict_args = vars(args) return dict_args
Example #22
Source File: main.py From learning2run with MIT License | 5 votes |
def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--env-id', type=str, default='HalfCheetah-v1') boolean_flag(parser, 'render-eval', default=False) boolean_flag(parser, 'layer-norm', default=True) boolean_flag(parser, 'render', default=False) parser.add_argument('--num-cpu', type=int, default=1) boolean_flag(parser, 'normalize-returns', default=False) boolean_flag(parser, 'normalize-observations', default=True) parser.add_argument('--seed', type=int, default=0) parser.add_argument('--critic-l2-reg', type=float, default=1e-2) parser.add_argument('--batch-size', type=int, default=64) # per MPI worker parser.add_argument('--actor-lr', type=float, default=1e-4) parser.add_argument('--critic-lr', type=float, default=1e-3) boolean_flag(parser, 'popart', default=False) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--reward-scale', type=float, default=1.) parser.add_argument('--clip-norm', type=float, default=None) parser.add_argument('--nb-epochs', type=int, default=500) # with default settings, perform 1M steps total parser.add_argument('--nb-epoch-cycles', type=int, default=20) parser.add_argument('--nb-train-steps', type=int, default=50) # per epoch cycle and MPI worker parser.add_argument('--nb-eval-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--nb-rollout-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--noise-type', type=str, default='adaptive-param_0.2') # choices are adaptive-param_xx, ou_xx, normal_xx, none parser.add_argument('--logdir', type=str, default=None) boolean_flag(parser, 'gym-monitor', default=False) boolean_flag(parser, 'evaluation', default=True) boolean_flag(parser, 'bind-to-core', default=False) return vars(parser.parse_args())
Example #23
Source File: run_mujoco.py From ICML2019-TREX with MIT License | 5 votes |
def train(env, seed, policy_fn, reward_giver, dataset, algo, g_step, d_step, policy_entcoeff, num_timesteps, save_per_iter, checkpoint_dir, log_dir, pretrained, BC_max_iter, task_name=None): pretrained_weight = None if pretrained and (BC_max_iter > 0): # Pretrain with behavior cloning from baselines.gail import behavior_clone pretrained_weight = behavior_clone.learn(env, policy_fn, dataset, max_iters=BC_max_iter) if algo == 'trpo': from baselines.gail import trpo_mpi # Set up for MPI seed rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) trpo_mpi.learn(env, policy_fn, reward_giver, dataset, rank, pretrained=pretrained, pretrained_weight=pretrained_weight, g_step=g_step, d_step=d_step, entcoeff=policy_entcoeff, max_timesteps=num_timesteps, ckpt_dir=checkpoint_dir, log_dir=log_dir, save_per_iter=save_per_iter, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, gamma=0.995, lam=0.97, vf_iters=5, vf_stepsize=1e-3, task_name=task_name) else: raise NotImplementedError
Example #24
Source File: run_mujoco.py From DRL_DeliveryDuel with MIT License | 5 votes |
def train(env, seed, policy_fn, reward_giver, dataset, algo, g_step, d_step, policy_entcoeff, num_timesteps, save_per_iter, checkpoint_dir, log_dir, pretrained, BC_max_iter, task_name=None): pretrained_weight = None if pretrained and (BC_max_iter > 0): # Pretrain with behavior cloning from baselines.gail import behavior_clone pretrained_weight = behavior_clone.learn(env, policy_fn, dataset, max_iters=BC_max_iter) if algo == 'trpo': from baselines.gail import trpo_mpi # Set up for MPI seed rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) workerseed = seed + 10000 * MPI.COMM_WORLD.Get_rank() set_global_seeds(workerseed) env.seed(workerseed) trpo_mpi.learn(env, policy_fn, reward_giver, dataset, rank, pretrained=pretrained, pretrained_weight=pretrained_weight, g_step=g_step, d_step=d_step, entcoeff=policy_entcoeff, max_timesteps=num_timesteps, ckpt_dir=checkpoint_dir, log_dir=log_dir, save_per_iter=save_per_iter, timesteps_per_batch=1024, max_kl=0.01, cg_iters=10, cg_damping=0.1, gamma=0.995, lam=0.97, vf_iters=5, vf_stepsize=1e-3, task_name=task_name) else: raise NotImplementedError
Example #25
Source File: util.py From DRL_DeliveryDuel with MIT License | 5 votes |
def install_mpi_excepthook(): import sys from mpi4py import MPI old_hook = sys.excepthook def new_hook(a, b, c): old_hook(a, b, c) sys.stdout.flush() sys.stderr.flush() MPI.COMM_WORLD.Abort() sys.excepthook = new_hook
Example #26
Source File: util.py From DRL_DeliveryDuel with MIT License | 5 votes |
def mpi_fork(n): """Re-launches the current script with workers Returns "parent" for original parent, "child" for MPI children """ if n <= 1: return "child" if os.getenv("IN_MPI") is None: env = os.environ.copy() env.update( MKL_NUM_THREADS="1", OMP_NUM_THREADS="1", IN_MPI="1" ) # "-bind-to core" is crucial for good performance args = [ "mpirun", "-np", str(n), "-bind-to", "core", sys.executable ] args += sys.argv subprocess.check_call(args, env=env) return "parent" else: install_mpi_excepthook() return "child"
Example #27
Source File: util.py From ICML2019-TREX with MIT License | 5 votes |
def install_mpi_excepthook(): import sys from mpi4py import MPI old_hook = sys.excepthook def new_hook(a, b, c): old_hook(a, b, c) sys.stdout.flush() sys.stderr.flush() MPI.COMM_WORLD.Abort() sys.excepthook = new_hook
Example #28
Source File: util.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reshape_for_broadcasting(source, target): """Reshapes a tensor (source) to have the correct shape and dtype of the target before broadcasting it with MPI. """ dim = len(target.get_shape()) shape = ([1] * (dim-1)) + [-1] return tf.reshape(tf.cast(source, target.dtype), shape)
Example #29
Source File: run.py From ICML2019-TREX with MIT License | 5 votes |
def main(): # configure logger, disable logging in child MPI processes (with rank > 0) arg_parser = common_arg_parser() args, unknown_args = arg_parser.parse_known_args() extra_args = parse_cmdline_kwargs(unknown_args) if MPI is None or MPI.COMM_WORLD.Get_rank() == 0: rank = 0 logger.configure() else: logger.configure(format_strs=[]) rank = MPI.COMM_WORLD.Get_rank() model, env = train(args, extra_args) env.close() if args.save_path is not None and rank == 0: save_path = osp.expanduser(args.save_path) model.save(save_path) if args.play: logger.log("Running trained model") env = build_env(args) obs = env.reset() def initialize_placeholders(nlstm=128,**kwargs): return np.zeros((args.num_env or 1, 2*nlstm)), np.zeros((1)) state, dones = initialize_placeholders(**extra_args) while True: actions, _, state, _ = model.step(obs,S=state, M=dones) obs, _, done, _ = env.step(actions) env.render() done = done.any() if isinstance(done, np.ndarray) else done if done: obs = env.reset() env.close()
Example #30
Source File: main.py From DRL_DeliveryDuel with MIT License | 5 votes |
def parse_args(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--env-id', type=str, default='HalfCheetah-v1') boolean_flag(parser, 'render-eval', default=False) boolean_flag(parser, 'layer-norm', default=True) boolean_flag(parser, 'render', default=False) boolean_flag(parser, 'normalize-returns', default=False) boolean_flag(parser, 'normalize-observations', default=True) parser.add_argument('--seed', help='RNG seed', type=int, default=0) parser.add_argument('--critic-l2-reg', type=float, default=1e-2) parser.add_argument('--batch-size', type=int, default=64) # per MPI worker parser.add_argument('--actor-lr', type=float, default=1e-4) parser.add_argument('--critic-lr', type=float, default=1e-3) boolean_flag(parser, 'popart', default=False) parser.add_argument('--gamma', type=float, default=0.99) parser.add_argument('--reward-scale', type=float, default=1.) parser.add_argument('--clip-norm', type=float, default=None) parser.add_argument('--nb-epochs', type=int, default=500) # with default settings, perform 1M steps total parser.add_argument('--nb-epoch-cycles', type=int, default=20) parser.add_argument('--nb-train-steps', type=int, default=50) # per epoch cycle and MPI worker parser.add_argument('--nb-eval-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--nb-rollout-steps', type=int, default=100) # per epoch cycle and MPI worker parser.add_argument('--noise-type', type=str, default='adaptive-param_0.2') # choices are adaptive-param_xx, ou_xx, normal_xx, none parser.add_argument('--num-timesteps', type=int, default=None) boolean_flag(parser, 'evaluation', default=False) args = parser.parse_args() # we don't directly specify timesteps for this script, so make sure that if we do specify them # they agree with the other parameters if args.num_timesteps is not None: assert(args.num_timesteps == args.nb_epochs * args.nb_epoch_cycles * args.nb_rollout_steps) dict_args = vars(args) del dict_args['num_timesteps'] return dict_args