Python stable_baselines.A2C Examples
The following are 3
code examples of stable_baselines.A2C().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
stable_baselines
, or try the search function
.
Example #1
Source File: test_utils.py From stable-baselines with MIT License | 6 votes |
def test_evaluate_policy(): model = A2C('MlpPolicy', 'Pendulum-v0', seed=0) n_steps_per_episode, n_eval_episodes = 200, 2 model.n_callback_calls = 0 def dummy_callback(locals_, _globals): locals_['model'].n_callback_calls += 1 _, episode_lengths = evaluate_policy(model, model.get_env(), n_eval_episodes, deterministic=True, render=False, callback=dummy_callback, reward_threshold=None, return_episode_rewards=True) n_steps = sum(episode_lengths) assert n_steps == n_steps_per_episode * n_eval_episodes assert n_steps == model.n_callback_calls # Reaching a mean reward of zero is impossible with the Pendulum env with pytest.raises(AssertionError): evaluate_policy(model, model.get_env(), n_eval_episodes, reward_threshold=0.0) episode_rewards, _ = evaluate_policy(model, model.get_env(), n_eval_episodes, return_episode_rewards=True) assert len(episode_rewards) == n_eval_episodes
Example #2
Source File: run_atari.py From stable-baselines with MIT License | 5 votes |
def train(env_id, num_timesteps, seed, policy, lr_schedule, num_env): """ Train A2C model for atari environment, for testing purposes :param env_id: (str) Environment ID :param num_timesteps: (int) The total number of samples :param seed: (int) The initial seed for training :param policy: (A2CPolicy) The policy model to use (MLP, CNN, LSTM, ...) :param lr_schedule: (str) The type of scheduler for the learning rate update ('linear', 'constant', 'double_linear_con', 'middle_drop' or 'double_middle_drop') :param num_env: (int) The number of environments """ policy_fn = None if policy == 'cnn': policy_fn = CnnPolicy elif policy == 'lstm': policy_fn = CnnLstmPolicy elif policy == 'lnlstm': policy_fn = CnnLnLstmPolicy if policy_fn is None: raise ValueError("Error: policy {} not implemented".format(policy)) env = VecFrameStack(make_atari_env(env_id, num_env, seed), 4) model = A2C(policy_fn, env, lr_schedule=lr_schedule, seed=seed) model.learn(total_timesteps=int(num_timesteps * 1.1)) env.close()
Example #3
Source File: a2c.py From robotics-rl-srl with MIT License | 5 votes |
def __init__(self): super(A2CModel, self).__init__(name="a2c", model_class=A2C)