Python stable_baselines.SAC Examples
The following are 12
code examples of stable_baselines.SAC().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
stable_baselines
, or try the search function
.
Example #1
Source File: test_0deterministic.py From stable-baselines with MIT License | 6 votes |
def test_deterministic_training_common(algo): results = [[], []] rewards = [[], []] kwargs = {'n_cpu_tf_sess': 1} if algo in [DDPG, TD3, SAC]: env_id = 'Pendulum-v0' kwargs.update({'action_noise': NormalActionNoise(0.0, 0.1)}) else: env_id = 'CartPole-v1' if algo == DQN: kwargs.update({'learning_starts': 100}) for i in range(2): model = algo('MlpPolicy', env_id, seed=SEED, **kwargs) model.learn(N_STEPS_TRAINING) env = model.get_env() obs = env.reset() for _ in range(100): action, _ = model.predict(obs, deterministic=False) obs, reward, _, _ = env.step(action) results[i].append(action) rewards[i].append(reward) assert sum(results[0]) == sum(results[1]), results assert sum(rewards[0]) == sum(rewards[1]), rewards
Example #2
Source File: test_her.py From stable-baselines with MIT License | 6 votes |
def test_long_episode(model_class): """ Check that the model does not break when the replay buffer is still empty after the first rollout (because the episode is not over). """ # n_bits > nb_rollout_steps n_bits = 10 env = BitFlippingEnv(n_bits, continuous=model_class in [DDPG, SAC, TD3], max_steps=n_bits) kwargs = {} if model_class == DDPG: kwargs['nb_rollout_steps'] = 9 # < n_bits elif model_class in [DQN, SAC, TD3]: kwargs['batch_size'] = 8 # < n_bits kwargs['learning_starts'] = 0 model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy='future', verbose=0, **kwargs) model.learn(200)
Example #3
Source File: hyperparams_opt.py From rl-baselines-zoo with MIT License | 6 votes |
def sample_her_params(trial): """ Sampler for HER hyperparams. :param trial: (optuna.trial) :return: (dict) """ if trial.model_class == SAC: hyperparams = sample_sac_params(trial) elif trial.model_class == DDPG: hyperparams = sample_ddpg_params(trial) elif trial.model_class == TD3: hyperparams = sample_td3_params(trial) hyperparams['random_exploration'] = trial.suggest_uniform('random_exploration', 0, 1) hyperparams['n_sampled_goal'] = trial.suggest_categorical('n_sampled_goal', [1, 2, 4, 6, 8]) return hyperparams
Example #4
Source File: loader.py From adversarial-policies with MIT License | 5 votes |
def mpi_unavailable_error(*args, **kwargs): raise ImportError("This algorithm requires MPI, which is not available.") # Lazy import for PPO1 and SAC, which have optional mpi dependency
Example #5
Source File: train.py From adversarial-policies with MIT License | 5 votes |
def sac(batch_size, learning_rate, **kwargs): return _stable( stable_baselines.SAC, our_type="sac", callback_key="step", callback_mul=1, batch_size=batch_size, learning_rate=learning_rate, **kwargs, )
Example #6
Source File: test_her.py From stable-baselines with MIT License | 5 votes |
def test_her(model_class, goal_selection_strategy, discrete_obs_space): env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS, discrete_obs_space=discrete_obs_space) # Take random actions 10% of the time kwargs = {'random_exploration': 0.1} if model_class in [DDPG, SAC, TD3] else {} model = HER('MlpPolicy', env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=0, **kwargs) model.learn(1000)
Example #7
Source File: test_action_scaling.py From stable-baselines with MIT License | 5 votes |
def test_buffer_actions_scaling(model_class, model_kwargs): """ Test if actions are scaled to tanh co-domain before being put in a buffer for algorithms that use tanh-squashing, i.e., DDPG, TD3, SAC :param model_class: (BaseRLModel) A RL Model :param model_kwargs: (dict) Dictionary containing named arguments to the given algorithm """ # check random and inferred actions as they possibly have different flows for random_coeff in [0.0, 1.0]: env = IdentityEnvBox(-2000, 1000) model = model_class("MlpPolicy", env, seed=1, random_exploration=random_coeff, **model_kwargs) model.learn(total_timesteps=ROLLOUT_STEPS) assert hasattr(model, 'replay_buffer') buffer = model.replay_buffer assert buffer.can_sample(ROLLOUT_STEPS) _, actions, _, _, _ = buffer.sample(ROLLOUT_STEPS) assert not np.any(actions > np.ones_like(actions)) assert not np.any(actions < -np.ones_like(actions))
Example #8
Source File: hyperparams_opt.py From rl-baselines-zoo with MIT License | 5 votes |
def sample_sac_params(trial): """ Sampler for SAC hyperparams. :param trial: (optuna.trial) :return: (dict) """ gamma = trial.suggest_categorical('gamma', [0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]) learning_rate = trial.suggest_loguniform('lr', 1e-5, 1) batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512]) buffer_size = trial.suggest_categorical('buffer_size', [int(1e4), int(1e5), int(1e6)]) learning_starts = trial.suggest_categorical('learning_starts', [0, 1000, 10000, 20000]) train_freq = trial.suggest_categorical('train_freq', [1, 10, 100, 300]) # gradient_steps takes too much time # gradient_steps = trial.suggest_categorical('gradient_steps', [1, 100, 300]) gradient_steps = train_freq ent_coef = trial.suggest_categorical('ent_coef', ['auto', 0.5, 0.1, 0.05, 0.01, 0.0001]) net_arch = trial.suggest_categorical('net_arch', ["small", "medium", "big"]) net_arch = { 'small': [64, 64], 'medium': [256, 256], 'big': [400, 300], }[net_arch] target_entropy = 'auto' if ent_coef == 'auto': target_entropy = trial.suggest_categorical('target_entropy', ['auto', -1, -10, -20, -50, -100]) return { 'gamma': gamma, 'learning_rate': learning_rate, 'batch_size': batch_size, 'buffer_size': buffer_size, 'learning_starts': learning_starts, 'train_freq': train_freq, 'gradient_steps': gradient_steps, 'ent_coef': ent_coef, 'target_entropy': target_entropy, 'policy_kwargs': dict(layers=net_arch) }
Example #9
Source File: sac.py From robotics-rl-srl with MIT License | 5 votes |
def __init__(self): super(SACModel, self).__init__(name="sac", model_class=SAC)
Example #10
Source File: train_TD3_pushing_HER_Dyn_Rand.py From pybullet-robot-envs with GNU Lesser General Public License v2.1 | 5 votes |
def main(load_policy=False): global log_dir, log_dir_policy if (load_policy): log_dir_policy = '../policies/PUSHING_TD3+HER_FIXED_POSITION_DYN_RAND_FROM_FIXED_PHYSICS' model_class = TD3 # works also with SAC and DDPG action_space = 7 fixed = True normalize_observations = False gamma = 0.9 memory_limit = 1000000 normalize_returns = True timesteps = 1500000 discreteAction = 0 rend = False env = pandaPushGymEnvHERRand(urdfRoot=robot_data.getDataPath(), renders=rend, useIK=0, isDiscrete=discreteAction, action_space = action_space, fixedPositionObj = fixed, includeVelObs = True) env = Monitor(env, log_dir, allow_early_resets=True) # Available strategies (cf paper): future, final, episode, random goal_selection_strategy = 'future' # equivalent to GoalSelectionStrategy.FUTURE n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Wrap the model model = HER(CustomPolicy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND", buffer_size=1000000,batch_size=256, random_exploration=0.3, action_noise=action_noise) if (load_policy): model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3+HER_FIXED_DYN_RAND_FROM_FIXED_PHYSICS", buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise) # Train the model starting from a previous policy model.learn(timesteps, callback = callback ) model.save("../policies/PUSHING_FIXED_TD3_DYN_RAND") print("Finished train1")
Example #11
Source File: train_TD3_pushing_HER.py From pybullet-robot-envs with GNU Lesser General Public License v2.1 | 5 votes |
def main(load_policy=False): global log_dir model_class = TD3 # works also with SAC and DDPG action_space = 7 normalize_observations = False gamma = 0.9 memory_limit = 1000000 normalize_returns = True timesteps = 8000000 rend = False obj_pose_rnd_std = 0 env = pandaPushGymGoalEnv(renders=rend, use_IK=0, numControlledJoints = action_space, obj_pose_rnd_std = obj_pose_rnd_std, includeVelObs = True) env = Monitor(env, log_dir, allow_early_resets=True) goal_selection_strategy = 'future' n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) # Wrap the model model = HER(CustomTD3Policy, env, model_class, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, verbose=1,tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3_phase1_target_fixed", buffer_size=1000000,batch_size=256, random_exploration=0.3, action_noise=action_noise) if (load_policy): model = HER.load("../policies/USEFUL_POLICIES/PUSHING_TD3+HER_FIXED_POSITIONbest_model.pkl", env=env, n_sampled_goal=4, goal_selection_strategy=goal_selection_strategy, tensorboard_log="../pybullet_logs/panda_push_TD3/stable_baselines/TD3_phase1_target_fixed", buffer_size=1000000,batch_size=256,random_exploration=0.3, action_noise=action_noise) model.learn(timesteps,log_interval=100, callback = callback) print("Saving Policy PHASE_1") model.save("../policies/TD3_phase1_target_fixed")
Example #12
Source File: test_her.py From stable-baselines with MIT License | 4 votes |
def test_model_manipulation(model_class, goal_selection_strategy): env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS) env = DummyVecEnv([lambda: env]) model = HER('MlpPolicy', env, model_class, n_sampled_goal=3, goal_selection_strategy=goal_selection_strategy, verbose=0) model.learn(1000) model_predict(model, env, n_steps=100, additional_check=None) model.save('./test_her.zip') del model # NOTE: HER does not support VecEnvWrapper yet with pytest.raises(AssertionError): model = HER.load('./test_her.zip', env=VecNormalize(env)) model = HER.load('./test_her.zip') # Check that the model raises an error when the env # is not wrapped (or no env passed to the model) with pytest.raises(ValueError): model.predict(env.reset()) env_ = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS) env_ = HERGoalEnvWrapper(env_) model_predict(model, env_, n_steps=100, additional_check=None) model.set_env(env) model.learn(1000) model_predict(model, env_, n_steps=100, additional_check=None) assert model.n_sampled_goal == 3 del model env = BitFlippingEnv(N_BITS, continuous=model_class in [DDPG, SAC, TD3], max_steps=N_BITS) model = HER.load('./test_her', env=env) model.learn(1000) model_predict(model, env_, n_steps=100, additional_check=None) assert model.n_sampled_goal == 3 if os.path.isfile('./test_her.zip'): os.remove('./test_her.zip')