Python stable_baselines.common.vec_env.VecNormalize() Examples
The following are 25
code examples of stable_baselines.common.vec_env.VecNormalize().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
stable_baselines.common.vec_env
, or try the search function
.
Example #1
Source File: loader.py From adversarial-policies with MIT License | 6 votes |
def load_stable_baselines(cls): def f(root_dir, env, env_name, index, transparent_params): denv = FakeSingleSpacesVec(env, agent_id=index) pylog.info(f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'") model = load_backward_compatible_model(cls, root_dir, denv) try: vec_normalize = load_vec_normalize(root_dir, denv) model = NormalizeModel(model, vec_normalize) except FileNotFoundError: # No saved VecNormalize, must have not trained with normalization. pass return model return f
Example #2
Source File: ppo1.py From robotics-rl-srl with MIT License | 6 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #3
Source File: trpo.py From robotics-rl-srl with MIT License | 6 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #4
Source File: serialize.py From imitation with MIT License | 6 votes |
def save_stable_model( output_dir: str, model: BaseRLModel, vec_normalize: Optional[VecNormalize] = None, ) -> None: """Serialize policy. Load later with `load_policy(..., policy_path=output_dir)`. Args: output_dir: Path to the save directory. policy: The stable baselines policy. vec_normalize: Optionally, a VecNormalize to save statistics for. `load_policy` automatically applies `NormalizePolicy` wrapper when loading. """ os.makedirs(output_dir, exist_ok=True) model.save(os.path.join(output_dir, "model.pkl")) if vec_normalize is not None: with open(os.path.join(output_dir, "vec_normalize.pkl"), "wb") as f: pickle.dump(vec_normalize, f) tf.logging.info("Saved policy to %s", output_dir)
Example #5
Source File: buffers.py From stable-baselines with MIT License | 6 votes |
def sample(self, batch_size: int, env: Optional[VecNormalize] = None, **_kwargs): """ Sample a batch of experiences. :param batch_size: (int) How many transitions to sample. :param env: (Optional[VecNormalize]) associated gym VecEnv to normalize the observations/rewards when sampling :return: - obs_batch: (np.ndarray) batch of observations - act_batch: (numpy float) batch of actions executed given obs_batch - rew_batch: (numpy float) rewards received as results of executing act_batch - next_obs_batch: (np.ndarray) next set of observations seen after executing act_batch - done_mask: (numpy bool) done_mask[i] = 1 if executing act_batch[i] resulted in the end of an episode and 0 otherwise. """ idxes = [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)] return self._encode_sample(idxes, env=env)
Example #6
Source File: deepq.py From robotics-rl-srl with MIT License | 5 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DQN is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) if args.srl_model != "raw_pixels": env = VecNormalize(env, norm_reward=False) env = loadRunningAverage(env, load_path_normalise=load_path_normalise) return env
Example #7
Source File: loader.py From adversarial-policies with MIT License | 5 votes |
def __init__( self, model: stable_baselines.common.base_class.BaseRLModel, vec_normalize: vec_env.VecNormalize, ): super().__init__(model=model) self.vec_normalize = vec_normalize
Example #8
Source File: sac.py From robotics-rl-srl with MIT License | 5 votes |
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) if args.srl_model != "raw_pixels": env = VecNormalize(env, norm_reward=False) env = loadRunningAverage(env, load_path_normalise=load_path_normalise) return env
Example #9
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def loadRunningAverage(envs, load_path_normalise=None): if load_path_normalise is not None: try: printGreen("Loading saved running average") envs.load_running_average(load_path_normalise) envs.training = False except FileNotFoundError: envs.training = True printYellow("Running Average files not found for VecNormalize, switching to training mode") return envs
Example #10
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None): """ :param args: (argparse.Namespace Object) :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs :param env_kwargs: (dict) The extra arguments for the environment :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted. :return: (Gym VecEnv) """ # imported here to prevent cyclic imports from environments.registry import registered_env from state_representation.registry import registered_srl, SRLType assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \ "Error: cannot have more than 1 CPU for the environment {}".format(args.env) if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL: srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs) for i in range(args.num_cpu)] if len(envs) == 1: # No need for subprocesses when having only one env envs = DummyVecEnv(envs) else: envs = SubprocVecEnv(envs) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
Example #11
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def loadRunningAverage(self, path): """ Hack to use VecNormalize :param path: (str) path to log dir """ self.venv.load_running_average(path) # Compatibility with stable-baselines
Example #12
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def saveRunningAverage(self, path): """ Hack to use VecNormalize :param path: (str) path to log dir """ self.venv.save_running_average(path)
Example #13
Source File: utils.py From robotics-rl-srl with MIT License | 5 votes |
def get_original_obs(self): """ Hack to use VecNormalize :return: (numpy float) """ return self.venv.get_original_obs()
Example #14
Source File: train_ppo.py From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_train_env(num_envs, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, policy, disc_action_space, normalize): # Choosing environment wrapper according to the policy if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy": if disc_action_space: env_temp = RosEnvDiscImg else: env_temp = RosEnvContImg elif policy == "CNN1DPolicy": if disc_action_space: env_temp = RosEnvDiscRawScanPrepWp else: env_temp = RosEnvContRawScanPrepWp elif policy == "CNN1DPolicy_multi_input": if disc_action_space: env_temp = RosEnvDiscRaw else: env_temp = RosEnvContRaw elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2": if disc_action_space: env_temp = RosEnvDiscImgVel else: env_temp = RosEnvContImgVel env = SubprocVecEnv([lambda k=k: Monitor(env_temp("sim%d" % (k+1), StateCollector("sim%s"%(k+1), "train") , stack_offset, num_stacks, robot_radius, rew_fnc, debug, "train", task_mode), '%s/%s/sim_%d'%(path_to_models, agent_name, k+1), allow_early_resets=True) for k in range(num_envs)]) # Normalizing? if normalize: env = VecNormalize(env, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0, gamma=0.99, epsilon=1e-08) else: env = env # Stack of data? if num_stacks > 1: env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset) return env
Example #15
Source File: run_ppo.py From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License | 5 votes |
def load_train_env(ns, state_collector, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, rl_mode, policy, disc_action_space, normalize): # Choosing environment wrapper according to the policy if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy": if disc_action_space: env_temp = RosEnvDiscImg else: env_temp = RosEnvContImg elif policy in ["CNN1DPolicy", "CNN1DPolicy2", "CNN1DPolicy3"]: if disc_action_space: env_temp = RosEnvDiscRawScanPrepWp else: env_temp = RosEnvContRawScanPrepWp elif policy == "CNN1DPolicy_multi_input": if disc_action_space: env_temp = RosEnvDiscRaw else: env_temp = RosEnvContRaw elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2": if disc_action_space: env_temp = RosEnvDiscImgVel else: env_temp = RosEnvContImgVel env_raw = DummyVecEnv([lambda: env_temp(ns, state_collector, stack_offset, num_stacks, robot_radius, rew_fnc, debug, rl_mode, task_mode)]) if normalize: env = VecNormalize(env_raw, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0, gamma=0.99, epsilon=1e-08) else: env = env_raw # Stack of data? if num_stacks > 1: env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset) return env
Example #16
Source File: buffers.py From stable-baselines with MIT License | 5 votes |
def sample(self, batch_size: int, beta: float = 0, env: Optional[VecNormalize] = None): """ Sample a batch of experiences. compared to ReplayBuffer.sample it also returns importance weights and idxes of sampled experiences. :param batch_size: (int) How many transitions to sample. :param beta: (float) To what degree to use importance weights (0 - no corrections, 1 - full correction) :param env: (Optional[VecNormalize]) associated gym VecEnv to normalize the observations/rewards when sampling :return: - obs_batch: (np.ndarray) batch of observations - act_batch: (numpy float) batch of actions executed given obs_batch - rew_batch: (numpy float) rewards received as results of executing act_batch - next_obs_batch: (np.ndarray) next set of observations seen after executing act_batch - done_mask: (numpy bool) done_mask[i] = 1 if executing act_batch[i] resulted in the end of an episode and 0 otherwise. - weights: (numpy float) Array of shape (batch_size,) and dtype np.float32 denoting importance weight of each sampled transition - idxes: (numpy int) Array of shape (batch_size,) and dtype np.int32 idexes in buffer of sampled experiences """ assert beta > 0 idxes = self._sample_proportional(batch_size) weights = [] p_min = self._it_min.min() / self._it_sum.sum() max_weight = (p_min * len(self._storage)) ** (-beta) p_sample = self._it_sum[idxes] / self._it_sum.sum() weights = (p_sample * len(self._storage)) ** (-beta) / max_weight encoded_sample = self._encode_sample(idxes, env=env) return tuple(list(encoded_sample) + [weights, idxes])
Example #17
Source File: buffers.py From stable-baselines with MIT License | 5 votes |
def _encode_sample(self, idxes: Union[List[int], np.ndarray], env: Optional[VecNormalize] = None): obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] for i in idxes: data = self._storage[i] obs_t, action, reward, obs_tp1, done = data obses_t.append(np.array(obs_t, copy=False)) actions.append(np.array(action, copy=False)) rewards.append(reward) obses_tp1.append(np.array(obs_tp1, copy=False)) dones.append(done) return (self._normalize_obs(np.array(obses_t), env), np.array(actions), self._normalize_reward(np.array(rewards), env), self._normalize_obs(np.array(obses_tp1), env), np.array(dones))
Example #18
Source File: buffers.py From stable-baselines with MIT License | 5 votes |
def _normalize_reward(reward: np.ndarray, env: Optional[VecNormalize] = None) -> np.ndarray: """ Helper for normalizing the reward. """ if env is not None: return env.normalize_reward(reward) return reward
Example #19
Source File: buffers.py From stable-baselines with MIT License | 5 votes |
def _normalize_obs(obs: np.ndarray, env: Optional[VecNormalize] = None) -> np.ndarray: """ Helper for normalizing the observation. """ if env is not None: return env.normalize_obs(obs) return obs
Example #20
Source File: common.py From imitation with MIT License | 5 votes |
def _reward_fn_normalize_inputs( obs: np.ndarray, acts: np.ndarray, next_obs: np.ndarray, dones: np.ndarray, *, reward_fn: RewardFn, vec_normalize: vec_env.VecNormalize, norm_reward: bool = True, ) -> np.ndarray: """Combine with `functools.partial` to create an input-normalizing RewardFn. Args: reward_fn: The reward function that normalized inputs are evaluated on. vec_normalize: Instance of VecNormalize used to normalize inputs and rewards. norm_reward: If True, then also normalize reward before returning. Returns: The possibly normalized reward. """ norm_obs = vec_normalize.normalize_obs(obs) norm_next_obs = vec_normalize.normalize_obs(next_obs) rew = reward_fn(norm_obs, acts, norm_next_obs, dones) if norm_reward: rew = vec_normalize.normalize_reward(rew) return rew
Example #21
Source File: serialize.py From imitation with MIT License | 5 votes |
def __init__(self, policy: BasePolicy, vec_normalize: VecNormalize): super().__init__( policy.sess, policy.ob_space, policy.ac_space, policy.n_env, policy.n_steps, policy.n_batch, ) self._policy = policy self.vec_normalize = vec_normalize
Example #22
Source File: loader.py From adversarial-policies with MIT License | 5 votes |
def load_vec_normalize(root_dir: str, venv: vec_env.VecEnv) -> vec_env.VecNormalize: try: normalize_path = os.path.join(root_dir, "vec_normalize.pkl") vec_normalize = vec_env.VecNormalize.load(normalize_path, venv) vec_normalize.training = False pylog.info(f"Loaded normalization statistics from '{normalize_path}'") return vec_normalize except FileNotFoundError: pass # Could not find vec_normalize.pkl: try loading old-style vec normalize. vec_normalize = vec_env.VecNormalize(venv, training=False) vec_normalize.load_running_average(root_dir) pylog.info(f"Loaded normalization statistics from '{root_dir}'") return vec_normalize
Example #23
Source File: test_policies.py From imitation with MIT License | 4 votes |
def test_serialize_identity(env_name, model_cfg, normalize, tmpdir): """Test output actions of deserialized policy are same as original.""" orig_venv = venv = util.make_vec_env(env_name, n_envs=1, parallel=False) vec_normalize = None if normalize: venv = vec_normalize = VecNormalize(venv) model_name, model_cls_name = model_cfg try: model_cls = registry.load_attr(model_cls_name) except (AttributeError, ImportError): # pragma: no cover pytest.skip( "Couldn't load stable baselines class. " "(Probably because mpi4py not installed.)" ) model = model_cls("MlpPolicy", venv) model.learn(1000) venv.env_method("seed", 0) venv.reset() if normalize: # don't want statistics to change as we collect rollouts vec_normalize.training = False orig_rollout = rollout.generate_transitions( model, venv, n_timesteps=1000, deterministic_policy=True, rng=np.random.RandomState(0), ) serialize.save_stable_model(tmpdir, model, vec_normalize) # We use `orig_venv` since `load_policy` automatically wraps `loaded` # with a VecNormalize, when appropriate. with serialize.load_policy(model_name, tmpdir, orig_venv) as loaded: orig_venv.env_method("seed", 0) orig_venv.reset() new_rollout = rollout.generate_transitions( loaded, orig_venv, n_timesteps=1000, deterministic_policy=True, rng=np.random.RandomState(0), ) assert np.allclose(orig_rollout.acts, new_rollout.acts)
Example #24
Source File: serialize.py From imitation with MIT License | 4 votes |
def _load_stable_baselines(cls: Type[BaseRLModel], policy_attr: str) -> PolicyLoaderFn: """Higher-order function, returning a policy loading function. Args: cls: The RL algorithm, e.g. `stable_baselines.PPO2`. policy_attr: The attribute of the RL algorithm containing the policy, e.g. `act_model`. Returns: A function loading policies trained via cls. """ @contextlib.contextmanager def f(path: str, venv: VecEnv) -> Iterator[BasePolicy]: """Loads a policy saved to path, for environment env.""" tf.logging.info( f"Loading Stable Baselines policy for '{cls}' " f"from '{path}'" ) model_path = os.path.join(path, "model.pkl") model = None try: model = cls.load(model_path, env=venv) policy = getattr(model, policy_attr) try: normalize_path = os.path.join(path, "vec_normalize.pkl") with open(normalize_path, "rb") as f: vec_normalize = pickle.load(f) vec_normalize.training = False vec_normalize.set_venv(venv) policy = NormalizePolicy(policy, vec_normalize) tf.logging.info(f"Loaded VecNormalize from '{normalize_path}'") except FileNotFoundError: # We did not use VecNormalize during training, skip pass yield policy finally: if model is not None and model.sess is not None: model.sess.close() return f
Example #25
Source File: loader.py From adversarial-policies with MIT License | 4 votes |
def load_old_ppo2(root_dir, env, env_name, index, transparent_params): try: from baselines.ppo2 import ppo2 as ppo2_old except ImportError as e: msg = "{}. HINT: you need to install (OpenAI) Baselines to use old_ppo2".format(e) raise ImportError(msg) denv = FakeSingleSpacesVec(env, agent_id=index) possible_fnames = ["model.pkl", "final_model.pkl"] model_path = None for fname in possible_fnames: candidate_path = os.path.join(root_dir, fname) if os.path.exists(candidate_path): model_path = candidate_path if model_path is None: raise FileNotFoundError( f"Could not find model at '{root_dir}' " f"under any filename '{possible_fnames}'" ) graph = tf.Graph() sess = tf.Session(graph=graph) with sess.as_default(): with graph.as_default(): pylog.info(f"Loading Baselines PPO2 policy from '{model_path}'") policy = ppo2_old.learn( network="mlp", env=denv, total_timesteps=1, seed=0, nminibatches=4, log_interval=1, save_interval=1, load_path=model_path, ) stable_policy = OpenAIToStablePolicy( policy, ob_space=denv.observation_space, ac_space=denv.action_space ) model = PolicyToModel(stable_policy) try: normalize_path = os.path.join(root_dir, "normalize.pkl") with open(normalize_path, "rb") as f: old_vec_normalize = pickle.load(f) vec_normalize = vec_env.VecNormalize(denv, training=False) vec_normalize.obs_rms = old_vec_normalize.ob_rms vec_normalize.ret_rms = old_vec_normalize.ret_rms model = NormalizeModel(model, vec_normalize) pylog.info(f"Loaded normalization statistics from '{normalize_path}'") except FileNotFoundError: # We did not use VecNormalize during training, skip pass return model