Python stable_baselines.common.vec_env.VecNormalize() Examples

The following are 25 code examples of stable_baselines.common.vec_env.VecNormalize(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module stable_baselines.common.vec_env , or try the search function .
Example #1
Source File: loader.py    From adversarial-policies with MIT License 6 votes vote down vote up
def load_stable_baselines(cls):
    def f(root_dir, env, env_name, index, transparent_params):
        denv = FakeSingleSpacesVec(env, agent_id=index)
        pylog.info(f"Loading Stable Baselines policy for '{cls}' from '{root_dir}'")
        model = load_backward_compatible_model(cls, root_dir, denv)

        try:
            vec_normalize = load_vec_normalize(root_dir, denv)
            model = NormalizeModel(model, vec_normalize)
        except FileNotFoundError:
            # No saved VecNormalize, must have not trained with normalization.
            pass

        return model

    return f 
Example #2
Source File: ppo1.py    From robotics-rl-srl with MIT License 6 votes vote down vote up
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])
        envs = VecFrameStack(envs, args.num_stack)

        if args.srl_model != "raw_pixels":
            printYellow("Using MLP policy because working on state representation")
            envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
            envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

        return envs 
Example #3
Source File: trpo.py    From robotics-rl-srl with MIT License 6 votes vote down vote up
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])
        envs = VecFrameStack(envs, args.num_stack)

        if args.srl_model != "raw_pixels":
            printYellow("Using MLP policy because working on state representation")
            envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
            envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

        return envs 
Example #4
Source File: serialize.py    From imitation with MIT License 6 votes vote down vote up
def save_stable_model(
    output_dir: str, model: BaseRLModel, vec_normalize: Optional[VecNormalize] = None,
) -> None:
    """Serialize policy.

    Load later with `load_policy(..., policy_path=output_dir)`.

    Args:
        output_dir: Path to the save directory.
        policy: The stable baselines policy.
        vec_normalize: Optionally, a VecNormalize to save statistics for.
            `load_policy` automatically applies `NormalizePolicy` wrapper
            when loading.
    """
    os.makedirs(output_dir, exist_ok=True)
    model.save(os.path.join(output_dir, "model.pkl"))
    if vec_normalize is not None:
        with open(os.path.join(output_dir, "vec_normalize.pkl"), "wb") as f:
            pickle.dump(vec_normalize, f)
    tf.logging.info("Saved policy to %s", output_dir) 
Example #5
Source File: buffers.py    From stable-baselines with MIT License 6 votes vote down vote up
def sample(self, batch_size: int, env: Optional[VecNormalize] = None, **_kwargs):
        """
        Sample a batch of experiences.

        :param batch_size: (int) How many transitions to sample.
        :param env: (Optional[VecNormalize]) associated gym VecEnv
            to normalize the observations/rewards when sampling
        :return:
            - obs_batch: (np.ndarray) batch of observations
            - act_batch: (numpy float) batch of actions executed given obs_batch
            - rew_batch: (numpy float) rewards received as results of executing act_batch
            - next_obs_batch: (np.ndarray) next set of observations seen after executing act_batch
            - done_mask: (numpy bool) done_mask[i] = 1 if executing act_batch[i] resulted in the end of an episode
                and 0 otherwise.
        """
        idxes = [random.randint(0, len(self._storage) - 1) for _ in range(batch_size)]
        return self._encode_sample(idxes, env=env) 
Example #6
Source File: deepq.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DQN is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])

        if args.srl_model != "raw_pixels":
            env = VecNormalize(env, norm_reward=False)
            env = loadRunningAverage(env, load_path_normalise=load_path_normalise)

        return env 
Example #7
Source File: loader.py    From adversarial-policies with MIT License 5 votes vote down vote up
def __init__(
        self,
        model: stable_baselines.common.base_class.BaseRLModel,
        vec_normalize: vec_env.VecNormalize,
    ):
        super().__init__(model=model)
        self.vec_normalize = vec_normalize 
Example #8
Source File: sac.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        env = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])

        if args.srl_model != "raw_pixels":
            env = VecNormalize(env, norm_reward=False)
            env = loadRunningAverage(env, load_path_normalise=load_path_normalise)

        return env 
Example #9
Source File: utils.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def loadRunningAverage(envs, load_path_normalise=None):
    if load_path_normalise is not None:
        try:
            printGreen("Loading saved running average")
            envs.load_running_average(load_path_normalise)
            envs.training = False
        except FileNotFoundError:
            envs.training = True
            printYellow("Running Average files not found for VecNormalize, switching to training mode")
    return envs 
Example #10
Source File: utils.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None):
    """
    :param args: (argparse.Namespace Object)
    :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
    :param env_kwargs: (dict) The extra arguments for the environment
    :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
    :return: (Gym VecEnv)
    """
    # imported here to prevent cyclic imports
    from environments.registry import registered_env
    from state_representation.registry import registered_srl, SRLType

    assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
        "Error: cannot have more than 1 CPU for the environment {}".format(args.env)

    if env_kwargs is not None and registered_srl[args.srl_model][0] == SRLType.SRL:
        srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs)
        env_kwargs["state_dim"] = srl_model.state_dim
        env_kwargs["srl_pipe"] = srl_model.pipe
    envs = [makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs)
            for i in range(args.num_cpu)]

    if len(envs) == 1:
        # No need for subprocesses when having only one env
        envs = DummyVecEnv(envs)
    else:
        envs = SubprocVecEnv(envs)

    envs = VecFrameStack(envs, args.num_stack)

    if args.srl_model != "raw_pixels":
        printYellow("Using MLP policy because working on state representation")
        envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
        envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

    return envs 
Example #11
Source File: utils.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def loadRunningAverage(self, path):
        """
        Hack to use VecNormalize
        :param path: (str) path to log dir
        """
        self.venv.load_running_average(path)

    # Compatibility with stable-baselines 
Example #12
Source File: utils.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def saveRunningAverage(self, path):
        """
        Hack to use VecNormalize
        :param path: (str) path to log dir
        """
        self.venv.save_running_average(path) 
Example #13
Source File: utils.py    From robotics-rl-srl with MIT License 5 votes vote down vote up
def get_original_obs(self):
        """
        Hack to use VecNormalize
        :return: (numpy float)
        """
        return self.venv.get_original_obs() 
Example #14
Source File: train_ppo.py    From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_train_env(num_envs, robot_radius, rew_fnc, num_stacks, stack_offset, debug, task_mode, policy, disc_action_space, normalize):
    # Choosing environment wrapper according to the policy
    if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscImg
        else:
            env_temp = RosEnvContImg
    elif policy == "CNN1DPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscRawScanPrepWp
        else:
            env_temp = RosEnvContRawScanPrepWp
    elif policy == "CNN1DPolicy_multi_input":
        if disc_action_space:
            env_temp = RosEnvDiscRaw
        else:
            env_temp = RosEnvContRaw
    elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
        if disc_action_space:
            env_temp = RosEnvDiscImgVel
        else:
            env_temp = RosEnvContImgVel

    env = SubprocVecEnv([lambda k=k: Monitor(env_temp("sim%d" % (k+1), StateCollector("sim%s"%(k+1), "train") , stack_offset, num_stacks, robot_radius, rew_fnc, debug, "train", task_mode), '%s/%s/sim_%d'%(path_to_models, agent_name, k+1), allow_early_resets=True) for k in range(num_envs)])

    # Normalizing?
    if normalize:
        env = VecNormalize(env, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0,
                           gamma=0.99, epsilon=1e-08)
    else:
        env = env

    # Stack of data?
    if num_stacks > 1:
        env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)

    return env 
Example #15
Source File: run_ppo.py    From drl_local_planner_ros_stable_baselines with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def load_train_env(ns, state_collector, robot_radius, rew_fnc, num_stacks,
                   stack_offset, debug, task_mode, rl_mode, policy, disc_action_space, normalize):
    # Choosing environment wrapper according to the policy
    if policy == "CnnPolicy" or policy == "CnnLnLstmPolicy" or policy == "CnnLstmPolicy":
        if disc_action_space:
            env_temp = RosEnvDiscImg
        else:
            env_temp = RosEnvContImg
    elif policy in ["CNN1DPolicy", "CNN1DPolicy2", "CNN1DPolicy3"]:
        if disc_action_space:
            env_temp = RosEnvDiscRawScanPrepWp
        else:
            env_temp = RosEnvContRawScanPrepWp
    elif policy == "CNN1DPolicy_multi_input":
        if disc_action_space:
            env_temp = RosEnvDiscRaw
        else:
            env_temp = RosEnvContRaw
    elif policy == "CnnPolicy_multi_input_vel" or policy == "CnnPolicy_multi_input_vel2":
        if disc_action_space:
            env_temp = RosEnvDiscImgVel
        else:
            env_temp = RosEnvContImgVel


    env_raw = DummyVecEnv([lambda: env_temp(ns, state_collector, stack_offset, num_stacks, robot_radius, rew_fnc, debug, rl_mode, task_mode)])

    if normalize:
        env = VecNormalize(env_raw, training=True, norm_obs=True, norm_reward=False, clip_obs=100.0, clip_reward=10.0,
                           gamma=0.99, epsilon=1e-08)
    else:
        env = env_raw

    # Stack of data?
    if num_stacks > 1:
        env = VecFrameStack(env, n_stack=num_stacks, n_offset=stack_offset)

    return env 
Example #16
Source File: buffers.py    From stable-baselines with MIT License 5 votes vote down vote up
def sample(self, batch_size: int, beta: float = 0, env: Optional[VecNormalize] = None):
        """
        Sample a batch of experiences.

        compared to ReplayBuffer.sample
        it also returns importance weights and idxes
        of sampled experiences.

        :param batch_size: (int) How many transitions to sample.
        :param beta: (float) To what degree to use importance weights (0 - no corrections, 1 - full correction)
        :param env: (Optional[VecNormalize]) associated gym VecEnv
            to normalize the observations/rewards when sampling
        :return:
            - obs_batch: (np.ndarray) batch of observations
            - act_batch: (numpy float) batch of actions executed given obs_batch
            - rew_batch: (numpy float) rewards received as results of executing act_batch
            - next_obs_batch: (np.ndarray) next set of observations seen after executing act_batch
            - done_mask: (numpy bool) done_mask[i] = 1 if executing act_batch[i] resulted in the end of an episode
                and 0 otherwise.
            - weights: (numpy float) Array of shape (batch_size,) and dtype np.float32 denoting importance weight of
                each sampled transition
            - idxes: (numpy int) Array of shape (batch_size,) and dtype np.int32 idexes in buffer of sampled experiences
        """
        assert beta > 0

        idxes = self._sample_proportional(batch_size)
        weights = []
        p_min = self._it_min.min() / self._it_sum.sum()
        max_weight = (p_min * len(self._storage)) ** (-beta)
        p_sample = self._it_sum[idxes] / self._it_sum.sum()
        weights = (p_sample * len(self._storage)) ** (-beta) / max_weight
        encoded_sample = self._encode_sample(idxes, env=env)
        return tuple(list(encoded_sample) + [weights, idxes]) 
Example #17
Source File: buffers.py    From stable-baselines with MIT License 5 votes vote down vote up
def _encode_sample(self, idxes: Union[List[int], np.ndarray], env: Optional[VecNormalize] = None):
        obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], []
        for i in idxes:
            data = self._storage[i]
            obs_t, action, reward, obs_tp1, done = data
            obses_t.append(np.array(obs_t, copy=False))
            actions.append(np.array(action, copy=False))
            rewards.append(reward)
            obses_tp1.append(np.array(obs_tp1, copy=False))
            dones.append(done)
        return (self._normalize_obs(np.array(obses_t), env),
                np.array(actions),
                self._normalize_reward(np.array(rewards), env),
                self._normalize_obs(np.array(obses_tp1), env),
                np.array(dones)) 
Example #18
Source File: buffers.py    From stable-baselines with MIT License 5 votes vote down vote up
def _normalize_reward(reward: np.ndarray,
                          env: Optional[VecNormalize] = None) -> np.ndarray:
        """
        Helper for normalizing the reward.
        """
        if env is not None:
            return env.normalize_reward(reward)
        return reward 
Example #19
Source File: buffers.py    From stable-baselines with MIT License 5 votes vote down vote up
def _normalize_obs(obs: np.ndarray,
                       env: Optional[VecNormalize] = None) -> np.ndarray:
        """
        Helper for normalizing the observation.
        """
        if env is not None:
            return env.normalize_obs(obs)
        return obs 
Example #20
Source File: common.py    From imitation with MIT License 5 votes vote down vote up
def _reward_fn_normalize_inputs(
    obs: np.ndarray,
    acts: np.ndarray,
    next_obs: np.ndarray,
    dones: np.ndarray,
    *,
    reward_fn: RewardFn,
    vec_normalize: vec_env.VecNormalize,
    norm_reward: bool = True,
) -> np.ndarray:
    """Combine with `functools.partial` to create an input-normalizing RewardFn.

    Args:
        reward_fn: The reward function that normalized inputs are evaluated on.
        vec_normalize: Instance of VecNormalize used to normalize inputs and
            rewards.
        norm_reward: If True, then also normalize reward before returning.

    Returns:
        The possibly normalized reward.
    """
    norm_obs = vec_normalize.normalize_obs(obs)
    norm_next_obs = vec_normalize.normalize_obs(next_obs)
    rew = reward_fn(norm_obs, acts, norm_next_obs, dones)
    if norm_reward:
        rew = vec_normalize.normalize_reward(rew)
    return rew 
Example #21
Source File: serialize.py    From imitation with MIT License 5 votes vote down vote up
def __init__(self, policy: BasePolicy, vec_normalize: VecNormalize):
        super().__init__(
            policy.sess,
            policy.ob_space,
            policy.ac_space,
            policy.n_env,
            policy.n_steps,
            policy.n_batch,
        )
        self._policy = policy
        self.vec_normalize = vec_normalize 
Example #22
Source File: loader.py    From adversarial-policies with MIT License 5 votes vote down vote up
def load_vec_normalize(root_dir: str, venv: vec_env.VecEnv) -> vec_env.VecNormalize:
    try:
        normalize_path = os.path.join(root_dir, "vec_normalize.pkl")
        vec_normalize = vec_env.VecNormalize.load(normalize_path, venv)
        vec_normalize.training = False
        pylog.info(f"Loaded normalization statistics from '{normalize_path}'")
        return vec_normalize
    except FileNotFoundError:
        pass

    # Could not find vec_normalize.pkl: try loading old-style vec normalize.
    vec_normalize = vec_env.VecNormalize(venv, training=False)
    vec_normalize.load_running_average(root_dir)
    pylog.info(f"Loaded normalization statistics from '{root_dir}'")
    return vec_normalize 
Example #23
Source File: test_policies.py    From imitation with MIT License 4 votes vote down vote up
def test_serialize_identity(env_name, model_cfg, normalize, tmpdir):
    """Test output actions of deserialized policy are same as original."""
    orig_venv = venv = util.make_vec_env(env_name, n_envs=1, parallel=False)
    vec_normalize = None
    if normalize:
        venv = vec_normalize = VecNormalize(venv)

    model_name, model_cls_name = model_cfg
    try:
        model_cls = registry.load_attr(model_cls_name)
    except (AttributeError, ImportError):  # pragma: no cover
        pytest.skip(
            "Couldn't load stable baselines class. "
            "(Probably because mpi4py not installed.)"
        )

    model = model_cls("MlpPolicy", venv)
    model.learn(1000)

    venv.env_method("seed", 0)
    venv.reset()
    if normalize:
        # don't want statistics to change as we collect rollouts
        vec_normalize.training = False
    orig_rollout = rollout.generate_transitions(
        model,
        venv,
        n_timesteps=1000,
        deterministic_policy=True,
        rng=np.random.RandomState(0),
    )

    serialize.save_stable_model(tmpdir, model, vec_normalize)
    # We use `orig_venv` since `load_policy` automatically wraps `loaded`
    # with a VecNormalize, when appropriate.
    with serialize.load_policy(model_name, tmpdir, orig_venv) as loaded:
        orig_venv.env_method("seed", 0)
        orig_venv.reset()
        new_rollout = rollout.generate_transitions(
            loaded,
            orig_venv,
            n_timesteps=1000,
            deterministic_policy=True,
            rng=np.random.RandomState(0),
        )

    assert np.allclose(orig_rollout.acts, new_rollout.acts) 
Example #24
Source File: serialize.py    From imitation with MIT License 4 votes vote down vote up
def _load_stable_baselines(cls: Type[BaseRLModel], policy_attr: str) -> PolicyLoaderFn:
    """Higher-order function, returning a policy loading function.

    Args:
        cls: The RL algorithm, e.g. `stable_baselines.PPO2`.
        policy_attr: The attribute of the RL algorithm containing the policy,
            e.g. `act_model`.

    Returns:
        A function loading policies trained via cls.
    """

    @contextlib.contextmanager
    def f(path: str, venv: VecEnv) -> Iterator[BasePolicy]:
        """Loads a policy saved to path, for environment env."""
        tf.logging.info(
            f"Loading Stable Baselines policy for '{cls}' " f"from '{path}'"
        )
        model_path = os.path.join(path, "model.pkl")
        model = None
        try:
            model = cls.load(model_path, env=venv)
            policy = getattr(model, policy_attr)

            try:
                normalize_path = os.path.join(path, "vec_normalize.pkl")
                with open(normalize_path, "rb") as f:
                    vec_normalize = pickle.load(f)
                vec_normalize.training = False
                vec_normalize.set_venv(venv)
                policy = NormalizePolicy(policy, vec_normalize)
                tf.logging.info(f"Loaded VecNormalize from '{normalize_path}'")
            except FileNotFoundError:
                # We did not use VecNormalize during training, skip
                pass

            yield policy
        finally:
            if model is not None and model.sess is not None:
                model.sess.close()

    return f 
Example #25
Source File: loader.py    From adversarial-policies with MIT License 4 votes vote down vote up
def load_old_ppo2(root_dir, env, env_name, index, transparent_params):
    try:
        from baselines.ppo2 import ppo2 as ppo2_old
    except ImportError as e:
        msg = "{}. HINT: you need to install (OpenAI) Baselines to use old_ppo2".format(e)
        raise ImportError(msg)

    denv = FakeSingleSpacesVec(env, agent_id=index)
    possible_fnames = ["model.pkl", "final_model.pkl"]
    model_path = None
    for fname in possible_fnames:
        candidate_path = os.path.join(root_dir, fname)
        if os.path.exists(candidate_path):
            model_path = candidate_path
    if model_path is None:
        raise FileNotFoundError(
            f"Could not find model at '{root_dir}' " f"under any filename '{possible_fnames}'"
        )

    graph = tf.Graph()
    sess = tf.Session(graph=graph)
    with sess.as_default():
        with graph.as_default():
            pylog.info(f"Loading Baselines PPO2 policy from '{model_path}'")
            policy = ppo2_old.learn(
                network="mlp",
                env=denv,
                total_timesteps=1,
                seed=0,
                nminibatches=4,
                log_interval=1,
                save_interval=1,
                load_path=model_path,
            )
    stable_policy = OpenAIToStablePolicy(
        policy, ob_space=denv.observation_space, ac_space=denv.action_space
    )
    model = PolicyToModel(stable_policy)

    try:
        normalize_path = os.path.join(root_dir, "normalize.pkl")
        with open(normalize_path, "rb") as f:
            old_vec_normalize = pickle.load(f)
        vec_normalize = vec_env.VecNormalize(denv, training=False)
        vec_normalize.obs_rms = old_vec_normalize.ob_rms
        vec_normalize.ret_rms = old_vec_normalize.ret_rms
        model = NormalizeModel(model, vec_normalize)
        pylog.info(f"Loaded normalization statistics from '{normalize_path}'")
    except FileNotFoundError:
        # We did not use VecNormalize during training, skip
        pass

    return model