Python gym.spaces() Examples

The following are 30 code examples of gym.spaces(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gym , or try the search function .
Example #1
Source File: stack_frames.py    From garage with MIT License 6 votes vote down vote up
def __init__(self, env, n_frames):
        if not isinstance(env.observation_space, gym.spaces.Box):
            raise ValueError('Stack frames only works with gym.spaces.Box '
                             'environment.')

        if len(env.observation_space.shape) != 2:
            raise ValueError(
                'Stack frames only works with 2D single channel images')

        super().__init__(env)

        self._n_frames = n_frames
        self._frames = deque(maxlen=n_frames)

        new_obs_space_shape = env.observation_space.shape + (n_frames, )
        _low = env.observation_space.low.flatten()[0]
        _high = env.observation_space.high.flatten()[0]
        self._observation_space = gym.spaces.Box(
            _low,
            _high,
            shape=new_obs_space_shape,
            dtype=env.observation_space.dtype) 
Example #2
Source File: executor.py    From EasyRL with Apache License 2.0 6 votes vote down vote up
def flatten_obs(self, obs):
        """reshape the multi-channel observations into a flattern array for efficient communication in distributed training.

        Arguments:
            obs (obj): dict or list of numpy array for multi-channel observations.
        Returns:
            flattened_obs (tensor): a flattened array.
        """
        if isinstance(self.ob_ph_spec, list):
            assert len(obs) == len(
                self.observation_space
            ), "{} spaces for obs but {} inputs found".format(
                len(self.observation_space), len(obs))
            flattened_array = np.concatenate(
                [np.asarray(elm).astype(np.float32) for elm in obs], axis=1)
        elif isinstance(self.ob_ph_spec, OrderedDict):
            array_list = []
            for name in self.ob_ph_spec.keys():
                array_list.append(np.asarray(obs[name]).astype(np.float32))
            flattened_array = np.concatenate(array_list, axis=1)
        else:
            flattened_array = obs

        return flattened_array 
Example #3
Source File: executor.py    From EasyRL with Apache License 2.0 6 votes vote down vote up
def _basic_space_to_ph_spec(self, sp):
        """Translate a gym space object to a tuple to specify data type and shape.
        Arguments:
            sp (obj): basic space object of gym interface.
        Returns:
            a tuple used for building TensorFlow placeholders where the first element specifies `dtype` and the second one specifies `shape`.
        """

        # (jones.wz) TO DO: handle gym Atari input
        if isinstance(sp, gym.spaces.Box):
            if len(sp.shape) == 3:
                return (tf.uint8, (None, ) + sp.shape)
            return (tf.float32, (None, prod(sp.shape)))
        elif isinstance(sp, gym.spaces.Discrete):
            return (tf.int32, (None, sp.n))
        elif isinstance(sp, gym.spaces.MultiDiscrete):
            return (tf.int32, (None, prod(sp.shape)))
        elif isinstance(sp, gym.spaces.MultiBinary):
            return (tf.int32, (None, prod(sp.shape)))
        else:
            raise TypeError(
                "specified an unsupported space type {}".format(sp)) 
Example #4
Source File: run_bcq_on_batchdata.py    From EasyRL with Apache License 2.0 6 votes vote down vote up
def __init__(self, file_name, batch_size=128, n_step=1):
        # create an offline_env to do fake interaction with agent
        self.num_epoch = 0
        self.num_record = 0
        self._offset = 0

        # how many records to read from table at one time
        self.batch_size = batch_size
        # number of step to reserved for n-step dqn
        self.n_step = n_step

        # defined the shape of observation and action
        # we follow the definition of gym.spaces
        # `Box` for continue-space, `Discrete` for discrete-space and `Dict` for multiple input
        # actually low/high limitation will not be used by agent but required by gym.spaces
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(4,))
        self.action_space = Discrete(n=2)

        fr = open(file_name)
        self.data = fr.readlines()
        self.num_record = len(self.data)
        fr.close() 
Example #5
Source File: manipulation.py    From multi-agent-emergence-environments with MIT License 6 votes vote down vote up
def __init__(self, env, body_names, radius_multiplier=1.5, agent_idx_allowed_to_lock=None,
                 lock_type="any_lock", ac_obs_prefix='', obj_in_game_metadata_keys=None,
                 agent_allowed_to_lock_keys=None):
        super().__init__(env)
        self.n_agents = self.unwrapped.n_agents
        self.n_obj = len(body_names)
        self.body_names = body_names
        self.agent_idx_allowed_to_lock = np.arange(self.n_agents) if agent_idx_allowed_to_lock is None else agent_idx_allowed_to_lock
        self.lock_type = lock_type
        self.ac_obs_prefix = ac_obs_prefix
        self.obj_in_game_metadata_keys = obj_in_game_metadata_keys
        self.agent_allowed_to_lock_keys = agent_allowed_to_lock_keys
        self.action_space.spaces[f'action_{ac_obs_prefix}glue'] = (
            Tuple([MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)]))
        self.observation_space = update_obs_space(env, {f'{ac_obs_prefix}obj_lock': (self.n_obj, 1),
                                                        f'{ac_obs_prefix}you_lock': (self.n_agents, self.n_obj, 1),
                                                        f'{ac_obs_prefix}team_lock': (self.n_agents, self.n_obj, 1)})
        self.lock_radius = radius_multiplier*self.metadata['box_size']
        self.obj_locked = np.zeros((self.n_obj,), dtype=int) 
Example #6
Source File: test_agents.py    From chainerrl with MIT License 6 votes vote down vote up
def create_state_q_function_for_env(env):
    assert isinstance(env.observation_space, gym.spaces.Box)
    ndim_obs = env.observation_space.low.size
    if isinstance(env.action_space, gym.spaces.Discrete):
        return q_functions.FCStateQFunctionWithDiscreteAction(
            ndim_obs=ndim_obs,
            n_actions=env.action_space.n,
            n_hidden_channels=10,
            n_hidden_layers=1)
    elif isinstance(env.action_space, gym.spaces.Box):
        return q_functions.FCQuadraticStateQFunction(
            n_input_channels=ndim_obs,
            n_dim_action=env.action_space.low.size,
            n_hidden_channels=10,
            n_hidden_layers=1,
            action_space=env.action_space)
    else:
        raise NotImplementedError() 
Example #7
Source File: random_agent.py    From irl-benchmark with GNU General Public License v3.0 6 votes vote down vote up
def pick_action(self, state: Union[int, float, np.ndarray]
                    ) -> Union[int, float, np.ndarray]:
        """ Pick an action given a state.

        Picks uniformly random from all possible actions, using the environments
        action_space.sample() method.

        Parameters
        ----------
        state: int
            An integer corresponding to a state of a DiscreteEnv.
            Not used in this agent.

        Returns
        -------
        Union[int, float, np.ndarray]
            An action
        """
        # if other spaces are needed, check if their sample method conforms with
        # returned type, change if necessary.
        assert isinstance(self.env.action_space,
                          (Box, Discrete, MultiDiscrete, MultiBinary))
        return self.env.action_space.sample() 
Example #8
Source File: food.py    From multi-agent-emergence-environments with MIT License 6 votes vote down vote up
def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf,
                 food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False):
        super().__init__(env)
        self.eat_thresh = eat_thresh
        self.max_food_health = max_food_health
        self.respawn_time = respawn_time
        self.food_rew_type = food_rew_type
        self.n_agents = self.metadata['n_agents']

        if type(reward_scale) not in [list, tuple, np.ndarray]:
            reward_scale = [reward_scale, reward_scale]
        self.reward_scale = reward_scale
        self.reward_scale_obs = reward_scale_obs

        # Reset obs/action space to match
        self.max_n_food = self.metadata['max_n_food']
        self.curr_n_food = self.metadata['curr_n_food']
        self.max_food_size = self.metadata['food_size']
        food_dim = 5 if self.reward_scale_obs else 4
        self.observation_space = update_obs_space(self.env, {'food_obs': (self.max_n_food, food_dim),
                                                             'food_health': (self.max_n_food, 1),
                                                             'food_eat': (self.max_n_food, 1)})
        self.action_space.spaces['action_eat_food'] = Tuple([MultiDiscrete([2] * self.max_n_food)
                                                             for _ in range(self.n_agents)]) 
Example #9
Source File: multiprocessing_env.py    From ppo-pytorch with MIT License 6 votes vote down vote up
def __init__(self, env_fns, spaces=None):
        """
        envs: list of gym environments to run in subprocesses
        """
        self.waiting = False
        self.closed = False
        nenvs = len(env_fns)
        self.nenvs = nenvs
        self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
        self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
            for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
        for p in self.ps:
            p.daemon = True # if the main process crashes, we should not cause things to hang
            p.start()
        for remote in self.work_remotes:
            remote.close()

        self.remotes[0].send(('get_spaces', None))
        observation_space, action_space = self.remotes[0].recv()
        VecEnv.__init__(self, len(env_fns), observation_space, action_space) 
Example #10
Source File: env_checker.py    From stable-baselines with MIT License 6 votes vote down vote up
def _check_image_input(observation_space: spaces.Box) -> None:
    """
    Check that the input will be compatible with Stable-Baselines
    when the observation is apparently an image.
    """
    if observation_space.dtype != np.uint8:
        warnings.warn("It seems that your observation is an image but the `dtype` "
                      "of your observation_space is not `np.uint8`. "
                      "If your observation is not an image, we recommend you to flatten the observation "
                      "to have only a 1D vector")

    if np.any(observation_space.low != 0) or np.any(observation_space.high != 255):
        warnings.warn("It seems that your observation space is an image but the "
                      "upper and lower bounds are not in [0, 255]. "
                      "Because the CNN policy normalize automatically the observation "
                      "you may encounter issue if the values are not in that range."
                      )

    if observation_space.shape[0] < 36 or observation_space.shape[1] < 36:
        warnings.warn("The minimal resolution for an image is 36x36 for the default CnnPolicy. "
                      "You might need to use a custom `cnn_extractor` "
                      "cf https://stable-baselines.readthedocs.io/en/master/guide/custom_policy.html") 
Example #11
Source File: env_checker.py    From stable-baselines with MIT License 6 votes vote down vote up
def _check_obs(obs: Union[tuple, dict, np.ndarray, int],
               observation_space: spaces.Space,
               method_name: str) -> None:
    """
    Check that the observation returned by the environment
    correspond to the declared one.
    """
    if not isinstance(observation_space, spaces.Tuple):
        assert not isinstance(obs, tuple), ("The observation returned by the `{}()` "
                                            "method should be a single value, not a tuple".format(method_name))

    # The check for a GoalEnv is done by the base class
    if isinstance(observation_space, spaces.Discrete):
        assert isinstance(obs, int), "The observation returned by `{}()` method must be an int".format(method_name)
    elif _enforce_array_obs(observation_space):
        assert isinstance(obs, np.ndarray), ("The observation returned by `{}()` "
                                             "method must be a numpy array".format(method_name))

    assert observation_space.contains(obs), ("The observation returned by the `{}()` "
                                             "method does not match the given observation space".format(method_name)) 
Example #12
Source File: base_class.py    From stable-baselines with MIT License 6 votes vote down vote up
def action_probability(self, observation, state=None, mask=None, actions=None, logp=False):
        """
        If ``actions`` is ``None``, then get the model's action probability distribution from a given observation.

        Depending on the action space the output is:
            - Discrete: probability for each possible action
            - Box: mean and standard deviation of the action output

        However if ``actions`` is not ``None``, this function will return the probability that the given actions are
        taken with the given parameters (observation, state, ...) on this model. For discrete action spaces, it
        returns the probability mass; for continuous action spaces, the probability density. This is since the
        probability mass will always be zero in continuous spaces, see http://blog.christianperone.com/2019/01/
        for a good explanation

        :param observation: (np.ndarray) the input observation
        :param state: (np.ndarray) The last states (can be None, used in recurrent policies)
        :param mask: (np.ndarray) The last masks (can be None, used in recurrent policies)
        :param actions: (np.ndarray) (OPTIONAL) For calculating the likelihood that the given actions are chosen by
            the model for each of the given parameters. Must have the same number of actions and observations.
            (set to None to return the complete action probability distribution)
        :param logp: (bool) (OPTIONAL) When specified with actions, returns probability in log-space.
            This has no effect if actions is None.
        :return: (np.ndarray) the model's (log) action probability
        """
        pass 
Example #13
Source File: resize.py    From garage with MIT License 6 votes vote down vote up
def __init__(self, env, width, height):
        if not isinstance(env.observation_space, gym.spaces.Box):
            raise ValueError('Resize only works with Box environment.')

        if len(env.observation_space.shape) != 2:
            raise ValueError('Resize only works with 2D single channel image.')

        super().__init__(env)

        _low = env.observation_space.low.flatten()[0]
        _high = env.observation_space.high.flatten()[0]
        self._dtype = env.observation_space.dtype
        self._observation_space = gym.spaces.Box(_low,
                                                 _high,
                                                 shape=[width, height],
                                                 dtype=self._dtype)

        self._width = width
        self._height = height 
Example #14
Source File: base_class.py    From stable-baselines with MIT License 6 votes vote down vote up
def predict(self, observation, state=None, mask=None, deterministic=False):
        if state is None:
            state = self.initial_state
        if mask is None:
            mask = [False for _ in range(self.n_envs)]
        observation = np.array(observation)
        vectorized_env = self._is_vectorized_observation(observation, self.observation_space)

        observation = observation.reshape((-1,) + self.observation_space.shape)
        actions, _, states, _ = self.step(observation, state, mask, deterministic=deterministic)

        clipped_actions = actions
        # Clip the actions to avoid out of bound error
        if isinstance(self.action_space, gym.spaces.Box):
            clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high)

        if not vectorized_env:
            if state is not None:
                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
            clipped_actions = clipped_actions[0]

        return clipped_actions, states 
Example #15
Source File: grayscale.py    From garage with MIT License 6 votes vote down vote up
def __init__(self, env):
        if not isinstance(env.observation_space, gym.spaces.Box):
            raise ValueError(
                'Grayscale only works with gym.spaces.Box environment.')

        if len(env.observation_space.shape) != 3:
            raise ValueError('Grayscale only works with 2D RGB images')

        super().__init__(env)

        _low = env.observation_space.low.flatten()[0]
        _high = env.observation_space.high.flatten()[0]
        assert _low == 0
        assert _high == 255
        self._observation_space = gym.spaces.Box(
            _low,
            _high,
            shape=env.observation_space.shape[:-1],
            dtype=np.uint8) 
Example #16
Source File: ppo_atari.py    From cleanrl with MIT License 5 votes vote down vote up
def __init__(self, env):
        gym.ObservationWrapper.__init__(self, env)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32) 
Example #17
Source File: dummy_box_env.py    From garage with MIT License 5 votes vote down vote up
def observation_space(self):
        """Return an observation space.

        Returns:
            gym.spaces: The observation space of the environment.

        """
        return gym.spaces.Box(low=-1,
                              high=1,
                              shape=self._obs_dim,
                              dtype=np.float32) 
Example #18
Source File: dqn_atari.py    From cleanrl with MIT License 5 votes vote down vote up
def __init__(self, env):
        super(ImageToPyTorch, self).__init__(env)
        old_shape = self.observation_space.shape
        self.observation_space = gym.spaces.Box(
            low=0,
            high=255,
            shape=(old_shape[-1], old_shape[0], old_shape[1]),
            dtype=np.uint8,
        ) 
Example #19
Source File: ppo_atari.py    From cleanrl with MIT License 5 votes vote down vote up
def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None):
        """
        Warp frames to 84x84 as done in the Nature paper and later work.
        If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which
        observation should be warped.
        """
        super().__init__(env)
        self._width = width
        self._height = height
        self._grayscale = grayscale
        self._key = dict_space_key
        if self._grayscale:
            num_colors = 1
        else:
            num_colors = 3

        new_space = gym.spaces.Box(
            low=0,
            high=255,
            shape=(self._height, self._width, num_colors),
            dtype=np.uint8,
        )
        if self._key is None:
            original_space = self.observation_space
            self.observation_space = new_space
        else:
            original_space = self.observation_space.spaces[self._key]
            self.observation_space.spaces[self._key] = new_space
        assert original_space.dtype == np.uint8 and len(original_space.shape) == 3 
Example #20
Source File: dqn_atari.py    From cleanrl with MIT License 5 votes vote down vote up
def __init__(self, env):
        gym.ObservationWrapper.__init__(self, env)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32) 
Example #21
Source File: util.py    From multi-agent-emergence-environments with MIT License 5 votes vote down vote up
def __init__(self, env, obs_groups):
        super().__init__(env)
        self.obs_groups = obs_groups
        for key_to_save, keys_to_concat in obs_groups.items():
            assert np.all([np.array(self.observation_space.spaces[keys_to_concat[0]].shape[:-1]) ==
                           np.array(self.observation_space.spaces[k].shape[:-1])
                           for k in keys_to_concat]), \
                f"Spaces were {[(k, v) for k, v in self.observation_space.spaces.items() if k in keys_to_concat]}"
            new_last_dim = sum([self.observation_space.spaces[k].shape[-1] for k in keys_to_concat])
            new_shape = list(self.observation_space.spaces[keys_to_concat[0]].shape[:-1]) + [new_last_dim]
            self.observation_space = update_obs_space(self, {key_to_save: new_shape}) 
Example #22
Source File: util.py    From multi-agent-emergence-environments with MIT License 5 votes vote down vote up
def __init__(self, env, action_key, nbuckets=11):
        super().__init__(env)
        self.action_key = action_key
        self.discrete_to_continuous_act_map = []
        for i, ac_space in enumerate(self.action_space.spaces[action_key].spaces):
            assert isinstance(ac_space, Box)
            action_map = np.array([np.linspace(low, high, nbuckets)
                                   for low, high in zip(ac_space.low, ac_space.high)])
            _nbuckets = np.ones((len(action_map))) * nbuckets
            self.action_space.spaces[action_key].spaces[i] = gym.spaces.MultiDiscrete(_nbuckets)
            self.discrete_to_continuous_act_map.append(action_map)
        self.discrete_to_continuous_act_map = np.array(self.discrete_to_continuous_act_map) 
Example #23
Source File: util.py    From multi-agent-emergence-environments with MIT License 5 votes vote down vote up
def update_obs_space(env, delta):
    spaces = env.observation_space.spaces.copy()
    for key, shape in delta.items():
        spaces[key] = Box(-np.inf, np.inf, shape, np.float32)
    return Dict(spaces) 
Example #24
Source File: kellycoinflip.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True):
        # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this?
        self.initialWealth=float(initialWealth)
        self.edgePriorAlpha=edgePriorAlpha
        self.edgePriorBeta=edgePriorBeta
        self.maxWealthAlpha=maxWealthAlpha
        self.maxWealthM=maxWealthM
        self.maxRoundsMean=maxRoundsMean
        self.maxRoundsSD=maxRoundsSD

        # draw this game's set of parameters:
        edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta)
        maxWealth = round(genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random))
        maxRounds = int(round(prng.np_random.normal(maxRoundsMean, maxRoundsSD)))

        # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap;
        # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date:
        self.maxEverWealth = float(self.initialWealth)
        # for the coinflip edge, it is total wins/losses:
        self.wins = 0
        self.losses = 0
        # for the number of rounds, we need to remember how many rounds we've played:
        self.roundsElapsed = 0

        # the rest proceeds as before:
        self.action_space = spaces.Discrete(int(maxWealth*100))
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, shape=[1]), # current wealth
            spaces.Discrete(maxRounds+1), # rounds elapsed
            spaces.Discrete(maxRounds+1), # wins
            spaces.Discrete(maxRounds+1), # losses
            spaces.Box(0, maxWealth, [1]))) # maximum observed wealth
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = self.initialWealth
        self.maxRounds = maxRounds
        self.rounds = self.maxRounds
        self.maxWealth = maxWealth
        if reseed or not hasattr(self, 'np_random') : self.seed() 
Example #25
Source File: kellycoinflip.py    From DRL_DeliveryDuel with MIT License 5 votes vote down vote up
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300):

        self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments
        self.observation_space = spaces.Tuple((
            spaces.Box(0, maxWealth, [1]), # (w,b)
            spaces.Discrete(maxRounds+1)))
        self.reward_range = (0, maxWealth)
        self.edge = edge
        self.wealth = initialWealth
        self.initialWealth = initialWealth
        self.maxRounds = maxRounds
        self.maxWealth = maxWealth
        self.seed()
        self.reset() 
Example #26
Source File: normalized_env.py    From garage with MIT License 5 votes vote down vote up
def __init__(
            self,
            env,
            scale_reward=1.,
            normalize_obs=False,
            normalize_reward=False,
            expected_action_scale=1.,
            flatten_obs=True,
            obs_alpha=0.001,
            reward_alpha=0.001,
    ):
        super().__init__(env)

        self._scale_reward = scale_reward
        self._normalize_obs = normalize_obs
        self._normalize_reward = normalize_reward
        self._expected_action_scale = expected_action_scale
        self._flatten_obs = flatten_obs

        self._obs_alpha = obs_alpha
        flat_obs_dim = gym.spaces.utils.flatdim(env.observation_space)
        self._obs_mean = np.zeros(flat_obs_dim)
        self._obs_var = np.ones(flat_obs_dim)

        self._reward_alpha = reward_alpha
        self._reward_mean = 0.
        self._reward_var = 1. 
Example #27
Source File: box_wrapper.py    From gym-dolphin with MIT License 5 votes vote down vote up
def __init__(self, space):
    assert(isinstance(space, Tuple))
    
    self.in_space = space
    
    self.convertors = list(map(convertor, space.spaces))
    
    low = np.concatenate([c.out_space.low for c in self.convertors])
    high = np.concatenate([c.out_space.high for c in self.convertors])
    
    self.out_space = Box(low, high) 
Example #28
Source File: normalized_env.py    From garage with MIT License 5 votes vote down vote up
def _update_obs_estimate(self, obs):
        flat_obs = gym.spaces.utils.flatten(self.env.observation_space, obs)
        self._obs_mean = (
            1 - self._obs_alpha) * self._obs_mean + self._obs_alpha * flat_obs
        self._obs_var = (
            1 - self._obs_alpha) * self._obs_var + self._obs_alpha * np.square(
                flat_obs - self._obs_mean) 
Example #29
Source File: dummy_box_env.py    From garage with MIT License 5 votes vote down vote up
def action_space(self):
        """Return an action space.

        Returns:
            gym.spaces: The action space of the environment.

        """
        return gym.spaces.Box(low=-5.0,
                              high=5.0,
                              shape=self._action_dim,
                              dtype=np.float32) 
Example #30
Source File: normalized_env.py    From garage with MIT License 5 votes vote down vote up
def step(self, action):
        """Feed environment with one step of action and get result.

        Args:
            action (np.ndarray): An action fed to the environment.

        Returns:
            tuple:
                * observation (np.ndarray): The observation of the environment.
                * reward (float): The reward acquired at this time step.
                * done (boolean): Whether the environment was completed at this
                    time step.
                * infos (dict): Environment-dependent additional information.

        """
        if isinstance(self.action_space, gym.spaces.Box):
            # rescale the action when the bounds are not inf
            lb, ub = self.action_space.low, self.action_space.high
            if np.all(lb != -np.inf) and np.all(ub != -np.inf):
                scaled_action = lb + (action + self._expected_action_scale) * (
                    0.5 * (ub - lb) / self._expected_action_scale)
                scaled_action = np.clip(scaled_action, lb, ub)
            else:
                scaled_action = action
        else:
            scaled_action = action

        next_obs, reward, done, info = self.env.step(scaled_action)

        if self._normalize_obs:
            next_obs = self._apply_normalize_obs(next_obs)
        if self._normalize_reward:
            reward = self._apply_normalize_reward(reward)

        return next_obs, reward * self._scale_reward, done, info