Python gym.spaces() Examples
The following are 30
code examples of gym.spaces().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym
, or try the search function
.
Example #1
Source File: stack_frames.py From garage with MIT License | 6 votes |
def __init__(self, env, n_frames): if not isinstance(env.observation_space, gym.spaces.Box): raise ValueError('Stack frames only works with gym.spaces.Box ' 'environment.') if len(env.observation_space.shape) != 2: raise ValueError( 'Stack frames only works with 2D single channel images') super().__init__(env) self._n_frames = n_frames self._frames = deque(maxlen=n_frames) new_obs_space_shape = env.observation_space.shape + (n_frames, ) _low = env.observation_space.low.flatten()[0] _high = env.observation_space.high.flatten()[0] self._observation_space = gym.spaces.Box( _low, _high, shape=new_obs_space_shape, dtype=env.observation_space.dtype)
Example #2
Source File: executor.py From EasyRL with Apache License 2.0 | 6 votes |
def flatten_obs(self, obs): """reshape the multi-channel observations into a flattern array for efficient communication in distributed training. Arguments: obs (obj): dict or list of numpy array for multi-channel observations. Returns: flattened_obs (tensor): a flattened array. """ if isinstance(self.ob_ph_spec, list): assert len(obs) == len( self.observation_space ), "{} spaces for obs but {} inputs found".format( len(self.observation_space), len(obs)) flattened_array = np.concatenate( [np.asarray(elm).astype(np.float32) for elm in obs], axis=1) elif isinstance(self.ob_ph_spec, OrderedDict): array_list = [] for name in self.ob_ph_spec.keys(): array_list.append(np.asarray(obs[name]).astype(np.float32)) flattened_array = np.concatenate(array_list, axis=1) else: flattened_array = obs return flattened_array
Example #3
Source File: executor.py From EasyRL with Apache License 2.0 | 6 votes |
def _basic_space_to_ph_spec(self, sp): """Translate a gym space object to a tuple to specify data type and shape. Arguments: sp (obj): basic space object of gym interface. Returns: a tuple used for building TensorFlow placeholders where the first element specifies `dtype` and the second one specifies `shape`. """ # (jones.wz) TO DO: handle gym Atari input if isinstance(sp, gym.spaces.Box): if len(sp.shape) == 3: return (tf.uint8, (None, ) + sp.shape) return (tf.float32, (None, prod(sp.shape))) elif isinstance(sp, gym.spaces.Discrete): return (tf.int32, (None, sp.n)) elif isinstance(sp, gym.spaces.MultiDiscrete): return (tf.int32, (None, prod(sp.shape))) elif isinstance(sp, gym.spaces.MultiBinary): return (tf.int32, (None, prod(sp.shape))) else: raise TypeError( "specified an unsupported space type {}".format(sp))
Example #4
Source File: run_bcq_on_batchdata.py From EasyRL with Apache License 2.0 | 6 votes |
def __init__(self, file_name, batch_size=128, n_step=1): # create an offline_env to do fake interaction with agent self.num_epoch = 0 self.num_record = 0 self._offset = 0 # how many records to read from table at one time self.batch_size = batch_size # number of step to reserved for n-step dqn self.n_step = n_step # defined the shape of observation and action # we follow the definition of gym.spaces # `Box` for continue-space, `Discrete` for discrete-space and `Dict` for multiple input # actually low/high limitation will not be used by agent but required by gym.spaces self.observation_space = Box(low=-np.inf, high=np.inf, shape=(4,)) self.action_space = Discrete(n=2) fr = open(file_name) self.data = fr.readlines() self.num_record = len(self.data) fr.close()
Example #5
Source File: manipulation.py From multi-agent-emergence-environments with MIT License | 6 votes |
def __init__(self, env, body_names, radius_multiplier=1.5, agent_idx_allowed_to_lock=None, lock_type="any_lock", ac_obs_prefix='', obj_in_game_metadata_keys=None, agent_allowed_to_lock_keys=None): super().__init__(env) self.n_agents = self.unwrapped.n_agents self.n_obj = len(body_names) self.body_names = body_names self.agent_idx_allowed_to_lock = np.arange(self.n_agents) if agent_idx_allowed_to_lock is None else agent_idx_allowed_to_lock self.lock_type = lock_type self.ac_obs_prefix = ac_obs_prefix self.obj_in_game_metadata_keys = obj_in_game_metadata_keys self.agent_allowed_to_lock_keys = agent_allowed_to_lock_keys self.action_space.spaces[f'action_{ac_obs_prefix}glue'] = ( Tuple([MultiDiscrete([2] * self.n_obj) for _ in range(self.n_agents)])) self.observation_space = update_obs_space(env, {f'{ac_obs_prefix}obj_lock': (self.n_obj, 1), f'{ac_obs_prefix}you_lock': (self.n_agents, self.n_obj, 1), f'{ac_obs_prefix}team_lock': (self.n_agents, self.n_obj, 1)}) self.lock_radius = radius_multiplier*self.metadata['box_size'] self.obj_locked = np.zeros((self.n_obj,), dtype=int)
Example #6
Source File: test_agents.py From chainerrl with MIT License | 6 votes |
def create_state_q_function_for_env(env): assert isinstance(env.observation_space, gym.spaces.Box) ndim_obs = env.observation_space.low.size if isinstance(env.action_space, gym.spaces.Discrete): return q_functions.FCStateQFunctionWithDiscreteAction( ndim_obs=ndim_obs, n_actions=env.action_space.n, n_hidden_channels=10, n_hidden_layers=1) elif isinstance(env.action_space, gym.spaces.Box): return q_functions.FCQuadraticStateQFunction( n_input_channels=ndim_obs, n_dim_action=env.action_space.low.size, n_hidden_channels=10, n_hidden_layers=1, action_space=env.action_space) else: raise NotImplementedError()
Example #7
Source File: random_agent.py From irl-benchmark with GNU General Public License v3.0 | 6 votes |
def pick_action(self, state: Union[int, float, np.ndarray] ) -> Union[int, float, np.ndarray]: """ Pick an action given a state. Picks uniformly random from all possible actions, using the environments action_space.sample() method. Parameters ---------- state: int An integer corresponding to a state of a DiscreteEnv. Not used in this agent. Returns ------- Union[int, float, np.ndarray] An action """ # if other spaces are needed, check if their sample method conforms with # returned type, change if necessary. assert isinstance(self.env.action_space, (Box, Discrete, MultiDiscrete, MultiBinary)) return self.env.action_space.sample()
Example #8
Source File: food.py From multi-agent-emergence-environments with MIT License | 6 votes |
def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf, food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False): super().__init__(env) self.eat_thresh = eat_thresh self.max_food_health = max_food_health self.respawn_time = respawn_time self.food_rew_type = food_rew_type self.n_agents = self.metadata['n_agents'] if type(reward_scale) not in [list, tuple, np.ndarray]: reward_scale = [reward_scale, reward_scale] self.reward_scale = reward_scale self.reward_scale_obs = reward_scale_obs # Reset obs/action space to match self.max_n_food = self.metadata['max_n_food'] self.curr_n_food = self.metadata['curr_n_food'] self.max_food_size = self.metadata['food_size'] food_dim = 5 if self.reward_scale_obs else 4 self.observation_space = update_obs_space(self.env, {'food_obs': (self.max_n_food, food_dim), 'food_health': (self.max_n_food, 1), 'food_eat': (self.max_n_food, 1)}) self.action_space.spaces['action_eat_food'] = Tuple([MultiDiscrete([2] * self.max_n_food) for _ in range(self.n_agents)])
Example #9
Source File: multiprocessing_env.py From ppo-pytorch with MIT License | 6 votes |
def __init__(self, env_fns, spaces=None): """ envs: list of gym environments to run in subprocesses """ self.waiting = False self.closed = False nenvs = len(env_fns) self.nenvs = nenvs self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] for p in self.ps: p.daemon = True # if the main process crashes, we should not cause things to hang p.start() for remote in self.work_remotes: remote.close() self.remotes[0].send(('get_spaces', None)) observation_space, action_space = self.remotes[0].recv() VecEnv.__init__(self, len(env_fns), observation_space, action_space)
Example #10
Source File: env_checker.py From stable-baselines with MIT License | 6 votes |
def _check_image_input(observation_space: spaces.Box) -> None: """ Check that the input will be compatible with Stable-Baselines when the observation is apparently an image. """ if observation_space.dtype != np.uint8: warnings.warn("It seems that your observation is an image but the `dtype` " "of your observation_space is not `np.uint8`. " "If your observation is not an image, we recommend you to flatten the observation " "to have only a 1D vector") if np.any(observation_space.low != 0) or np.any(observation_space.high != 255): warnings.warn("It seems that your observation space is an image but the " "upper and lower bounds are not in [0, 255]. " "Because the CNN policy normalize automatically the observation " "you may encounter issue if the values are not in that range." ) if observation_space.shape[0] < 36 or observation_space.shape[1] < 36: warnings.warn("The minimal resolution for an image is 36x36 for the default CnnPolicy. " "You might need to use a custom `cnn_extractor` " "cf https://stable-baselines.readthedocs.io/en/master/guide/custom_policy.html")
Example #11
Source File: env_checker.py From stable-baselines with MIT License | 6 votes |
def _check_obs(obs: Union[tuple, dict, np.ndarray, int], observation_space: spaces.Space, method_name: str) -> None: """ Check that the observation returned by the environment correspond to the declared one. """ if not isinstance(observation_space, spaces.Tuple): assert not isinstance(obs, tuple), ("The observation returned by the `{}()` " "method should be a single value, not a tuple".format(method_name)) # The check for a GoalEnv is done by the base class if isinstance(observation_space, spaces.Discrete): assert isinstance(obs, int), "The observation returned by `{}()` method must be an int".format(method_name) elif _enforce_array_obs(observation_space): assert isinstance(obs, np.ndarray), ("The observation returned by `{}()` " "method must be a numpy array".format(method_name)) assert observation_space.contains(obs), ("The observation returned by the `{}()` " "method does not match the given observation space".format(method_name))
Example #12
Source File: base_class.py From stable-baselines with MIT License | 6 votes |
def action_probability(self, observation, state=None, mask=None, actions=None, logp=False): """ If ``actions`` is ``None``, then get the model's action probability distribution from a given observation. Depending on the action space the output is: - Discrete: probability for each possible action - Box: mean and standard deviation of the action output However if ``actions`` is not ``None``, this function will return the probability that the given actions are taken with the given parameters (observation, state, ...) on this model. For discrete action spaces, it returns the probability mass; for continuous action spaces, the probability density. This is since the probability mass will always be zero in continuous spaces, see http://blog.christianperone.com/2019/01/ for a good explanation :param observation: (np.ndarray) the input observation :param state: (np.ndarray) The last states (can be None, used in recurrent policies) :param mask: (np.ndarray) The last masks (can be None, used in recurrent policies) :param actions: (np.ndarray) (OPTIONAL) For calculating the likelihood that the given actions are chosen by the model for each of the given parameters. Must have the same number of actions and observations. (set to None to return the complete action probability distribution) :param logp: (bool) (OPTIONAL) When specified with actions, returns probability in log-space. This has no effect if actions is None. :return: (np.ndarray) the model's (log) action probability """ pass
Example #13
Source File: resize.py From garage with MIT License | 6 votes |
def __init__(self, env, width, height): if not isinstance(env.observation_space, gym.spaces.Box): raise ValueError('Resize only works with Box environment.') if len(env.observation_space.shape) != 2: raise ValueError('Resize only works with 2D single channel image.') super().__init__(env) _low = env.observation_space.low.flatten()[0] _high = env.observation_space.high.flatten()[0] self._dtype = env.observation_space.dtype self._observation_space = gym.spaces.Box(_low, _high, shape=[width, height], dtype=self._dtype) self._width = width self._height = height
Example #14
Source File: base_class.py From stable-baselines with MIT License | 6 votes |
def predict(self, observation, state=None, mask=None, deterministic=False): if state is None: state = self.initial_state if mask is None: mask = [False for _ in range(self.n_envs)] observation = np.array(observation) vectorized_env = self._is_vectorized_observation(observation, self.observation_space) observation = observation.reshape((-1,) + self.observation_space.shape) actions, _, states, _ = self.step(observation, state, mask, deterministic=deterministic) clipped_actions = actions # Clip the actions to avoid out of bound error if isinstance(self.action_space, gym.spaces.Box): clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high) if not vectorized_env: if state is not None: raise ValueError("Error: The environment must be vectorized when using recurrent policies.") clipped_actions = clipped_actions[0] return clipped_actions, states
Example #15
Source File: grayscale.py From garage with MIT License | 6 votes |
def __init__(self, env): if not isinstance(env.observation_space, gym.spaces.Box): raise ValueError( 'Grayscale only works with gym.spaces.Box environment.') if len(env.observation_space.shape) != 3: raise ValueError('Grayscale only works with 2D RGB images') super().__init__(env) _low = env.observation_space.low.flatten()[0] _high = env.observation_space.high.flatten()[0] assert _low == 0 assert _high == 255 self._observation_space = gym.spaces.Box( _low, _high, shape=env.observation_space.shape[:-1], dtype=np.uint8)
Example #16
Source File: ppo_atari.py From cleanrl with MIT License | 5 votes |
def __init__(self, env): gym.ObservationWrapper.__init__(self, env) self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
Example #17
Source File: dummy_box_env.py From garage with MIT License | 5 votes |
def observation_space(self): """Return an observation space. Returns: gym.spaces: The observation space of the environment. """ return gym.spaces.Box(low=-1, high=1, shape=self._obs_dim, dtype=np.float32)
Example #18
Source File: dqn_atari.py From cleanrl with MIT License | 5 votes |
def __init__(self, env): super(ImageToPyTorch, self).__init__(env) old_shape = self.observation_space.shape self.observation_space = gym.spaces.Box( low=0, high=255, shape=(old_shape[-1], old_shape[0], old_shape[1]), dtype=np.uint8, )
Example #19
Source File: ppo_atari.py From cleanrl with MIT License | 5 votes |
def __init__(self, env, width=84, height=84, grayscale=True, dict_space_key=None): """ Warp frames to 84x84 as done in the Nature paper and later work. If the environment uses dictionary observations, `dict_space_key` can be specified which indicates which observation should be warped. """ super().__init__(env) self._width = width self._height = height self._grayscale = grayscale self._key = dict_space_key if self._grayscale: num_colors = 1 else: num_colors = 3 new_space = gym.spaces.Box( low=0, high=255, shape=(self._height, self._width, num_colors), dtype=np.uint8, ) if self._key is None: original_space = self.observation_space self.observation_space = new_space else: original_space = self.observation_space.spaces[self._key] self.observation_space.spaces[self._key] = new_space assert original_space.dtype == np.uint8 and len(original_space.shape) == 3
Example #20
Source File: dqn_atari.py From cleanrl with MIT License | 5 votes |
def __init__(self, env): gym.ObservationWrapper.__init__(self, env) self.observation_space = gym.spaces.Box(low=0, high=1, shape=env.observation_space.shape, dtype=np.float32)
Example #21
Source File: util.py From multi-agent-emergence-environments with MIT License | 5 votes |
def __init__(self, env, obs_groups): super().__init__(env) self.obs_groups = obs_groups for key_to_save, keys_to_concat in obs_groups.items(): assert np.all([np.array(self.observation_space.spaces[keys_to_concat[0]].shape[:-1]) == np.array(self.observation_space.spaces[k].shape[:-1]) for k in keys_to_concat]), \ f"Spaces were {[(k, v) for k, v in self.observation_space.spaces.items() if k in keys_to_concat]}" new_last_dim = sum([self.observation_space.spaces[k].shape[-1] for k in keys_to_concat]) new_shape = list(self.observation_space.spaces[keys_to_concat[0]].shape[:-1]) + [new_last_dim] self.observation_space = update_obs_space(self, {key_to_save: new_shape})
Example #22
Source File: util.py From multi-agent-emergence-environments with MIT License | 5 votes |
def __init__(self, env, action_key, nbuckets=11): super().__init__(env) self.action_key = action_key self.discrete_to_continuous_act_map = [] for i, ac_space in enumerate(self.action_space.spaces[action_key].spaces): assert isinstance(ac_space, Box) action_map = np.array([np.linspace(low, high, nbuckets) for low, high in zip(ac_space.low, ac_space.high)]) _nbuckets = np.ones((len(action_map))) * nbuckets self.action_space.spaces[action_key].spaces[i] = gym.spaces.MultiDiscrete(_nbuckets) self.discrete_to_continuous_act_map.append(action_map) self.discrete_to_continuous_act_map = np.array(self.discrete_to_continuous_act_map)
Example #23
Source File: util.py From multi-agent-emergence-environments with MIT License | 5 votes |
def update_obs_space(env, delta): spaces = env.observation_space.spaces.copy() for key, shape in delta.items(): spaces[key] = Box(-np.inf, np.inf, shape, np.float32) return Dict(spaces)
Example #24
Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License | 5 votes |
def __init__(self, initialWealth=25.0, edgePriorAlpha=7, edgePriorBeta=3, maxWealthAlpha=5.0, maxWealthM=200.0, maxRoundsMean=300.0, maxRoundsSD=25.0, reseed=True): # store the hyperparameters for passing back into __init__() during resets so the same hyperparameters govern the next game's parameters, as the user expects: TODO: this is boilerplate, is there any more elegant way to do this? self.initialWealth=float(initialWealth) self.edgePriorAlpha=edgePriorAlpha self.edgePriorBeta=edgePriorBeta self.maxWealthAlpha=maxWealthAlpha self.maxWealthM=maxWealthM self.maxRoundsMean=maxRoundsMean self.maxRoundsSD=maxRoundsSD # draw this game's set of parameters: edge = prng.np_random.beta(edgePriorAlpha, edgePriorBeta) maxWealth = round(genpareto.rvs(maxWealthAlpha, maxWealthM, random_state=prng.np_random)) maxRounds = int(round(prng.np_random.normal(maxRoundsMean, maxRoundsSD))) # add an additional global variable which is the sufficient statistic for the Pareto distribution on wealth cap; # alpha doesn't update, but x_m does, and simply is the highest wealth count we've seen to date: self.maxEverWealth = float(self.initialWealth) # for the coinflip edge, it is total wins/losses: self.wins = 0 self.losses = 0 # for the number of rounds, we need to remember how many rounds we've played: self.roundsElapsed = 0 # the rest proceeds as before: self.action_space = spaces.Discrete(int(maxWealth*100)) self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, shape=[1]), # current wealth spaces.Discrete(maxRounds+1), # rounds elapsed spaces.Discrete(maxRounds+1), # wins spaces.Discrete(maxRounds+1), # losses spaces.Box(0, maxWealth, [1]))) # maximum observed wealth self.reward_range = (0, maxWealth) self.edge = edge self.wealth = self.initialWealth self.maxRounds = maxRounds self.rounds = self.maxRounds self.maxWealth = maxWealth if reseed or not hasattr(self, 'np_random') : self.seed()
Example #25
Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License | 5 votes |
def __init__(self, initialWealth=25.0, edge=0.6, maxWealth=250.0, maxRounds=300): self.action_space = spaces.Discrete(int(maxWealth*100)) # betting in penny increments self.observation_space = spaces.Tuple(( spaces.Box(0, maxWealth, [1]), # (w,b) spaces.Discrete(maxRounds+1))) self.reward_range = (0, maxWealth) self.edge = edge self.wealth = initialWealth self.initialWealth = initialWealth self.maxRounds = maxRounds self.maxWealth = maxWealth self.seed() self.reset()
Example #26
Source File: normalized_env.py From garage with MIT License | 5 votes |
def __init__( self, env, scale_reward=1., normalize_obs=False, normalize_reward=False, expected_action_scale=1., flatten_obs=True, obs_alpha=0.001, reward_alpha=0.001, ): super().__init__(env) self._scale_reward = scale_reward self._normalize_obs = normalize_obs self._normalize_reward = normalize_reward self._expected_action_scale = expected_action_scale self._flatten_obs = flatten_obs self._obs_alpha = obs_alpha flat_obs_dim = gym.spaces.utils.flatdim(env.observation_space) self._obs_mean = np.zeros(flat_obs_dim) self._obs_var = np.ones(flat_obs_dim) self._reward_alpha = reward_alpha self._reward_mean = 0. self._reward_var = 1.
Example #27
Source File: box_wrapper.py From gym-dolphin with MIT License | 5 votes |
def __init__(self, space): assert(isinstance(space, Tuple)) self.in_space = space self.convertors = list(map(convertor, space.spaces)) low = np.concatenate([c.out_space.low for c in self.convertors]) high = np.concatenate([c.out_space.high for c in self.convertors]) self.out_space = Box(low, high)
Example #28
Source File: normalized_env.py From garage with MIT License | 5 votes |
def _update_obs_estimate(self, obs): flat_obs = gym.spaces.utils.flatten(self.env.observation_space, obs) self._obs_mean = ( 1 - self._obs_alpha) * self._obs_mean + self._obs_alpha * flat_obs self._obs_var = ( 1 - self._obs_alpha) * self._obs_var + self._obs_alpha * np.square( flat_obs - self._obs_mean)
Example #29
Source File: dummy_box_env.py From garage with MIT License | 5 votes |
def action_space(self): """Return an action space. Returns: gym.spaces: The action space of the environment. """ return gym.spaces.Box(low=-5.0, high=5.0, shape=self._action_dim, dtype=np.float32)
Example #30
Source File: normalized_env.py From garage with MIT License | 5 votes |
def step(self, action): """Feed environment with one step of action and get result. Args: action (np.ndarray): An action fed to the environment. Returns: tuple: * observation (np.ndarray): The observation of the environment. * reward (float): The reward acquired at this time step. * done (boolean): Whether the environment was completed at this time step. * infos (dict): Environment-dependent additional information. """ if isinstance(self.action_space, gym.spaces.Box): # rescale the action when the bounds are not inf lb, ub = self.action_space.low, self.action_space.high if np.all(lb != -np.inf) and np.all(ub != -np.inf): scaled_action = lb + (action + self._expected_action_scale) * ( 0.5 * (ub - lb) / self._expected_action_scale) scaled_action = np.clip(scaled_action, lb, ub) else: scaled_action = action else: scaled_action = action next_obs, reward, done, info = self.env.step(scaled_action) if self._normalize_obs: next_obs = self._apply_normalize_obs(next_obs) if self._normalize_reward: reward = self._apply_normalize_reward(reward) return next_obs, reward * self._scale_reward, done, info