Python gym.spaces.Dict() Examples
The following are 30
code examples of gym.spaces.Dict().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym.spaces
, or try the search function
.
Example #1
Source File: simple_rpg.py From ray with Apache License 2.0 | 9 votes |
def __init__(self, config): self.cur_pos = 0 self.action_space = Discrete(4) # Represents an item. self.item_space = Discrete(5) # Represents an effect on the player. self.effect_space = Box(9000, 9999, shape=(4, )) # Represents a player. self.player_space = Dict({ "location": Box(-100, 100, shape=(2, )), "status": Box(-1, 1, shape=(10, )), "items": Repeated(self.item_space, max_len=MAX_ITEMS), "effects": Repeated(self.effect_space, max_len=MAX_EFFECTS), }) # Observation is a list of players. self.observation_space = Repeated( self.player_space, max_len=MAX_PLAYERS)
Example #2
Source File: multi_agent.py From multi-agent-emergence-environments with MIT License | 8 votes |
def __init__(self, env, keys_self, keys_external, keys_mask=[], flatten=True): super().__init__(env) self.keys_self = sorted([k + '_self' for k in keys_self]) self.keys_external = sorted(keys_external) self.keys_mask = sorted(keys_mask) self.flatten = flatten # Change observation space to look like a single agent observation space. # This makes constructing policies much easier if flatten: size = sum([np.prod(self.env.observation_space.spaces[k].shape[1:]) for k in self.keys_self + self.keys_external]) self.observation_space = Dict( {'observation_self': Box(-np.inf, np.inf, (size,), np.float32)}) else: size_self = sum([self.env.observation_space.spaces[k].shape[1] for k in self.keys_self]) obs_self = {'observation_self': Box(-np.inf, np.inf, (size_self,), np.float32)} obs_extern = {k: Box(-np.inf, np.inf, v.shape[1:], np.float32) for k, v in self.observation_space.spaces.items() if k in self.keys_external + self.keys_mask} obs_self.update(obs_extern) self.observation_space = Dict(obs_self)
Example #3
Source File: _spaces.py From adeptRL with GNU General Public License v3.0 | 6 votes |
def dtypes_from_gym(gym_space): if isinstance(gym_space, spaces.Discrete): return {"Discrete": gym_space.dtype} elif isinstance(gym_space, spaces.MultiDiscrete): raise NotImplementedError elif isinstance(gym_space, spaces.MultiBinary): return {"MultiBinary": gym_space.dtype} elif isinstance(gym_space, spaces.Box): return {"Box": gym_space.dtype} elif isinstance(gym_space, spaces.Dict): return { name: list(Space._detect_gym_spaces(s).values())[0] for name, s in gym_space.spaces.items() } elif isinstance(gym_space, spaces.Tuple): return { idx: list(Space._detect_gym_spaces(s).values())[0] for idx, s in enumerate(gym_space.spaces) } else: raise NotImplementedError
Example #4
Source File: env_wrappers.py From sample-factory with MIT License | 6 votes |
def __init__(self, env, w, h, grayscale=True, add_channel_dim=False, area_interpolation=False): super(ResizeWrapper, self).__init__(env) self.w = w self.h = h self.grayscale = grayscale self.add_channel_dim = add_channel_dim self.interpolation = cv2.INTER_AREA if area_interpolation else cv2.INTER_NEAREST if isinstance(env.observation_space, spaces.Dict): # TODO: does this even work? new_spaces = {} for key, space in env.observation_space.spaces.items(): new_spaces[key] = self._calc_new_obs_space(space) self.observation_space = spaces.Dict(new_spaces) else: self.observation_space = self._calc_new_obs_space(env.observation_space)
Example #5
Source File: _spaces.py From adeptRL with GNU General Public License v3.0 | 6 votes |
def _detect_gym_spaces(gym_space): if isinstance(gym_space, spaces.Discrete): return {"Discrete": (gym_space.n,)} elif isinstance(gym_space, spaces.MultiDiscrete): raise NotImplementedError elif isinstance(gym_space, spaces.MultiBinary): return {"MultiBinary": (gym_space.n,)} elif isinstance(gym_space, spaces.Box): return {"Box": gym_space.shape} elif isinstance(gym_space, spaces.Dict): return { name: list(Space._detect_gym_spaces(s).values())[0] for name, s in gym_space.spaces.items() } elif isinstance(gym_space, spaces.Tuple): return { idx: list(Space._detect_gym_spaces(s).values())[0] for idx, s in enumerate(gym_space.spaces) }
Example #6
Source File: env.py From smac with MIT License | 6 votes |
def __init__(self, **smac_args): """Create a new multi-agent StarCraft env compatible with RLlib. Arguments: smac_args (dict): Arguments to pass to the underlying smac.env.starcraft.StarCraft2Env instance. Examples: >>> from smac.examples.rllib import RLlibStarCraft2Env >>> env = RLlibStarCraft2Env(map_name="8m") >>> print(env.reset()) """ self._env = StarCraft2Env(**smac_args) self._ready_agents = [] self.observation_space = Dict({ "obs": Box(-1, 1, shape=(self._env.get_obs_size(),)), "action_mask": Box(0, 1, shape=(self._env.get_total_actions(),)), }) self.action_space = Discrete(self._env.get_total_actions())
Example #7
Source File: dummy_vec_env.py From sonic_contest with MIT License | 6 votes |
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] self.level_pred = self.envs[0].level_pred VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) shapes, dtypes = {}, {} self.keys = [] obs_space = env.observation_space if isinstance(obs_space, spaces.Dict): assert isinstance(obs_space.spaces, OrderedDict) for key, box in obs_space.spaces.items(): assert isinstance(box, spaces.Box) shapes[key] = box.shape dtypes[key] = box.dtype self.keys.append(key) else: box = obs_space assert isinstance(box, spaces.Box) self.keys = [None] shapes, dtypes = { None: box.shape }, { None: box.dtype } self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
Example #8
Source File: wrappers.py From gym-minigrid with Apache License 2.0 | 6 votes |
def __init__(self, env, agent_view_size=7): super().__init__(env) assert agent_view_size % 2 == 1 assert agent_view_size >= 3 # Override default view size env.unwrapped.agent_view_size = agent_view_size # Compute observation space with specified view size observation_space = gym.spaces.Box( low=0, high=255, shape=(agent_view_size, agent_view_size, 3), dtype='uint8' ) # Override the environment's observation space self.observation_space = spaces.Dict({ 'image': observation_space })
Example #9
Source File: dm_env_wrapper.py From ray with Apache License 2.0 | 6 votes |
def _convert_spec_to_space(spec): if isinstance(spec, dict): return spaces.Dict( {k: _convert_spec_to_space(v) for k, v in spec.items()}) if isinstance(spec, specs.DiscreteArray): return spaces.Discrete(spec.num_values) elif isinstance(spec, specs.BoundedArray): return spaces.Box( low=np.asscalar(spec.minimum), high=np.asscalar(spec.maximum), shape=spec.shape, dtype=spec.dtype) elif isinstance(spec, specs.Array): return spaces.Box( low=-float("inf"), high=float("inf"), shape=spec.shape, dtype=spec.dtype) raise NotImplementedError( ("Could not convert `Array` spec of type {} to Gym space. " "Attempted to convert: {}").format(type(spec), spec))
Example #10
Source File: observation.py From highway-env with MIT License | 6 votes |
def __init__(self, env: 'AbstractEnv', features: Optional[List[str]] = None, grid_size: Optional[List[List[float]]] = None, grid_step: Optional[List[int]] = None, features_range: Dict[str, List[float]] = None, absolute: bool = False, **kwargs: dict) -> None: """ :param env: The environment to observe :param features: Names of features used in the observation :param vehicles_count: Number of observed vehicles """ self.env = env self.features = features if features is not None else self.FEATURES self.grid_size = np.array(grid_size) if grid_size is not None else np.array(self.GRID_SIZE) self.grid_step = np.array(grid_step) if grid_step is not None else np.array(self.GRID_STEP) grid_shape = np.asarray(np.floor((self.grid_size[:, 1] - self.grid_size[:, 0]) / grid_step), dtype=np.int) self.grid = np.zeros((len(self.features), *grid_shape)) self.features_range = features_range self.absolute = absolute
Example #11
Source File: simple_rpg.py From ray with Apache License 2.0 | 6 votes |
def __init__(self, config): self.cur_pos = 0 self.action_space = Discrete(4) # Represents an item. self.item_space = Discrete(5) # Represents an effect on the player. self.effect_space = Box(9000, 9999, shape=(4, )) # Represents a player. self.player_space = Dict({ "location": Box(-100, 100, shape=(2, )), "status": Box(-1, 1, shape=(10, )), "items": Repeated(self.item_space, max_len=MAX_ITEMS), "effects": Repeated(self.effect_space, max_len=MAX_EFFECTS), }) # Observation is a list of players. self.observation_space = Repeated( self.player_space, max_len=MAX_PLAYERS)
Example #12
Source File: dm_env_wrapper.py From ray with Apache License 2.0 | 6 votes |
def _convert_spec_to_space(spec): if isinstance(spec, dict): return spaces.Dict( {k: _convert_spec_to_space(v) for k, v in spec.items()}) if isinstance(spec, specs.DiscreteArray): return spaces.Discrete(spec.num_values) elif isinstance(spec, specs.BoundedArray): return spaces.Box( low=np.asscalar(spec.minimum), high=np.asscalar(spec.maximum), shape=spec.shape, dtype=spec.dtype) elif isinstance(spec, specs.Array): return spaces.Box( low=-float("inf"), high=float("inf"), shape=spec.shape, dtype=spec.dtype) raise NotImplementedError( ("Could not convert `Array` spec of type {} to Gym space. " "Attempted to convert: {}").format(type(spec), spec))
Example #13
Source File: dummy_vec_env.py From self-imitation-learning with MIT License | 6 votes |
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) shapes, dtypes = {}, {} self.keys = [] obs_space = env.observation_space if isinstance(obs_space, spaces.Dict): assert isinstance(obs_space.spaces, OrderedDict) subspaces = obs_space.spaces else: subspaces = {None: obs_space} for key, box in subspaces.items(): shapes[key] = box.shape dtypes[key] = box.dtype self.keys.append(key) self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
Example #14
Source File: bit_flip.py From rlgraph with Apache License 2.0 | 6 votes |
def __init__(self, bit_length=16, max_steps=None): super(BitFlip, self).__init__() assert bit_length >= 1, 'bit_length must be >= 1, found {}'.format(bit_length) self.bit_length = bit_length if max_steps is None: self.max_steps = bit_length else: self.max_steps = max_steps self.last_action = -1 # -1 for reset self.steps = 0 self.seed() self.action_space = spaces.Discrete(bit_length + 1) # index = n means to not flip any bit # achieved goal and observation are identical in bit_flip environment, however it is made this way to be # compatible with Openai GoalEnv self.observation_space = spaces.Dict(dict( observation=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), achieved_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), desired_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), )) self.reset()
Example #15
Source File: two_step_game.py From ray with Apache License 2.0 | 6 votes |
def __init__(self, env_config): self.state = None self.agent_1 = 0 self.agent_2 = 1 # MADDPG emits action logits instead of actual discrete actions self.actions_are_logits = env_config.get("actions_are_logits", False) self.one_hot_state_encoding = env_config.get("one_hot_state_encoding", False) self.with_state = env_config.get("separate_state_space", False) if not self.one_hot_state_encoding: self.observation_space = Discrete(6) self.with_state = False else: # Each agent gets the full state (one-hot encoding of which of the # three states are active) as input with the receiving agent's # ID (1 or 2) concatenated onto the end. if self.with_state: self.observation_space = Dict({ "obs": MultiDiscrete([2, 2, 2, 3]), ENV_STATE: MultiDiscrete([2, 2, 2]) }) else: self.observation_space = MultiDiscrete([2, 2, 2, 3])
Example #16
Source File: few_shot_robot_env.py From rl_swiss with MIT License | 5 votes |
def __init__(self, model_path, initial_qpos, n_actions, n_substeps, terminate_on_success=False): if model_path.startswith('/'): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), 'assets', model_path) if not os.path.exists(fullpath): raise IOError('File {} does not exist'.format(fullpath)) model = mujoco_py.load_model_from_path(fullpath) self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps) self.viewer = None self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.seed() self._env_setup(initial_qpos=initial_qpos) self.initial_state = copy.deepcopy(self.sim.get_state()) obs = self.reset() self.action_space = spaces.Box(-1., 1., shape=(n_actions,), dtype='float32') self.observation_space = spaces.Dict(dict( obs=spaces.Box(-np.inf, np.inf, shape=obs['obs'].shape, dtype='float32'), obs_task_params=spaces.Box(-np.inf, np.inf, shape=obs['obs_task_params'].shape, dtype='float32'), )) self.terminate_on_success = terminate_on_success
Example #17
Source File: parametric_actions_cartpole.py From ray with Apache License 2.0 | 5 votes |
def __init__(self, max_avail_actions): # Use simple random 2-unit action embeddings for [LEFT, RIGHT] self.left_action_embed = np.random.randn(2) self.right_action_embed = np.random.randn(2) self.action_space = Discrete(max_avail_actions) self.wrapped = gym.make("CartPole-v0") self.observation_space = Dict({ "action_mask": Box(0, 1, shape=(max_avail_actions, )), "avail_actions": Box(-10, 10, shape=(max_avail_actions, 2)), "cart": self.wrapped.observation_space, })
Example #18
Source File: wrapped_goal_envs.py From rl_swiss with MIT License | 5 votes |
def __init__(self, *args, **kwargs): super(DebugReachFetchPickAndPlaceEnv, self).__init__(*args, **kwargs) fetch_obs_space = self.observation_space new_obs_space = spaces.Dict( { 'obs': fetch_obs_space.spaces['observation'], 'obs_task_params': Box(-np.inf, np.inf, shape=(3,), dtype='float32') } ) self.observation_space = new_obs_space
Example #19
Source File: wrapped_goal_envs.py From rl_swiss with MIT License | 5 votes |
def __init__(self, *args, **kwargs): super(DebugFetchReachAndLiftEnv, self).__init__(*args, **kwargs) fetch_obs_space = self.observation_space new_obs_space = spaces.Dict( { 'obs': fetch_obs_space.spaces['observation'], 'obs_task_params': Box(-np.inf, np.inf, shape=(3,), dtype='float32') } ) self.observation_space = new_obs_space
Example #20
Source File: cartpole.py From ray with Apache License 2.0 | 5 votes |
def __init__(self, config=None): self.env = gym.make("CartPole-v0") self.action_space = Discrete(2) self.observation_space = Dict({ "obs": self.env.observation_space, "action_mask": Box(low=0, high=1, shape=(self.action_space.n, )) }) self.running_reward = 0
Example #21
Source File: policy.py From ray with Apache License 2.0 | 5 votes |
def make_model_and_action_dist(policy, obs_space, action_space, config): dist_class, logit_dim = ModelCatalog.get_action_dist( action_space, config["model"], framework="torch") model_cls = DiscreteLinearModel if hasattr(obs_space, "original_space"): original_space = obs_space.original_space else: original_space = obs_space exploration_config = config.get("exploration_config") # Model is dependent on exploration strategy because of its implicitness # TODO: Have a separate model catalogue for bandits if exploration_config: if exploration_config["type"] == TS_PATH: if isinstance(original_space, spaces.Dict): assert "item" in original_space.spaces, \ "Cannot find 'item' key in observation space" model_cls = ParametricLinearModelThompsonSampling else: model_cls = DiscreteLinearModelThompsonSampling elif exploration_config["type"] == UCB_PATH: if isinstance(original_space, spaces.Dict): assert "item" in original_space.spaces, \ "Cannot find 'item' key in observation space" model_cls = ParametricLinearModelUCB else: model_cls = DiscreteLinearModelUCB model = model_cls( obs_space, action_space, logit_dim, config["model"], name="LinearModel") return model, dist_class
Example #22
Source File: parametric.py From ray with Apache License 2.0 | 5 votes |
def _def_observation_space(self): # Embeddings for each item in the candidate pool item_obs_space = spaces.Box( low=-np.inf, high=np.inf, shape=(self.num_candidates, self.feature_dim)) # Can be useful for collaborative filtering based agents item_ids_obs_space = spaces.MultiDiscrete( [self.num_items] * self.num_candidates) # Can be either binary (clicks) or continuous feedback (watch time) resp_space = spaces.Box(low=-1, high=1, shape=(self.slate_size, )) if self.num_users == 1: return spaces.Dict({ "item": item_obs_space, "item_id": item_ids_obs_space, "response": resp_space }) else: user_obs_space = spaces.Discrete(self.num_users) return spaces.Dict({ "user": user_obs_space, "item": item_obs_space, "item_id": item_ids_obs_space, "response": resp_space })
Example #23
Source File: wrapped_goal_envs.py From rl_swiss with MIT License | 5 votes |
def __init__(self, *args, **kwargs): super(WrappedRotatedFetchReachAnywhereEnv, self).__init__(*args, **kwargs) fetch_obs_space = self.observation_space new_obs_space = spaces.Dict( { 'obs': fetch_obs_space.spaces['observation'], 'obs_task_params': fetch_obs_space.spaces['desired_goal'] } ) self.observation_space = new_obs_space
Example #24
Source File: space_utils.py From ray with Apache License 2.0 | 5 votes |
def flatten_space(space): """Flattens a gym.Space into its primitive components. Primitive components are any non Tuple/Dict spaces. Args: space(gym.Space): The gym.Space to flatten. This may be any supported type (including nested Tuples and Dicts). Returns: List[gym.Space]: The flattened list of primitive Spaces. This list does not contain Tuples or Dicts anymore. """ def _helper_flatten(space_, l): from ray.rllib.utils.spaces.flexdict import FlexDict if isinstance(space_, Tuple): for s in space_: _helper_flatten(s, l) elif isinstance(space_, (Dict, FlexDict)): for k in space_.spaces: _helper_flatten(space_[k], l) else: l.append(space_) ret = [] _helper_flatten(space, ret) return ret
Example #25
Source File: nested_space_repeat_after_me_env.py From ray with Apache License 2.0 | 5 votes |
def __init__(self, config): self.observation_space = config.get( "space", Tuple([Discrete(2), Dict({ "a": Box(-1.0, 1.0, (2, )) })])) self.action_space = self.observation_space self.flattened_action_space = flatten_space(self.action_space) self.episode_len = config.get("episode_len", 100)
Example #26
Source File: parametric_actions_cartpole.py From ray with Apache License 2.0 | 5 votes |
def __init__(self, max_avail_actions): # Use simple random 2-unit action embeddings for [LEFT, RIGHT] self.left_action_embed = np.random.randn(2) self.right_action_embed = np.random.randn(2) self.action_space = Discrete(max_avail_actions) self.wrapped = gym.make("CartPole-v0") self.observation_space = Dict({ "action_mask": Box(0, 1, shape=(max_avail_actions, )), "avail_actions": Box(-10, 10, shape=(max_avail_actions, 2)), "cart": self.wrapped.observation_space, })
Example #27
Source File: environment.py From pypownet with GNU Lesser General Public License v3.0 | 5 votes |
def array_to_observation(self, array): """ Converts and returns an pypownet.game.Observation from a array-object (e.g. list, numpy arrays). :param array: array-style object :return: an instance of pypownet.game.Action equivalent to input action :raise ValueError: the input array is not of the same length than the expected action (self.action_length) """ expected_length = sum(list(map(sum, self.shape))) if len(array) != expected_length: raise ValueError('Expected observation array of length %d, got %d' % (expected_length, len(array))) def transform_array(gym_dict, input_array, res): # loop through all dicts first for k, v in gym_dict.spaces.items(): if isinstance(v, Dict) or isinstance(v, OrderedDict): input_array, res = transform_array(v, input_array, res) # then save shapes for k, v in gym_dict.spaces.items(): if not (isinstance(v, Dict) or isinstance(v, OrderedDict)): n_elements = np.prod(v.shape) if not isinstance(v, Discrete) else 1 # prod because some containers are flattened res[k] = input_array[:n_elements] input_array = input_array[n_elements:] # shift arrato discard just selected values return input_array, res _, subobservations = transform_array(self, array, {}) return Observation(**subobservations)
Example #28
Source File: env_wrappers.py From sample-factory with MIT License | 5 votes |
def is_goal_based_env(env): dict_obs = isinstance(env.observation_space, spaces.Dict) if not dict_obs: return False for key in ['obs', 'goal']: if key not in env.observation_space.spaces: return False return True
Example #29
Source File: eqa.py From habitat-api with MIT License | 5 votes |
def step( self, *args: Any, answer_id: int, task: EQATask, **kwargs: Any ) -> Dict[str, Observations]: if task.answer is not None: task.is_valid = False task.invalid_reason = "Agent answered question twice." task.answer = answer_id return self._sim.get_observations_at()
Example #30
Source File: pyrobot.py From habitat-api with MIT License | 5 votes |
def _robot_action_space(self, robot_type, robot_config): action_spaces_dict = {} for action in robot_config.ACTIONS: action_spaces_dict[action] = ACTION_SPACES[robot_type.upper()][ action ] return spaces.Dict(action_spaces_dict)