Python gym.spaces.Dict() Examples
The following are 30
code examples of gym.spaces.Dict().
Example #1
Source File: From ray with Apache License 2.0 | 9 votes |
def __init__(self, config): self.cur_pos = 0 self.action_space = Discrete(4) # Represents an item. self.item_space = Discrete(5) # Represents an effect on the player. self.effect_space = Box(9000, 9999, shape=(4, )) # Represents a player. self.player_space = Dict({ "location": Box(-100, 100, shape=(2, )), "status": Box(-1, 1, shape=(10, )), "items": Repeated(self.item_space, max_len=MAX_ITEMS), "effects": Repeated(self.effect_space, max_len=MAX_EFFECTS), }) # Observation is a list of players. self.observation_space = Repeated( self.player_space, max_len=MAX_PLAYERS)
Example #2
Source File: From multi-agent-emergence-environments with MIT License | 8 votes |
def __init__(self, env, keys_self, keys_external, keys_mask=[], flatten=True): super().__init__(env) self.keys_self = sorted([k + '_self' for k in keys_self]) self.keys_external = sorted(keys_external) self.keys_mask = sorted(keys_mask) self.flatten = flatten # Change observation space to look like a single agent observation space. # This makes constructing policies much easier if flatten: size = sum([[k].shape[1:]) for k in self.keys_self + self.keys_external]) self.observation_space = Dict( {'observation_self': Box(-np.inf, np.inf, (size,), np.float32)}) else: size_self = sum([self.env.observation_space.spaces[k].shape[1] for k in self.keys_self]) obs_self = {'observation_self': Box(-np.inf, np.inf, (size_self,), np.float32)} obs_extern = {k: Box(-np.inf, np.inf, v.shape[1:], np.float32) for k, v in self.observation_space.spaces.items() if k in self.keys_external + self.keys_mask} obs_self.update(obs_extern) self.observation_space = Dict(obs_self)
Example #3
Source File: From adeptRL with GNU General Public License v3.0 | 6 votes |
def dtypes_from_gym(gym_space): if isinstance(gym_space, spaces.Discrete): return {"Discrete": gym_space.dtype} elif isinstance(gym_space, spaces.MultiDiscrete): raise NotImplementedError elif isinstance(gym_space, spaces.MultiBinary): return {"MultiBinary": gym_space.dtype} elif isinstance(gym_space, spaces.Box): return {"Box": gym_space.dtype} elif isinstance(gym_space, spaces.Dict): return { name: list(Space._detect_gym_spaces(s).values())[0] for name, s in gym_space.spaces.items() } elif isinstance(gym_space, spaces.Tuple): return { idx: list(Space._detect_gym_spaces(s).values())[0] for idx, s in enumerate(gym_space.spaces) } else: raise NotImplementedError
Example #4
Source File: From sample-factory with MIT License | 6 votes |
def __init__(self, env, w, h, grayscale=True, add_channel_dim=False, area_interpolation=False): super(ResizeWrapper, self).__init__(env) self.w = w self.h = h self.grayscale = grayscale self.add_channel_dim = add_channel_dim self.interpolation = cv2.INTER_AREA if area_interpolation else cv2.INTER_NEAREST if isinstance(env.observation_space, spaces.Dict): # TODO: does this even work? new_spaces = {} for key, space in env.observation_space.spaces.items(): new_spaces[key] = self._calc_new_obs_space(space) self.observation_space = spaces.Dict(new_spaces) else: self.observation_space = self._calc_new_obs_space(env.observation_space)
Example #5
Source File: From adeptRL with GNU General Public License v3.0 | 6 votes |
def _detect_gym_spaces(gym_space): if isinstance(gym_space, spaces.Discrete): return {"Discrete": (gym_space.n,)} elif isinstance(gym_space, spaces.MultiDiscrete): raise NotImplementedError elif isinstance(gym_space, spaces.MultiBinary): return {"MultiBinary": (gym_space.n,)} elif isinstance(gym_space, spaces.Box): return {"Box": gym_space.shape} elif isinstance(gym_space, spaces.Dict): return { name: list(Space._detect_gym_spaces(s).values())[0] for name, s in gym_space.spaces.items() } elif isinstance(gym_space, spaces.Tuple): return { idx: list(Space._detect_gym_spaces(s).values())[0] for idx, s in enumerate(gym_space.spaces) }
Example #6
Source File: From smac with MIT License | 6 votes |
def __init__(self, **smac_args): """Create a new multi-agent StarCraft env compatible with RLlib. Arguments: smac_args (dict): Arguments to pass to the underlying smac.env.starcraft.StarCraft2Env instance. Examples: >>> from smac.examples.rllib import RLlibStarCraft2Env >>> env = RLlibStarCraft2Env(map_name="8m") >>> print(env.reset()) """ self._env = StarCraft2Env(**smac_args) self._ready_agents = [] self.observation_space = Dict({ "obs": Box(-1, 1, shape=(self._env.get_obs_size(),)), "action_mask": Box(0, 1, shape=(self._env.get_total_actions(),)), }) self.action_space = Discrete(self._env.get_total_actions())
Example #7
Source File: From sonic_contest with MIT License | 6 votes |
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] self.level_pred = self.envs[0].level_pred VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) shapes, dtypes = {}, {} self.keys = [] obs_space = env.observation_space if isinstance(obs_space, spaces.Dict): assert isinstance(obs_space.spaces, OrderedDict) for key, box in obs_space.spaces.items(): assert isinstance(box, spaces.Box) shapes[key] = box.shape dtypes[key] = box.dtype self.keys.append(key) else: box = obs_space assert isinstance(box, spaces.Box) self.keys = [None] shapes, dtypes = { None: box.shape }, { None: box.dtype } self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
Example #8
Source File: From gym-minigrid with Apache License 2.0 | 6 votes |
def __init__(self, env, agent_view_size=7): super().__init__(env) assert agent_view_size % 2 == 1 assert agent_view_size >= 3 # Override default view size env.unwrapped.agent_view_size = agent_view_size # Compute observation space with specified view size observation_space = gym.spaces.Box( low=0, high=255, shape=(agent_view_size, agent_view_size, 3), dtype='uint8' ) # Override the environment's observation space self.observation_space = spaces.Dict({ 'image': observation_space })
Example #9
Source File: From ray with Apache License 2.0 | 6 votes |
def _convert_spec_to_space(spec): if isinstance(spec, dict): return spaces.Dict( {k: _convert_spec_to_space(v) for k, v in spec.items()}) if isinstance(spec, specs.DiscreteArray): return spaces.Discrete(spec.num_values) elif isinstance(spec, specs.BoundedArray): return spaces.Box( low=np.asscalar(spec.minimum), high=np.asscalar(spec.maximum), shape=spec.shape, dtype=spec.dtype) elif isinstance(spec, specs.Array): return spaces.Box( low=-float("inf"), high=float("inf"), shape=spec.shape, dtype=spec.dtype) raise NotImplementedError( ("Could not convert `Array` spec of type {} to Gym space. " "Attempted to convert: {}").format(type(spec), spec))
Example #10
Source File: From highway-env with MIT License | 6 votes |
def __init__(self, env: 'AbstractEnv', features: Optional[List[str]] = None, grid_size: Optional[List[List[float]]] = None, grid_step: Optional[List[int]] = None, features_range: Dict[str, List[float]] = None, absolute: bool = False, **kwargs: dict) -> None: """ :param env: The environment to observe :param features: Names of features used in the observation :param vehicles_count: Number of observed vehicles """ self.env = env self.features = features if features is not None else self.FEATURES self.grid_size = np.array(grid_size) if grid_size is not None else np.array(self.GRID_SIZE) self.grid_step = np.array(grid_step) if grid_step is not None else np.array(self.GRID_STEP) grid_shape = np.asarray(np.floor((self.grid_size[:, 1] - self.grid_size[:, 0]) / grid_step), self.grid = np.zeros((len(self.features), *grid_shape)) self.features_range = features_range self.absolute = absolute
Example #11
Source File: From ray with Apache License 2.0 | 6 votes |
def __init__(self, config): self.cur_pos = 0 self.action_space = Discrete(4) # Represents an item. self.item_space = Discrete(5) # Represents an effect on the player. self.effect_space = Box(9000, 9999, shape=(4, )) # Represents a player. self.player_space = Dict({ "location": Box(-100, 100, shape=(2, )), "status": Box(-1, 1, shape=(10, )), "items": Repeated(self.item_space, max_len=MAX_ITEMS), "effects": Repeated(self.effect_space, max_len=MAX_EFFECTS), }) # Observation is a list of players. self.observation_space = Repeated( self.player_space, max_len=MAX_PLAYERS)
Example #12
Source File: From ray with Apache License 2.0 | 6 votes |
def _convert_spec_to_space(spec): if isinstance(spec, dict): return spaces.Dict( {k: _convert_spec_to_space(v) for k, v in spec.items()}) if isinstance(spec, specs.DiscreteArray): return spaces.Discrete(spec.num_values) elif isinstance(spec, specs.BoundedArray): return spaces.Box( low=np.asscalar(spec.minimum), high=np.asscalar(spec.maximum), shape=spec.shape, dtype=spec.dtype) elif isinstance(spec, specs.Array): return spaces.Box( low=-float("inf"), high=float("inf"), shape=spec.shape, dtype=spec.dtype) raise NotImplementedError( ("Could not convert `Array` spec of type {} to Gym space. " "Attempted to convert: {}").format(type(spec), spec))
Example #13
Source File: From self-imitation-learning with MIT License | 6 votes |
def __init__(self, env_fns): self.envs = [fn() for fn in env_fns] env = self.envs[0] VecEnv.__init__(self, len(env_fns), env.observation_space, env.action_space) shapes, dtypes = {}, {} self.keys = [] obs_space = env.observation_space if isinstance(obs_space, spaces.Dict): assert isinstance(obs_space.spaces, OrderedDict) subspaces = obs_space.spaces else: subspaces = {None: obs_space} for key, box in subspaces.items(): shapes[key] = box.shape dtypes[key] = box.dtype self.keys.append(key) self.buf_obs = { k: np.zeros((self.num_envs,) + tuple(shapes[k]), dtype=dtypes[k]) for k in self.keys } self.buf_dones = np.zeros((self.num_envs,), dtype=np.bool) self.buf_rews = np.zeros((self.num_envs,), dtype=np.float32) self.buf_infos = [{} for _ in range(self.num_envs)] self.actions = None
Example #14
Source File: From rlgraph with Apache License 2.0 | 6 votes |
def __init__(self, bit_length=16, max_steps=None): super(BitFlip, self).__init__() assert bit_length >= 1, 'bit_length must be >= 1, found {}'.format(bit_length) self.bit_length = bit_length if max_steps is None: self.max_steps = bit_length else: self.max_steps = max_steps self.last_action = -1 # -1 for reset self.steps = 0 self.seed() self.action_space = spaces.Discrete(bit_length + 1) # index = n means to not flip any bit # achieved goal and observation are identical in bit_flip environment, however it is made this way to be # compatible with Openai GoalEnv self.observation_space = spaces.Dict(dict( observation=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), achieved_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), desired_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), )) self.reset()
Example #15
Source File: From ray with Apache License 2.0 | 6 votes |
def __init__(self, env_config): self.state = None self.agent_1 = 0 self.agent_2 = 1 # MADDPG emits action logits instead of actual discrete actions self.actions_are_logits = env_config.get("actions_are_logits", False) self.one_hot_state_encoding = env_config.get("one_hot_state_encoding", False) self.with_state = env_config.get("separate_state_space", False) if not self.one_hot_state_encoding: self.observation_space = Discrete(6) self.with_state = False else: # Each agent gets the full state (one-hot encoding of which of the # three states are active) as input with the receiving agent's # ID (1 or 2) concatenated onto the end. if self.with_state: self.observation_space = Dict({ "obs": MultiDiscrete([2, 2, 2, 3]), ENV_STATE: MultiDiscrete([2, 2, 2]) }) else: self.observation_space = MultiDiscrete([2, 2, 2, 3])
Example #16
Source File: From rl_swiss with MIT License | 5 votes |
def __init__(self, model_path, initial_qpos, n_actions, n_substeps, terminate_on_success=False): if model_path.startswith('/'): fullpath = model_path else: fullpath = os.path.join(os.path.dirname(__file__), 'assets', model_path) if not os.path.exists(fullpath): raise IOError('File {} does not exist'.format(fullpath)) model = mujoco_py.load_model_from_path(fullpath) self.sim = mujoco_py.MjSim(model, nsubsteps=n_substeps) self.viewer = None self.metadata = { 'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': int(np.round(1.0 / self.dt)) } self.seed() self._env_setup(initial_qpos=initial_qpos) self.initial_state = copy.deepcopy(self.sim.get_state()) obs = self.reset() self.action_space = spaces.Box(-1., 1., shape=(n_actions,), dtype='float32') self.observation_space = spaces.Dict(dict( obs=spaces.Box(-np.inf, np.inf, shape=obs['obs'].shape, dtype='float32'), obs_task_params=spaces.Box(-np.inf, np.inf, shape=obs['obs_task_params'].shape, dtype='float32'), )) self.terminate_on_success = terminate_on_success
Example #17
Source File: From ray with Apache License 2.0 | 5 votes |
def __init__(self, max_avail_actions): # Use simple random 2-unit action embeddings for [LEFT, RIGHT] self.left_action_embed = np.random.randn(2) self.right_action_embed = np.random.randn(2) self.action_space = Discrete(max_avail_actions) self.wrapped = gym.make("CartPole-v0") self.observation_space = Dict({ "action_mask": Box(0, 1, shape=(max_avail_actions, )), "avail_actions": Box(-10, 10, shape=(max_avail_actions, 2)), "cart": self.wrapped.observation_space, })
Example #18
Source File: From rl_swiss with MIT License | 5 votes |
def __init__(self, *args, **kwargs): super(DebugReachFetchPickAndPlaceEnv, self).__init__(*args, **kwargs) fetch_obs_space = self.observation_space new_obs_space = spaces.Dict( { 'obs': fetch_obs_space.spaces['observation'], 'obs_task_params': Box(-np.inf, np.inf, shape=(3,), dtype='float32') } ) self.observation_space = new_obs_space
Example #19
Source File: From rl_swiss with MIT License | 5 votes |
def __init__(self, *args, **kwargs): super(DebugFetchReachAndLiftEnv, self).__init__(*args, **kwargs) fetch_obs_space = self.observation_space new_obs_space = spaces.Dict( { 'obs': fetch_obs_space.spaces['observation'], 'obs_task_params': Box(-np.inf, np.inf, shape=(3,), dtype='float32') } ) self.observation_space = new_obs_space
Example #20
Source File: From ray with Apache License 2.0 | 5 votes |
def __init__(self, config=None): self.env = gym.make("CartPole-v0") self.action_space = Discrete(2) self.observation_space = Dict({ "obs": self.env.observation_space, "action_mask": Box(low=0, high=1, shape=(self.action_space.n, )) }) self.running_reward = 0
Example #21
Source File: From ray with Apache License 2.0 | 5 votes |
def make_model_and_action_dist(policy, obs_space, action_space, config): dist_class, logit_dim = ModelCatalog.get_action_dist( action_space, config["model"], framework="torch") model_cls = DiscreteLinearModel if hasattr(obs_space, "original_space"): original_space = obs_space.original_space else: original_space = obs_space exploration_config = config.get("exploration_config") # Model is dependent on exploration strategy because of its implicitness # TODO: Have a separate model catalogue for bandits if exploration_config: if exploration_config["type"] == TS_PATH: if isinstance(original_space, spaces.Dict): assert "item" in original_space.spaces, \ "Cannot find 'item' key in observation space" model_cls = ParametricLinearModelThompsonSampling else: model_cls = DiscreteLinearModelThompsonSampling elif exploration_config["type"] == UCB_PATH: if isinstance(original_space, spaces.Dict): assert "item" in original_space.spaces, \ "Cannot find 'item' key in observation space" model_cls = ParametricLinearModelUCB else: model_cls = DiscreteLinearModelUCB model = model_cls( obs_space, action_space, logit_dim, config["model"], name="LinearModel") return model, dist_class
Example #22
Source File: From ray with Apache License 2.0 | 5 votes |
def _def_observation_space(self): # Embeddings for each item in the candidate pool item_obs_space = spaces.Box( low=-np.inf, high=np.inf, shape=(self.num_candidates, self.feature_dim)) # Can be useful for collaborative filtering based agents item_ids_obs_space = spaces.MultiDiscrete( [self.num_items] * self.num_candidates) # Can be either binary (clicks) or continuous feedback (watch time) resp_space = spaces.Box(low=-1, high=1, shape=(self.slate_size, )) if self.num_users == 1: return spaces.Dict({ "item": item_obs_space, "item_id": item_ids_obs_space, "response": resp_space }) else: user_obs_space = spaces.Discrete(self.num_users) return spaces.Dict({ "user": user_obs_space, "item": item_obs_space, "item_id": item_ids_obs_space, "response": resp_space })
Example #23
Source File: From rl_swiss with MIT License | 5 votes |
def __init__(self, *args, **kwargs): super(WrappedRotatedFetchReachAnywhereEnv, self).__init__(*args, **kwargs) fetch_obs_space = self.observation_space new_obs_space = spaces.Dict( { 'obs': fetch_obs_space.spaces['observation'], 'obs_task_params': fetch_obs_space.spaces['desired_goal'] } ) self.observation_space = new_obs_space
Example #24
Source File: From ray with Apache License 2.0 | 5 votes |
def flatten_space(space): """Flattens a gym.Space into its primitive components. Primitive components are any non Tuple/Dict spaces. Args: space(gym.Space): The gym.Space to flatten. This may be any supported type (including nested Tuples and Dicts). Returns: List[gym.Space]: The flattened list of primitive Spaces. This list does not contain Tuples or Dicts anymore. """ def _helper_flatten(space_, l): from ray.rllib.utils.spaces.flexdict import FlexDict if isinstance(space_, Tuple): for s in space_: _helper_flatten(s, l) elif isinstance(space_, (Dict, FlexDict)): for k in space_.spaces: _helper_flatten(space_[k], l) else: l.append(space_) ret = [] _helper_flatten(space, ret) return ret
Example #25
Source File: From ray with Apache License 2.0 | 5 votes |
def __init__(self, config): self.observation_space = config.get( "space", Tuple([Discrete(2), Dict({ "a": Box(-1.0, 1.0, (2, )) })])) self.action_space = self.observation_space self.flattened_action_space = flatten_space(self.action_space) self.episode_len = config.get("episode_len", 100)
Example #26
Source File: From ray with Apache License 2.0 | 5 votes |
def __init__(self, max_avail_actions): # Use simple random 2-unit action embeddings for [LEFT, RIGHT] self.left_action_embed = np.random.randn(2) self.right_action_embed = np.random.randn(2) self.action_space = Discrete(max_avail_actions) self.wrapped = gym.make("CartPole-v0") self.observation_space = Dict({ "action_mask": Box(0, 1, shape=(max_avail_actions, )), "avail_actions": Box(-10, 10, shape=(max_avail_actions, 2)), "cart": self.wrapped.observation_space, })
Example #27
Source File: From pypownet with GNU Lesser General Public License v3.0 | 5 votes |
def array_to_observation(self, array): """ Converts and returns an from a array-object (e.g. list, numpy arrays). :param array: array-style object :return: an instance of equivalent to input action :raise ValueError: the input array is not of the same length than the expected action (self.action_length) """ expected_length = sum(list(map(sum, self.shape))) if len(array) != expected_length: raise ValueError('Expected observation array of length %d, got %d' % (expected_length, len(array))) def transform_array(gym_dict, input_array, res): # loop through all dicts first for k, v in gym_dict.spaces.items(): if isinstance(v, Dict) or isinstance(v, OrderedDict): input_array, res = transform_array(v, input_array, res) # then save shapes for k, v in gym_dict.spaces.items(): if not (isinstance(v, Dict) or isinstance(v, OrderedDict)): n_elements = if not isinstance(v, Discrete) else 1 # prod because some containers are flattened res[k] = input_array[:n_elements] input_array = input_array[n_elements:] # shift arrato discard just selected values return input_array, res _, subobservations = transform_array(self, array, {}) return Observation(**subobservations)
Example #28
Source File: From sample-factory with MIT License | 5 votes |
def is_goal_based_env(env): dict_obs = isinstance(env.observation_space, spaces.Dict) if not dict_obs: return False for key in ['obs', 'goal']: if key not in env.observation_space.spaces: return False return True
Example #29
Source File: From habitat-api with MIT License | 5 votes |
def step( self, *args: Any, answer_id: int, task: EQATask, **kwargs: Any ) -> Dict[str, Observations]: if task.answer is not None: task.is_valid = False task.invalid_reason = "Agent answered question twice." task.answer = answer_id return self._sim.get_observations_at()
Example #30
Source File: From habitat-api with MIT License | 5 votes |
def _robot_action_space(self, robot_type, robot_config): action_spaces_dict = {} for action in robot_config.ACTIONS: action_spaces_dict[action] = ACTION_SPACES[robot_type.upper()][ action ] return spaces.Dict(action_spaces_dict)