Python gym.spaces.Tuple() Examples
The following are 30
code examples of gym.spaces.Tuple().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym.spaces
, or try the search function
.
Example #1
Source File: utility.py From playground with Apache License 2.0 | 6 votes |
def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, constants.Item): return obj.value elif isinstance(obj, constants.Action): return obj.value elif isinstance(obj, constants.GameType): return obj.value elif isinstance(obj, np.int64): return int(obj) elif hasattr(obj, 'to_json'): return obj.to_json() elif isinstance(obj, spaces.Discrete): return obj.n elif isinstance(obj, spaces.Tuple): return [space.n for space in obj.spaces] return json.JSONEncoder.default(self, obj)
Example #2
Source File: gym.py From sonic_contest with MIT License | 6 votes |
def gym_space_distribution(space): """ Create a Distribution from a gym.Space. If the space is not supported, throws an UnsupportedActionSpace exception. """ if isinstance(space, spaces.Discrete): return CategoricalSoftmax(space.n) elif isinstance(space, spaces.Box): return BoxGaussian(space.low, space.high) elif isinstance(space, spaces.MultiBinary): return MultiBernoulli(space.n) elif isinstance(space, spaces.Tuple): sub_dists = tuple(gym_space_distribution(s) for s in space.spaces) return TupleDistribution(sub_dists) elif isinstance(space, spaces.MultiDiscrete): discretes = tuple(CategoricalSoftmax(n) for n in space.nvec) return TupleDistribution(discretes, to_sample=lambda x: np.array(x, dtype=space.dtype)) raise UnsupportedGymSpace(space)
Example #3
Source File: qmix_policy.py From ray with Apache License 2.0 | 6 votes |
def _validate(obs_space, action_space): if not hasattr(obs_space, "original_space") or \ not isinstance(obs_space.original_space, Tuple): raise ValueError("Obs space must be a Tuple, got {}. Use ".format( obs_space) + "MultiAgentEnv.with_agent_groups() to group related " "agents for QMix.") if not isinstance(action_space, Tuple): raise ValueError( "Action space must be a Tuple, got {}. ".format(action_space) + "Use MultiAgentEnv.with_agent_groups() to group related " "agents for QMix.") if not isinstance(action_space.spaces[0], Discrete): raise ValueError( "QMix requires a discrete action space, got {}".format( action_space.spaces[0])) if len({str(x) for x in obs_space.original_space.spaces}) > 1: raise ValueError( "Implementation limitation: observations of grouped agents " "must be homogeneous, got {}".format( obs_space.original_space.spaces)) if len({str(x) for x in action_space.spaces}) > 1: raise ValueError( "Implementation limitation: action space of grouped agents " "must be homogeneous, got {}".format(action_space.spaces))
Example #4
Source File: food.py From multi-agent-emergence-environments with MIT License | 6 votes |
def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf, food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False): super().__init__(env) self.eat_thresh = eat_thresh self.max_food_health = max_food_health self.respawn_time = respawn_time self.food_rew_type = food_rew_type self.n_agents = self.metadata['n_agents'] if type(reward_scale) not in [list, tuple, np.ndarray]: reward_scale = [reward_scale, reward_scale] self.reward_scale = reward_scale self.reward_scale_obs = reward_scale_obs # Reset obs/action space to match self.max_n_food = self.metadata['max_n_food'] self.curr_n_food = self.metadata['curr_n_food'] self.max_food_size = self.metadata['food_size'] food_dim = 5 if self.reward_scale_obs else 4 self.observation_space = update_obs_space(self.env, {'food_obs': (self.max_n_food, food_dim), 'food_health': (self.max_n_food, 1), 'food_eat': (self.max_n_food, 1)}) self.action_space.spaces['action_eat_food'] = Tuple([MultiDiscrete([2] * self.max_n_food) for _ in range(self.n_agents)])
Example #5
Source File: meta.py From sonic_contest with MIT License | 6 votes |
def __init__(self, env, first_action, num_eps=1, warmup_eps=0): """ Parameters: env: the environment to wrap. first_action: the action to include in the first observation. num_eps: episodes per meta-episode. warmup_eps: the number of episodes at the start of a meta-episode for which rewards are 0. Negative values are added to num_eps. """ if warmup_eps < 0: warmup_eps += num_eps super(RL2Env, self).__init__(env) self.first_action = first_action self.observation_space = spaces.Tuple([ env.observation_space, env.action_space, spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype='float'), spaces.MultiBinary(1) ]) self.num_eps = num_eps self.warmup_eps = warmup_eps self._done_eps = 0
Example #6
Source File: qmix_policy.py From ray with Apache License 2.0 | 6 votes |
def _validate(obs_space, action_space): if not hasattr(obs_space, "original_space") or \ not isinstance(obs_space.original_space, Tuple): raise ValueError("Obs space must be a Tuple, got {}. Use ".format( obs_space) + "MultiAgentEnv.with_agent_groups() to group related " "agents for QMix.") if not isinstance(action_space, Tuple): raise ValueError( "Action space must be a Tuple, got {}. ".format(action_space) + "Use MultiAgentEnv.with_agent_groups() to group related " "agents for QMix.") if not isinstance(action_space.spaces[0], Discrete): raise ValueError( "QMix requires a discrete action space, got {}".format( action_space.spaces[0])) if len({str(x) for x in obs_space.original_space.spaces}) > 1: raise ValueError( "Implementation limitation: observations of grouped agents " "must be homogeneous, got {}".format( obs_space.original_space.spaces)) if len({str(x) for x in action_space.spaces}) > 1: raise ValueError( "Implementation limitation: action space of grouped agents " "must be homogeneous, got {}".format(action_space.spaces))
Example #7
Source File: random_env.py From ray with Apache License 2.0 | 6 votes |
def step(self, action): if self.check_action_bounds and not self.action_space.contains(action): raise ValueError("Illegal action for {}: {}".format( self.action_space, action)) if (isinstance(self.action_space, Tuple) and len(action) != len(self.action_space.spaces)): raise ValueError("Illegal action for {}: {}".format( self.action_space, action)) return self.observation_space.sample(), \ float(self.reward_space.sample()), \ bool(np.random.choice( [True, False], p=[self.p_done, 1.0 - self.p_done] )), {} # Multi-agent version of the RandomEnv.
Example #8
Source File: _spaces.py From adeptRL with GNU General Public License v3.0 | 6 votes |
def _detect_gym_spaces(gym_space): if isinstance(gym_space, spaces.Discrete): return {"Discrete": (gym_space.n,)} elif isinstance(gym_space, spaces.MultiDiscrete): raise NotImplementedError elif isinstance(gym_space, spaces.MultiBinary): return {"MultiBinary": (gym_space.n,)} elif isinstance(gym_space, spaces.Box): return {"Box": gym_space.shape} elif isinstance(gym_space, spaces.Dict): return { name: list(Space._detect_gym_spaces(s).values())[0] for name, s in gym_space.spaces.items() } elif isinstance(gym_space, spaces.Tuple): return { idx: list(Space._detect_gym_spaces(s).values())[0] for idx, s in enumerate(gym_space.spaces) }
Example #9
Source File: gym_env.py From RLs with Apache License 2.0 | 6 votes |
def step(self, actions): actions = np.array(actions) if not self.is_continuous: actions = sth.int2action_index(actions, self.discrete_action_dim_list) if self.action_type == 'discrete': actions = actions.reshape(-1,) elif self.action_type == 'Tuple(Discrete)': actions = actions.reshape(self.n, -1).tolist() results = Asyn.op_func(self.envs, Asyn.OP.STEP, actions) obs, reward, done, info = [np.asarray(e) for e in zip(*results)] reward = reward.astype('float32') dones_index = np.where(done)[0] if dones_index.shape[0] > 0: correct_new_obs = self.partial_reset(obs, dones_index) else: correct_new_obs = obs if self.obs_type == 'visual': obs = obs[:, np.newaxis, ...] correct_new_obs = correct_new_obs[:, np.newaxis, ...] return obs, reward, done, info, correct_new_obs
Example #10
Source File: _spaces.py From adeptRL with GNU General Public License v3.0 | 6 votes |
def dtypes_from_gym(gym_space): if isinstance(gym_space, spaces.Discrete): return {"Discrete": gym_space.dtype} elif isinstance(gym_space, spaces.MultiDiscrete): raise NotImplementedError elif isinstance(gym_space, spaces.MultiBinary): return {"MultiBinary": gym_space.dtype} elif isinstance(gym_space, spaces.Box): return {"Box": gym_space.dtype} elif isinstance(gym_space, spaces.Dict): return { name: list(Space._detect_gym_spaces(s).values())[0] for name, s in gym_space.spaces.items() } elif isinstance(gym_space, spaces.Tuple): return { idx: list(Space._detect_gym_spaces(s).values())[0] for idx, s in enumerate(gym_space.spaces) } else: raise NotImplementedError
Example #11
Source File: random_env.py From ray with Apache License 2.0 | 6 votes |
def step(self, action): if self.check_action_bounds and not self.action_space.contains(action): raise ValueError("Illegal action for {}: {}".format( self.action_space, action)) if (isinstance(self.action_space, Tuple) and len(action) != len(self.action_space.spaces)): raise ValueError("Illegal action for {}: {}".format( self.action_space, action)) return self.observation_space.sample(), \ float(self.reward_space.sample()), \ bool(np.random.choice( [True, False], p=[self.p_done, 1.0 - self.p_done] )), {} # Multi-agent version of the RandomEnv.
Example #12
Source File: prisoners_dilemma.py From LOLA_DiCE with MIT License | 6 votes |
def __init__(self, max_steps, batch_size=1): self.max_steps = max_steps self.batch_size = batch_size self.payout_mat = np.array([[-2,0],[-3,-1]]) self.states = np.array([[1,2],[3,4]]) self.action_space = Tuple([ Discrete(self.NUM_ACTIONS) for _ in range(self.NUM_AGENTS) ]) self.observation_space = Tuple([ OneHot(self.NUM_STATES) for _ in range(self.NUM_AGENTS) ]) self.available_actions = [ np.ones((batch_size, self.NUM_ACTIONS), dtype=int) for _ in range(self.NUM_AGENTS) ] self.step_count = None
Example #13
Source File: env_checker.py From stable-baselines with MIT License | 6 votes |
def _check_obs(obs: Union[tuple, dict, np.ndarray, int], observation_space: spaces.Space, method_name: str) -> None: """ Check that the observation returned by the environment correspond to the declared one. """ if not isinstance(observation_space, spaces.Tuple): assert not isinstance(obs, tuple), ("The observation returned by the `{}()` " "method should be a single value, not a tuple".format(method_name)) # The check for a GoalEnv is done by the base class if isinstance(observation_space, spaces.Discrete): assert isinstance(obs, int), "The observation returned by `{}()` method must be an int".format(method_name) elif _enforce_array_obs(observation_space): assert isinstance(obs, np.ndarray), ("The observation returned by `{}()` " "method must be a numpy array".format(method_name)) assert observation_space.contains(obs), ("The observation returned by the `{}()` " "method does not match the given observation space".format(method_name))
Example #14
Source File: windy_maze_env.py From ray with Apache License 2.0 | 6 votes |
def __init__(self, env_config): self.map = [m for m in MAP_DATA.split("\n") if m] self.x_dim = len(self.map) self.y_dim = len(self.map[0]) logger.info("Loaded map {} {}".format(self.x_dim, self.y_dim)) for x in range(self.x_dim): for y in range(self.y_dim): if self.map[x][y] == "S": self.start_pos = (x, y) elif self.map[x][y] == "F": self.end_pos = (x, y) logger.info("Start pos {} end pos {}".format(self.start_pos, self.end_pos)) self.observation_space = Tuple([ Box(0, 100, shape=(2, )), # (x, y) Discrete(4), # wind direction (N, E, S, W) ]) self.action_space = Discrete(2) # whether to move or not
Example #15
Source File: windy_maze_env.py From ray with Apache License 2.0 | 6 votes |
def __init__(self, env_config): self.map = [m for m in MAP_DATA.split("\n") if m] self.x_dim = len(self.map) self.y_dim = len(self.map[0]) logger.info("Loaded map {} {}".format(self.x_dim, self.y_dim)) for x in range(self.x_dim): for y in range(self.y_dim): if self.map[x][y] == "S": self.start_pos = (x, y) elif self.map[x][y] == "F": self.end_pos = (x, y) logger.info("Start pos {} end pos {}".format(self.start_pos, self.end_pos)) self.observation_space = Tuple([ Box(0, 100, shape=(2, )), # (x, y) Discrete(4), # wind direction (N, E, S, W) ]) self.action_space = Discrete(2) # whether to move or not
Example #16
Source File: cliff_env.py From gym-gridworlds with MIT License | 6 votes |
def __init__(self): self.height = 4 self.width = 12 self.action_space = spaces.Discrete(4) self.observation_space = spaces.Tuple(( spaces.Discrete(self.height), spaces.Discrete(self.width) )) self.moves = { 0: (-1, 0), # up 1: (0, 1), # right 2: (1, 0), # down 3: (0, -1), # left } # begin in start state self.reset()
Example #17
Source File: windy_gridworld_env.py From gym-gridworlds with MIT License | 6 votes |
def __init__(self): self.height = 7 self.width = 10 self.action_space = spaces.Discrete(4) self.observation_space = spaces.Tuple(( spaces.Discrete(self.height), spaces.Discrete(self.width) )) self.moves = { 0: (-1, 0), # up 1: (0, 1), # right 2: (1, 0), # down 3: (0, -1), # left } # begin in start state self.reset()
Example #18
Source File: recsim_gym.py From recsim with Apache License 2.0 | 6 votes |
def observation_space(self): """Returns the observation space of the environment. Each observation is a dictionary with three keys `user`, `doc` and `response` that includes observation about user state, document and user response, respectively. """ if isinstance(self._environment, environment.MultiUserEnvironment): user_obs_space = self._environment.user_model[0].observation_space() resp_obs_space = self._environment.user_model[0].response_space() user_obs_space = spaces.Tuple( [user_obs_space] * self._environment.num_users) resp_obs_space = spaces.Tuple( [resp_obs_space] * self._environment.num_users) if isinstance(self._environment, environment.SingleUserEnvironment): user_obs_space = self._environment.user_model.observation_space() resp_obs_space = self._environment.user_model.response_space() return spaces.Dict({ 'user': user_obs_space, 'doc': self._environment.candidate_set.observation_space(), 'response': resp_obs_space, })
Example #19
Source File: cluster_bandit_agent_test.py From recsim with Apache License 2.0 | 6 votes |
def dummy_observation_space(self): single_response_space = spaces.Dict({ 'cluster_id': spaces.Discrete(2), 'click': spaces.Discrete(2) }) doc_space = spaces.Dict( {0: spaces.Dict({'cluster_id': spaces.Discrete(2)})}) user_space = spaces.Dict({ 'sufficient_statistics': spaces.Dict({ 'impression_count': spaces.Box(np.array([0] * 2), np.array([np.inf] * 2)), 'click_count': spaces.Box(np.array([0] * 2), np.array([np.inf] * 2)) }) }) return spaces.Dict({ 'user': user_space, 'doc': doc_space, 'response': spaces.Tuple([ single_response_space, ]) })
Example #20
Source File: mapf_gym_cap.py From distributedRL_MAPF with MIT License | 5 votes |
def __init__(self, num_agents=1, observation_size=10,world0=None, goals0=None, DIAGONAL_MOVEMENT=False, SIZE=(10,40), PROB=(0,.5), FULL_HELP=False,blank_world=False): """ Args: DIAGONAL_MOVEMENT: if the agents are allowed to move diagonally SIZE: size of a side of the square grid PROB: range of probabilities that a given block is an obstacle FULL_HELP """ # Initialize member variables self.num_agents = num_agents #a way of doing joint rewards self.individual_rewards = [0 for i in range(num_agents)] self.observation_size = observation_size self.SIZE = SIZE self.PROB = PROB self.fresh = True self.FULL_HELP = FULL_HELP self.finished = False self.mutex = Lock() self.DIAGONAL_MOVEMENT = DIAGONAL_MOVEMENT # Initialize data structures self._setWorld(world0,goals0,blank_world=blank_world) if DIAGONAL_MOVEMENT: self.action_space = spaces.Tuple([spaces.Discrete(self.num_agents), spaces.Discrete(9)]) else: self.action_space = spaces.Tuple([spaces.Discrete(self.num_agents), spaces.Discrete(5)]) self.viewer = None
Example #21
Source File: tensorforce_agent.py From playground with Apache License 2.0 | 5 votes |
def initialize(self, env): from gym import spaces from tensorforce.agents import PPOAgent if self.algorithm == "ppo": if type(env.action_space) == spaces.Tuple: actions = { str(num): { 'type': int, 'num_actions': space.n } for num, space in enumerate(env.action_space.spaces) } else: actions = dict(type='int', num_actions=env.action_space.n) return PPOAgent( states=dict(type='float', shape=env.observation_space.shape), actions=actions, network=[ dict(type='dense', size=64), dict(type='dense', size=64) ], batching_capacity=1000, step_optimizer=dict(type='adam', learning_rate=1e-4)) return None
Example #22
Source File: iiwa_robotiq_3_finger.py From costar_plan with Apache License 2.0 | 5 votes |
def getActionSpace(self): return spaces.Tuple((spaces.Box(-np.pi, np.pi, 6), spaces.Box(-0.6, 0.6, 1)))
Example #23
Source File: test_qmix.py From ray with Apache License 2.0 | 5 votes |
def test_avail_actions_qmix(self): grouping = { "group_1": ["agent_1"], # trivial grouping for testing } obs_space = Tuple([AvailActionsTestEnv.observation_space]) act_space = Tuple([AvailActionsTestEnv.action_space]) register_env( "action_mask_test", lambda config: AvailActionsTestEnv(config).with_agent_groups( grouping, obs_space=obs_space, act_space=act_space)) ray.init() agent = QMixTrainer( env="action_mask_test", config={ "num_envs_per_worker": 5, # test with vectorization on "env_config": { "avail_action": 3, }, "framework": "torch", }) for _ in range(5): agent.train() # OK if it doesn't trip the action assertion error assert agent.train()["episode_reward_mean"] == 21.0 agent.stop() ray.shutdown()
Example #24
Source File: test_catalog.py From ray with Apache License 2.0 | 5 votes |
def test_tuple_preprocessor(self): ray.init(object_store_memory=1000 * 1024 * 1024) class TupleEnv: def __init__(self): self.observation_space = Tuple( [Discrete(5), Box(0, 5, shape=(3, ), dtype=np.float32)]) p1 = ModelCatalog.get_preprocessor(TupleEnv()) self.assertEqual(p1.shape, (8, )) self.assertEqual( list(p1.transform((0, np.array([1, 2, 3])))), [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])
Example #25
Source File: space_utils.py From ray with Apache License 2.0 | 5 votes |
def get_base_struct_from_space(space): """Returns a Tuple/Dict Space as native (equally structured) py tuple/dict. Args: space (gym.Space): The Space to get the python struct for. Returns: Union[dict,tuple,gym.Space]: The struct equivalent to the given Space. Note that the returned struct still contains all original "primitive" Spaces (e.g. Box, Discrete). Examples: >>> get_base_struct_from_space(Dict({ >>> "a": Box(), >>> "b": Tuple([Discrete(2), Discrete(3)]) >>> })) >>> # Will return: dict(a=Box(), b=tuple(Discrete(2), Discrete(3))) """ def _helper_struct(space_): if isinstance(space_, Tuple): return tuple(_helper_struct(s) for s in space_) elif isinstance(space_, Dict): return {k: _helper_struct(space_[k]) for k in space_.spaces} else: return space_ return _helper_struct(space)
Example #26
Source File: nested_space_repeat_after_me_env.py From ray with Apache License 2.0 | 5 votes |
def __init__(self, config): self.observation_space = config.get( "space", Tuple([Discrete(2), Dict({ "a": Box(-1.0, 1.0, (2, )) })])) self.action_space = self.observation_space self.flattened_action_space = flatten_space(self.action_space) self.episode_len = config.get("episode_len", 100)
Example #27
Source File: correlated_actions_env.py From ray with Apache License 2.0 | 5 votes |
def __init__(self, _): self.observation_space = Discrete(2) self.action_space = Tuple([Discrete(2), Discrete(2)]) self.last_observation = None
Example #28
Source File: env_utils.py From oac-explore with MIT License | 5 votes |
def get_dim(space): if isinstance(space, Box): return space.low.size elif isinstance(space, Discrete): return space.n elif isinstance(space, Tuple): return sum(get_dim(subspace) for subspace in space.spaces) elif hasattr(space, 'flat_dim'): return space.flat_dim else: raise TypeError("Unknown space: {}".format(space))
Example #29
Source File: jaco_robotiq.py From costar_plan with Apache License 2.0 | 5 votes |
def getActionSpace(self): return spaces.Tuple((spaces.Box(-np.pi, np.pi, 6), spaces.Box(-0.6, 0.6, 1)))
Example #30
Source File: utils.py From cherry with Apache License 2.0 | 5 votes |
def is_discrete(space, vectorized=False): """ Returns whether a space is discrete. **Arguments** * **space** - The space. * **vectorized** - Whether to return the discreteness for the vectorized environments (True) or just the discreteness of the underlying environment (False). """ msg = 'Space type not supported.' assert isinstance(space, (Box, Discrete, Dict, Tuple)), msg if isinstance(space, Discrete): return True if isinstance(space, Box): return False if isinstance(space, Dict): dimensions = { k[0]: is_discrete(k[1], vectorized) for k in space.spaces.items() } return OrderedDict(dimensions) if isinstance(space, Tuple): if not vectorized: return is_discrete(space[0], vectorized) discrete = tuple( is_discrete(s) for s in space ) return discrete