Python Examples of gym.spaces.Tuple

Source File: utility.py From playground with Apache License 2.0

6 votes

def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, constants.Item):
            return obj.value
        elif isinstance(obj, constants.Action):
            return obj.value
        elif isinstance(obj, constants.GameType):
            return obj.value
        elif isinstance(obj, np.int64):
            return int(obj)
        elif hasattr(obj, 'to_json'):
            return obj.to_json()
        elif isinstance(obj, spaces.Discrete):
            return obj.n
        elif isinstance(obj, spaces.Tuple):
            return [space.n for space in obj.spaces]
        return json.JSONEncoder.default(self, obj)

Source File: gym.py From sonic_contest with MIT License

6 votes

def gym_space_distribution(space):
    """
    Create a Distribution from a gym.Space.

    If the space is not supported, throws an
    UnsupportedActionSpace exception.
    """
    if isinstance(space, spaces.Discrete):
        return CategoricalSoftmax(space.n)
    elif isinstance(space, spaces.Box):
        return BoxGaussian(space.low, space.high)
    elif isinstance(space, spaces.MultiBinary):
        return MultiBernoulli(space.n)
    elif isinstance(space, spaces.Tuple):
        sub_dists = tuple(gym_space_distribution(s) for s in space.spaces)
        return TupleDistribution(sub_dists)
    elif isinstance(space, spaces.MultiDiscrete):
        discretes = tuple(CategoricalSoftmax(n) for n in space.nvec)
        return TupleDistribution(discretes, to_sample=lambda x: np.array(x, dtype=space.dtype))
    raise UnsupportedGymSpace(space)

Source File: qmix_policy.py From ray with Apache License 2.0

6 votes

def _validate(obs_space, action_space):
    if not hasattr(obs_space, "original_space") or \
            not isinstance(obs_space.original_space, Tuple):
        raise ValueError("Obs space must be a Tuple, got {}. Use ".format(
            obs_space) + "MultiAgentEnv.with_agent_groups() to group related "
                         "agents for QMix.")
    if not isinstance(action_space, Tuple):
        raise ValueError(
            "Action space must be a Tuple, got {}. ".format(action_space) +
            "Use MultiAgentEnv.with_agent_groups() to group related "
            "agents for QMix.")
    if not isinstance(action_space.spaces[0], Discrete):
        raise ValueError(
            "QMix requires a discrete action space, got {}".format(
                action_space.spaces[0]))
    if len({str(x) for x in obs_space.original_space.spaces}) > 1:
        raise ValueError(
            "Implementation limitation: observations of grouped agents "
            "must be homogeneous, got {}".format(
                obs_space.original_space.spaces))
    if len({str(x) for x in action_space.spaces}) > 1:
        raise ValueError(
            "Implementation limitation: action space of grouped agents "
            "must be homogeneous, got {}".format(action_space.spaces))

Source File: food.py From multi-agent-emergence-environments with MIT License

6 votes

def __init__(self, env, eat_thresh=0.5, max_food_health=10, respawn_time=np.inf,
                 food_rew_type='selfish', reward_scale=1.0, reward_scale_obs=False):
        super().__init__(env)
        self.eat_thresh = eat_thresh
        self.max_food_health = max_food_health
        self.respawn_time = respawn_time
        self.food_rew_type = food_rew_type
        self.n_agents = self.metadata['n_agents']

        if type(reward_scale) not in [list, tuple, np.ndarray]:
            reward_scale = [reward_scale, reward_scale]
        self.reward_scale = reward_scale
        self.reward_scale_obs = reward_scale_obs

        # Reset obs/action space to match
        self.max_n_food = self.metadata['max_n_food']
        self.curr_n_food = self.metadata['curr_n_food']
        self.max_food_size = self.metadata['food_size']
        food_dim = 5 if self.reward_scale_obs else 4
        self.observation_space = update_obs_space(self.env, {'food_obs': (self.max_n_food, food_dim),
                                                             'food_health': (self.max_n_food, 1),
                                                             'food_eat': (self.max_n_food, 1)})
        self.action_space.spaces['action_eat_food'] = Tuple([MultiDiscrete([2] * self.max_n_food)
                                                             for _ in range(self.n_agents)])

Source File: meta.py From sonic_contest with MIT License

6 votes

def __init__(self, env, first_action, num_eps=1, warmup_eps=0):
        """
        Parameters:
          env: the environment to wrap.
          first_action: the action to include in the first
            observation.
          num_eps: episodes per meta-episode.
          warmup_eps: the number of episodes at the start
            of a meta-episode for which rewards are 0.
            Negative values are added to num_eps.
        """
        if warmup_eps < 0:
            warmup_eps += num_eps
        super(RL2Env, self).__init__(env)
        self.first_action = first_action
        self.observation_space = spaces.Tuple([
            env.observation_space,
            env.action_space,
            spaces.Box(low=-np.inf, high=np.inf, shape=(1,), dtype='float'),
            spaces.MultiBinary(1)
        ])
        self.num_eps = num_eps
        self.warmup_eps = warmup_eps
        self._done_eps = 0

Source File: qmix_policy.py From ray with Apache License 2.0

6 votes

def _validate(obs_space, action_space):
    if not hasattr(obs_space, "original_space") or \
            not isinstance(obs_space.original_space, Tuple):
        raise ValueError("Obs space must be a Tuple, got {}. Use ".format(
            obs_space) + "MultiAgentEnv.with_agent_groups() to group related "
                         "agents for QMix.")
    if not isinstance(action_space, Tuple):
        raise ValueError(
            "Action space must be a Tuple, got {}. ".format(action_space) +
            "Use MultiAgentEnv.with_agent_groups() to group related "
            "agents for QMix.")
    if not isinstance(action_space.spaces[0], Discrete):
        raise ValueError(
            "QMix requires a discrete action space, got {}".format(
                action_space.spaces[0]))
    if len({str(x) for x in obs_space.original_space.spaces}) > 1:
        raise ValueError(
            "Implementation limitation: observations of grouped agents "
            "must be homogeneous, got {}".format(
                obs_space.original_space.spaces))
    if len({str(x) for x in action_space.spaces}) > 1:
        raise ValueError(
            "Implementation limitation: action space of grouped agents "
            "must be homogeneous, got {}".format(action_space.spaces))

Source File: random_env.py From ray with Apache License 2.0

6 votes

def step(self, action):
        if self.check_action_bounds and not self.action_space.contains(action):
            raise ValueError("Illegal action for {}: {}".format(
                self.action_space, action))
        if (isinstance(self.action_space, Tuple)
                and len(action) != len(self.action_space.spaces)):
            raise ValueError("Illegal action for {}: {}".format(
                self.action_space, action))

        return self.observation_space.sample(), \
            float(self.reward_space.sample()), \
            bool(np.random.choice(
                [True, False], p=[self.p_done, 1.0 - self.p_done]
            )), {}


# Multi-agent version of the RandomEnv.

Source File: _spaces.py From adeptRL with GNU General Public License v3.0

6 votes

def _detect_gym_spaces(gym_space):
        if isinstance(gym_space, spaces.Discrete):
            return {"Discrete": (gym_space.n,)}
        elif isinstance(gym_space, spaces.MultiDiscrete):
            raise NotImplementedError
        elif isinstance(gym_space, spaces.MultiBinary):
            return {"MultiBinary": (gym_space.n,)}
        elif isinstance(gym_space, spaces.Box):
            return {"Box": gym_space.shape}
        elif isinstance(gym_space, spaces.Dict):
            return {
                name: list(Space._detect_gym_spaces(s).values())[0]
                for name, s in gym_space.spaces.items()
            }
        elif isinstance(gym_space, spaces.Tuple):
            return {
                idx: list(Space._detect_gym_spaces(s).values())[0]
                for idx, s in enumerate(gym_space.spaces)
            }

Source File: gym_env.py From RLs with Apache License 2.0

6 votes

def step(self, actions):
        actions = np.array(actions)
        if not self.is_continuous:
            actions = sth.int2action_index(actions, self.discrete_action_dim_list)
            if self.action_type == 'discrete':
                actions = actions.reshape(-1,)
            elif self.action_type == 'Tuple(Discrete)':
                actions = actions.reshape(self.n, -1).tolist()
        results = Asyn.op_func(self.envs, Asyn.OP.STEP, actions)
        obs, reward, done, info = [np.asarray(e) for e in zip(*results)]
        reward = reward.astype('float32')
        dones_index = np.where(done)[0]
        if dones_index.shape[0] > 0:
            correct_new_obs = self.partial_reset(obs, dones_index)
        else:
            correct_new_obs = obs
        if self.obs_type == 'visual':
            obs = obs[:, np.newaxis, ...]
            correct_new_obs = correct_new_obs[:, np.newaxis, ...]
        return obs, reward, done, info, correct_new_obs

Source File: _spaces.py From adeptRL with GNU General Public License v3.0

6 votes

def dtypes_from_gym(gym_space):
        if isinstance(gym_space, spaces.Discrete):
            return {"Discrete": gym_space.dtype}
        elif isinstance(gym_space, spaces.MultiDiscrete):
            raise NotImplementedError
        elif isinstance(gym_space, spaces.MultiBinary):
            return {"MultiBinary": gym_space.dtype}
        elif isinstance(gym_space, spaces.Box):
            return {"Box": gym_space.dtype}
        elif isinstance(gym_space, spaces.Dict):
            return {
                name: list(Space._detect_gym_spaces(s).values())[0]
                for name, s in gym_space.spaces.items()
            }
        elif isinstance(gym_space, spaces.Tuple):
            return {
                idx: list(Space._detect_gym_spaces(s).values())[0]
                for idx, s in enumerate(gym_space.spaces)
            }
        else:
            raise NotImplementedError

Source File: random_env.py From ray with Apache License 2.0

6 votes

def step(self, action):
        if self.check_action_bounds and not self.action_space.contains(action):
            raise ValueError("Illegal action for {}: {}".format(
                self.action_space, action))
        if (isinstance(self.action_space, Tuple)
                and len(action) != len(self.action_space.spaces)):
            raise ValueError("Illegal action for {}: {}".format(
                self.action_space, action))

        return self.observation_space.sample(), \
            float(self.reward_space.sample()), \
            bool(np.random.choice(
                [True, False], p=[self.p_done, 1.0 - self.p_done]
            )), {}


# Multi-agent version of the RandomEnv.

Source File: prisoners_dilemma.py From LOLA_DiCE with MIT License

6 votes

def __init__(self, max_steps, batch_size=1):
        self.max_steps = max_steps
        self.batch_size = batch_size
        self.payout_mat = np.array([[-2,0],[-3,-1]])
        self.states = np.array([[1,2],[3,4]])

        self.action_space = Tuple([
            Discrete(self.NUM_ACTIONS) for _ in range(self.NUM_AGENTS)
        ])
        self.observation_space = Tuple([
            OneHot(self.NUM_STATES) for _ in range(self.NUM_AGENTS)
        ])
        self.available_actions = [
            np.ones((batch_size, self.NUM_ACTIONS), dtype=int)
            for _ in range(self.NUM_AGENTS)
        ]

        self.step_count = None

Source File: env_checker.py From stable-baselines with MIT License

6 votes

def _check_obs(obs: Union[tuple, dict, np.ndarray, int],
               observation_space: spaces.Space,
               method_name: str) -> None:
    """
    Check that the observation returned by the environment
    correspond to the declared one.
    """
    if not isinstance(observation_space, spaces.Tuple):
        assert not isinstance(obs, tuple), ("The observation returned by the `{}()` "
                                            "method should be a single value, not a tuple".format(method_name))

    # The check for a GoalEnv is done by the base class
    if isinstance(observation_space, spaces.Discrete):
        assert isinstance(obs, int), "The observation returned by `{}()` method must be an int".format(method_name)
    elif _enforce_array_obs(observation_space):
        assert isinstance(obs, np.ndarray), ("The observation returned by `{}()` "
                                             "method must be a numpy array".format(method_name))

    assert observation_space.contains(obs), ("The observation returned by the `{}()` "
                                             "method does not match the given observation space".format(method_name))

Source File: windy_maze_env.py From ray with Apache License 2.0

6 votes

def __init__(self, env_config):
        self.map = [m for m in MAP_DATA.split("\n") if m]
        self.x_dim = len(self.map)
        self.y_dim = len(self.map[0])
        logger.info("Loaded map {} {}".format(self.x_dim, self.y_dim))
        for x in range(self.x_dim):
            for y in range(self.y_dim):
                if self.map[x][y] == "S":
                    self.start_pos = (x, y)
                elif self.map[x][y] == "F":
                    self.end_pos = (x, y)
        logger.info("Start pos {} end pos {}".format(self.start_pos,
                                                     self.end_pos))
        self.observation_space = Tuple([
            Box(0, 100, shape=(2, )),  # (x, y)
            Discrete(4),  # wind direction (N, E, S, W)
        ])
        self.action_space = Discrete(2)  # whether to move or not

Source File: windy_maze_env.py From ray with Apache License 2.0

6 votes

def __init__(self, env_config):
        self.map = [m for m in MAP_DATA.split("\n") if m]
        self.x_dim = len(self.map)
        self.y_dim = len(self.map[0])
        logger.info("Loaded map {} {}".format(self.x_dim, self.y_dim))
        for x in range(self.x_dim):
            for y in range(self.y_dim):
                if self.map[x][y] == "S":
                    self.start_pos = (x, y)
                elif self.map[x][y] == "F":
                    self.end_pos = (x, y)
        logger.info("Start pos {} end pos {}".format(self.start_pos,
                                                     self.end_pos))
        self.observation_space = Tuple([
            Box(0, 100, shape=(2, )),  # (x, y)
            Discrete(4),  # wind direction (N, E, S, W)
        ])
        self.action_space = Discrete(2)  # whether to move or not

Source File: cliff_env.py From gym-gridworlds with MIT License

6 votes

def __init__(self):
        self.height = 4
        self.width = 12
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Tuple((
                spaces.Discrete(self.height),
                spaces.Discrete(self.width)
                ))
        self.moves = {
                0: (-1, 0),   # up
                1: (0, 1),   # right
                2: (1, 0),  # down
                3: (0, -1),  # left
                }

        # begin in start state
        self.reset()

Source File: windy_gridworld_env.py From gym-gridworlds with MIT License

6 votes

def __init__(self):
        self.height = 7
        self.width = 10
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Tuple((
                spaces.Discrete(self.height),
                spaces.Discrete(self.width)
                ))
        self.moves = {
                0: (-1, 0),  # up
                1: (0, 1),   # right
                2: (1, 0),   # down
                3: (0, -1),  # left
                }

        # begin in start state
        self.reset()

Source File: recsim_gym.py From recsim with Apache License 2.0

6 votes

def observation_space(self):
    """Returns the observation space of the environment.

    Each observation is a dictionary with three keys `user`, `doc` and
    `response` that includes observation about user state, document and user
    response, respectively.
    """
    if isinstance(self._environment, environment.MultiUserEnvironment):
      user_obs_space = self._environment.user_model[0].observation_space()
      resp_obs_space = self._environment.user_model[0].response_space()
      user_obs_space = spaces.Tuple(
          [user_obs_space] * self._environment.num_users)
      resp_obs_space = spaces.Tuple(
          [resp_obs_space] * self._environment.num_users)

    if isinstance(self._environment, environment.SingleUserEnvironment):
      user_obs_space = self._environment.user_model.observation_space()
      resp_obs_space = self._environment.user_model.response_space()

    return spaces.Dict({
        'user': user_obs_space,
        'doc': self._environment.candidate_set.observation_space(),
        'response': resp_obs_space,
    })

Source File: cluster_bandit_agent_test.py From recsim with Apache License 2.0

6 votes

def dummy_observation_space(self):
    single_response_space = spaces.Dict({
        'cluster_id': spaces.Discrete(2),
        'click': spaces.Discrete(2)
    })
    doc_space = spaces.Dict(
        {0: spaces.Dict({'cluster_id': spaces.Discrete(2)})})
    user_space = spaces.Dict({
        'sufficient_statistics':
            spaces.Dict({
                'impression_count':
                    spaces.Box(np.array([0] * 2), np.array([np.inf] * 2)),
                'click_count':
                    spaces.Box(np.array([0] * 2), np.array([np.inf] * 2))
            })
    })
    return spaces.Dict({
        'user': user_space,
        'doc': doc_space,
        'response': spaces.Tuple([
            single_response_space,
        ])
    })

Source File: mapf_gym_cap.py From distributedRL_MAPF with MIT License

5 votes

def __init__(self, num_agents=1, observation_size=10,world0=None, goals0=None, DIAGONAL_MOVEMENT=False, SIZE=(10,40), PROB=(0,.5), FULL_HELP=False,blank_world=False):
        """
        Args:
            DIAGONAL_MOVEMENT: if the agents are allowed to move diagonally
            SIZE: size of a side of the square grid
            PROB: range of probabilities that a given block is an obstacle
            FULL_HELP
        """
        # Initialize member variables
        self.num_agents        = num_agents 
        #a way of doing joint rewards
        self.individual_rewards           = [0 for i in range(num_agents)]
        self.observation_size  = observation_size
        self.SIZE              = SIZE
        self.PROB              = PROB
        self.fresh             = True
        self.FULL_HELP         = FULL_HELP
        self.finished          = False
        self.mutex             = Lock()
        self.DIAGONAL_MOVEMENT = DIAGONAL_MOVEMENT

        # Initialize data structures
        self._setWorld(world0,goals0,blank_world=blank_world)
        if DIAGONAL_MOVEMENT:
            self.action_space = spaces.Tuple([spaces.Discrete(self.num_agents), spaces.Discrete(9)])
        else:
            self.action_space = spaces.Tuple([spaces.Discrete(self.num_agents), spaces.Discrete(5)])
        self.viewer           = None

Source File: tensorforce_agent.py From playground with Apache License 2.0

5 votes

def initialize(self, env):
        from gym import spaces
        from tensorforce.agents import PPOAgent

        if self.algorithm == "ppo":
            if type(env.action_space) == spaces.Tuple:
                actions = {
                    str(num): {
                        'type': int,
                        'num_actions': space.n
                    }
                    for num, space in enumerate(env.action_space.spaces)
                }
            else:
                actions = dict(type='int', num_actions=env.action_space.n)

            return PPOAgent(
                states=dict(type='float', shape=env.observation_space.shape),
                actions=actions,
                network=[
                    dict(type='dense', size=64),
                    dict(type='dense', size=64)
                ],
                batching_capacity=1000,
                step_optimizer=dict(type='adam', learning_rate=1e-4))
        return None

Source File: iiwa_robotiq_3_finger.py From costar_plan with Apache License 2.0

5 votes

def getActionSpace(self):
        return spaces.Tuple((spaces.Box(-np.pi, np.pi, 6), spaces.Box(-0.6, 0.6, 1)))

Source File: test_qmix.py From ray with Apache License 2.0

5 votes

def test_avail_actions_qmix(self):
        grouping = {
            "group_1": ["agent_1"],  # trivial grouping for testing
        }
        obs_space = Tuple([AvailActionsTestEnv.observation_space])
        act_space = Tuple([AvailActionsTestEnv.action_space])
        register_env(
            "action_mask_test",
            lambda config: AvailActionsTestEnv(config).with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space))

        ray.init()
        agent = QMixTrainer(
            env="action_mask_test",
            config={
                "num_envs_per_worker": 5,  # test with vectorization on
                "env_config": {
                    "avail_action": 3,
                },
                "framework": "torch",
            })
        for _ in range(5):
            agent.train()  # OK if it doesn't trip the action assertion error
        assert agent.train()["episode_reward_mean"] == 21.0
        agent.stop()
        ray.shutdown()

Source File: test_catalog.py From ray with Apache License 2.0

5 votes

def test_tuple_preprocessor(self):
        ray.init(object_store_memory=1000 * 1024 * 1024)

        class TupleEnv:
            def __init__(self):
                self.observation_space = Tuple(
                    [Discrete(5),
                     Box(0, 5, shape=(3, ), dtype=np.float32)])

        p1 = ModelCatalog.get_preprocessor(TupleEnv())
        self.assertEqual(p1.shape, (8, ))
        self.assertEqual(
            list(p1.transform((0, np.array([1, 2, 3])))),
            [float(x) for x in [1, 0, 0, 0, 0, 1, 2, 3]])

Source File: space_utils.py From ray with Apache License 2.0

5 votes

def get_base_struct_from_space(space):
    """Returns a Tuple/Dict Space as native (equally structured) py tuple/dict.

    Args:
        space (gym.Space): The Space to get the python struct for.

    Returns:
        Union[dict,tuple,gym.Space]: The struct equivalent to the given Space.
            Note that the returned struct still contains all original
            "primitive" Spaces (e.g. Box, Discrete).

    Examples:
        >>> get_base_struct_from_space(Dict({
        >>>     "a": Box(),
        >>>     "b": Tuple([Discrete(2), Discrete(3)])
        >>> }))
        >>> # Will return: dict(a=Box(), b=tuple(Discrete(2), Discrete(3)))
    """

    def _helper_struct(space_):
        if isinstance(space_, Tuple):
            return tuple(_helper_struct(s) for s in space_)
        elif isinstance(space_, Dict):
            return {k: _helper_struct(space_[k]) for k in space_.spaces}
        else:
            return space_

    return _helper_struct(space)

Source File: nested_space_repeat_after_me_env.py From ray with Apache License 2.0

5 votes

def __init__(self, config):
        self.observation_space = config.get(
            "space", Tuple([Discrete(2),
                            Dict({
                                "a": Box(-1.0, 1.0, (2, ))
                            })]))
        self.action_space = self.observation_space
        self.flattened_action_space = flatten_space(self.action_space)
        self.episode_len = config.get("episode_len", 100)

Source File: correlated_actions_env.py From ray with Apache License 2.0

5 votes

def __init__(self, _):
        self.observation_space = Discrete(2)
        self.action_space = Tuple([Discrete(2), Discrete(2)])
        self.last_observation = None

Source File: env_utils.py From oac-explore with MIT License

5 votes

def get_dim(space):
    if isinstance(space, Box):
        return space.low.size
    elif isinstance(space, Discrete):
        return space.n
    elif isinstance(space, Tuple):
        return sum(get_dim(subspace) for subspace in space.spaces)
    elif hasattr(space, 'flat_dim'):
        return space.flat_dim
    else:
        raise TypeError("Unknown space: {}".format(space))

Source File: jaco_robotiq.py From costar_plan with Apache License 2.0

5 votes

def getActionSpace(self):
        return spaces.Tuple((spaces.Box(-np.pi, np.pi, 6), spaces.Box(-0.6, 0.6, 1)))

Source File: utils.py From cherry with Apache License 2.0

5 votes

def is_discrete(space, vectorized=False):
    """
    Returns whether a space is discrete.

    **Arguments**

    * **space** - The space.
    * **vectorized** - Whether to return the discreteness for the
        vectorized environments (True) or just the discreteness of
        the underlying environment (False).
    """
    msg = 'Space type not supported.'
    assert isinstance(space, (Box, Discrete, Dict, Tuple)), msg
    if isinstance(space, Discrete):
        return True
    if isinstance(space, Box):
        return False
    if isinstance(space, Dict):
        dimensions = {
            k[0]: is_discrete(k[1], vectorized) for k in space.spaces.items()
        }
        return OrderedDict(dimensions)
    if isinstance(space, Tuple):
        if not vectorized:
            return is_discrete(space[0], vectorized)
        discrete = tuple(
            is_discrete(s) for s in space
        )
        return discrete

Python gym.spaces.Tuple() Examples