Python gym.spaces.Discrete() Examples

The following are 30 code examples of gym.spaces.Discrete(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module gym.spaces , or try the search function .
Example #1
Source File: cartpole_bullet.py    From soccer-matlab with BSD 2-Clause "Simplified" License 9 votes vote down vote up
def __init__(self, renders=True):
    # start the bullet physics server
    self._renders = renders
    if (renders):
	    p.connect(p.GUI)
    else:
    	p.connect(p.DIRECT)

    observation_high = np.array([
          np.finfo(np.float32).max,
          np.finfo(np.float32).max,
          np.finfo(np.float32).max,
          np.finfo(np.float32).max])
    action_high = np.array([0.1])

    self.action_space = spaces.Discrete(9)
    self.observation_space = spaces.Box(-observation_high, observation_high)

    self.theta_threshold_radians = 1
    self.x_threshold = 2.4
    self._seed()
#    self.reset()
    self.viewer = None
    self._configure() 
Example #2
Source File: base.py    From ConvLab with MIT License 8 votes vote down vote up
def set_gym_space_attr(gym_space):
    '''Set missing gym space attributes for standardization'''
    if isinstance(gym_space, spaces.Box):
        setattr(gym_space, 'is_discrete', False)
    elif isinstance(gym_space, spaces.Discrete):
        setattr(gym_space, 'is_discrete', True)
        setattr(gym_space, 'low', 0)
        setattr(gym_space, 'high', gym_space.n)
    elif isinstance(gym_space, spaces.MultiBinary):
        setattr(gym_space, 'is_discrete', True)
        setattr(gym_space, 'low', np.full(gym_space.n, 0))
        setattr(gym_space, 'high', np.full(gym_space.n, 2))
    elif isinstance(gym_space, spaces.MultiDiscrete):
        setattr(gym_space, 'is_discrete', True)
        setattr(gym_space, 'low', np.zeros_like(gym_space.nvec))
        setattr(gym_space, 'high', np.array(gym_space.nvec))
    else:
        raise ValueError('gym_space not recognized') 
Example #3
Source File: fixed_sequence_env.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def __init__(
            self,
            n_actions=10,
            seed=0,
            episode_len=100
    ):
        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)
        self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]

        self.action_space = Discrete(n_actions)
        self.observation_space = Discrete(1)

        self.episode_len = episode_len
        self.time = 0
        self.reset() 
Example #4
Source File: lunar_lander.py    From cs294-112_hws with MIT License 6 votes vote down vote up
def __init__(self):
        self._seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        high = np.array([np.inf]*N_OBS_DIM)  # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-high, high)

        self.action_space = spaces.Discrete(N_ACT_DIM)

        self.curr_step = None

        self._reset() 
Example #5
Source File: input.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
    ''' 
    Create placeholder to feed observations into of the size appropriate to the observation space
    
    Parameters:
    ----------

    ob_space: gym.Space     observation space
    
    batch_size: int         size of the batch to be fed into input. Can be left None in most cases. 

    name: str               name of the placeholder

    Returns:
    -------

    tensorflow placeholder tensor
    '''

    assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
        'Can only deal with Discrete and Box observation spaces for now'

    return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name) 
Example #6
Source File: input.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def encode_observation(ob_space, placeholder):
    '''
    Encode input in the way that is appropriate to the observation space

    Parameters:
    ----------
    
    ob_space: gym.Space             observation space
    
    placeholder: tf.placeholder     observation input placeholder
    '''
    if isinstance(ob_space, Discrete):
        return tf.to_float(tf.one_hot(placeholder, ob_space.n))

    elif isinstance(ob_space, Box):
        return tf.to_float(placeholder)
    else:
        raise NotImplementedError 
Example #7
Source File: run_bcq_on_batchdata.py    From EasyRL with Apache License 2.0 6 votes vote down vote up
def __init__(self, file_name, batch_size=128, n_step=1):
        # create an offline_env to do fake interaction with agent
        self.num_epoch = 0
        self.num_record = 0
        self._offset = 0

        # how many records to read from table at one time
        self.batch_size = batch_size
        # number of step to reserved for n-step dqn
        self.n_step = n_step

        # defined the shape of observation and action
        # we follow the definition of gym.spaces
        # `Box` for continue-space, `Discrete` for discrete-space and `Dict` for multiple input
        # actually low/high limitation will not be used by agent but required by gym.spaces
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(4,))
        self.action_space = Discrete(n=2)

        fr = open(file_name)
        self.data = fr.readlines()
        self.num_record = len(self.data)
        fr.close() 
Example #8
Source File: fixed_sequence_env.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def __init__(
            self,
            n_actions=10,
            seed=0,
            episode_len=100
    ):
        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)
        self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]

        self.action_space = Discrete(n_actions)
        self.observation_space = Discrete(1)

        self.episode_len = episode_len
        self.time = 0
        self.reset() 
Example #9
Source File: lunar_lander.py    From cs294-112_hws with MIT License 6 votes vote down vote up
def __init__(self):
        self._seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        high = np.array([np.inf]*N_OBS_DIM)  # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-high, high)

        self.action_space = spaces.Discrete(N_ACT_DIM)

        self.curr_step = None

        self._reset() 
Example #10
Source File: mnist_env.py    From HardRLWithYoutube with MIT License 6 votes vote down vote up
def __init__(
            self,
            seed=0,
            episode_len=None,
            no_images=None
    ):
        from tensorflow.examples.tutorials.mnist import input_data
        # we could use temporary directory for this with a context manager and 
        # TemporaryDirecotry, but then each test that uses mnist would re-download the data
        # this way the data is not cleaned up, but we only download it once per machine
        mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data')
        with filelock.FileLock(mnist_path + '.lock'):
           self.mnist = input_data.read_data_sets(mnist_path)

        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)

        self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1))
        self.action_space = Discrete(10)
        self.episode_len = episode_len
        self.time = 0
        self.no_images = no_images

        self.train_mode()
        self.reset() 
Example #11
Source File: bit_flip.py    From rlgraph with Apache License 2.0 6 votes vote down vote up
def __init__(self, bit_length=16, max_steps=None):

        super(BitFlip, self).__init__()

        assert bit_length >= 1, 'bit_length must be >= 1, found {}'.format(bit_length)

        self.bit_length = bit_length

        if max_steps is None:
            self.max_steps = bit_length
        else:
            self.max_steps = max_steps

        self.last_action = -1  # -1 for reset
        self.steps = 0
        self.seed()
        self.action_space = spaces.Discrete(bit_length + 1)  # index = n means to not flip any bit
        # achieved goal and observation are identical in bit_flip environment, however it is made this way to be
        # compatible with Openai GoalEnv
        self.observation_space = spaces.Dict(dict(
            observation=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32),
            achieved_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32),
            desired_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32),
        ))

        self.reset() 
Example #12
Source File: safelife_env.py    From safelife with Apache License 2.0 6 votes vote down vote up
def __init__(self, level_iterator, **kwargs):
        self.level_iterator = level_iterator

        load_kwargs(self, kwargs)

        self.action_space = spaces.Discrete(len(self.action_names))
        if self.output_channels is None:
            self.observation_space = spaces.Box(
                low=0, high=2**15,
                shape=self.view_shape,
                dtype=np.uint16,
            )
        else:
            self.observation_space = spaces.Box(
                low=0, high=1,
                shape=self.view_shape + (len(self.output_channels),),
                dtype=np.uint8,
            )
        self.seed() 
Example #13
Source File: random_agent.py    From irl-benchmark with GNU General Public License v3.0 6 votes vote down vote up
def pick_action(self, state: Union[int, float, np.ndarray]
                    ) -> Union[int, float, np.ndarray]:
        """ Pick an action given a state.

        Picks uniformly random from all possible actions, using the environments
        action_space.sample() method.

        Parameters
        ----------
        state: int
            An integer corresponding to a state of a DiscreteEnv.
            Not used in this agent.

        Returns
        -------
        Union[int, float, np.ndarray]
            An action
        """
        # if other spaces are needed, check if their sample method conforms with
        # returned type, change if necessary.
        assert isinstance(self.env.action_space,
                          (Box, Discrete, MultiDiscrete, MultiBinary))
        return self.env.action_space.sample() 
Example #14
Source File: abc.py    From chainerrl with MIT License 6 votes vote down vote up
def __init__(self, size=2, discrete=True, partially_observable=False,
                 episodic=True, deterministic=False):
        self.size = size
        self.terminal_state = size
        self.episodic = episodic
        self.partially_observable = partially_observable
        self.deterministic = deterministic
        self.n_max_offset = 1
        # (s_0, ..., s_N) + terminal state + offset
        self.n_dim_obs = self.size + 1 + self.n_max_offset
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(self.n_dim_obs,), dtype=np.float32,
        )
        if discrete:
            self.action_space = spaces.Discrete(self.size)
        else:
            self.action_space = spaces.Box(
                low=-1.0, high=1.0,
                shape=(self.size,), dtype=np.float32,
            ) 
Example #15
Source File: input.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def encode_observation(ob_space, placeholder):
    '''
    Encode input in the way that is appropriate to the observation space

    Parameters:
    ----------

    ob_space: gym.Space             observation space

    placeholder: tf.placeholder     observation input placeholder
    '''
    if isinstance(ob_space, Discrete):
        return tf.to_float(tf.one_hot(placeholder, ob_space.n))

    elif isinstance(ob_space, Box):
        return tf.to_float(placeholder)
    else:
        raise NotImplementedError 
Example #16
Source File: gym_env_problem_test.py    From tensor2tensor with Apache License 2.0 6 votes vote down vote up
def test_setup(self):
    ep = gym_env_problem.GymEnvProblem(
        base_env_name="CartPole-v0", batch_size=5)
    # Checks that environments were created and they are `batch_size` in number.
    ep.assert_common_preconditions()

    # Expectations on the observation space.
    observation_space = ep.observation_space
    self.assertIsInstance(observation_space, Box)
    self.assertEqual(observation_space.shape, (4,))
    self.assertEqual(observation_space.dtype, np.float32)

    # Expectations on the action space.
    action_space = ep.action_space
    self.assertTrue(isinstance(action_space, Discrete))
    self.assertEqual(action_space.shape, ())
    self.assertEqual(action_space.dtype, np.int64)
    self.assertEqual(ep.num_actions, 2)

    # Reward range is infinite here.
    self.assertFalse(ep.is_reward_range_finite) 
Example #17
Source File: input.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
    '''
    Create placeholder to feed observations into of the size appropriate to the observation space

    Parameters:
    ----------

    ob_space: gym.Space     observation space

    batch_size: int         size of the batch to be fed into input. Can be left None in most cases.

    name: str               name of the placeholder

    Returns:
    -------

    tensorflow placeholder tensor
    '''

    assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
        'Can only deal with Discrete and Box observation spaces for now'

    return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name) 
Example #18
Source File: policy_util.py    From ConvLab with MIT License 6 votes vote down vote up
def get_action_type(action_space):
    '''Method to get the action type to choose prob. dist. to sample actions from NN logits output'''
    if isinstance(action_space, spaces.Box):
        shape = action_space.shape
        assert len(shape) == 1
        if shape[0] == 1:
            return 'continuous'
        else:
            return 'multi_continuous'
    elif isinstance(action_space, spaces.Discrete):
        return 'discrete'
    elif isinstance(action_space, spaces.MultiDiscrete):
        return 'multi_discrete'
    elif isinstance(action_space, spaces.MultiBinary):
        return 'multi_binary'
    else:
        raise NotImplementedError


# action_policy base methods 
Example #19
Source File: input.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def encode_observation(ob_space, placeholder):
    '''
    Encode input in the way that is appropriate to the observation space

    Parameters:
    ----------

    ob_space: gym.Space             observation space

    placeholder: tf.placeholder     observation input placeholder
    '''
    if isinstance(ob_space, Discrete):
        return tf.to_float(tf.one_hot(placeholder, ob_space.n))

    elif isinstance(ob_space, Box):
        return tf.to_float(placeholder)
    else:
        raise NotImplementedError 
Example #20
Source File: input.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
    '''
    Create placeholder to feed observations into of the size appropriate to the observation space

    Parameters:
    ----------

    ob_space: gym.Space     observation space

    batch_size: int         size of the batch to be fed into input. Can be left None in most cases.

    name: str               name of the placeholder

    Returns:
    -------

    tensorflow placeholder tensor
    '''

    assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
        'Can only deal with Discrete and Box observation spaces for now'

    return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name) 
Example #21
Source File: fixed_sequence_env.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def __init__(
            self,
            n_actions=10,
            seed=0,
            episode_len=100
    ):
        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)
        self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]

        self.action_space = Discrete(n_actions)
        self.observation_space = Discrete(1)

        self.episode_len = episode_len
        self.time = 0
        self.reset() 
Example #22
Source File: input.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def observation_placeholder(ob_space, batch_size=None, name='Ob'):
    '''
    Create placeholder to feed observations into of the size appropriate to the observation space

    Parameters:
    ----------

    ob_space: gym.Space     observation space

    batch_size: int         size of the batch to be fed into input. Can be left None in most cases.

    name: str               name of the placeholder

    Returns:
    -------

    tensorflow placeholder tensor
    '''

    assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
        'Can only deal with Discrete and Box observation spaces for now'

    return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name) 
Example #23
Source File: input.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 6 votes vote down vote up
def encode_observation(ob_space, placeholder):
    '''
    Encode input in the way that is appropriate to the observation space

    Parameters:
    ----------

    ob_space: gym.Space             observation space

    placeholder: tf.placeholder     observation input placeholder
    '''
    if isinstance(ob_space, Discrete):
        return tf.to_float(tf.one_hot(placeholder, ob_space.n))

    elif isinstance(ob_space, Box):
        return tf.to_float(placeholder)
    else:
        raise NotImplementedError 
Example #24
Source File: core.py    From spinningup with MIT License 5 votes vote down vote up
def __init__(self, observation_space, action_space, 
                 hidden_sizes=(64,64), activation=nn.Tanh):
        super().__init__()

        obs_dim = observation_space.shape[0]

        # policy builder depends on action space
        if isinstance(action_space, Box):
            self.pi = MLPGaussianActor(obs_dim, action_space.shape[0], hidden_sizes, activation)
        elif isinstance(action_space, Discrete):
            self.pi = MLPCategoricalActor(obs_dim, action_space.n, hidden_sizes, activation)

        # build value function
        self.v  = MLPCritic(obs_dim, hidden_sizes, activation) 
Example #25
Source File: distributions.py    From rl_graph_generation with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def make_pdtype(ac_space):
    from gym import spaces
    if isinstance(ac_space, spaces.Box):
        assert len(ac_space.shape) == 1
        return DiagGaussianPdType(ac_space.shape[0])
    elif isinstance(ac_space, spaces.Discrete):
        return CategoricalPdType(ac_space.n)
    elif isinstance(ac_space, spaces.MultiDiscrete):
        return MultiCategoricalPdType(ac_space.nvec)
    elif isinstance(ac_space, spaces.MultiBinary):
        return BernoulliPdType(ac_space.n)
    else:
        raise NotImplementedError 
Example #26
Source File: environment.py    From 2019-OSS-Summer-RL with MIT License 5 votes vote down vote up
def __init__(self, maze_file=None, maze_size=(10, 10), mode=None, enable_render=True):
        self.viewer = None
        self.enable_render = enable_render

        has_loops = True
        num_portals = 3

        self.maze_view = MazeView2D(maze_name="OpenAI Gym - Maze (%d x %d)" % maze_size,
                                    maze_size=maze_size, screen_size=(640, 640),
                                    has_loops=has_loops, num_portals=num_portals,
                                    enable_render=enable_render)

        self.maze_size = self.maze_view.maze_size

        self.action_space = spaces.Discrete(2*len(self.maze_size))

        low = np.zeros(len(self.maze_size), dtype=int)
        high =  np.array(self.maze_size, dtype=int) - np.ones(len(self.maze_size), dtype=int)
        self.observation_space = spaces.Box(low, high, dtype=np.int64)

        self.state = None
        self.steps_beyond_done = None

        self.seed()
        self.reset()

        self.configure() 
Example #27
Source File: core.py    From spinningup with MIT License 5 votes vote down vote up
def mlp_actor_critic(x, a, hidden_sizes=(64,64), activation=tf.tanh, 
                     output_activation=None, policy=None, action_space=None):

    # default policy builder depends on action space
    if policy is None and isinstance(action_space, Box):
        policy = mlp_gaussian_policy
    elif policy is None and isinstance(action_space, Discrete):
        policy = mlp_categorical_policy

    with tf.variable_scope('pi'):
        pi, logp, logp_pi = policy(x, a, hidden_sizes, activation, output_activation, action_space)
    with tf.variable_scope('v'):
        v = tf.squeeze(mlp(x, list(hidden_sizes)+[1], activation, None), axis=1)
    return pi, logp, logp_pi, v 
Example #28
Source File: core.py    From spinningup with MIT License 5 votes vote down vote up
def placeholder_from_space(space):
    if isinstance(space, Box):
        return placeholder(space.shape)
    elif isinstance(space, Discrete):
        return tf.placeholder(dtype=tf.int32, shape=(None,))
    raise NotImplementedError 
Example #29
Source File: env_utils.py    From tf2rl with MIT License 5 votes vote down vote up
def get_act_dim(env):
    if isinstance(env.action_space, Discrete):
        return 1  # env.action_space.n
    elif isinstance(env.action_space, Box):
        return env.action_space.low.size
    else:
        raise NotImplementedError 
Example #30
Source File: mnist_env.py    From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 5 votes vote down vote up
def __init__(
            self,
            seed=0,
            episode_len=None,
            no_images=None
    ):
        import filelock
        from tensorflow.examples.tutorials.mnist import input_data
        # we could use temporary directory for this with a context manager and
        # TemporaryDirecotry, but then each test that uses mnist would re-download the data
        # this way the data is not cleaned up, but we only download it once per machine
        mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data')
        with filelock.FileLock(mnist_path + '.lock'):
           self.mnist = input_data.read_data_sets(mnist_path)

        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)

        self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1))
        self.action_space = Discrete(10)
        self.episode_len = episode_len
        self.time = 0
        self.no_images = no_images

        self.train_mode()
        self.reset()