Python Examples of gym.spaces.Discrete

Source File: cartpole_bullet.py From soccer-matlab with BSD 2-Clause "Simplified" License

9 votes

def __init__(self, renders=True):
    # start the bullet physics server
    self._renders = renders
    if (renders):
	    p.connect(p.GUI)
    else:
    	p.connect(p.DIRECT)

    observation_high = np.array([
          np.finfo(np.float32).max,
          np.finfo(np.float32).max,
          np.finfo(np.float32).max,
          np.finfo(np.float32).max])
    action_high = np.array([0.1])

    self.action_space = spaces.Discrete(9)
    self.observation_space = spaces.Box(-observation_high, observation_high)

    self.theta_threshold_radians = 1
    self.x_threshold = 2.4
    self._seed()
#    self.reset()
    self.viewer = None
    self._configure()

Source File: base.py From ConvLab with MIT License

8 votes

def set_gym_space_attr(gym_space):
    '''Set missing gym space attributes for standardization'''
    if isinstance(gym_space, spaces.Box):
        setattr(gym_space, 'is_discrete', False)
    elif isinstance(gym_space, spaces.Discrete):
        setattr(gym_space, 'is_discrete', True)
        setattr(gym_space, 'low', 0)
        setattr(gym_space, 'high', gym_space.n)
    elif isinstance(gym_space, spaces.MultiBinary):
        setattr(gym_space, 'is_discrete', True)
        setattr(gym_space, 'low', np.full(gym_space.n, 0))
        setattr(gym_space, 'high', np.full(gym_space.n, 2))
    elif isinstance(gym_space, spaces.MultiDiscrete):
        setattr(gym_space, 'is_discrete', True)
        setattr(gym_space, 'low', np.zeros_like(gym_space.nvec))
        setattr(gym_space, 'high', np.array(gym_space.nvec))
    else:
        raise ValueError('gym_space not recognized')

Source File: fixed_sequence_env.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0

6 votes

def __init__(
            self,
            n_actions=10,
            seed=0,
            episode_len=100
    ):
        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)
        self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]

        self.action_space = Discrete(n_actions)
        self.observation_space = Discrete(1)

        self.episode_len = episode_len
        self.time = 0
        self.reset()

Source File: lunar_lander.py From cs294-112_hws with MIT License

6 votes

def __init__(self):
        self._seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        high = np.array([np.inf]*N_OBS_DIM)  # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-high, high)

        self.action_space = spaces.Discrete(N_ACT_DIM)

        self.curr_step = None

        self._reset()

Source File: input.py From HardRLWithYoutube with MIT License

6 votes

def observation_placeholder(ob_space, batch_size=None, name='Ob'):
    ''' 
    Create placeholder to feed observations into of the size appropriate to the observation space
    
    Parameters:
    ----------

    ob_space: gym.Space     observation space
    
    batch_size: int         size of the batch to be fed into input. Can be left None in most cases. 

    name: str               name of the placeholder

    Returns:
    -------

    tensorflow placeholder tensor
    '''

    assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \
        'Can only deal with Discrete and Box observation spaces for now'

    return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)

Source File: input.py From HardRLWithYoutube with MIT License

6 votes

def encode_observation(ob_space, placeholder):
    '''
    Encode input in the way that is appropriate to the observation space

    Parameters:
    ----------
    
    ob_space: gym.Space             observation space
    
    placeholder: tf.placeholder     observation input placeholder
    '''
    if isinstance(ob_space, Discrete):
        return tf.to_float(tf.one_hot(placeholder, ob_space.n))

    elif isinstance(ob_space, Box):
        return tf.to_float(placeholder)
    else:
        raise NotImplementedError

Source File: run_bcq_on_batchdata.py From EasyRL with Apache License 2.0

6 votes

def __init__(self, file_name, batch_size=128, n_step=1):
        # create an offline_env to do fake interaction with agent
        self.num_epoch = 0
        self.num_record = 0
        self._offset = 0

        # how many records to read from table at one time
        self.batch_size = batch_size
        # number of step to reserved for n-step dqn
        self.n_step = n_step

        # defined the shape of observation and action
        # we follow the definition of gym.spaces
        # `Box` for continue-space, `Discrete` for discrete-space and `Dict` for multiple input
        # actually low/high limitation will not be used by agent but required by gym.spaces
        self.observation_space = Box(low=-np.inf, high=np.inf, shape=(4,))
        self.action_space = Discrete(n=2)

        fr = open(file_name)
        self.data = fr.readlines()
        self.num_record = len(self.data)
        fr.close()

Source File: fixed_sequence_env.py From HardRLWithYoutube with MIT License

6 votes

def __init__(
            self,
            n_actions=10,
            seed=0,
            episode_len=100
    ):
        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)
        self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)]

        self.action_space = Discrete(n_actions)
        self.observation_space = Discrete(1)

        self.episode_len = episode_len
        self.time = 0
        self.reset()

Source File: lunar_lander.py From cs294-112_hws with MIT License

6 votes

def __init__(self):
        self._seed()
        self.viewer = None

        self.world = Box2D.b2World()
        self.moon = None
        self.lander = None
        self.particles = []

        self.prev_reward = None

        high = np.array([np.inf]*N_OBS_DIM)  # useful range is -1 .. +1, but spikes can be higher
        self.observation_space = spaces.Box(-high, high)

        self.action_space = spaces.Discrete(N_ACT_DIM)

        self.curr_step = None

        self._reset()

Source File: mnist_env.py From HardRLWithYoutube with MIT License

6 votes

def __init__(
            self,
            seed=0,
            episode_len=None,
            no_images=None
    ):
        from tensorflow.examples.tutorials.mnist import input_data
        # we could use temporary directory for this with a context manager and 
        # TemporaryDirecotry, but then each test that uses mnist would re-download the data
        # this way the data is not cleaned up, but we only download it once per machine
        mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data')
        with filelock.FileLock(mnist_path + '.lock'):
           self.mnist = input_data.read_data_sets(mnist_path)

        self.np_random = np.random.RandomState()
        self.np_random.seed(seed)

        self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1))
        self.action_space = Discrete(10)
        self.episode_len = episode_len
        self.time = 0
        self.no_images = no_images

        self.train_mode()
        self.reset()

Source File: bit_flip.py From rlgraph with Apache License 2.0

6 votes

def __init__(self, bit_length=16, max_steps=None):

        super(BitFlip, self).__init__()

        assert bit_length >= 1, 'bit_length must be >= 1, found {}'.format(bit_length)

        self.bit_length = bit_length

        if max_steps is None:
            self.max_steps = bit_length
        else:
            self.max_steps = max_steps

        self.last_action = -1  # -1 for reset
        self.steps = 0
        self.seed()
        self.action_space = spaces.Discrete(bit_length + 1)  # index = n means to not flip any bit
        # achieved goal and observation are identical in bit_flip environment, however it is made this way to be
        # compatible with Openai GoalEnv
        self.observation_space = spaces.Dict(dict(
            observation=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32),
            achieved_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32),
            desired_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32),
        ))

        self.reset()

Source File: safelife_env.py From safelife with Apache License 2.0

6 votes

def __init__(self, level_iterator, **kwargs):
        self.level_iterator = level_iterator

        load_kwargs(self, kwargs)

        self.action_space = spaces.Discrete(len(self.action_names))
        if self.output_channels is None:
            self.observation_space = spaces.Box(
                low=0, high=2**15,
                shape=self.view_shape,
                dtype=np.uint16,
            )
        else:
            self.observation_space = spaces.Box(
                low=0, high=1,
                shape=self.view_shape + (len(self.output_channels),),
                dtype=np.uint8,
            )
        self.seed()

Source File: random_agent.py From irl-benchmark with GNU General Public License v3.0

6 votes

def pick_action(self, state: Union[int, float, np.ndarray]
                    ) -> Union[int, float, np.ndarray]:
        """ Pick an action given a state.

        Picks uniformly random from all possible actions, using the environments
        action_space.sample() method.

        Parameters
        ----------
        state: int
            An integer corresponding to a state of a DiscreteEnv.
            Not used in this agent.

        Returns
        -------
        Union[int, float, np.ndarray]
            An action
        """
        # if other spaces are needed, check if their sample method conforms with
        # returned type, change if necessary.
        assert isinstance(self.env.action_space,
                          (Box, Discrete, MultiDiscrete, MultiBinary))
        return self.env.action_space.sample()

Source File: abc.py From chainerrl with MIT License

6 votes

def __init__(self, size=2, discrete=True, partially_observable=False,
                 episodic=True, deterministic=False):
        self.size = size
        self.terminal_state = size
        self.episodic = episodic
        self.partially_observable = partially_observable
        self.deterministic = deterministic
        self.n_max_offset = 1
        # (s_0, ..., s_N) + terminal state + offset
        self.n_dim_obs = self.size + 1 + self.n_max_offset
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(self.n_dim_obs,), dtype=np.float32,
        )
        if discrete:
            self.action_space = spaces.Discrete(self.size)
        else:
            self.action_space = spaces.Box(
                low=-1.0, high=1.0,
                shape=(self.size,), dtype=np.float32,
            )