Python gym.spaces.Discrete() Examples
The following are 30
code examples of gym.spaces.Discrete().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym.spaces
, or try the search function
.
Example #1
Source File: cartpole_bullet.py From soccer-matlab with BSD 2-Clause "Simplified" License | 9 votes |
def __init__(self, renders=True): # start the bullet physics server self._renders = renders if (renders): p.connect(p.GUI) else: p.connect(p.DIRECT) observation_high = np.array([ np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max, np.finfo(np.float32).max]) action_high = np.array([0.1]) self.action_space = spaces.Discrete(9) self.observation_space = spaces.Box(-observation_high, observation_high) self.theta_threshold_radians = 1 self.x_threshold = 2.4 self._seed() # self.reset() self.viewer = None self._configure()
Example #2
Source File: base.py From ConvLab with MIT License | 8 votes |
def set_gym_space_attr(gym_space): '''Set missing gym space attributes for standardization''' if isinstance(gym_space, spaces.Box): setattr(gym_space, 'is_discrete', False) elif isinstance(gym_space, spaces.Discrete): setattr(gym_space, 'is_discrete', True) setattr(gym_space, 'low', 0) setattr(gym_space, 'high', gym_space.n) elif isinstance(gym_space, spaces.MultiBinary): setattr(gym_space, 'is_discrete', True) setattr(gym_space, 'low', np.full(gym_space.n, 0)) setattr(gym_space, 'high', np.full(gym_space.n, 2)) elif isinstance(gym_space, spaces.MultiDiscrete): setattr(gym_space, 'is_discrete', True) setattr(gym_space, 'low', np.zeros_like(gym_space.nvec)) setattr(gym_space, 'high', np.array(gym_space.nvec)) else: raise ValueError('gym_space not recognized')
Example #3
Source File: fixed_sequence_env.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__( self, n_actions=10, seed=0, episode_len=100 ): self.np_random = np.random.RandomState() self.np_random.seed(seed) self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)] self.action_space = Discrete(n_actions) self.observation_space = Discrete(1) self.episode_len = episode_len self.time = 0 self.reset()
Example #4
Source File: lunar_lander.py From cs294-112_hws with MIT License | 6 votes |
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None high = np.array([np.inf]*N_OBS_DIM) # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-high, high) self.action_space = spaces.Discrete(N_ACT_DIM) self.curr_step = None self._reset()
Example #5
Source File: input.py From HardRLWithYoutube with MIT License | 6 votes |
def observation_placeholder(ob_space, batch_size=None, name='Ob'): ''' Create placeholder to feed observations into of the size appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space batch_size: int size of the batch to be fed into input. Can be left None in most cases. name: str name of the placeholder Returns: ------- tensorflow placeholder tensor ''' assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \ 'Can only deal with Discrete and Box observation spaces for now' return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)
Example #6
Source File: input.py From HardRLWithYoutube with MIT License | 6 votes |
def encode_observation(ob_space, placeholder): ''' Encode input in the way that is appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space placeholder: tf.placeholder observation input placeholder ''' if isinstance(ob_space, Discrete): return tf.to_float(tf.one_hot(placeholder, ob_space.n)) elif isinstance(ob_space, Box): return tf.to_float(placeholder) else: raise NotImplementedError
Example #7
Source File: run_bcq_on_batchdata.py From EasyRL with Apache License 2.0 | 6 votes |
def __init__(self, file_name, batch_size=128, n_step=1): # create an offline_env to do fake interaction with agent self.num_epoch = 0 self.num_record = 0 self._offset = 0 # how many records to read from table at one time self.batch_size = batch_size # number of step to reserved for n-step dqn self.n_step = n_step # defined the shape of observation and action # we follow the definition of gym.spaces # `Box` for continue-space, `Discrete` for discrete-space and `Dict` for multiple input # actually low/high limitation will not be used by agent but required by gym.spaces self.observation_space = Box(low=-np.inf, high=np.inf, shape=(4,)) self.action_space = Discrete(n=2) fr = open(file_name) self.data = fr.readlines() self.num_record = len(self.data) fr.close()
Example #8
Source File: fixed_sequence_env.py From HardRLWithYoutube with MIT License | 6 votes |
def __init__( self, n_actions=10, seed=0, episode_len=100 ): self.np_random = np.random.RandomState() self.np_random.seed(seed) self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)] self.action_space = Discrete(n_actions) self.observation_space = Discrete(1) self.episode_len = episode_len self.time = 0 self.reset()
Example #9
Source File: lunar_lander.py From cs294-112_hws with MIT License | 6 votes |
def __init__(self): self._seed() self.viewer = None self.world = Box2D.b2World() self.moon = None self.lander = None self.particles = [] self.prev_reward = None high = np.array([np.inf]*N_OBS_DIM) # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(-high, high) self.action_space = spaces.Discrete(N_ACT_DIM) self.curr_step = None self._reset()
Example #10
Source File: mnist_env.py From HardRLWithYoutube with MIT License | 6 votes |
def __init__( self, seed=0, episode_len=None, no_images=None ): from tensorflow.examples.tutorials.mnist import input_data # we could use temporary directory for this with a context manager and # TemporaryDirecotry, but then each test that uses mnist would re-download the data # this way the data is not cleaned up, but we only download it once per machine mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data') with filelock.FileLock(mnist_path + '.lock'): self.mnist = input_data.read_data_sets(mnist_path) self.np_random = np.random.RandomState() self.np_random.seed(seed) self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1)) self.action_space = Discrete(10) self.episode_len = episode_len self.time = 0 self.no_images = no_images self.train_mode() self.reset()
Example #11
Source File: bit_flip.py From rlgraph with Apache License 2.0 | 6 votes |
def __init__(self, bit_length=16, max_steps=None): super(BitFlip, self).__init__() assert bit_length >= 1, 'bit_length must be >= 1, found {}'.format(bit_length) self.bit_length = bit_length if max_steps is None: self.max_steps = bit_length else: self.max_steps = max_steps self.last_action = -1 # -1 for reset self.steps = 0 self.seed() self.action_space = spaces.Discrete(bit_length + 1) # index = n means to not flip any bit # achieved goal and observation are identical in bit_flip environment, however it is made this way to be # compatible with Openai GoalEnv self.observation_space = spaces.Dict(dict( observation=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), achieved_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), desired_goal=spaces.Box(low=0, high=1, shape=(bit_length,), dtype=np.int32), )) self.reset()
Example #12
Source File: safelife_env.py From safelife with Apache License 2.0 | 6 votes |
def __init__(self, level_iterator, **kwargs): self.level_iterator = level_iterator load_kwargs(self, kwargs) self.action_space = spaces.Discrete(len(self.action_names)) if self.output_channels is None: self.observation_space = spaces.Box( low=0, high=2**15, shape=self.view_shape, dtype=np.uint16, ) else: self.observation_space = spaces.Box( low=0, high=1, shape=self.view_shape + (len(self.output_channels),), dtype=np.uint8, ) self.seed()
Example #13
Source File: random_agent.py From irl-benchmark with GNU General Public License v3.0 | 6 votes |
def pick_action(self, state: Union[int, float, np.ndarray] ) -> Union[int, float, np.ndarray]: """ Pick an action given a state. Picks uniformly random from all possible actions, using the environments action_space.sample() method. Parameters ---------- state: int An integer corresponding to a state of a DiscreteEnv. Not used in this agent. Returns ------- Union[int, float, np.ndarray] An action """ # if other spaces are needed, check if their sample method conforms with # returned type, change if necessary. assert isinstance(self.env.action_space, (Box, Discrete, MultiDiscrete, MultiBinary)) return self.env.action_space.sample()
Example #14
Source File: abc.py From chainerrl with MIT License | 6 votes |
def __init__(self, size=2, discrete=True, partially_observable=False, episodic=True, deterministic=False): self.size = size self.terminal_state = size self.episodic = episodic self.partially_observable = partially_observable self.deterministic = deterministic self.n_max_offset = 1 # (s_0, ..., s_N) + terminal state + offset self.n_dim_obs = self.size + 1 + self.n_max_offset self.observation_space = spaces.Box( low=-np.inf, high=np.inf, shape=(self.n_dim_obs,), dtype=np.float32, ) if discrete: self.action_space = spaces.Discrete(self.size) else: self.action_space = spaces.Box( low=-1.0, high=1.0, shape=(self.size,), dtype=np.float32, )
Example #15
Source File: input.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def encode_observation(ob_space, placeholder): ''' Encode input in the way that is appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space placeholder: tf.placeholder observation input placeholder ''' if isinstance(ob_space, Discrete): return tf.to_float(tf.one_hot(placeholder, ob_space.n)) elif isinstance(ob_space, Box): return tf.to_float(placeholder) else: raise NotImplementedError
Example #16
Source File: gym_env_problem_test.py From tensor2tensor with Apache License 2.0 | 6 votes |
def test_setup(self): ep = gym_env_problem.GymEnvProblem( base_env_name="CartPole-v0", batch_size=5) # Checks that environments were created and they are `batch_size` in number. ep.assert_common_preconditions() # Expectations on the observation space. observation_space = ep.observation_space self.assertIsInstance(observation_space, Box) self.assertEqual(observation_space.shape, (4,)) self.assertEqual(observation_space.dtype, np.float32) # Expectations on the action space. action_space = ep.action_space self.assertTrue(isinstance(action_space, Discrete)) self.assertEqual(action_space.shape, ()) self.assertEqual(action_space.dtype, np.int64) self.assertEqual(ep.num_actions, 2) # Reward range is infinite here. self.assertFalse(ep.is_reward_range_finite)
Example #17
Source File: input.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def observation_placeholder(ob_space, batch_size=None, name='Ob'): ''' Create placeholder to feed observations into of the size appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space batch_size: int size of the batch to be fed into input. Can be left None in most cases. name: str name of the placeholder Returns: ------- tensorflow placeholder tensor ''' assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \ 'Can only deal with Discrete and Box observation spaces for now' return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)
Example #18
Source File: policy_util.py From ConvLab with MIT License | 6 votes |
def get_action_type(action_space): '''Method to get the action type to choose prob. dist. to sample actions from NN logits output''' if isinstance(action_space, spaces.Box): shape = action_space.shape assert len(shape) == 1 if shape[0] == 1: return 'continuous' else: return 'multi_continuous' elif isinstance(action_space, spaces.Discrete): return 'discrete' elif isinstance(action_space, spaces.MultiDiscrete): return 'multi_discrete' elif isinstance(action_space, spaces.MultiBinary): return 'multi_binary' else: raise NotImplementedError # action_policy base methods
Example #19
Source File: input.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def encode_observation(ob_space, placeholder): ''' Encode input in the way that is appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space placeholder: tf.placeholder observation input placeholder ''' if isinstance(ob_space, Discrete): return tf.to_float(tf.one_hot(placeholder, ob_space.n)) elif isinstance(ob_space, Box): return tf.to_float(placeholder) else: raise NotImplementedError
Example #20
Source File: input.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def observation_placeholder(ob_space, batch_size=None, name='Ob'): ''' Create placeholder to feed observations into of the size appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space batch_size: int size of the batch to be fed into input. Can be left None in most cases. name: str name of the placeholder Returns: ------- tensorflow placeholder tensor ''' assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \ 'Can only deal with Discrete and Box observation spaces for now' return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)
Example #21
Source File: fixed_sequence_env.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def __init__( self, n_actions=10, seed=0, episode_len=100 ): self.np_random = np.random.RandomState() self.np_random.seed(seed) self.sequence = [self.np_random.randint(0, n_actions-1) for _ in range(episode_len)] self.action_space = Discrete(n_actions) self.observation_space = Discrete(1) self.episode_len = episode_len self.time = 0 self.reset()
Example #22
Source File: input.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def observation_placeholder(ob_space, batch_size=None, name='Ob'): ''' Create placeholder to feed observations into of the size appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space batch_size: int size of the batch to be fed into input. Can be left None in most cases. name: str name of the placeholder Returns: ------- tensorflow placeholder tensor ''' assert isinstance(ob_space, Discrete) or isinstance(ob_space, Box), \ 'Can only deal with Discrete and Box observation spaces for now' return tf.placeholder(shape=(batch_size,) + ob_space.shape, dtype=ob_space.dtype, name=name)
Example #23
Source File: input.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 6 votes |
def encode_observation(ob_space, placeholder): ''' Encode input in the way that is appropriate to the observation space Parameters: ---------- ob_space: gym.Space observation space placeholder: tf.placeholder observation input placeholder ''' if isinstance(ob_space, Discrete): return tf.to_float(tf.one_hot(placeholder, ob_space.n)) elif isinstance(ob_space, Box): return tf.to_float(placeholder) else: raise NotImplementedError
Example #24
Source File: core.py From spinningup with MIT License | 5 votes |
def __init__(self, observation_space, action_space, hidden_sizes=(64,64), activation=nn.Tanh): super().__init__() obs_dim = observation_space.shape[0] # policy builder depends on action space if isinstance(action_space, Box): self.pi = MLPGaussianActor(obs_dim, action_space.shape[0], hidden_sizes, activation) elif isinstance(action_space, Discrete): self.pi = MLPCategoricalActor(obs_dim, action_space.n, hidden_sizes, activation) # build value function self.v = MLPCritic(obs_dim, hidden_sizes, activation)
Example #25
Source File: distributions.py From rl_graph_generation with BSD 3-Clause "New" or "Revised" License | 5 votes |
def make_pdtype(ac_space): from gym import spaces if isinstance(ac_space, spaces.Box): assert len(ac_space.shape) == 1 return DiagGaussianPdType(ac_space.shape[0]) elif isinstance(ac_space, spaces.Discrete): return CategoricalPdType(ac_space.n) elif isinstance(ac_space, spaces.MultiDiscrete): return MultiCategoricalPdType(ac_space.nvec) elif isinstance(ac_space, spaces.MultiBinary): return BernoulliPdType(ac_space.n) else: raise NotImplementedError
Example #26
Source File: environment.py From 2019-OSS-Summer-RL with MIT License | 5 votes |
def __init__(self, maze_file=None, maze_size=(10, 10), mode=None, enable_render=True): self.viewer = None self.enable_render = enable_render has_loops = True num_portals = 3 self.maze_view = MazeView2D(maze_name="OpenAI Gym - Maze (%d x %d)" % maze_size, maze_size=maze_size, screen_size=(640, 640), has_loops=has_loops, num_portals=num_portals, enable_render=enable_render) self.maze_size = self.maze_view.maze_size self.action_space = spaces.Discrete(2*len(self.maze_size)) low = np.zeros(len(self.maze_size), dtype=int) high = np.array(self.maze_size, dtype=int) - np.ones(len(self.maze_size), dtype=int) self.observation_space = spaces.Box(low, high, dtype=np.int64) self.state = None self.steps_beyond_done = None self.seed() self.reset() self.configure()
Example #27
Source File: core.py From spinningup with MIT License | 5 votes |
def mlp_actor_critic(x, a, hidden_sizes=(64,64), activation=tf.tanh, output_activation=None, policy=None, action_space=None): # default policy builder depends on action space if policy is None and isinstance(action_space, Box): policy = mlp_gaussian_policy elif policy is None and isinstance(action_space, Discrete): policy = mlp_categorical_policy with tf.variable_scope('pi'): pi, logp, logp_pi = policy(x, a, hidden_sizes, activation, output_activation, action_space) with tf.variable_scope('v'): v = tf.squeeze(mlp(x, list(hidden_sizes)+[1], activation, None), axis=1) return pi, logp, logp_pi, v
Example #28
Source File: core.py From spinningup with MIT License | 5 votes |
def placeholder_from_space(space): if isinstance(space, Box): return placeholder(space.shape) elif isinstance(space, Discrete): return tf.placeholder(dtype=tf.int32, shape=(None,)) raise NotImplementedError
Example #29
Source File: env_utils.py From tf2rl with MIT License | 5 votes |
def get_act_dim(env): if isinstance(env.action_space, Discrete): return 1 # env.action_space.n elif isinstance(env.action_space, Box): return env.action_space.low.size else: raise NotImplementedError
Example #30
Source File: mnist_env.py From Reinforcement_Learning_for_Traffic_Light_Control with Apache License 2.0 | 5 votes |
def __init__( self, seed=0, episode_len=None, no_images=None ): import filelock from tensorflow.examples.tutorials.mnist import input_data # we could use temporary directory for this with a context manager and # TemporaryDirecotry, but then each test that uses mnist would re-download the data # this way the data is not cleaned up, but we only download it once per machine mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data') with filelock.FileLock(mnist_path + '.lock'): self.mnist = input_data.read_data_sets(mnist_path) self.np_random = np.random.RandomState() self.np_random.seed(seed) self.observation_space = Box(low=0.0, high=1.0, shape=(28,28,1)) self.action_space = Discrete(10) self.episode_len = episode_len self.time = 0 self.no_images = no_images self.train_mode() self.reset()