Python gym.utils.seeding.np_random() Examples
The following are 30
code examples of gym.utils.seeding.np_random().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
gym.utils.seeding
, or try the search function
.
Example #1
Source File: blackjack.py From rl_algorithms with MIT License | 7 votes |
def _step(self, action): assert self.action_space.contains(action) if action: # hit: add a card to players hand and return self.player.append(draw_card(self.np_random)) if is_bust(self.player): done = True reward = -1 else: done = False reward = 0 else: # stick: play out the dealers hand, and score done = True while sum_hand(self.dealer) < 17: self.dealer.append(draw_card(self.np_random)) reward = cmp(score(self.player), score(self.dealer)) if self.natural and is_natural(self.player) and reward == 1: reward = 1.5 return self._get_obs(), reward, done, {}
Example #2
Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License | 6 votes |
def step(self, action): action = action/100.0 if action > self.wealth: action = self.wealth if self.wealth < 0.000001: done = True reward = 0.0 else: if self.rounds == 0: done = True reward = self.wealth else: self.rounds = self.rounds - 1 done = False reward = 0.0 coinflip = flip(self.edge, self.np_random) self.roundsElapsed = self.roundsElapsed+1 if coinflip: self.wealth = min(self.maxWealth, self.wealth + action) self.maxEverWealth = max(self.wealth, self.maxEverWealth) self.wins = self.wins+1 else: self.wealth = self.wealth - action self.losses = self.losses+1 return self._get_obs(), reward, done, {}
Example #3
Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License | 6 votes |
def step(self, action): action = action/100.0 # convert from pennies to dollars if action > self.wealth: # treat attempts to bet more than possess as == betting everything action = self.wealth if self.wealth < 0.000001: done = True reward = 0.0 else: if self.rounds == 0: done = True reward = self.wealth else: self.rounds = self.rounds - 1 done = False reward = 0.0 coinflip = flip(self.edge, self.np_random) if coinflip: self.wealth = min(self.maxWealth, self.wealth + action) else: self.wealth = self.wealth - action return self._get_obs(), reward, done, {}
Example #4
Source File: memorize_digits.py From DRL_DeliveryDuel with MIT License | 6 votes |
def step(self, action): reward = -1 done = False self.step_n += 1 if self.digit==-1: pass else: if self.digit==action: reward = +1 done = self.step_n > 20 and 0==self.np_random.randint(low=0, high=5) self.digit = self.np_random.randint(low=0, high=10) obs = np.zeros( (FIELD_H,FIELD_W,3), dtype=np.uint8 ) obs[:,:,:] = self.color_bg digit_img = np.zeros( (6,6,3), dtype=np.uint8 ) digit_img[:] = self.color_bg xxx = self.bogus_mnist[self.digit]==42 digit_img[xxx] = self.color_digit obs[self.digit_y-3:self.digit_y+3, self.digit_x-3:self.digit_x+3] = digit_img self.last_obs = obs return obs, reward, done, {}
Example #5
Source File: tic_tac_toe_env.py From BERT with Apache License 2.0 | 6 votes |
def __init__(self, strict=False): self.strict = strict # What about metadata and spec? self.reward_range = (-1.0, 1.0) # Action space -- 9 positions that we can chose to mark. self.action_space = spaces.Discrete(9) # Observation space -- this hopefully does what we need. self.observation_space = spaces.Box( low=-1, high=1, shape=(3, 3), dtype=np.int64) # Set the seed. self.np_random = None self.seed() # Start the game. self.board_state = None self.done = False self.reset()
Example #6
Source File: blackjack.py From DRL_DeliveryDuel with MIT License | 6 votes |
def step(self, action): assert self.action_space.contains(action) if action: # hit: add a card to players hand and return self.player.append(draw_card(self.np_random)) if is_bust(self.player): done = True reward = -1 else: done = False reward = 0 else: # stick: play out the dealers hand, and score done = True while sum_hand(self.dealer) < 17: self.dealer.append(draw_card(self.np_random)) reward = cmp(score(self.player), score(self.dealer)) if self.natural and is_natural(self.player) and reward == 1: reward = 1.5 return self._get_obs(), reward, done, {}
Example #7
Source File: block_pushing.py From c-swm with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #8
Source File: hotter_colder.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #9
Source File: hotter_colder.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): self.number = self.np_random.uniform(-self.range, self.range) self.guess_count = 0 self.observation = 0 return self.observation
Example #10
Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License | 5 votes |
def flip(edge, np_random): return np_random.uniform() < edge
Example #11
Source File: atari_env.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed1 = seeding.np_random(seed) # Derive a random seed. This gets passed as a uint, but gets # checked as an int elsewhere, so we need to keep it below # 2**31. seed2 = seeding.hash_seed(seed1 + 1) % 2**31 # Empirically, we need to seed before loading the ROM. self.ale.setInt(b'random_seed', seed2) self.ale.loadROM(self.game_path) return [seed1, seed2]
Example #12
Source File: blackjack.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #13
Source File: blackjack.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): self.dealer = draw_hand(self.np_random) self.player = draw_hand(self.np_random) return self._get_obs()
Example #14
Source File: roulette.py From DRL_DeliveryDuel with MIT License | 5 votes |
def step(self, action): assert self.action_space.contains(action) if action == self.n - 1: # observation, reward, done, info return 0, 0, True, {} # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B] val = self.np_random.randint(0, self.n - 1) if val == action == 0: reward = self.n - 2.0 elif val != 0 and action != 0 and val % 2 == action % 2: reward = 1.0 else: reward = -1.0 return 0, reward, False, {}
Example #15
Source File: block_pushing.py From c-swm with MIT License | 5 votes |
def __init__(self, width=5, height=5, render_type='cubes', num_objects=5, seed=None): self.width = width self.height = height self.render_type = render_type self.num_objects = num_objects self.num_actions = 4 * self.num_objects # Move NESW self.colors = utils.get_colors(num_colors=max(9, self.num_objects)) self.np_random = None self.game = None self.target = None # Initialize to pos outside of env for easier collision resolution. self.objects = [[-1, -1] for _ in range(self.num_objects)] # If True, then check for collisions and don't allow two # objects to occupy the same position. self.collisions = True self.action_space = spaces.Discrete(self.num_actions) self.observation_space = spaces.Box( low=0, high=1, shape=(3, self.width, self.height), dtype=np.float32 ) self.seed(seed) self.reset()
Example #16
Source File: quadrotor2d_slungload.py From reinmav-gym with BSD 3-Clause "New" or "Revised" License | 5 votes |
def reset(self): print("reset") self.state = np.array(self.np_random.uniform(low=-1.0, high=1.0, size=(9,))) return np.array(self.state)
Example #17
Source File: quadrotor3d_slungload.py From reinmav-gym with BSD 3-Clause "New" or "Revised" License | 5 votes |
def reset(self): print("reset") self.state = np.array(self.np_random.uniform(low=-1.0, high=1.0, size=(16,))) return np.array(self.state)
Example #18
Source File: atari_env.py From DRL_DeliveryDuel with MIT License | 5 votes |
def step(self, a): reward = 0.0 action = self._action_set[a] if isinstance(self.frameskip, int): num_steps = self.frameskip else: num_steps = self.np_random.randint(self.frameskip[0], self.frameskip[1]) for _ in range(num_steps): reward += self.ale.act(action) ob = self._get_obs() return ob, reward, self.ale.game_over(), {"ale.lives": self.ale.lives()}
Example #19
Source File: kellycoinflip.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #20
Source File: acrobot.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): self.state = self.np_random.uniform(low=-0.1, high=0.1, size=(4,)) return self._get_ob()
Example #21
Source File: acrobot.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #22
Source File: cartpole.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) self.steps_beyond_done = None return np.array(self.state)
Example #23
Source File: cartpole.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #24
Source File: continuous_mountain_car.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) return np.array(self.state) # def get_state(self): # return self.state
Example #25
Source File: mountain_car.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) return np.array(self.state)
Example #26
Source File: mountain_car.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #27
Source File: pendulum.py From DRL_DeliveryDuel with MIT License | 5 votes |
def reset(self): high = np.array([np.pi, 1]) self.state = self.np_random.uniform(low=-high, high=high) self.last_u = None return self._get_obs()
Example #28
Source File: pendulum.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]
Example #29
Source File: bipedal_walker.py From DRL_DeliveryDuel with MIT License | 5 votes |
def _generate_clouds(self): # Sorry for the clouds, couldn't resist self.cloud_poly = [] for i in range(TERRAIN_LENGTH//20): x = self.np_random.uniform(0, TERRAIN_LENGTH)*TERRAIN_STEP y = VIEWPORT_H/SCALE*3/4 poly = [ (x+15*TERRAIN_STEP*math.sin(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP), y+ 5*TERRAIN_STEP*math.cos(3.14*2*a/5)+self.np_random.uniform(0,5*TERRAIN_STEP) ) for a in range(5) ] x1 = min( [p[0] for p in poly] ) x2 = max( [p[0] for p in poly] ) self.cloud_poly.append( (poly,x1,x2) )
Example #30
Source File: car_racing.py From DRL_DeliveryDuel with MIT License | 5 votes |
def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed]