Python utils.ReplayBuffer() Examples
The following are 2
code examples of utils.ReplayBuffer().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
utils
, or try the search function
.
Example #1
Source File: HAC.py From Hierarchical-Actor-Critic-HAC-PyTorch with MIT License | 6 votes |
def __init__(self, k_level, H, state_dim, action_dim, render, threshold, action_bounds, action_offset, state_bounds, state_offset, lr): # adding lowest level self.HAC = [DDPG(state_dim, action_dim, action_bounds, action_offset, lr, H)] self.replay_buffer = [ReplayBuffer()] # adding remaining levels for _ in range(k_level-1): self.HAC.append(DDPG(state_dim, state_dim, state_bounds, state_offset, lr, H)) self.replay_buffer.append(ReplayBuffer()) # set some parameters self.k_level = k_level self.H = H self.action_dim = action_dim self.state_dim = state_dim self.threshold = threshold self.render = render # logging parameters self.goals = [None]*self.k_level self.reward = 0 self.timestep = 0
Example #2
Source File: main.py From BCQ with MIT License | 5 votes |
def train_BCQ(state_dim, action_dim, max_action, device, args): # For saving files setting = f"{args.env}_{args.seed}" buffer_name = f"{args.buffer_name}_{setting}" # Initialize policy policy = BCQ.BCQ(state_dim, action_dim, max_action, device, args.discount, args.tau, args.lmbda, args.phi) # Load buffer replay_buffer = utils.ReplayBuffer(state_dim, action_dim, device) replay_buffer.load(f"./buffers/{buffer_name}") evaluations = [] episode_num = 0 done = True training_iters = 0 while training_iters < args.max_timesteps: pol_vals = policy.train(replay_buffer, iterations=int(args.eval_freq), batch_size=args.batch_size) evaluations.append(eval_policy(policy, args.env, args.seed)) np.save(f"./results/BCQ_{setting}", evaluations) training_iters += args.eval_freq print(f"Training iterations: {training_iters}") # Runs policy for X episodes and returns average reward # A fixed seed is used for the eval environment