Python chainer.optimizers.RMSpropGraves() Examples
The following are 8
code examples of chainer.optimizers.RMSpropGraves().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
chainer.optimizers
, or try the search function
.
Example #1
Source File: dqn_agent.py From DQN-chainer with MIT License | 6 votes |
def __init__(self, n_history, n_act): print("Initializing DQN...") self.step = 0 # number of steps that DQN is updated self.n_act = n_act self.n_history = n_history # Number of obervations used to construct the single state print("Model Building") self.model = ActionValue(n_history, n_act).to_gpu() self.model_target = copy.deepcopy(self.model) print("Initizlizing Optimizer") self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.01) self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] hs = self.n_history ims = self.img_size self.replay_buffer = [np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.float32), np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
Example #2
Source File: dqn_agent_cpu.py From DQN-chainer with MIT License | 6 votes |
def __init__(self, n_history, n_act): print("Initializing DQN...") self.step = 0 # number of steps that DQN is updated self.n_act = n_act self.n_history = n_history # Number of obervations used to construct the single state print("Model Building") self.model = ActionValue(n_history, n_act) self.model_target = copy.deepcopy(self.model) print("Initizlizing Optimizer") self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.01) self.optimizer.setup(self.model) # History Data : D=[s, a, r, s_dash, end_episode_flag] hs = self.n_history ims = self.img_size self.replay_buffer = [np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.float32), np.zeros((self.data_size, hs, ims, ims), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
Example #3
Source File: test_pretrained_models.py From chainerrl with MIT License | 5 votes |
def _test_load_dqn(self, gpu): q_func = links.Sequence( links.NatureDQNHead(), L.Linear(512, 4), DiscreteActionValue) opt = optimizers.RMSpropGraves( lr=2.5e-4, alpha=0.95, momentum=0.0, eps=1e-2) opt.setup(q_func) rbuf = replay_buffer.ReplayBuffer(100) explorer = explorers.LinearDecayEpsilonGreedy( start_epsilon=1.0, end_epsilon=0.1, decay_steps=10 ** 6, random_action_func=lambda: np.random.randint(4)) agent = agents.DQN(q_func, opt, rbuf, gpu=gpu, gamma=0.99, explorer=explorer, replay_start_size=50, target_update_interval=10 ** 4, clip_delta=True, update_interval=4, batch_accumulator='sum', phi=lambda x: x) model, exists = download_model("DQN", "BreakoutNoFrameskip-v4", model_type=self.pretrained_type) agent.load(model) if os.environ.get('CHAINERRL_ASSERT_DOWNLOADED_MODEL_IS_CACHED'): assert exists
Example #4
Source File: test_optimizers_by_linear_model.py From chainer with MIT License | 5 votes |
def create(self): return optimizers.RMSpropGraves(0.1)
Example #5
Source File: dqn_agent_nips.py From DQN-chainer with MIT License | 5 votes |
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." # Initialization for Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() print "Model Building" self.model = FunctionSet( l1=F.Convolution2D(4, 16, ksize=8, stride=4, wscale=np.sqrt(2)), l2=F.Convolution2D(16, 32, ksize=4, stride=2, wscale=np.sqrt(2)), l3=F.Linear(2592, 256), q_value=F.Linear(256, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 256), dtype=np.float32)) ).to_gpu() print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.0002, alpha=0.3, momentum=0.2) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
Example #6
Source File: dqn_agent_nature.py From DQN-chainer with MIT License | 5 votes |
def __init__(self, enable_controller=[0, 3, 4]): self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller # Default setting : "Pong" print "Initializing DQN..." # Initialization of Chainer 1.1.0 or older. # print "CUDA init" # cuda.init() print "Model Building" self.model = FunctionSet( l1=F.Convolution2D(4, 32, ksize=8, stride=4, nobias=False, wscale=np.sqrt(2)), l2=F.Convolution2D(32, 64, ksize=4, stride=2, nobias=False, wscale=np.sqrt(2)), l3=F.Convolution2D(64, 64, ksize=3, stride=1, nobias=False, wscale=np.sqrt(2)), l4=F.Linear(3136, 512, wscale=np.sqrt(2)), q_value=F.Linear(512, self.num_of_actions, initialW=np.zeros((self.num_of_actions, 512), dtype=np.float32)) ).to_gpu() self.model_target = copy.deepcopy(self.model) print "Initizlizing Optimizer" self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.D = [np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, 4, 84, 84), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]
Example #7
Source File: dern.py From der-network with MIT License | 5 votes |
def setup_optimizer(self): optimizer = optimizers.RMSpropGraves( lr=self.args.start_lr, alpha=0.95, momentum=0.9, eps=1e-08) optimizer.setup(self) optimizer.add_hook(chainer.optimizer.GradientClipping(self.args.grad_clip)) return optimizer
Example #8
Source File: q_net.py From deel with MIT License | 5 votes |
def __init__(self, use_gpu, enable_controller, dim): self.use_gpu = use_gpu self.num_of_actions = len(enable_controller) self.enable_controller = enable_controller self.dim = dim print("Initializing Q-Network...") hidden_dim = 256 self.model = FunctionSet( l4=F.Linear(self.dim*self.hist_size, hidden_dim, wscale=np.sqrt(2)), q_value=F.Linear(hidden_dim, self.num_of_actions, initialW=np.zeros((self.num_of_actions, hidden_dim), dtype=np.float32)) ) if self.use_gpu >= 0: self.model.to_gpu() self.model_target = copy.deepcopy(self.model) self.optimizer = optimizers.RMSpropGraves(lr=0.00025, alpha=0.95, momentum=0.95, eps=0.0001) self.optimizer.setup(self.model.collect_parameters()) # History Data : D=[s, a, r, s_dash, end_episode_flag] self.d = [np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros(self.data_size, dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.int8), np.zeros((self.data_size, self.hist_size, self.dim), dtype=np.uint8), np.zeros((self.data_size, 1), dtype=np.bool)]