Python rl.policy.LinearAnnealedPolicy() Examples
The following are 7
code examples of rl.policy.LinearAnnealedPolicy().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rl.policy
, or try the search function
.
Example #1
Source File: dqn_breakout_test.py From Deep-Learning-Quick-Reference with MIT License | 6 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #2
Source File: dqn_lunar_lander.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=500000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #3
Source File: dqn_breakout.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=1750000, log_interval=10000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #4
Source File: dqn_cartpole.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'CartPole-v0' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #5
Source File: dqn_lunar_lander_test.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_LunarLander-v2_weights_510000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #6
Source File: DQN_Agent_LSTM.py From Startcraft_pysc2_minigames with Apache License 2.0 | 4 votes |
def training_game(): env = Environment() input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # Agent dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=False, nb_steps_warmup=500, # nb_steps_warmup=1, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) # Tensorboard callback callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = FLAGS.mini_game w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Example #7
Source File: DQN_Agent.py From Startcraft_pysc2_minigames with Apache License 2.0 | 4 votes |
def training_game(): env = Environment(map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32) )) input_shape = (_SIZE, _SIZE, 1) nb_actions = _SIZE * _SIZE # Should this be an integer model = neural_network_model(input_shape, nb_actions) # memory : how many subsequent observations should be provided to the network? memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() ### Policy # Agent´s behaviour function. How the agent pick actions # LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ? # EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards # BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy?? policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500 ### Agent # Double Q-learning ( combines Q-Learning with a deep Neural Network ) # Q Learning -- Bellman equation dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) ## Save the parameters and upload them when needed name = "HallucinIce" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)] callbacks += [FileLogger(log_file, interval=100)] if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)