Python rl.agents.dqn.DQNAgent() Examples
The following are 12
code examples of rl.agents.dqn.DQNAgent().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
rl.agents.dqn
, or try the search function
.
Example #1
Source File: train_agent_kerasrl.py From gym-malware with MIT License | 7 votes |
def train_dqn_model(layers, rounds=10000, run_test=False, use_score=False): ENV_NAME = 'malware-score-v0' if use_score else 'malware-v0' env = gym.make(ENV_NAME) env.seed(123) nb_actions = env.action_space.n window_length = 1 # "experience" consists of where we were, where we are now # generate a policy model model = generate_dense_model((window_length,) + env.observation_space.shape, layers, nb_actions) # configure and compile our agent # BoltzmannQPolicy selects an action stochastically with a probability generated by soft-maxing Q values policy = BoltzmannQPolicy() # memory can help a model during training # for this, we only consider a single malware sample (window_length=1) for each "experience" memory = SequentialMemory(limit=32, ignore_episode_boundaries=False, window_length=window_length) # DQN agent as described in Mnih (2013) and Mnih (2015). # http://arxiv.org/pdf/1312.5602.pdf # http://arxiv.org/abs/1509.06461 agent = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=16, enable_double_dqn=True, enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy, batch_size=16) # keras-rl allows one to use and built-in keras optimizer agent.compile(RMSprop(lr=1e-3), metrics=['mae']) # play the game. learn something! agent.fit(env, nb_steps=rounds, visualize=False, verbose=2) history_train = env.history history_test = None if run_test: # Set up the testing environment TEST_NAME = 'malware-score-test-v0' if use_score else 'malware-test-v0' test_env = gym.make(TEST_NAME) # evaluate the agent on a few episodes, drawing randomly from the test samples agent.test(test_env, nb_episodes=100, visualize=False) history_test = test_env.history return agent, model, history_train, history_test
Example #2
Source File: dqn_breakout_test.py From Deep-Learning-Quick-Reference with MIT License | 6 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_BreakoutDeterministic-v4_weights_1750000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #3
Source File: test_dqn.py From keras-rl2 with MIT License | 5 votes |
def test_single_dqn_input(): model = Sequential() model.add(Flatten(input_shape=(2, 3))) model.add(Dense(2)) memory = SequentialMemory(limit=10, window_length=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #4
Source File: test_dqn.py From keras-rl2 with MIT License | 5 votes |
def test_multi_dqn_input(): input1 = Input(shape=(2, 3)) input2 = Input(shape=(2, 4)) x = Concatenate()([input1, input2]) x = Flatten()(x) x = Dense(2)(x) model = Model(inputs=[input1, input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, processor=processor, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #5
Source File: test_dqn.py From keras-rl with MIT License | 5 votes |
def test_single_dqn_input(): model = Sequential() model.add(Flatten(input_shape=(2, 3))) model.add(Dense(2)) memory = SequentialMemory(limit=10, window_length=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv((3,)), nb_steps=10)
Example #6
Source File: test_dqn.py From keras-rl with MIT License | 5 votes |
def test_multi_dqn_input(): input1 = Input(shape=(2, 3)) input2 = Input(shape=(2, 4)) x = Concatenate()([input1, input2]) x = Flatten()(x) x = Dense(2)(x) model = Model(inputs=[input1, input2], outputs=x) memory = SequentialMemory(limit=10, window_length=2) processor = MultiInputProcessor(nb_inputs=2) for double_dqn in (True, False): agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, processor=processor, enable_double_dqn=double_dqn) agent.compile('sgd') agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10)
Example #7
Source File: dqn_lunar_lander.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=0.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=500000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #8
Source File: dqn_breakout.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'BreakoutDeterministic-v4' INPUT_SHAPE = (84, 84) WINDOW_LENGTH = 4 # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE model = build_model(INPUT_SHAPE, num_actions) memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = AtariProcessor() policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) dqn = DQNAgent(model=model, nb_actions=num_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=1750000, log_interval=10000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #9
Source File: dqn_cartpole.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'CartPole-v0' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) dqn.fit(env, nb_steps=50000, visualize=False, verbose=2, callbacks=callbacks) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=5, visualize=True)
Example #10
Source File: dqn_lunar_lander_test.py From Deep-Learning-Quick-Reference with MIT License | 5 votes |
def main(): ENV_NAME = 'LunarLander-v2' # Get the environment and extract the number of actions. env = gym.make(ENV_NAME) np.random.seed(42) env.seed(42) num_actions = env.action_space.n state_space = env.observation_space.shape[0] print(num_actions) model = build_model(state_space, num_actions) memory = SequentialMemory(limit=50000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=10000) dqn = DQNAgent(model=model, nb_actions=num_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) callbacks = build_callbacks(ENV_NAME) # After training is done, we save the final weights. dqn.load_weights('dqn_LunarLander-v2_weights_510000.h5f') # Finally, evaluate our algorithm for 5 episodes. dqn.test(env, nb_episodes=10, visualize=True)
Example #11
Source File: DQN_Agent_LSTM.py From Startcraft_pysc2_minigames with Apache License 2.0 | 4 votes |
def training_game(): env = Environment() input_shape = (FLAGS.screen_size, FLAGS.screen_size, 1) nb_actions = 12 # Number of actions model = neural_network_model(input_shape, nb_actions) memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() # Policy policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # Agent dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, enable_double_dqn=False, nb_steps_warmup=500, # nb_steps_warmup=1, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) # Tensorboard callback callbacks = keras.callbacks.TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=False) # Save the parameters and upload them when needed name = FLAGS.mini_game w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)
Example #12
Source File: DQN_Agent.py From Startcraft_pysc2_minigames with Apache License 2.0 | 4 votes |
def training_game(): env = Environment(map_name="HallucinIce", visualize=True, game_steps_per_episode=150, agent_interface_format=features.AgentInterfaceFormat( feature_dimensions=features.Dimensions(screen=64, minimap=32) )) input_shape = (_SIZE, _SIZE, 1) nb_actions = _SIZE * _SIZE # Should this be an integer model = neural_network_model(input_shape, nb_actions) # memory : how many subsequent observations should be provided to the network? memory = SequentialMemory(limit=5000, window_length=_WINDOW_LENGTH) processor = SC2Proc() ### Policy # Agent´s behaviour function. How the agent pick actions # LinearAnnealedPolicy is a wrapper that transforms the policy into a linear incremental linear solution . Then why im not see LAP with other than not greedy ? # EpsGreedyQPolicy is a way of selecting random actions with uniform distributions from a set of actions . Select an action that can give max or min rewards # BolztmanQPolicy . Assumption that it follows a Boltzman distribution. gives the probability that a system will be in a certain state as a function of that state´s energy?? policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr="eps", value_max=1, value_min=0.7, value_test=.0, nb_steps=1e6) # policy = (BoltzmanQPolicy( tau=1., clip= (-500,500)) #clip defined in between -500 / 500 ### Agent # Double Q-learning ( combines Q-Learning with a deep Neural Network ) # Q Learning -- Bellman equation dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=500, target_model_update=1e-2, policy=policy, batch_size=150, processor=processor) dqn.compile(Adam(lr=.001), metrics=["mae"]) ## Save the parameters and upload them when needed name = "HallucinIce" w_file = "dqn_{}_weights.h5f".format(name) check_w_file = "train_w" + name + "_weights.h5f" if SAVE_MODEL: check_w_file = "train_w" + name + "_weights_{step}.h5f" log_file = "training_w_{}_log.json".format(name) callbacks = [ModelIntervalCheckpoint(check_w_file, interval=1000)] callbacks += [FileLogger(log_file, interval=100)] if LOAD_MODEL: dqn.load_weights(w_file) dqn.fit(env, callbacks=callbacks, nb_steps=1e7, action_repetition=2, log_interval=1e4, verbose=2) dqn.save_weights(w_file, overwrite=True) dqn.test(env, action_repetition=2, nb_episodes=30, visualize=False)